ln.parse/ln.parse/tokenizer/Tokenizer.cs

55 lines
1.7 KiB
C#

using System;
using System.Collections.Generic;
using System.Reflection.Metadata.Ecma335;
using System.Text;
namespace ln.parse.tokenizer
{
public class Tokenizer
{
List<TokenMatcher> tokenMatchers = new List<TokenMatcher>();
public Tokenizer()
{
}
public Tokenizer Add(TokenMatcher tokenMatcher) { tokenMatchers.Add(tokenMatcher); return this; }
public Tokenizer Remove(TokenMatcher tokenMatcher) { tokenMatchers.Remove(tokenMatcher); return this; }
public Token[] Parse(string source) => Parse(new SourceBuffer(source));
public Token[] Parse(SourceBuffer sourceBuffer)
{
List<Token> tokens = new List<Token>();
while (sourceBuffer.LinearPosition < sourceBuffer.Length)
{
Token token = null;
foreach (TokenMatcher tokenMatcher in tokenMatchers)
{
if (tokenMatcher.Match(sourceBuffer, out token))
break;
}
if (token == null)
throw new FormatException(String.Format("invalid token at {0}",sourceBuffer.TextPosition));
tokens.Add(token);
sourceBuffer.LinearPosition += token.Length;
}
return tokens.ToArray();
}
public static Tokenizer CreateDefaultTokenizer() =>
new Tokenizer()
.Add(TokenMatcher.WHITESPACE)
.Add(TokenMatcher.FLOAT)
.Add(TokenMatcher.INTEGER)
.Add(TokenMatcher.STRING)
.Add(TokenMatcher.OPERATOR)
.Add(TokenMatcher.BRACKET);
}
}