55 lines
1.7 KiB
C#
55 lines
1.7 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.Reflection.Metadata.Ecma335;
|
|
using System.Text;
|
|
|
|
namespace ln.parse.tokenizer
|
|
{
|
|
public class Tokenizer
|
|
{
|
|
List<TokenMatcher> tokenMatchers = new List<TokenMatcher>();
|
|
|
|
public Tokenizer()
|
|
{
|
|
}
|
|
|
|
public Tokenizer Add(TokenMatcher tokenMatcher) { tokenMatchers.Add(tokenMatcher); return this; }
|
|
public Tokenizer Remove(TokenMatcher tokenMatcher) { tokenMatchers.Remove(tokenMatcher); return this; }
|
|
|
|
public Token[] Parse(string source) => Parse(new SourceBuffer(source));
|
|
public Token[] Parse(SourceBuffer sourceBuffer)
|
|
{
|
|
List<Token> tokens = new List<Token>();
|
|
|
|
while (sourceBuffer.LinearPosition < sourceBuffer.Length)
|
|
{
|
|
Token token = null;
|
|
|
|
foreach (TokenMatcher tokenMatcher in tokenMatchers)
|
|
{
|
|
if (tokenMatcher.Match(sourceBuffer, out token))
|
|
break;
|
|
}
|
|
|
|
if (token == null)
|
|
throw new FormatException(String.Format("invalid token at {0}",sourceBuffer.TextPosition));
|
|
|
|
tokens.Add(token);
|
|
sourceBuffer.LinearPosition += token.Length;
|
|
}
|
|
|
|
return tokens.ToArray();
|
|
}
|
|
|
|
|
|
public static Tokenizer CreateDefaultTokenizer() =>
|
|
new Tokenizer()
|
|
.Add(TokenMatcher.WHITESPACE)
|
|
.Add(TokenMatcher.FLOAT)
|
|
.Add(TokenMatcher.INTEGER)
|
|
.Add(TokenMatcher.STRING)
|
|
.Add(TokenMatcher.OPERATOR)
|
|
.Add(TokenMatcher.BRACKET);
|
|
}
|
|
}
|