55 lines
1.7 KiB
C#
55 lines
1.7 KiB
C#
|
using System;
|
|||
|
using System.Collections.Generic;
|
|||
|
using System.Reflection.Metadata.Ecma335;
|
|||
|
using System.Text;
|
|||
|
|
|||
|
namespace ln.parse.tokenizer
|
|||
|
{
|
|||
|
public class Tokenizer
|
|||
|
{
|
|||
|
List<TokenMatcher> tokenMatchers = new List<TokenMatcher>();
|
|||
|
|
|||
|
public Tokenizer()
|
|||
|
{
|
|||
|
}
|
|||
|
|
|||
|
public Tokenizer Add(TokenMatcher tokenMatcher) { tokenMatchers.Add(tokenMatcher); return this; }
|
|||
|
public Tokenizer Remove(TokenMatcher tokenMatcher) { tokenMatchers.Remove(tokenMatcher); return this; }
|
|||
|
|
|||
|
public Token[] Parse(string source) => Parse(new SourceBuffer(source));
|
|||
|
public Token[] Parse(SourceBuffer sourceBuffer)
|
|||
|
{
|
|||
|
List<Token> tokens = new List<Token>();
|
|||
|
|
|||
|
while (sourceBuffer.LinearPosition < sourceBuffer.Length)
|
|||
|
{
|
|||
|
Token token = null;
|
|||
|
|
|||
|
foreach (TokenMatcher tokenMatcher in tokenMatchers)
|
|||
|
{
|
|||
|
if (tokenMatcher.Match(sourceBuffer, out token))
|
|||
|
break;
|
|||
|
}
|
|||
|
|
|||
|
if (token == null)
|
|||
|
throw new FormatException(String.Format("invalid token at {0}",sourceBuffer.TextPosition));
|
|||
|
|
|||
|
tokens.Add(token);
|
|||
|
sourceBuffer.LinearPosition += token.Length;
|
|||
|
}
|
|||
|
|
|||
|
return tokens.ToArray();
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
public static Tokenizer CreateDefaultTokenizer() =>
|
|||
|
new Tokenizer()
|
|||
|
.Add(TokenMatcher.WHITESPACE)
|
|||
|
.Add(TokenMatcher.FLOAT)
|
|||
|
.Add(TokenMatcher.INTEGER)
|
|||
|
.Add(TokenMatcher.STRING)
|
|||
|
.Add(TokenMatcher.OPERATOR)
|
|||
|
.Add(TokenMatcher.BRACKET);
|
|||
|
}
|
|||
|
}
|