62 lines
2.1 KiB
C#
62 lines
2.1 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.Reflection.Metadata.Ecma335;
|
|
using System.Text;
|
|
|
|
namespace ln.parse.tokenizer
|
|
{
|
|
public class Tokenizer
|
|
{
|
|
List<TokenMatcher> tokenMatchers = new List<TokenMatcher>();
|
|
|
|
public Tokenizer()
|
|
{
|
|
}
|
|
|
|
public Tokenizer Add(TokenMatcher tokenMatcher) { tokenMatchers.Add(tokenMatcher); return this; }
|
|
public Tokenizer Remove(TokenMatcher tokenMatcher) { tokenMatchers.Remove(tokenMatcher); return this; }
|
|
|
|
public Token[] Parse(string source) => Parse(new SourceBuffer(source));
|
|
public Token[] Parse(string source,Func<Token,bool> filter) => Parse(new SourceBuffer(source), filter);
|
|
public Token[] Parse(SourceBuffer sourceBuffer) => Parse(sourceBuffer, (token) => true);
|
|
public Token[] Parse(SourceBuffer sourceBuffer,Func<Token,bool> filter)
|
|
{
|
|
List<Token> tokens = new List<Token>();
|
|
|
|
while (sourceBuffer.LinearPosition < sourceBuffer.Length)
|
|
{
|
|
Token token = null;
|
|
|
|
foreach (TokenMatcher tokenMatcher in tokenMatchers)
|
|
{
|
|
if (tokenMatcher.Match(sourceBuffer, out token))
|
|
break;
|
|
}
|
|
|
|
if (token == null)
|
|
throw new FormatException(String.Format("invalid token at {0} [{1}]",sourceBuffer.TextPosition,sourceBuffer.GetCurrentText().Substring(0,10)));
|
|
|
|
if (filter(token))
|
|
tokens.Add(token);
|
|
|
|
sourceBuffer.LinearPosition += token.Length;
|
|
}
|
|
|
|
return tokens.ToArray();
|
|
}
|
|
|
|
|
|
public static Tokenizer CreateDefaultTokenizer()
|
|
{
|
|
return new Tokenizer()
|
|
.Add(TokenMatcher.FLOAT)
|
|
.Add(TokenMatcher.INTEGER)
|
|
.Add(TokenMatcher.STRING)
|
|
.Add(TokenMatcher.OPERATOR)
|
|
.Add(TokenMatcher.BRACKET)
|
|
.Add(TokenMatcher.IDENTIFIER)
|
|
.Add(TokenMatcher.WHITESPACE);
|
|
}
|
|
}
|
|
}
|