using System; using System.Collections.Generic; using System.Reflection.Metadata.Ecma335; using System.Text; namespace ln.parse.tokenizer { public class Tokenizer { List tokenMatchers = new List(); public Tokenizer() { } public Tokenizer Add(TokenMatcher tokenMatcher) { tokenMatchers.Add(tokenMatcher); return this; } public Tokenizer Remove(TokenMatcher tokenMatcher) { tokenMatchers.Remove(tokenMatcher); return this; } public Token[] Parse(string source) => Parse(new SourceBuffer(source)); public Token[] Parse(SourceBuffer sourceBuffer) { List tokens = new List(); while (sourceBuffer.LinearPosition < sourceBuffer.Length) { Token token = null; foreach (TokenMatcher tokenMatcher in tokenMatchers) { if (tokenMatcher.Match(sourceBuffer, out token)) break; } if (token == null) throw new FormatException(String.Format("invalid token at {0}",sourceBuffer.TextPosition)); tokens.Add(token); sourceBuffer.LinearPosition += token.Length; } return tokens.ToArray(); } public static Tokenizer CreateDefaultTokenizer() => new Tokenizer() .Add(TokenMatcher.WHITESPACE) .Add(TokenMatcher.FLOAT) .Add(TokenMatcher.INTEGER) .Add(TokenMatcher.STRING) .Add(TokenMatcher.OPERATOR) .Add(TokenMatcher.BRACKET); } }