From 7f7f1e68be86f3e9a7e10a787579571f8d0c72cd Mon Sep 17 00:00:00 2001 From: Harald Wolff Date: Tue, 24 Nov 2020 18:17:58 +0100 Subject: [PATCH] Alpha Release --- ln.parse.tests/TokenizerTests.cs | 125 +++++++++++++----- ln.parse.tests/complex.txt | 8 ++ ln.parse.tests/ln.parse.tests.csproj | 8 +- ln.parse/ln.parse.csproj | 4 +- .../tokenizer/RegularExpressionMatcher.cs | 10 +- ln.parse/tokenizer/Token.cs | 25 ++-- ln.parse/tokenizer/TokenMatcher.cs | 14 +- ln.parse/tokenizer/Tokenizer.cs | 21 ++- 8 files changed, 149 insertions(+), 66 deletions(-) create mode 100644 ln.parse.tests/complex.txt diff --git a/ln.parse.tests/TokenizerTests.cs b/ln.parse.tests/TokenizerTests.cs index 4f34d88..981faec 100644 --- a/ln.parse.tests/TokenizerTests.cs +++ b/ln.parse.tests/TokenizerTests.cs @@ -1,4 +1,6 @@ using System; +using System.Collections.Generic; +using System.IO; using System.Reflection; using ln.parse.tokenizer; using NUnit.Framework; @@ -7,54 +9,109 @@ namespace ln.parse.tests { public class TokenizerTests { - Tokenizer tokenizer; + StreamWriter output = new StreamWriter(Console.OpenStandardOutput()); + + Tokenizer tokenizer = Tokenizer.CreateDefaultTokenizer(); + + KeyValuePair[] primitiveTests = new KeyValuePair[]{ + new KeyValuePair("0",typeof(Token.IntegerToken)), + new KeyValuePair("1",typeof(Token.IntegerToken)), + new KeyValuePair("2",typeof(Token.IntegerToken)), + new KeyValuePair("3",typeof(Token.IntegerToken)), + new KeyValuePair("4",typeof(Token.IntegerToken)), + new KeyValuePair("5",typeof(Token.IntegerToken)), + new KeyValuePair("6",typeof(Token.IntegerToken)), + new KeyValuePair("7",typeof(Token.IntegerToken)), + new KeyValuePair("8",typeof(Token.IntegerToken)), + new KeyValuePair("9",typeof(Token.IntegerToken)), + new KeyValuePair("10",typeof(Token.IntegerToken)), + new KeyValuePair("100",typeof(Token.IntegerToken)), + new KeyValuePair("453",typeof(Token.IntegerToken)), + new KeyValuePair("75239475",typeof(Token.IntegerToken)), + new KeyValuePair("99999999",typeof(Token.IntegerToken)), + new KeyValuePair("-15362",typeof(Token.IntegerToken)), + new KeyValuePair("-1",typeof(Token.IntegerToken)), + new KeyValuePair("-2",typeof(Token.IntegerToken)), + new KeyValuePair("-3",typeof(Token.IntegerToken)), + new KeyValuePair("-4",typeof(Token.IntegerToken)), + new KeyValuePair("-5",typeof(Token.IntegerToken)), + new KeyValuePair("0.0",typeof(Token.FloatToken)), + new KeyValuePair("-123.456",typeof(Token.FloatToken)), + new KeyValuePair("123.456",typeof(Token.FloatToken)), + new KeyValuePair("987463.234636",typeof(Token.FloatToken)), + new KeyValuePair("-352594.2373782",typeof(Token.FloatToken)), + new KeyValuePair("\"Hallo Welt, ich bin ein \\\"String\\\"!\"",typeof(Token.StringToken)), + new KeyValuePair("\"a simple string\"",typeof(Token.StringToken)), + new KeyValuePair("\"that's it, I can string\"",typeof(Token.StringToken)), + new KeyValuePair("(",typeof(Token.BracketToken)), + new KeyValuePair(")",typeof(Token.BracketToken)), + new KeyValuePair("[",typeof(Token.BracketToken)), + new KeyValuePair("]",typeof(Token.BracketToken)), + new KeyValuePair("{",typeof(Token.BracketToken)), + new KeyValuePair("}",typeof(Token.BracketToken)), + new KeyValuePair("\t",typeof(Token.WhiteSpaceToken)), + new KeyValuePair("Ich",typeof(Token.IdentifierToken)), + new KeyValuePair("IchBinEinIdentifier",typeof(Token.IdentifierToken)), + new KeyValuePair(" ",typeof(Token.WhiteSpaceToken)) + }; - [SetUp] - public void Setup() - { - tokenizer = Tokenizer.CreateDefaultTokenizer(); - } [Test] - public void Test_Integer() + public void Test_0_Primitives() { - Token[] token = tokenizer.Parse("654372"); - - TestContext.Out.WriteLine("Tokens: {0}", token); + foreach (KeyValuePair primTest in primitiveTests) + { + output.WriteLine("Primitive Test: {0} => {1}", primTest.Key, primTest.Value); + output.Flush(); - Assert.AreEqual(1, token.Length); - Assert.IsTrue(token[0] is Token.IntegerToken); - Assert.AreEqual("654372", token[0].Value); + Token[] token = tokenizer.Parse(primTest.Key); - Assert.Pass(); - } - - [Test] - public void Test_Float() - { - Token[] token = tokenizer.Parse("654372.3524"); - - TestContext.Out.WriteLine("Tokens: {0}", token); + output.WriteLine("Token Source: {0}", token[0].TokenSource); + output.WriteLine("Token Value: {0}", token[0].Value); + output.Flush(); - Assert.AreEqual(1, token.Length); - Assert.IsTrue(token[0] is Token.FloatToken); - Assert.AreEqual("654372.3524", token[0].Value); + Assert.AreEqual(1, token.Length); + Assert.AreEqual(primTest.Value, token[0].GetType()); + Assert.AreEqual(primTest.Key, token[0].TokenSource); + } Assert.Pass(); } + string complexSource = null; + [Test] - public void Test_String() + public void Test_1_Complex() { - Token[] token = tokenizer.Parse("\"Hallo Welt, ich bin ein \\\"String\\\"!\""); - - TestContext.Out.WriteLine("Tokens: {0}", token); + using (StreamReader sr = new StreamReader("complex.txt")) + { + complexSource = sr.ReadToEnd(); + } - Assert.AreEqual(1, token.Length); - Assert.IsTrue(token[0] is Token.StringToken); - Assert.AreEqual("\"Hallo Welt, ich bin ein \\\"String\\\"!\"", token[0].Value); + output.WriteLine("--- complex test (no filter) ---"); + output.Flush(); + + Token[] tokens = tokenizer.Parse(complexSource); + + foreach (Token token in tokens) + { + output.WriteLine("Token: {0,-48}: {1}",token.GetType(),token.Value); + } + output.Flush(); + + output.WriteLine("--- complex filter test ---"); + output.Flush(); + + tokens = tokenizer.Parse(complexSource, (token) => !(token is Token.WhiteSpaceToken)); + + foreach (Token token in tokens) + { + output.WriteLine("Token: {0,-48}: {1}",token.GetType(),token.Value); + } + output.Flush(); - Assert.Pass(); } - } + + + } } \ No newline at end of file diff --git a/ln.parse.tests/complex.txt b/ln.parse.tests/complex.txt new file mode 100644 index 0000000..55594b7 --- /dev/null +++ b/ln.parse.tests/complex.txt @@ -0,0 +1,8 @@ +anInteger: 1234; +anFloat: + 567.345; +object "objectKey" { + someIdentifier: "let me be"; +} + +theLastIdentifier: null; diff --git a/ln.parse.tests/ln.parse.tests.csproj b/ln.parse.tests/ln.parse.tests.csproj index 4450e03..37b3b52 100644 --- a/ln.parse.tests/ln.parse.tests.csproj +++ b/ln.parse.tests/ln.parse.tests.csproj @@ -6,14 +6,18 @@ false + + + + - - + + diff --git a/ln.parse/ln.parse.csproj b/ln.parse/ln.parse.csproj index 3334f22..c5b27a7 100644 --- a/ln.parse/ln.parse.csproj +++ b/ln.parse/ln.parse.csproj @@ -2,13 +2,13 @@ netcoreapp3.1 - 0.0.2-test2 + 0.0.3 Harald Wolff-Thobaben l--n.de - + diff --git a/ln.parse/tokenizer/RegularExpressionMatcher.cs b/ln.parse/tokenizer/RegularExpressionMatcher.cs index f255277..c528627 100644 --- a/ln.parse/tokenizer/RegularExpressionMatcher.cs +++ b/ln.parse/tokenizer/RegularExpressionMatcher.cs @@ -8,26 +8,26 @@ namespace ln.parse.tokenizer public class RegularExpressionMatcher : TokenMatcher { Regex regex; - Func createTokenDelegate; + Func createTokenDelegate; - public RegularExpressionMatcher(string pattern,Func createTokenDelegate) + public RegularExpressionMatcher(string pattern,Func createTokenDelegate) :this(pattern) { this.createTokenDelegate = createTokenDelegate; } protected RegularExpressionMatcher(string pattern) { - regex = new Regex(pattern); + regex = new Regex(pattern,RegexOptions.Singleline); } - public virtual Token CreateToken(SourceBuffer sourceBuffer, int start, int length) => createTokenDelegate(sourceBuffer, start, length); + public virtual Token CreateToken(SourceBuffer sourceBuffer, int start, int length, string value) => createTokenDelegate(sourceBuffer, start, length, value); public override bool Match(SourceBuffer sourceBuffer,out Token token) { Match match = regex.Match(sourceBuffer.GetCurrentText()); if ((match != null) && match.Success && (match.Index == 0)) { - token = CreateToken(sourceBuffer, sourceBuffer.LinearPosition, match.Length); + token = CreateToken(sourceBuffer, sourceBuffer.LinearPosition, match.Length, match.Groups["value"].Value); return true; } token = null; diff --git a/ln.parse/tokenizer/Token.cs b/ln.parse/tokenizer/Token.cs index 49addb0..361c22e 100644 --- a/ln.parse/tokenizer/Token.cs +++ b/ln.parse/tokenizer/Token.cs @@ -9,6 +9,7 @@ namespace ln.parse.tokenizer public SourceBuffer SourceBuffer { get; } public int LinearStart { get; } public int Length { get; } + public string Value { get; private set; } public TextPosition TextPosition => SourceBuffer.GetTextPosition(LinearStart); @@ -18,37 +19,43 @@ namespace ln.parse.tokenizer LinearStart = start; Length = length; } + public Token(SourceBuffer sourceBuffer, int start, int length, string value) + { + SourceBuffer = sourceBuffer; + LinearStart = start; + Length = length; + Value = value; + } - public string Value => SourceBuffer.GetText(LinearStart, Length); - + public string TokenSource => SourceBuffer.GetText(LinearStart, Length); public class IntegerToken : Token { - public IntegerToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { } + public IntegerToken(SourceBuffer sourceBuffer, int start, int length, string value) : base(sourceBuffer, start, length, value) { } } public class FloatToken : Token { - public FloatToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { } + public FloatToken(SourceBuffer sourceBuffer, int start, int length, string value) : base(sourceBuffer, start, length, value) { } } public class StringToken : Token { - public StringToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { } + public StringToken(SourceBuffer sourceBuffer, int start, int length, string value) : base(sourceBuffer, start, length, value) { } } public class OperatorToken : Token { - public OperatorToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { } + public OperatorToken(SourceBuffer sourceBuffer, int start, int length, string value) : base(sourceBuffer, start, length, value) { } } public class WhiteSpaceToken : Token { - public WhiteSpaceToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { } + public WhiteSpaceToken(SourceBuffer sourceBuffer, int start, int length, string value) : base(sourceBuffer, start, length, value) { } } public class IdentifierToken : Token { - public IdentifierToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { } + public IdentifierToken(SourceBuffer sourceBuffer, int start, int length, string value) : base(sourceBuffer, start, length, value) { } } public class BracketToken : Token { - public BracketToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { } + public BracketToken(SourceBuffer sourceBuffer, int start, int length, string value) : base(sourceBuffer, start, length, value) { } } } } diff --git a/ln.parse/tokenizer/TokenMatcher.cs b/ln.parse/tokenizer/TokenMatcher.cs index ad00da8..4c99199 100644 --- a/ln.parse/tokenizer/TokenMatcher.cs +++ b/ln.parse/tokenizer/TokenMatcher.cs @@ -15,13 +15,13 @@ namespace ln.parse.tokenizer public abstract bool Match(SourceBuffer sourceBuffer, out Token token); - public static readonly TokenMatcher INTEGER = new RegularExpressionMatcher("^-?\\d+", (SourceBuffer sourceBuffer, int start, int length) => new Token.IntegerToken(sourceBuffer, start, length)); - public static readonly TokenMatcher FLOAT = new RegularExpressionMatcher("^-?\\d+\\.\\d*", (SourceBuffer sourceBuffer, int start, int length) => new Token.FloatToken(sourceBuffer, start, length)); - public static readonly TokenMatcher STRING = new RegularExpressionMatcher("^\\\"(\\\\\"|.)*?\\\"", (SourceBuffer sourceBuffer, int start, int length) => new Token.StringToken(sourceBuffer, start, length)); - public static readonly TokenMatcher IDENTIFIER = new RegularExpressionMatcher("^\\w][a-zA-Z0-9_]*", (SourceBuffer sourceBuffer, int start, int length) => new Token.IdentifierToken(sourceBuffer, start, length)); - public static readonly TokenMatcher OPERATOR = new RegularExpressionMatcher("\\+|\\-|\\*|\\/|\\||\\&|\\|\\||\\&\\&", (SourceBuffer sourceBuffer, int start, int length) => new Token.OperatorToken(sourceBuffer, start, length)); - public static readonly TokenMatcher WHITESPACE = new RegularExpressionMatcher("^\\s+", (SourceBuffer sourceBuffer, int start, int length) => new Token.WhiteSpaceToken(sourceBuffer, start, length)); - public static readonly TokenMatcher BRACKET = new RegularExpressionMatcher("^\\{|\\}|\\(|\\)|\\[|\\]|", (SourceBuffer sourceBuffer, int start, int length) => new Token.WhiteSpaceToken(sourceBuffer, start, length)); + public static readonly TokenMatcher INTEGER = new RegularExpressionMatcher("^(?-?\\d+)", (SourceBuffer sourceBuffer, int start, int length, string value) => new Token.IntegerToken(sourceBuffer, start, length, value)); + public static readonly TokenMatcher FLOAT = new RegularExpressionMatcher("^(?-?\\d+\\.\\d*)", (SourceBuffer sourceBuffer, int start, int length, string value) => new Token.FloatToken(sourceBuffer, start, length, value)); + public static readonly TokenMatcher STRING = new RegularExpressionMatcher("^\\\"(?(\\\\\"|.)*?)\\\"", (SourceBuffer sourceBuffer, int start, int length, string value) => new Token.StringToken(sourceBuffer, start, length, value)); + public static readonly TokenMatcher IDENTIFIER = new RegularExpressionMatcher("^(?[\\w][a-zA-Z0-9_]*)", (SourceBuffer sourceBuffer, int start, int length, string value) => new Token.IdentifierToken(sourceBuffer, start, length, value)); + public static readonly TokenMatcher OPERATOR = new RegularExpressionMatcher("(?\\+|\\-|\\*|\\/|\\||\\&|\\|\\||\\&\\&|\\;|\\:)", (SourceBuffer sourceBuffer, int start, int length, string value) => new Token.OperatorToken(sourceBuffer, start, length, value)); + public static readonly TokenMatcher WHITESPACE = new RegularExpressionMatcher("^(?\\s+)", (SourceBuffer sourceBuffer, int start, int length, string value) => new Token.WhiteSpaceToken(sourceBuffer, start, length, value)); + public static readonly TokenMatcher BRACKET = new RegularExpressionMatcher("^(?[(){}\\[\\]])", (SourceBuffer sourceBuffer, int start, int length, string value) => new Token.BracketToken(sourceBuffer, start, length, value)); } diff --git a/ln.parse/tokenizer/Tokenizer.cs b/ln.parse/tokenizer/Tokenizer.cs index 82381fd..3211809 100644 --- a/ln.parse/tokenizer/Tokenizer.cs +++ b/ln.parse/tokenizer/Tokenizer.cs @@ -17,7 +17,9 @@ namespace ln.parse.tokenizer public Tokenizer Remove(TokenMatcher tokenMatcher) { tokenMatchers.Remove(tokenMatcher); return this; } public Token[] Parse(string source) => Parse(new SourceBuffer(source)); - public Token[] Parse(SourceBuffer sourceBuffer) + public Token[] Parse(string source,Func filter) => Parse(new SourceBuffer(source), filter); + public Token[] Parse(SourceBuffer sourceBuffer) => Parse(sourceBuffer, (token) => true); + public Token[] Parse(SourceBuffer sourceBuffer,Func filter) { List tokens = new List(); @@ -32,9 +34,11 @@ namespace ln.parse.tokenizer } if (token == null) - throw new FormatException(String.Format("invalid token at {0}",sourceBuffer.TextPosition)); + throw new FormatException(String.Format("invalid token at {0} [{1}]",sourceBuffer.TextPosition,sourceBuffer.GetCurrentText().Substring(0,10))); + + if (filter(token)) + tokens.Add(token); - tokens.Add(token); sourceBuffer.LinearPosition += token.Length; } @@ -42,13 +46,16 @@ namespace ln.parse.tokenizer } - public static Tokenizer CreateDefaultTokenizer() => - new Tokenizer() - .Add(TokenMatcher.WHITESPACE) + public static Tokenizer CreateDefaultTokenizer() + { + return new Tokenizer() .Add(TokenMatcher.FLOAT) .Add(TokenMatcher.INTEGER) .Add(TokenMatcher.STRING) .Add(TokenMatcher.OPERATOR) - .Add(TokenMatcher.BRACKET); + .Add(TokenMatcher.BRACKET) + .Add(TokenMatcher.IDENTIFIER) + .Add(TokenMatcher.WHITESPACE); + } } }