Alpha Release

master
Harald Wolff 2020-11-24 18:17:58 +01:00
parent 998397cbe7
commit 7f7f1e68be
8 changed files with 149 additions and 66 deletions

View File

@ -1,4 +1,6 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Reflection;
using ln.parse.tokenizer;
using NUnit.Framework;
@ -7,54 +9,109 @@ namespace ln.parse.tests
{
public class TokenizerTests
{
Tokenizer tokenizer;
StreamWriter output = new StreamWriter(Console.OpenStandardOutput());
Tokenizer tokenizer = Tokenizer.CreateDefaultTokenizer();
KeyValuePair<string,Type>[] primitiveTests = new KeyValuePair<string, Type>[]{
new KeyValuePair<string,Type>("0",typeof(Token.IntegerToken)),
new KeyValuePair<string,Type>("1",typeof(Token.IntegerToken)),
new KeyValuePair<string,Type>("2",typeof(Token.IntegerToken)),
new KeyValuePair<string,Type>("3",typeof(Token.IntegerToken)),
new KeyValuePair<string,Type>("4",typeof(Token.IntegerToken)),
new KeyValuePair<string,Type>("5",typeof(Token.IntegerToken)),
new KeyValuePair<string,Type>("6",typeof(Token.IntegerToken)),
new KeyValuePair<string,Type>("7",typeof(Token.IntegerToken)),
new KeyValuePair<string,Type>("8",typeof(Token.IntegerToken)),
new KeyValuePair<string,Type>("9",typeof(Token.IntegerToken)),
new KeyValuePair<string,Type>("10",typeof(Token.IntegerToken)),
new KeyValuePair<string,Type>("100",typeof(Token.IntegerToken)),
new KeyValuePair<string,Type>("453",typeof(Token.IntegerToken)),
new KeyValuePair<string,Type>("75239475",typeof(Token.IntegerToken)),
new KeyValuePair<string,Type>("99999999",typeof(Token.IntegerToken)),
new KeyValuePair<string,Type>("-15362",typeof(Token.IntegerToken)),
new KeyValuePair<string,Type>("-1",typeof(Token.IntegerToken)),
new KeyValuePair<string,Type>("-2",typeof(Token.IntegerToken)),
new KeyValuePair<string,Type>("-3",typeof(Token.IntegerToken)),
new KeyValuePair<string,Type>("-4",typeof(Token.IntegerToken)),
new KeyValuePair<string,Type>("-5",typeof(Token.IntegerToken)),
new KeyValuePair<string,Type>("0.0",typeof(Token.FloatToken)),
new KeyValuePair<string,Type>("-123.456",typeof(Token.FloatToken)),
new KeyValuePair<string,Type>("123.456",typeof(Token.FloatToken)),
new KeyValuePair<string,Type>("987463.234636",typeof(Token.FloatToken)),
new KeyValuePair<string,Type>("-352594.2373782",typeof(Token.FloatToken)),
new KeyValuePair<string,Type>("\"Hallo Welt, ich bin ein \\\"String\\\"!\"",typeof(Token.StringToken)),
new KeyValuePair<string,Type>("\"a simple string\"",typeof(Token.StringToken)),
new KeyValuePair<string,Type>("\"that's it, I can string\"",typeof(Token.StringToken)),
new KeyValuePair<string,Type>("(",typeof(Token.BracketToken)),
new KeyValuePair<string,Type>(")",typeof(Token.BracketToken)),
new KeyValuePair<string,Type>("[",typeof(Token.BracketToken)),
new KeyValuePair<string,Type>("]",typeof(Token.BracketToken)),
new KeyValuePair<string,Type>("{",typeof(Token.BracketToken)),
new KeyValuePair<string,Type>("}",typeof(Token.BracketToken)),
new KeyValuePair<string,Type>("\t",typeof(Token.WhiteSpaceToken)),
new KeyValuePair<string,Type>("Ich",typeof(Token.IdentifierToken)),
new KeyValuePair<string,Type>("IchBinEinIdentifier",typeof(Token.IdentifierToken)),
new KeyValuePair<string,Type>(" ",typeof(Token.WhiteSpaceToken))
};
[SetUp]
public void Setup()
{
tokenizer = Tokenizer.CreateDefaultTokenizer();
}
[Test]
public void Test_Integer()
public void Test_0_Primitives()
{
Token[] token = tokenizer.Parse("654372");
TestContext.Out.WriteLine("Tokens: {0}", token);
foreach (KeyValuePair<string,Type> primTest in primitiveTests)
{
output.WriteLine("Primitive Test: {0} => {1}", primTest.Key, primTest.Value);
output.Flush();
Assert.AreEqual(1, token.Length);
Assert.IsTrue(token[0] is Token.IntegerToken);
Assert.AreEqual("654372", token[0].Value);
Token[] token = tokenizer.Parse(primTest.Key);
Assert.Pass();
}
[Test]
public void Test_Float()
{
Token[] token = tokenizer.Parse("654372.3524");
TestContext.Out.WriteLine("Tokens: {0}", token);
output.WriteLine("Token Source: {0}", token[0].TokenSource);
output.WriteLine("Token Value: {0}", token[0].Value);
output.Flush();
Assert.AreEqual(1, token.Length);
Assert.IsTrue(token[0] is Token.FloatToken);
Assert.AreEqual("654372.3524", token[0].Value);
Assert.AreEqual(1, token.Length);
Assert.AreEqual(primTest.Value, token[0].GetType());
Assert.AreEqual(primTest.Key, token[0].TokenSource);
}
Assert.Pass();
}
string complexSource = null;
[Test]
public void Test_String()
public void Test_1_Complex()
{
Token[] token = tokenizer.Parse("\"Hallo Welt, ich bin ein \\\"String\\\"!\"");
TestContext.Out.WriteLine("Tokens: {0}", token);
using (StreamReader sr = new StreamReader("complex.txt"))
{
complexSource = sr.ReadToEnd();
}
Assert.AreEqual(1, token.Length);
Assert.IsTrue(token[0] is Token.StringToken);
Assert.AreEqual("\"Hallo Welt, ich bin ein \\\"String\\\"!\"", token[0].Value);
output.WriteLine("--- complex test (no filter) ---");
output.Flush();
Token[] tokens = tokenizer.Parse(complexSource);
foreach (Token token in tokens)
{
output.WriteLine("Token: {0,-48}: {1}",token.GetType(),token.Value);
}
output.Flush();
output.WriteLine("--- complex filter test ---");
output.Flush();
tokens = tokenizer.Parse(complexSource, (token) => !(token is Token.WhiteSpaceToken));
foreach (Token token in tokens)
{
output.WriteLine("Token: {0,-48}: {1}",token.GetType(),token.Value);
}
output.Flush();
Assert.Pass();
}
}
}
}

View File

@ -0,0 +1,8 @@
anInteger: 1234;
anFloat:
567.345;
object "objectKey" {
someIdentifier: "let me be";
}
theLastIdentifier: null;

View File

@ -6,14 +6,18 @@
<IsPackable>false</IsPackable>
</PropertyGroup>
<ItemGroup>
<None Update="complex.txt" CopyToOutputDirectory="PreserveNewest" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="NUnit" Version="3.12.0" />
<PackageReference Include="NUnit3TestAdapter" Version="3.16.1" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.5.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\ln.parse\ln.parse.csproj" />
<ItemGroup>
<ProjectReference Include="..\ln.parse\ln.parse.csproj" />
</ItemGroup>
</Project>

View File

@ -2,13 +2,13 @@
<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>
<Version>0.0.2-test2</Version>
<Version>0.0.3</Version>
<Authors>Harald Wolff-Thobaben</Authors>
<Company>l--n.de</Company>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="System.Text.RegularExpressions" Version="4.3.1" />
<PackageReference Include="ln.collections" Version="0.1.0" />
<PackageReference Include="ln.collections" Version="0.1.2" />
</ItemGroup>
</Project>

View File

@ -8,26 +8,26 @@ namespace ln.parse.tokenizer
public class RegularExpressionMatcher : TokenMatcher
{
Regex regex;
Func<SourceBuffer, int, int, Token> createTokenDelegate;
Func<SourceBuffer, int, int, string, Token> createTokenDelegate;
public RegularExpressionMatcher(string pattern,Func<SourceBuffer,int,int,Token> createTokenDelegate)
public RegularExpressionMatcher(string pattern,Func<SourceBuffer,int,int,string,Token> createTokenDelegate)
:this(pattern)
{
this.createTokenDelegate = createTokenDelegate;
}
protected RegularExpressionMatcher(string pattern)
{
regex = new Regex(pattern);
regex = new Regex(pattern,RegexOptions.Singleline);
}
public virtual Token CreateToken(SourceBuffer sourceBuffer, int start, int length) => createTokenDelegate(sourceBuffer, start, length);
public virtual Token CreateToken(SourceBuffer sourceBuffer, int start, int length, string value) => createTokenDelegate(sourceBuffer, start, length, value);
public override bool Match(SourceBuffer sourceBuffer,out Token token)
{
Match match = regex.Match(sourceBuffer.GetCurrentText());
if ((match != null) && match.Success && (match.Index == 0))
{
token = CreateToken(sourceBuffer, sourceBuffer.LinearPosition, match.Length);
token = CreateToken(sourceBuffer, sourceBuffer.LinearPosition, match.Length, match.Groups["value"].Value);
return true;
}
token = null;

View File

@ -9,6 +9,7 @@ namespace ln.parse.tokenizer
public SourceBuffer SourceBuffer { get; }
public int LinearStart { get; }
public int Length { get; }
public string Value { get; private set; }
public TextPosition TextPosition => SourceBuffer.GetTextPosition(LinearStart);
@ -18,37 +19,43 @@ namespace ln.parse.tokenizer
LinearStart = start;
Length = length;
}
public Token(SourceBuffer sourceBuffer, int start, int length, string value)
{
SourceBuffer = sourceBuffer;
LinearStart = start;
Length = length;
Value = value;
}
public string Value => SourceBuffer.GetText(LinearStart, Length);
public string TokenSource => SourceBuffer.GetText(LinearStart, Length);
public class IntegerToken : Token
{
public IntegerToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
public IntegerToken(SourceBuffer sourceBuffer, int start, int length, string value) : base(sourceBuffer, start, length, value) { }
}
public class FloatToken : Token
{
public FloatToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
public FloatToken(SourceBuffer sourceBuffer, int start, int length, string value) : base(sourceBuffer, start, length, value) { }
}
public class StringToken : Token
{
public StringToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
public StringToken(SourceBuffer sourceBuffer, int start, int length, string value) : base(sourceBuffer, start, length, value) { }
}
public class OperatorToken : Token
{
public OperatorToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
public OperatorToken(SourceBuffer sourceBuffer, int start, int length, string value) : base(sourceBuffer, start, length, value) { }
}
public class WhiteSpaceToken : Token
{
public WhiteSpaceToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
public WhiteSpaceToken(SourceBuffer sourceBuffer, int start, int length, string value) : base(sourceBuffer, start, length, value) { }
}
public class IdentifierToken : Token
{
public IdentifierToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
public IdentifierToken(SourceBuffer sourceBuffer, int start, int length, string value) : base(sourceBuffer, start, length, value) { }
}
public class BracketToken : Token
{
public BracketToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
public BracketToken(SourceBuffer sourceBuffer, int start, int length, string value) : base(sourceBuffer, start, length, value) { }
}
}
}

View File

@ -15,13 +15,13 @@ namespace ln.parse.tokenizer
public abstract bool Match(SourceBuffer sourceBuffer, out Token token);
public static readonly TokenMatcher INTEGER = new RegularExpressionMatcher("^-?\\d+", (SourceBuffer sourceBuffer, int start, int length) => new Token.IntegerToken(sourceBuffer, start, length));
public static readonly TokenMatcher FLOAT = new RegularExpressionMatcher("^-?\\d+\\.\\d*", (SourceBuffer sourceBuffer, int start, int length) => new Token.FloatToken(sourceBuffer, start, length));
public static readonly TokenMatcher STRING = new RegularExpressionMatcher("^\\\"(\\\\\"|.)*?\\\"", (SourceBuffer sourceBuffer, int start, int length) => new Token.StringToken(sourceBuffer, start, length));
public static readonly TokenMatcher IDENTIFIER = new RegularExpressionMatcher("^\\w][a-zA-Z0-9_]*", (SourceBuffer sourceBuffer, int start, int length) => new Token.IdentifierToken(sourceBuffer, start, length));
public static readonly TokenMatcher OPERATOR = new RegularExpressionMatcher("\\+|\\-|\\*|\\/|\\||\\&|\\|\\||\\&\\&", (SourceBuffer sourceBuffer, int start, int length) => new Token.OperatorToken(sourceBuffer, start, length));
public static readonly TokenMatcher WHITESPACE = new RegularExpressionMatcher("^\\s+", (SourceBuffer sourceBuffer, int start, int length) => new Token.WhiteSpaceToken(sourceBuffer, start, length));
public static readonly TokenMatcher BRACKET = new RegularExpressionMatcher("^\\{|\\}|\\(|\\)|\\[|\\]|", (SourceBuffer sourceBuffer, int start, int length) => new Token.WhiteSpaceToken(sourceBuffer, start, length));
public static readonly TokenMatcher INTEGER = new RegularExpressionMatcher("^(?<value>-?\\d+)", (SourceBuffer sourceBuffer, int start, int length, string value) => new Token.IntegerToken(sourceBuffer, start, length, value));
public static readonly TokenMatcher FLOAT = new RegularExpressionMatcher("^(?<value>-?\\d+\\.\\d*)", (SourceBuffer sourceBuffer, int start, int length, string value) => new Token.FloatToken(sourceBuffer, start, length, value));
public static readonly TokenMatcher STRING = new RegularExpressionMatcher("^\\\"(?<value>(\\\\\"|.)*?)\\\"", (SourceBuffer sourceBuffer, int start, int length, string value) => new Token.StringToken(sourceBuffer, start, length, value));
public static readonly TokenMatcher IDENTIFIER = new RegularExpressionMatcher("^(?<value>[\\w][a-zA-Z0-9_]*)", (SourceBuffer sourceBuffer, int start, int length, string value) => new Token.IdentifierToken(sourceBuffer, start, length, value));
public static readonly TokenMatcher OPERATOR = new RegularExpressionMatcher("(?<value>\\+|\\-|\\*|\\/|\\||\\&|\\|\\||\\&\\&|\\;|\\:)", (SourceBuffer sourceBuffer, int start, int length, string value) => new Token.OperatorToken(sourceBuffer, start, length, value));
public static readonly TokenMatcher WHITESPACE = new RegularExpressionMatcher("^(?<value>\\s+)", (SourceBuffer sourceBuffer, int start, int length, string value) => new Token.WhiteSpaceToken(sourceBuffer, start, length, value));
public static readonly TokenMatcher BRACKET = new RegularExpressionMatcher("^(?<value>[(){}\\[\\]])", (SourceBuffer sourceBuffer, int start, int length, string value) => new Token.BracketToken(sourceBuffer, start, length, value));
}

View File

@ -17,7 +17,9 @@ namespace ln.parse.tokenizer
public Tokenizer Remove(TokenMatcher tokenMatcher) { tokenMatchers.Remove(tokenMatcher); return this; }
public Token[] Parse(string source) => Parse(new SourceBuffer(source));
public Token[] Parse(SourceBuffer sourceBuffer)
public Token[] Parse(string source,Func<Token,bool> filter) => Parse(new SourceBuffer(source), filter);
public Token[] Parse(SourceBuffer sourceBuffer) => Parse(sourceBuffer, (token) => true);
public Token[] Parse(SourceBuffer sourceBuffer,Func<Token,bool> filter)
{
List<Token> tokens = new List<Token>();
@ -32,9 +34,11 @@ namespace ln.parse.tokenizer
}
if (token == null)
throw new FormatException(String.Format("invalid token at {0}",sourceBuffer.TextPosition));
throw new FormatException(String.Format("invalid token at {0} [{1}]",sourceBuffer.TextPosition,sourceBuffer.GetCurrentText().Substring(0,10)));
if (filter(token))
tokens.Add(token);
tokens.Add(token);
sourceBuffer.LinearPosition += token.Length;
}
@ -42,13 +46,16 @@ namespace ln.parse.tokenizer
}
public static Tokenizer CreateDefaultTokenizer() =>
new Tokenizer()
.Add(TokenMatcher.WHITESPACE)
public static Tokenizer CreateDefaultTokenizer()
{
return new Tokenizer()
.Add(TokenMatcher.FLOAT)
.Add(TokenMatcher.INTEGER)
.Add(TokenMatcher.STRING)
.Add(TokenMatcher.OPERATOR)
.Add(TokenMatcher.BRACKET);
.Add(TokenMatcher.BRACKET)
.Add(TokenMatcher.IDENTIFIER)
.Add(TokenMatcher.WHITESPACE);
}
}
}