Alpha Release
parent
998397cbe7
commit
7f7f1e68be
|
@ -1,4 +1,6 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Reflection;
|
||||
using ln.parse.tokenizer;
|
||||
using NUnit.Framework;
|
||||
|
@ -7,54 +9,109 @@ namespace ln.parse.tests
|
|||
{
|
||||
public class TokenizerTests
|
||||
{
|
||||
Tokenizer tokenizer;
|
||||
StreamWriter output = new StreamWriter(Console.OpenStandardOutput());
|
||||
|
||||
Tokenizer tokenizer = Tokenizer.CreateDefaultTokenizer();
|
||||
|
||||
KeyValuePair<string,Type>[] primitiveTests = new KeyValuePair<string, Type>[]{
|
||||
new KeyValuePair<string,Type>("0",typeof(Token.IntegerToken)),
|
||||
new KeyValuePair<string,Type>("1",typeof(Token.IntegerToken)),
|
||||
new KeyValuePair<string,Type>("2",typeof(Token.IntegerToken)),
|
||||
new KeyValuePair<string,Type>("3",typeof(Token.IntegerToken)),
|
||||
new KeyValuePair<string,Type>("4",typeof(Token.IntegerToken)),
|
||||
new KeyValuePair<string,Type>("5",typeof(Token.IntegerToken)),
|
||||
new KeyValuePair<string,Type>("6",typeof(Token.IntegerToken)),
|
||||
new KeyValuePair<string,Type>("7",typeof(Token.IntegerToken)),
|
||||
new KeyValuePair<string,Type>("8",typeof(Token.IntegerToken)),
|
||||
new KeyValuePair<string,Type>("9",typeof(Token.IntegerToken)),
|
||||
new KeyValuePair<string,Type>("10",typeof(Token.IntegerToken)),
|
||||
new KeyValuePair<string,Type>("100",typeof(Token.IntegerToken)),
|
||||
new KeyValuePair<string,Type>("453",typeof(Token.IntegerToken)),
|
||||
new KeyValuePair<string,Type>("75239475",typeof(Token.IntegerToken)),
|
||||
new KeyValuePair<string,Type>("99999999",typeof(Token.IntegerToken)),
|
||||
new KeyValuePair<string,Type>("-15362",typeof(Token.IntegerToken)),
|
||||
new KeyValuePair<string,Type>("-1",typeof(Token.IntegerToken)),
|
||||
new KeyValuePair<string,Type>("-2",typeof(Token.IntegerToken)),
|
||||
new KeyValuePair<string,Type>("-3",typeof(Token.IntegerToken)),
|
||||
new KeyValuePair<string,Type>("-4",typeof(Token.IntegerToken)),
|
||||
new KeyValuePair<string,Type>("-5",typeof(Token.IntegerToken)),
|
||||
new KeyValuePair<string,Type>("0.0",typeof(Token.FloatToken)),
|
||||
new KeyValuePair<string,Type>("-123.456",typeof(Token.FloatToken)),
|
||||
new KeyValuePair<string,Type>("123.456",typeof(Token.FloatToken)),
|
||||
new KeyValuePair<string,Type>("987463.234636",typeof(Token.FloatToken)),
|
||||
new KeyValuePair<string,Type>("-352594.2373782",typeof(Token.FloatToken)),
|
||||
new KeyValuePair<string,Type>("\"Hallo Welt, ich bin ein \\\"String\\\"!\"",typeof(Token.StringToken)),
|
||||
new KeyValuePair<string,Type>("\"a simple string\"",typeof(Token.StringToken)),
|
||||
new KeyValuePair<string,Type>("\"that's it, I can string\"",typeof(Token.StringToken)),
|
||||
new KeyValuePair<string,Type>("(",typeof(Token.BracketToken)),
|
||||
new KeyValuePair<string,Type>(")",typeof(Token.BracketToken)),
|
||||
new KeyValuePair<string,Type>("[",typeof(Token.BracketToken)),
|
||||
new KeyValuePair<string,Type>("]",typeof(Token.BracketToken)),
|
||||
new KeyValuePair<string,Type>("{",typeof(Token.BracketToken)),
|
||||
new KeyValuePair<string,Type>("}",typeof(Token.BracketToken)),
|
||||
new KeyValuePair<string,Type>("\t",typeof(Token.WhiteSpaceToken)),
|
||||
new KeyValuePair<string,Type>("Ich",typeof(Token.IdentifierToken)),
|
||||
new KeyValuePair<string,Type>("IchBinEinIdentifier",typeof(Token.IdentifierToken)),
|
||||
new KeyValuePair<string,Type>(" ",typeof(Token.WhiteSpaceToken))
|
||||
};
|
||||
|
||||
[SetUp]
|
||||
public void Setup()
|
||||
{
|
||||
tokenizer = Tokenizer.CreateDefaultTokenizer();
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void Test_Integer()
|
||||
public void Test_0_Primitives()
|
||||
{
|
||||
Token[] token = tokenizer.Parse("654372");
|
||||
|
||||
TestContext.Out.WriteLine("Tokens: {0}", token);
|
||||
foreach (KeyValuePair<string,Type> primTest in primitiveTests)
|
||||
{
|
||||
output.WriteLine("Primitive Test: {0} => {1}", primTest.Key, primTest.Value);
|
||||
output.Flush();
|
||||
|
||||
Assert.AreEqual(1, token.Length);
|
||||
Assert.IsTrue(token[0] is Token.IntegerToken);
|
||||
Assert.AreEqual("654372", token[0].Value);
|
||||
Token[] token = tokenizer.Parse(primTest.Key);
|
||||
|
||||
Assert.Pass();
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void Test_Float()
|
||||
{
|
||||
Token[] token = tokenizer.Parse("654372.3524");
|
||||
|
||||
TestContext.Out.WriteLine("Tokens: {0}", token);
|
||||
output.WriteLine("Token Source: {0}", token[0].TokenSource);
|
||||
output.WriteLine("Token Value: {0}", token[0].Value);
|
||||
output.Flush();
|
||||
|
||||
Assert.AreEqual(1, token.Length);
|
||||
Assert.IsTrue(token[0] is Token.FloatToken);
|
||||
Assert.AreEqual("654372.3524", token[0].Value);
|
||||
Assert.AreEqual(1, token.Length);
|
||||
Assert.AreEqual(primTest.Value, token[0].GetType());
|
||||
Assert.AreEqual(primTest.Key, token[0].TokenSource);
|
||||
}
|
||||
|
||||
Assert.Pass();
|
||||
}
|
||||
|
||||
string complexSource = null;
|
||||
|
||||
[Test]
|
||||
public void Test_String()
|
||||
public void Test_1_Complex()
|
||||
{
|
||||
Token[] token = tokenizer.Parse("\"Hallo Welt, ich bin ein \\\"String\\\"!\"");
|
||||
|
||||
TestContext.Out.WriteLine("Tokens: {0}", token);
|
||||
using (StreamReader sr = new StreamReader("complex.txt"))
|
||||
{
|
||||
complexSource = sr.ReadToEnd();
|
||||
}
|
||||
|
||||
Assert.AreEqual(1, token.Length);
|
||||
Assert.IsTrue(token[0] is Token.StringToken);
|
||||
Assert.AreEqual("\"Hallo Welt, ich bin ein \\\"String\\\"!\"", token[0].Value);
|
||||
output.WriteLine("--- complex test (no filter) ---");
|
||||
output.Flush();
|
||||
|
||||
Token[] tokens = tokenizer.Parse(complexSource);
|
||||
|
||||
foreach (Token token in tokens)
|
||||
{
|
||||
output.WriteLine("Token: {0,-48}: {1}",token.GetType(),token.Value);
|
||||
}
|
||||
output.Flush();
|
||||
|
||||
output.WriteLine("--- complex filter test ---");
|
||||
output.Flush();
|
||||
|
||||
tokens = tokenizer.Parse(complexSource, (token) => !(token is Token.WhiteSpaceToken));
|
||||
|
||||
foreach (Token token in tokens)
|
||||
{
|
||||
output.WriteLine("Token: {0,-48}: {1}",token.GetType(),token.Value);
|
||||
}
|
||||
output.Flush();
|
||||
|
||||
Assert.Pass();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
anInteger: 1234;
|
||||
anFloat:
|
||||
567.345;
|
||||
object "objectKey" {
|
||||
someIdentifier: "let me be";
|
||||
}
|
||||
|
||||
theLastIdentifier: null;
|
|
@ -6,14 +6,18 @@
|
|||
<IsPackable>false</IsPackable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Update="complex.txt" CopyToOutputDirectory="PreserveNewest" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="NUnit" Version="3.12.0" />
|
||||
<PackageReference Include="NUnit3TestAdapter" Version="3.16.1" />
|
||||
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.5.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\ln.parse\ln.parse.csproj" />
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\ln.parse\ln.parse.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
|
|
@ -2,13 +2,13 @@
|
|||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>netcoreapp3.1</TargetFramework>
|
||||
<Version>0.0.2-test2</Version>
|
||||
<Version>0.0.3</Version>
|
||||
<Authors>Harald Wolff-Thobaben</Authors>
|
||||
<Company>l--n.de</Company>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="System.Text.RegularExpressions" Version="4.3.1" />
|
||||
<PackageReference Include="ln.collections" Version="0.1.0" />
|
||||
<PackageReference Include="ln.collections" Version="0.1.2" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
|
|
|
@ -8,26 +8,26 @@ namespace ln.parse.tokenizer
|
|||
public class RegularExpressionMatcher : TokenMatcher
|
||||
{
|
||||
Regex regex;
|
||||
Func<SourceBuffer, int, int, Token> createTokenDelegate;
|
||||
Func<SourceBuffer, int, int, string, Token> createTokenDelegate;
|
||||
|
||||
public RegularExpressionMatcher(string pattern,Func<SourceBuffer,int,int,Token> createTokenDelegate)
|
||||
public RegularExpressionMatcher(string pattern,Func<SourceBuffer,int,int,string,Token> createTokenDelegate)
|
||||
:this(pattern)
|
||||
{
|
||||
this.createTokenDelegate = createTokenDelegate;
|
||||
}
|
||||
protected RegularExpressionMatcher(string pattern)
|
||||
{
|
||||
regex = new Regex(pattern);
|
||||
regex = new Regex(pattern,RegexOptions.Singleline);
|
||||
}
|
||||
|
||||
public virtual Token CreateToken(SourceBuffer sourceBuffer, int start, int length) => createTokenDelegate(sourceBuffer, start, length);
|
||||
public virtual Token CreateToken(SourceBuffer sourceBuffer, int start, int length, string value) => createTokenDelegate(sourceBuffer, start, length, value);
|
||||
|
||||
public override bool Match(SourceBuffer sourceBuffer,out Token token)
|
||||
{
|
||||
Match match = regex.Match(sourceBuffer.GetCurrentText());
|
||||
if ((match != null) && match.Success && (match.Index == 0))
|
||||
{
|
||||
token = CreateToken(sourceBuffer, sourceBuffer.LinearPosition, match.Length);
|
||||
token = CreateToken(sourceBuffer, sourceBuffer.LinearPosition, match.Length, match.Groups["value"].Value);
|
||||
return true;
|
||||
}
|
||||
token = null;
|
||||
|
|
|
@ -9,6 +9,7 @@ namespace ln.parse.tokenizer
|
|||
public SourceBuffer SourceBuffer { get; }
|
||||
public int LinearStart { get; }
|
||||
public int Length { get; }
|
||||
public string Value { get; private set; }
|
||||
|
||||
public TextPosition TextPosition => SourceBuffer.GetTextPosition(LinearStart);
|
||||
|
||||
|
@ -18,37 +19,43 @@ namespace ln.parse.tokenizer
|
|||
LinearStart = start;
|
||||
Length = length;
|
||||
}
|
||||
public Token(SourceBuffer sourceBuffer, int start, int length, string value)
|
||||
{
|
||||
SourceBuffer = sourceBuffer;
|
||||
LinearStart = start;
|
||||
Length = length;
|
||||
Value = value;
|
||||
}
|
||||
|
||||
public string Value => SourceBuffer.GetText(LinearStart, Length);
|
||||
|
||||
public string TokenSource => SourceBuffer.GetText(LinearStart, Length);
|
||||
|
||||
public class IntegerToken : Token
|
||||
{
|
||||
public IntegerToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
|
||||
public IntegerToken(SourceBuffer sourceBuffer, int start, int length, string value) : base(sourceBuffer, start, length, value) { }
|
||||
}
|
||||
public class FloatToken : Token
|
||||
{
|
||||
public FloatToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
|
||||
public FloatToken(SourceBuffer sourceBuffer, int start, int length, string value) : base(sourceBuffer, start, length, value) { }
|
||||
}
|
||||
public class StringToken : Token
|
||||
{
|
||||
public StringToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
|
||||
public StringToken(SourceBuffer sourceBuffer, int start, int length, string value) : base(sourceBuffer, start, length, value) { }
|
||||
}
|
||||
public class OperatorToken : Token
|
||||
{
|
||||
public OperatorToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
|
||||
public OperatorToken(SourceBuffer sourceBuffer, int start, int length, string value) : base(sourceBuffer, start, length, value) { }
|
||||
}
|
||||
public class WhiteSpaceToken : Token
|
||||
{
|
||||
public WhiteSpaceToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
|
||||
public WhiteSpaceToken(SourceBuffer sourceBuffer, int start, int length, string value) : base(sourceBuffer, start, length, value) { }
|
||||
}
|
||||
public class IdentifierToken : Token
|
||||
{
|
||||
public IdentifierToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
|
||||
public IdentifierToken(SourceBuffer sourceBuffer, int start, int length, string value) : base(sourceBuffer, start, length, value) { }
|
||||
}
|
||||
public class BracketToken : Token
|
||||
{
|
||||
public BracketToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
|
||||
public BracketToken(SourceBuffer sourceBuffer, int start, int length, string value) : base(sourceBuffer, start, length, value) { }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,13 +15,13 @@ namespace ln.parse.tokenizer
|
|||
|
||||
public abstract bool Match(SourceBuffer sourceBuffer, out Token token);
|
||||
|
||||
public static readonly TokenMatcher INTEGER = new RegularExpressionMatcher("^-?\\d+", (SourceBuffer sourceBuffer, int start, int length) => new Token.IntegerToken(sourceBuffer, start, length));
|
||||
public static readonly TokenMatcher FLOAT = new RegularExpressionMatcher("^-?\\d+\\.\\d*", (SourceBuffer sourceBuffer, int start, int length) => new Token.FloatToken(sourceBuffer, start, length));
|
||||
public static readonly TokenMatcher STRING = new RegularExpressionMatcher("^\\\"(\\\\\"|.)*?\\\"", (SourceBuffer sourceBuffer, int start, int length) => new Token.StringToken(sourceBuffer, start, length));
|
||||
public static readonly TokenMatcher IDENTIFIER = new RegularExpressionMatcher("^\\w][a-zA-Z0-9_]*", (SourceBuffer sourceBuffer, int start, int length) => new Token.IdentifierToken(sourceBuffer, start, length));
|
||||
public static readonly TokenMatcher OPERATOR = new RegularExpressionMatcher("\\+|\\-|\\*|\\/|\\||\\&|\\|\\||\\&\\&", (SourceBuffer sourceBuffer, int start, int length) => new Token.OperatorToken(sourceBuffer, start, length));
|
||||
public static readonly TokenMatcher WHITESPACE = new RegularExpressionMatcher("^\\s+", (SourceBuffer sourceBuffer, int start, int length) => new Token.WhiteSpaceToken(sourceBuffer, start, length));
|
||||
public static readonly TokenMatcher BRACKET = new RegularExpressionMatcher("^\\{|\\}|\\(|\\)|\\[|\\]|", (SourceBuffer sourceBuffer, int start, int length) => new Token.WhiteSpaceToken(sourceBuffer, start, length));
|
||||
public static readonly TokenMatcher INTEGER = new RegularExpressionMatcher("^(?<value>-?\\d+)", (SourceBuffer sourceBuffer, int start, int length, string value) => new Token.IntegerToken(sourceBuffer, start, length, value));
|
||||
public static readonly TokenMatcher FLOAT = new RegularExpressionMatcher("^(?<value>-?\\d+\\.\\d*)", (SourceBuffer sourceBuffer, int start, int length, string value) => new Token.FloatToken(sourceBuffer, start, length, value));
|
||||
public static readonly TokenMatcher STRING = new RegularExpressionMatcher("^\\\"(?<value>(\\\\\"|.)*?)\\\"", (SourceBuffer sourceBuffer, int start, int length, string value) => new Token.StringToken(sourceBuffer, start, length, value));
|
||||
public static readonly TokenMatcher IDENTIFIER = new RegularExpressionMatcher("^(?<value>[\\w][a-zA-Z0-9_]*)", (SourceBuffer sourceBuffer, int start, int length, string value) => new Token.IdentifierToken(sourceBuffer, start, length, value));
|
||||
public static readonly TokenMatcher OPERATOR = new RegularExpressionMatcher("(?<value>\\+|\\-|\\*|\\/|\\||\\&|\\|\\||\\&\\&|\\;|\\:)", (SourceBuffer sourceBuffer, int start, int length, string value) => new Token.OperatorToken(sourceBuffer, start, length, value));
|
||||
public static readonly TokenMatcher WHITESPACE = new RegularExpressionMatcher("^(?<value>\\s+)", (SourceBuffer sourceBuffer, int start, int length, string value) => new Token.WhiteSpaceToken(sourceBuffer, start, length, value));
|
||||
public static readonly TokenMatcher BRACKET = new RegularExpressionMatcher("^(?<value>[(){}\\[\\]])", (SourceBuffer sourceBuffer, int start, int length, string value) => new Token.BracketToken(sourceBuffer, start, length, value));
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -17,7 +17,9 @@ namespace ln.parse.tokenizer
|
|||
public Tokenizer Remove(TokenMatcher tokenMatcher) { tokenMatchers.Remove(tokenMatcher); return this; }
|
||||
|
||||
public Token[] Parse(string source) => Parse(new SourceBuffer(source));
|
||||
public Token[] Parse(SourceBuffer sourceBuffer)
|
||||
public Token[] Parse(string source,Func<Token,bool> filter) => Parse(new SourceBuffer(source), filter);
|
||||
public Token[] Parse(SourceBuffer sourceBuffer) => Parse(sourceBuffer, (token) => true);
|
||||
public Token[] Parse(SourceBuffer sourceBuffer,Func<Token,bool> filter)
|
||||
{
|
||||
List<Token> tokens = new List<Token>();
|
||||
|
||||
|
@ -32,9 +34,11 @@ namespace ln.parse.tokenizer
|
|||
}
|
||||
|
||||
if (token == null)
|
||||
throw new FormatException(String.Format("invalid token at {0}",sourceBuffer.TextPosition));
|
||||
throw new FormatException(String.Format("invalid token at {0} [{1}]",sourceBuffer.TextPosition,sourceBuffer.GetCurrentText().Substring(0,10)));
|
||||
|
||||
if (filter(token))
|
||||
tokens.Add(token);
|
||||
|
||||
tokens.Add(token);
|
||||
sourceBuffer.LinearPosition += token.Length;
|
||||
}
|
||||
|
||||
|
@ -42,13 +46,16 @@ namespace ln.parse.tokenizer
|
|||
}
|
||||
|
||||
|
||||
public static Tokenizer CreateDefaultTokenizer() =>
|
||||
new Tokenizer()
|
||||
.Add(TokenMatcher.WHITESPACE)
|
||||
public static Tokenizer CreateDefaultTokenizer()
|
||||
{
|
||||
return new Tokenizer()
|
||||
.Add(TokenMatcher.FLOAT)
|
||||
.Add(TokenMatcher.INTEGER)
|
||||
.Add(TokenMatcher.STRING)
|
||||
.Add(TokenMatcher.OPERATOR)
|
||||
.Add(TokenMatcher.BRACKET);
|
||||
.Add(TokenMatcher.BRACKET)
|
||||
.Add(TokenMatcher.IDENTIFIER)
|
||||
.Add(TokenMatcher.WHITESPACE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue