Alpha Commit

master
Harald Wolff-Thobaben 2020-11-19 20:32:34 +01:00
parent e159bbe78a
commit 577e90b54e
7 changed files with 249 additions and 8 deletions

View File

@ -1,8 +0,0 @@
using System;
namespace ln.parse
{
public class Class1
{
}
}

View File

@ -4,4 +4,12 @@
<TargetFramework>netcoreapp3.1</TargetFramework>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="System.Text.RegularExpressions" Version="4.3.1" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\ln.collections\ln.collections.csproj" />
</ItemGroup>
</Project>

View File

@ -0,0 +1,37 @@
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
namespace ln.parse.tokenizer
{
public class RegularExpressionMatcher : TokenMatcher
{
Regex regex;
Func<SourceBuffer, int, int, Token> createTokenDelegate;
public RegularExpressionMatcher(string pattern,Func<SourceBuffer,int,int,Token> createTokenDelegate)
:this(pattern)
{
this.createTokenDelegate = createTokenDelegate;
}
protected RegularExpressionMatcher(string pattern)
{
regex = new Regex(pattern);
}
public virtual Token CreateToken(SourceBuffer sourceBuffer, int start, int length) => createTokenDelegate(sourceBuffer, start, length);
public override bool Match(SourceBuffer sourceBuffer,out Token token)
{
Match match = regex.Match(sourceBuffer.GetCurrentText());
if ((match != null) && match.Success && (match.Index == 0))
{
token = CreateToken(sourceBuffer, sourceBuffer.LinearPosition, match.Length);
return true;
}
token = null;
return false;
}
}
}

View File

@ -0,0 +1,77 @@
using ln.collections;
using System;
using System.IO;
namespace ln.parse.tokenizer
{
public struct TextPosition
{
public int LineNo;
public int CursorPosition;
public TextPosition(int line,int cursor)
{
LineNo = line;
CursorPosition = cursor;
}
public static TextPosition First => new TextPosition(1, 1);
public override string ToString() => string.Format("{0}:{1}", LineNo, CursorPosition);
}
public class SourceBuffer
{
readonly string _buffer;
BTree<int, TextPosition> statTextPositions = new BTree<int, TextPosition>();
int linearPosition;
public int LinearPosition { get => linearPosition; set => linearPosition = value; }
public TextPosition TextPosition => GetTextPosition(linearPosition);
public string Text => _buffer;
public int Length => _buffer.Length;
public SourceBuffer(char[] buffer) : this(new StringReader(new String(buffer)))
{ }
public SourceBuffer(TextReader reader) : this(reader.ReadToEnd())
{ }
public SourceBuffer(string source)
{
_buffer = source;
doStatistics();
}
private void doStatistics()
{
TextPosition textPosition = new TextPosition();
for (int n=0;n<(_buffer.Length-1);n++)
{
textPosition.CursorPosition++;
if (_buffer[n] == '\n')
{
textPosition.LineNo++;
textPosition.CursorPosition = 0;
} else if (textPosition.CursorPosition == 1)
statTextPositions.Add(n, textPosition);
}
}
public TextPosition GetTextPosition(int linearPosition)
{
statTextPositions.TryGetPreviousOrCurrentValue(linearPosition, out TextPosition textPosition);
return textPosition;
}
public string GetText() => _buffer;
public string GetText(int linearStart) => _buffer.Substring(linearStart);
public string GetText(int linearStart, int length) => _buffer.Substring(linearStart, length);
public string GetCurrentText() => _buffer.Substring(linearPosition);
}
}

54
tokenizer/Token.cs 100644
View File

@ -0,0 +1,54 @@
using System;
using System.Collections.Generic;
using System.Text;
namespace ln.parse.tokenizer
{
public class Token
{
public SourceBuffer SourceBuffer { get; }
public int LinearStart { get; }
public int Length { get; }
public TextPosition TextPosition => SourceBuffer.GetTextPosition(LinearStart);
public Token(SourceBuffer sourceBuffer, int start, int length)
{
SourceBuffer = sourceBuffer;
LinearStart = start;
Length = length;
}
public string Value => SourceBuffer.GetText(LinearStart, Length);
public class IntegerToken : Token
{
public IntegerToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
}
public class FloatToken : Token
{
public FloatToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
}
public class StringToken : Token
{
public StringToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
}
public class OperatorToken : Token
{
public OperatorToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
}
public class WhiteSpaceToken : Token
{
public WhiteSpaceToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
}
public class IdentifierToken : Token
{
public IdentifierToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
}
public class BracketToken : Token
{
public BracketToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
}
}
}

View File

@ -0,0 +1,28 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Runtime.InteropServices.ComTypes;
using System.Text;
namespace ln.parse.tokenizer
{
public abstract class TokenMatcher
{
public TokenMatcher()
{
}
public abstract bool Match(SourceBuffer sourceBuffer, out Token token);
public static readonly TokenMatcher INTEGER = new RegularExpressionMatcher("^-?\\d+", (SourceBuffer sourceBuffer, int start, int length) => new Token.IntegerToken(sourceBuffer, start, length));
public static readonly TokenMatcher FLOAT = new RegularExpressionMatcher("^-?\\d+.\\d*", (SourceBuffer sourceBuffer, int start, int length) => new Token.FloatToken(sourceBuffer, start, length));
public static readonly TokenMatcher STRING = new RegularExpressionMatcher("^\\\"(\\\\\"|.)*?\\\"", (SourceBuffer sourceBuffer, int start, int length) => new Token.StringToken(sourceBuffer, start, length));
public static readonly TokenMatcher IDENTIFIER = new RegularExpressionMatcher("^\\w][a-zA-Z0-9_]*", (SourceBuffer sourceBuffer, int start, int length) => new Token.IdentifierToken(sourceBuffer, start, length));
public static readonly TokenMatcher OPERATOR = new RegularExpressionMatcher("\\+|\\-|\\*|\\/|\\||\\&|\\|\\||\\&\\&", (SourceBuffer sourceBuffer, int start, int length) => new Token.OperatorToken(sourceBuffer, start, length));
public static readonly TokenMatcher WHITESPACE = new RegularExpressionMatcher("^\\s+", (SourceBuffer sourceBuffer, int start, int length) => new Token.WhiteSpaceToken(sourceBuffer, start, length));
public static readonly TokenMatcher BRACKET = new RegularExpressionMatcher("^\\{|\\}|\\(|\\)|\\[|\\]|", (SourceBuffer sourceBuffer, int start, int length) => new Token.WhiteSpaceToken(sourceBuffer, start, length));
}
}

View File

@ -0,0 +1,45 @@
using System;
using System.Collections.Generic;
using System.Reflection.Metadata.Ecma335;
using System.Text;
namespace ln.parse.tokenizer
{
public class Tokenizer
{
List<TokenMatcher> tokenMatchers = new List<TokenMatcher>();
public Tokenizer()
{
}
public Tokenizer Add(TokenMatcher tokenMatcher) { tokenMatchers.Add(tokenMatcher); return this; }
public Tokenizer Remove(TokenMatcher tokenMatcher) { tokenMatchers.Remove(tokenMatcher); return this; }
public Token[] Parse(string source) => Parse(new SourceBuffer(source));
public Token[] Parse(SourceBuffer sourceBuffer)
{
List<Token> tokens = new List<Token>();
while (sourceBuffer.LinearPosition < sourceBuffer.Length)
{
Token token = null;
foreach (TokenMatcher tokenMatcher in tokenMatchers)
{
if (tokenMatcher.Match(sourceBuffer, out token))
break;
}
if (token == null)
throw new FormatException(String.Format("invalid token at {0}",sourceBuffer.TextPosition));
tokens.Add(token);
}
return tokens.ToArray();
}
}
}