Alpha Commit
parent
e159bbe78a
commit
577e90b54e
|
@ -4,4 +4,12 @@
|
||||||
<TargetFramework>netcoreapp3.1</TargetFramework>
|
<TargetFramework>netcoreapp3.1</TargetFramework>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="System.Text.RegularExpressions" Version="4.3.1" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\ln.collections\ln.collections.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Text;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
|
|
||||||
|
namespace ln.parse.tokenizer
|
||||||
|
{
|
||||||
|
public class RegularExpressionMatcher : TokenMatcher
|
||||||
|
{
|
||||||
|
Regex regex;
|
||||||
|
Func<SourceBuffer, int, int, Token> createTokenDelegate;
|
||||||
|
|
||||||
|
public RegularExpressionMatcher(string pattern,Func<SourceBuffer,int,int,Token> createTokenDelegate)
|
||||||
|
:this(pattern)
|
||||||
|
{
|
||||||
|
this.createTokenDelegate = createTokenDelegate;
|
||||||
|
}
|
||||||
|
protected RegularExpressionMatcher(string pattern)
|
||||||
|
{
|
||||||
|
regex = new Regex(pattern);
|
||||||
|
}
|
||||||
|
|
||||||
|
public virtual Token CreateToken(SourceBuffer sourceBuffer, int start, int length) => createTokenDelegate(sourceBuffer, start, length);
|
||||||
|
|
||||||
|
public override bool Match(SourceBuffer sourceBuffer,out Token token)
|
||||||
|
{
|
||||||
|
Match match = regex.Match(sourceBuffer.GetCurrentText());
|
||||||
|
if ((match != null) && match.Success && (match.Index == 0))
|
||||||
|
{
|
||||||
|
token = CreateToken(sourceBuffer, sourceBuffer.LinearPosition, match.Length);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
token = null;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,77 @@
|
||||||
|
using ln.collections;
|
||||||
|
using System;
|
||||||
|
using System.IO;
|
||||||
|
|
||||||
|
namespace ln.parse.tokenizer
|
||||||
|
{
|
||||||
|
public struct TextPosition
|
||||||
|
{
|
||||||
|
public int LineNo;
|
||||||
|
public int CursorPosition;
|
||||||
|
|
||||||
|
public TextPosition(int line,int cursor)
|
||||||
|
{
|
||||||
|
LineNo = line;
|
||||||
|
CursorPosition = cursor;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static TextPosition First => new TextPosition(1, 1);
|
||||||
|
|
||||||
|
public override string ToString() => string.Format("{0}:{1}", LineNo, CursorPosition);
|
||||||
|
}
|
||||||
|
|
||||||
|
public class SourceBuffer
|
||||||
|
{
|
||||||
|
readonly string _buffer;
|
||||||
|
BTree<int, TextPosition> statTextPositions = new BTree<int, TextPosition>();
|
||||||
|
|
||||||
|
int linearPosition;
|
||||||
|
public int LinearPosition { get => linearPosition; set => linearPosition = value; }
|
||||||
|
public TextPosition TextPosition => GetTextPosition(linearPosition);
|
||||||
|
|
||||||
|
public string Text => _buffer;
|
||||||
|
|
||||||
|
public int Length => _buffer.Length;
|
||||||
|
|
||||||
|
public SourceBuffer(char[] buffer) : this(new StringReader(new String(buffer)))
|
||||||
|
{ }
|
||||||
|
public SourceBuffer(TextReader reader) : this(reader.ReadToEnd())
|
||||||
|
{ }
|
||||||
|
public SourceBuffer(string source)
|
||||||
|
{
|
||||||
|
_buffer = source;
|
||||||
|
doStatistics();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doStatistics()
|
||||||
|
{
|
||||||
|
TextPosition textPosition = new TextPosition();
|
||||||
|
|
||||||
|
for (int n=0;n<(_buffer.Length-1);n++)
|
||||||
|
{
|
||||||
|
textPosition.CursorPosition++;
|
||||||
|
|
||||||
|
if (_buffer[n] == '\n')
|
||||||
|
{
|
||||||
|
textPosition.LineNo++;
|
||||||
|
textPosition.CursorPosition = 0;
|
||||||
|
} else if (textPosition.CursorPosition == 1)
|
||||||
|
statTextPositions.Add(n, textPosition);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public TextPosition GetTextPosition(int linearPosition)
|
||||||
|
{
|
||||||
|
statTextPositions.TryGetPreviousOrCurrentValue(linearPosition, out TextPosition textPosition);
|
||||||
|
return textPosition;
|
||||||
|
}
|
||||||
|
|
||||||
|
public string GetText() => _buffer;
|
||||||
|
public string GetText(int linearStart) => _buffer.Substring(linearStart);
|
||||||
|
public string GetText(int linearStart, int length) => _buffer.Substring(linearStart, length);
|
||||||
|
|
||||||
|
public string GetCurrentText() => _buffer.Substring(linearPosition);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,54 @@
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace ln.parse.tokenizer
|
||||||
|
{
|
||||||
|
public class Token
|
||||||
|
{
|
||||||
|
public SourceBuffer SourceBuffer { get; }
|
||||||
|
public int LinearStart { get; }
|
||||||
|
public int Length { get; }
|
||||||
|
|
||||||
|
public TextPosition TextPosition => SourceBuffer.GetTextPosition(LinearStart);
|
||||||
|
|
||||||
|
public Token(SourceBuffer sourceBuffer, int start, int length)
|
||||||
|
{
|
||||||
|
SourceBuffer = sourceBuffer;
|
||||||
|
LinearStart = start;
|
||||||
|
Length = length;
|
||||||
|
}
|
||||||
|
|
||||||
|
public string Value => SourceBuffer.GetText(LinearStart, Length);
|
||||||
|
|
||||||
|
|
||||||
|
public class IntegerToken : Token
|
||||||
|
{
|
||||||
|
public IntegerToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
|
||||||
|
}
|
||||||
|
public class FloatToken : Token
|
||||||
|
{
|
||||||
|
public FloatToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
|
||||||
|
}
|
||||||
|
public class StringToken : Token
|
||||||
|
{
|
||||||
|
public StringToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
|
||||||
|
}
|
||||||
|
public class OperatorToken : Token
|
||||||
|
{
|
||||||
|
public OperatorToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
|
||||||
|
}
|
||||||
|
public class WhiteSpaceToken : Token
|
||||||
|
{
|
||||||
|
public WhiteSpaceToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
|
||||||
|
}
|
||||||
|
public class IdentifierToken : Token
|
||||||
|
{
|
||||||
|
public IdentifierToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
|
||||||
|
}
|
||||||
|
public class BracketToken : Token
|
||||||
|
{
|
||||||
|
public BracketToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,28 @@
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Globalization;
|
||||||
|
using System.Runtime.InteropServices.ComTypes;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace ln.parse.tokenizer
|
||||||
|
{
|
||||||
|
public abstract class TokenMatcher
|
||||||
|
{
|
||||||
|
|
||||||
|
public TokenMatcher()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract bool Match(SourceBuffer sourceBuffer, out Token token);
|
||||||
|
|
||||||
|
public static readonly TokenMatcher INTEGER = new RegularExpressionMatcher("^-?\\d+", (SourceBuffer sourceBuffer, int start, int length) => new Token.IntegerToken(sourceBuffer, start, length));
|
||||||
|
public static readonly TokenMatcher FLOAT = new RegularExpressionMatcher("^-?\\d+.\\d*", (SourceBuffer sourceBuffer, int start, int length) => new Token.FloatToken(sourceBuffer, start, length));
|
||||||
|
public static readonly TokenMatcher STRING = new RegularExpressionMatcher("^\\\"(\\\\\"|.)*?\\\"", (SourceBuffer sourceBuffer, int start, int length) => new Token.StringToken(sourceBuffer, start, length));
|
||||||
|
public static readonly TokenMatcher IDENTIFIER = new RegularExpressionMatcher("^\\w][a-zA-Z0-9_]*", (SourceBuffer sourceBuffer, int start, int length) => new Token.IdentifierToken(sourceBuffer, start, length));
|
||||||
|
public static readonly TokenMatcher OPERATOR = new RegularExpressionMatcher("\\+|\\-|\\*|\\/|\\||\\&|\\|\\||\\&\\&", (SourceBuffer sourceBuffer, int start, int length) => new Token.OperatorToken(sourceBuffer, start, length));
|
||||||
|
public static readonly TokenMatcher WHITESPACE = new RegularExpressionMatcher("^\\s+", (SourceBuffer sourceBuffer, int start, int length) => new Token.WhiteSpaceToken(sourceBuffer, start, length));
|
||||||
|
public static readonly TokenMatcher BRACKET = new RegularExpressionMatcher("^\\{|\\}|\\(|\\)|\\[|\\]|", (SourceBuffer sourceBuffer, int start, int length) => new Token.WhiteSpaceToken(sourceBuffer, start, length));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,45 @@
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Reflection.Metadata.Ecma335;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace ln.parse.tokenizer
|
||||||
|
{
|
||||||
|
public class Tokenizer
|
||||||
|
{
|
||||||
|
List<TokenMatcher> tokenMatchers = new List<TokenMatcher>();
|
||||||
|
|
||||||
|
public Tokenizer()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public Tokenizer Add(TokenMatcher tokenMatcher) { tokenMatchers.Add(tokenMatcher); return this; }
|
||||||
|
public Tokenizer Remove(TokenMatcher tokenMatcher) { tokenMatchers.Remove(tokenMatcher); return this; }
|
||||||
|
|
||||||
|
public Token[] Parse(string source) => Parse(new SourceBuffer(source));
|
||||||
|
public Token[] Parse(SourceBuffer sourceBuffer)
|
||||||
|
{
|
||||||
|
List<Token> tokens = new List<Token>();
|
||||||
|
|
||||||
|
while (sourceBuffer.LinearPosition < sourceBuffer.Length)
|
||||||
|
{
|
||||||
|
Token token = null;
|
||||||
|
|
||||||
|
foreach (TokenMatcher tokenMatcher in tokenMatchers)
|
||||||
|
{
|
||||||
|
if (tokenMatcher.Match(sourceBuffer, out token))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (token == null)
|
||||||
|
throw new FormatException(String.Format("invalid token at {0}",sourceBuffer.TextPosition));
|
||||||
|
|
||||||
|
tokens.Add(token);
|
||||||
|
}
|
||||||
|
|
||||||
|
return tokens.ToArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue