Alpha Commit
parent
e159bbe78a
commit
577e90b54e
|
@ -4,4 +4,12 @@
|
|||
<TargetFramework>netcoreapp3.1</TargetFramework>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="System.Text.RegularExpressions" Version="4.3.1" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\ln.collections\ln.collections.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace ln.parse.tokenizer
|
||||
{
|
||||
public class RegularExpressionMatcher : TokenMatcher
|
||||
{
|
||||
Regex regex;
|
||||
Func<SourceBuffer, int, int, Token> createTokenDelegate;
|
||||
|
||||
public RegularExpressionMatcher(string pattern,Func<SourceBuffer,int,int,Token> createTokenDelegate)
|
||||
:this(pattern)
|
||||
{
|
||||
this.createTokenDelegate = createTokenDelegate;
|
||||
}
|
||||
protected RegularExpressionMatcher(string pattern)
|
||||
{
|
||||
regex = new Regex(pattern);
|
||||
}
|
||||
|
||||
public virtual Token CreateToken(SourceBuffer sourceBuffer, int start, int length) => createTokenDelegate(sourceBuffer, start, length);
|
||||
|
||||
public override bool Match(SourceBuffer sourceBuffer,out Token token)
|
||||
{
|
||||
Match match = regex.Match(sourceBuffer.GetCurrentText());
|
||||
if ((match != null) && match.Success && (match.Index == 0))
|
||||
{
|
||||
token = CreateToken(sourceBuffer, sourceBuffer.LinearPosition, match.Length);
|
||||
return true;
|
||||
}
|
||||
token = null;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
using ln.collections;
|
||||
using System;
|
||||
using System.IO;
|
||||
|
||||
namespace ln.parse.tokenizer
|
||||
{
|
||||
public struct TextPosition
|
||||
{
|
||||
public int LineNo;
|
||||
public int CursorPosition;
|
||||
|
||||
public TextPosition(int line,int cursor)
|
||||
{
|
||||
LineNo = line;
|
||||
CursorPosition = cursor;
|
||||
}
|
||||
|
||||
public static TextPosition First => new TextPosition(1, 1);
|
||||
|
||||
public override string ToString() => string.Format("{0}:{1}", LineNo, CursorPosition);
|
||||
}
|
||||
|
||||
public class SourceBuffer
|
||||
{
|
||||
readonly string _buffer;
|
||||
BTree<int, TextPosition> statTextPositions = new BTree<int, TextPosition>();
|
||||
|
||||
int linearPosition;
|
||||
public int LinearPosition { get => linearPosition; set => linearPosition = value; }
|
||||
public TextPosition TextPosition => GetTextPosition(linearPosition);
|
||||
|
||||
public string Text => _buffer;
|
||||
|
||||
public int Length => _buffer.Length;
|
||||
|
||||
public SourceBuffer(char[] buffer) : this(new StringReader(new String(buffer)))
|
||||
{ }
|
||||
public SourceBuffer(TextReader reader) : this(reader.ReadToEnd())
|
||||
{ }
|
||||
public SourceBuffer(string source)
|
||||
{
|
||||
_buffer = source;
|
||||
doStatistics();
|
||||
}
|
||||
|
||||
private void doStatistics()
|
||||
{
|
||||
TextPosition textPosition = new TextPosition();
|
||||
|
||||
for (int n=0;n<(_buffer.Length-1);n++)
|
||||
{
|
||||
textPosition.CursorPosition++;
|
||||
|
||||
if (_buffer[n] == '\n')
|
||||
{
|
||||
textPosition.LineNo++;
|
||||
textPosition.CursorPosition = 0;
|
||||
} else if (textPosition.CursorPosition == 1)
|
||||
statTextPositions.Add(n, textPosition);
|
||||
}
|
||||
}
|
||||
|
||||
public TextPosition GetTextPosition(int linearPosition)
|
||||
{
|
||||
statTextPositions.TryGetPreviousOrCurrentValue(linearPosition, out TextPosition textPosition);
|
||||
return textPosition;
|
||||
}
|
||||
|
||||
public string GetText() => _buffer;
|
||||
public string GetText(int linearStart) => _buffer.Substring(linearStart);
|
||||
public string GetText(int linearStart, int length) => _buffer.Substring(linearStart, length);
|
||||
|
||||
public string GetCurrentText() => _buffer.Substring(linearPosition);
|
||||
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
|
||||
namespace ln.parse.tokenizer
|
||||
{
|
||||
public class Token
|
||||
{
|
||||
public SourceBuffer SourceBuffer { get; }
|
||||
public int LinearStart { get; }
|
||||
public int Length { get; }
|
||||
|
||||
public TextPosition TextPosition => SourceBuffer.GetTextPosition(LinearStart);
|
||||
|
||||
public Token(SourceBuffer sourceBuffer, int start, int length)
|
||||
{
|
||||
SourceBuffer = sourceBuffer;
|
||||
LinearStart = start;
|
||||
Length = length;
|
||||
}
|
||||
|
||||
public string Value => SourceBuffer.GetText(LinearStart, Length);
|
||||
|
||||
|
||||
public class IntegerToken : Token
|
||||
{
|
||||
public IntegerToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
|
||||
}
|
||||
public class FloatToken : Token
|
||||
{
|
||||
public FloatToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
|
||||
}
|
||||
public class StringToken : Token
|
||||
{
|
||||
public StringToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
|
||||
}
|
||||
public class OperatorToken : Token
|
||||
{
|
||||
public OperatorToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
|
||||
}
|
||||
public class WhiteSpaceToken : Token
|
||||
{
|
||||
public WhiteSpaceToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
|
||||
}
|
||||
public class IdentifierToken : Token
|
||||
{
|
||||
public IdentifierToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
|
||||
}
|
||||
public class BracketToken : Token
|
||||
{
|
||||
public BracketToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.Runtime.InteropServices.ComTypes;
|
||||
using System.Text;
|
||||
|
||||
namespace ln.parse.tokenizer
|
||||
{
|
||||
public abstract class TokenMatcher
|
||||
{
|
||||
|
||||
public TokenMatcher()
|
||||
{
|
||||
}
|
||||
|
||||
public abstract bool Match(SourceBuffer sourceBuffer, out Token token);
|
||||
|
||||
public static readonly TokenMatcher INTEGER = new RegularExpressionMatcher("^-?\\d+", (SourceBuffer sourceBuffer, int start, int length) => new Token.IntegerToken(sourceBuffer, start, length));
|
||||
public static readonly TokenMatcher FLOAT = new RegularExpressionMatcher("^-?\\d+.\\d*", (SourceBuffer sourceBuffer, int start, int length) => new Token.FloatToken(sourceBuffer, start, length));
|
||||
public static readonly TokenMatcher STRING = new RegularExpressionMatcher("^\\\"(\\\\\"|.)*?\\\"", (SourceBuffer sourceBuffer, int start, int length) => new Token.StringToken(sourceBuffer, start, length));
|
||||
public static readonly TokenMatcher IDENTIFIER = new RegularExpressionMatcher("^\\w][a-zA-Z0-9_]*", (SourceBuffer sourceBuffer, int start, int length) => new Token.IdentifierToken(sourceBuffer, start, length));
|
||||
public static readonly TokenMatcher OPERATOR = new RegularExpressionMatcher("\\+|\\-|\\*|\\/|\\||\\&|\\|\\||\\&\\&", (SourceBuffer sourceBuffer, int start, int length) => new Token.OperatorToken(sourceBuffer, start, length));
|
||||
public static readonly TokenMatcher WHITESPACE = new RegularExpressionMatcher("^\\s+", (SourceBuffer sourceBuffer, int start, int length) => new Token.WhiteSpaceToken(sourceBuffer, start, length));
|
||||
public static readonly TokenMatcher BRACKET = new RegularExpressionMatcher("^\\{|\\}|\\(|\\)|\\[|\\]|", (SourceBuffer sourceBuffer, int start, int length) => new Token.WhiteSpaceToken(sourceBuffer, start, length));
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Reflection.Metadata.Ecma335;
|
||||
using System.Text;
|
||||
|
||||
namespace ln.parse.tokenizer
|
||||
{
|
||||
public class Tokenizer
|
||||
{
|
||||
List<TokenMatcher> tokenMatchers = new List<TokenMatcher>();
|
||||
|
||||
public Tokenizer()
|
||||
{
|
||||
}
|
||||
|
||||
public Tokenizer Add(TokenMatcher tokenMatcher) { tokenMatchers.Add(tokenMatcher); return this; }
|
||||
public Tokenizer Remove(TokenMatcher tokenMatcher) { tokenMatchers.Remove(tokenMatcher); return this; }
|
||||
|
||||
public Token[] Parse(string source) => Parse(new SourceBuffer(source));
|
||||
public Token[] Parse(SourceBuffer sourceBuffer)
|
||||
{
|
||||
List<Token> tokens = new List<Token>();
|
||||
|
||||
while (sourceBuffer.LinearPosition < sourceBuffer.Length)
|
||||
{
|
||||
Token token = null;
|
||||
|
||||
foreach (TokenMatcher tokenMatcher in tokenMatchers)
|
||||
{
|
||||
if (tokenMatcher.Match(sourceBuffer, out token))
|
||||
break;
|
||||
}
|
||||
|
||||
if (token == null)
|
||||
throw new FormatException(String.Format("invalid token at {0}",sourceBuffer.TextPosition));
|
||||
|
||||
tokens.Add(token);
|
||||
}
|
||||
|
||||
return tokens.ToArray();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue