diff --git a/Class1.cs b/Class1.cs
deleted file mode 100644
index d4ce454..0000000
--- a/Class1.cs
+++ /dev/null
@@ -1,8 +0,0 @@
-using System;
-
-namespace ln.parse
-{
- public class Class1
- {
- }
-}
diff --git a/ln.parse.csproj b/ln.parse.csproj
index cb63190..d1c7cd1 100644
--- a/ln.parse.csproj
+++ b/ln.parse.csproj
@@ -4,4 +4,12 @@
netcoreapp3.1
+
+
+
+
+
+
+
+
diff --git a/tokenizer/RegularExpressionMatcher.cs b/tokenizer/RegularExpressionMatcher.cs
new file mode 100644
index 0000000..f255277
--- /dev/null
+++ b/tokenizer/RegularExpressionMatcher.cs
@@ -0,0 +1,37 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace ln.parse.tokenizer
+{
+ public class RegularExpressionMatcher : TokenMatcher
+ {
+ Regex regex;
+ Func createTokenDelegate;
+
+ public RegularExpressionMatcher(string pattern,Func createTokenDelegate)
+ :this(pattern)
+ {
+ this.createTokenDelegate = createTokenDelegate;
+ }
+ protected RegularExpressionMatcher(string pattern)
+ {
+ regex = new Regex(pattern);
+ }
+
+ public virtual Token CreateToken(SourceBuffer sourceBuffer, int start, int length) => createTokenDelegate(sourceBuffer, start, length);
+
+ public override bool Match(SourceBuffer sourceBuffer,out Token token)
+ {
+ Match match = regex.Match(sourceBuffer.GetCurrentText());
+ if ((match != null) && match.Success && (match.Index == 0))
+ {
+ token = CreateToken(sourceBuffer, sourceBuffer.LinearPosition, match.Length);
+ return true;
+ }
+ token = null;
+ return false;
+ }
+ }
+}
diff --git a/tokenizer/SourceBuffer.cs b/tokenizer/SourceBuffer.cs
new file mode 100644
index 0000000..fcb55b2
--- /dev/null
+++ b/tokenizer/SourceBuffer.cs
@@ -0,0 +1,77 @@
+using ln.collections;
+using System;
+using System.IO;
+
+namespace ln.parse.tokenizer
+{
+ public struct TextPosition
+ {
+ public int LineNo;
+ public int CursorPosition;
+
+ public TextPosition(int line,int cursor)
+ {
+ LineNo = line;
+ CursorPosition = cursor;
+ }
+
+ public static TextPosition First => new TextPosition(1, 1);
+
+ public override string ToString() => string.Format("{0}:{1}", LineNo, CursorPosition);
+ }
+
+ public class SourceBuffer
+ {
+ readonly string _buffer;
+ BTree statTextPositions = new BTree();
+
+ int linearPosition;
+ public int LinearPosition { get => linearPosition; set => linearPosition = value; }
+ public TextPosition TextPosition => GetTextPosition(linearPosition);
+
+ public string Text => _buffer;
+
+ public int Length => _buffer.Length;
+
+ public SourceBuffer(char[] buffer) : this(new StringReader(new String(buffer)))
+ { }
+ public SourceBuffer(TextReader reader) : this(reader.ReadToEnd())
+ { }
+ public SourceBuffer(string source)
+ {
+ _buffer = source;
+ doStatistics();
+ }
+
+ private void doStatistics()
+ {
+ TextPosition textPosition = new TextPosition();
+
+ for (int n=0;n<(_buffer.Length-1);n++)
+ {
+ textPosition.CursorPosition++;
+
+ if (_buffer[n] == '\n')
+ {
+ textPosition.LineNo++;
+ textPosition.CursorPosition = 0;
+ } else if (textPosition.CursorPosition == 1)
+ statTextPositions.Add(n, textPosition);
+ }
+ }
+
+ public TextPosition GetTextPosition(int linearPosition)
+ {
+ statTextPositions.TryGetPreviousOrCurrentValue(linearPosition, out TextPosition textPosition);
+ return textPosition;
+ }
+
+ public string GetText() => _buffer;
+ public string GetText(int linearStart) => _buffer.Substring(linearStart);
+ public string GetText(int linearStart, int length) => _buffer.Substring(linearStart, length);
+
+ public string GetCurrentText() => _buffer.Substring(linearPosition);
+
+
+ }
+}
diff --git a/tokenizer/Token.cs b/tokenizer/Token.cs
new file mode 100644
index 0000000..49addb0
--- /dev/null
+++ b/tokenizer/Token.cs
@@ -0,0 +1,54 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace ln.parse.tokenizer
+{
+ public class Token
+ {
+ public SourceBuffer SourceBuffer { get; }
+ public int LinearStart { get; }
+ public int Length { get; }
+
+ public TextPosition TextPosition => SourceBuffer.GetTextPosition(LinearStart);
+
+ public Token(SourceBuffer sourceBuffer, int start, int length)
+ {
+ SourceBuffer = sourceBuffer;
+ LinearStart = start;
+ Length = length;
+ }
+
+ public string Value => SourceBuffer.GetText(LinearStart, Length);
+
+
+ public class IntegerToken : Token
+ {
+ public IntegerToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
+ }
+ public class FloatToken : Token
+ {
+ public FloatToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
+ }
+ public class StringToken : Token
+ {
+ public StringToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
+ }
+ public class OperatorToken : Token
+ {
+ public OperatorToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
+ }
+ public class WhiteSpaceToken : Token
+ {
+ public WhiteSpaceToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
+ }
+ public class IdentifierToken : Token
+ {
+ public IdentifierToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
+ }
+ public class BracketToken : Token
+ {
+ public BracketToken(SourceBuffer sourceBuffer, int start, int length) : base(sourceBuffer, start, length) { }
+ }
+ }
+}
diff --git a/tokenizer/TokenMatcher.cs b/tokenizer/TokenMatcher.cs
new file mode 100644
index 0000000..cc4e46b
--- /dev/null
+++ b/tokenizer/TokenMatcher.cs
@@ -0,0 +1,28 @@
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Runtime.InteropServices.ComTypes;
+using System.Text;
+
+namespace ln.parse.tokenizer
+{
+ public abstract class TokenMatcher
+ {
+
+ public TokenMatcher()
+ {
+ }
+
+ public abstract bool Match(SourceBuffer sourceBuffer, out Token token);
+
+ public static readonly TokenMatcher INTEGER = new RegularExpressionMatcher("^-?\\d+", (SourceBuffer sourceBuffer, int start, int length) => new Token.IntegerToken(sourceBuffer, start, length));
+ public static readonly TokenMatcher FLOAT = new RegularExpressionMatcher("^-?\\d+.\\d*", (SourceBuffer sourceBuffer, int start, int length) => new Token.FloatToken(sourceBuffer, start, length));
+ public static readonly TokenMatcher STRING = new RegularExpressionMatcher("^\\\"(\\\\\"|.)*?\\\"", (SourceBuffer sourceBuffer, int start, int length) => new Token.StringToken(sourceBuffer, start, length));
+ public static readonly TokenMatcher IDENTIFIER = new RegularExpressionMatcher("^\\w][a-zA-Z0-9_]*", (SourceBuffer sourceBuffer, int start, int length) => new Token.IdentifierToken(sourceBuffer, start, length));
+ public static readonly TokenMatcher OPERATOR = new RegularExpressionMatcher("\\+|\\-|\\*|\\/|\\||\\&|\\|\\||\\&\\&", (SourceBuffer sourceBuffer, int start, int length) => new Token.OperatorToken(sourceBuffer, start, length));
+ public static readonly TokenMatcher WHITESPACE = new RegularExpressionMatcher("^\\s+", (SourceBuffer sourceBuffer, int start, int length) => new Token.WhiteSpaceToken(sourceBuffer, start, length));
+ public static readonly TokenMatcher BRACKET = new RegularExpressionMatcher("^\\{|\\}|\\(|\\)|\\[|\\]|", (SourceBuffer sourceBuffer, int start, int length) => new Token.WhiteSpaceToken(sourceBuffer, start, length));
+
+ }
+
+}
diff --git a/tokenizer/Tokenizer.cs b/tokenizer/Tokenizer.cs
new file mode 100644
index 0000000..81f08a2
--- /dev/null
+++ b/tokenizer/Tokenizer.cs
@@ -0,0 +1,45 @@
+using System;
+using System.Collections.Generic;
+using System.Reflection.Metadata.Ecma335;
+using System.Text;
+
+namespace ln.parse.tokenizer
+{
+ public class Tokenizer
+ {
+ List tokenMatchers = new List();
+
+ public Tokenizer()
+ {
+ }
+
+ public Tokenizer Add(TokenMatcher tokenMatcher) { tokenMatchers.Add(tokenMatcher); return this; }
+ public Tokenizer Remove(TokenMatcher tokenMatcher) { tokenMatchers.Remove(tokenMatcher); return this; }
+
+ public Token[] Parse(string source) => Parse(new SourceBuffer(source));
+ public Token[] Parse(SourceBuffer sourceBuffer)
+ {
+ List tokens = new List();
+
+ while (sourceBuffer.LinearPosition < sourceBuffer.Length)
+ {
+ Token token = null;
+
+ foreach (TokenMatcher tokenMatcher in tokenMatchers)
+ {
+ if (tokenMatcher.Match(sourceBuffer, out token))
+ break;
+ }
+
+ if (token == null)
+ throw new FormatException(String.Format("invalid token at {0}",sourceBuffer.TextPosition));
+
+ tokens.Add(token);
+ }
+
+ return tokens.ToArray();
+ }
+
+
+ }
+}