From 44c2c89de0aa61ba6f0530c6b52ee76629f9f683 Mon Sep 17 00:00:00 2001 From: Harald Wolff Date: Wed, 7 Mar 2018 21:05:21 +0100 Subject: [PATCH] Initial Commit --- .gitignore | 40 +++++ SharpLexer.sln | 17 +++ SharpLexer/Fundamentals.xml | 11 ++ SharpLexer/Grammar.cs | 181 +++++++++++++++++++++++ SharpLexer/Lexer.cs | 10 ++ SharpLexer/MainClass.cs | 59 ++++++++ SharpLexer/README.md | 69 +++++++++ SharpLexer/SharpLexer.csproj | 72 +++++++++ SharpLexer/TestGrammar.xml | 23 +++ SharpLexer/buffer/CharacterBuffer.cs | 187 ++++++++++++++++++++++++ SharpLexer/buffer/DefinitionReader.cs | 55 +++++++ SharpLexer/match/Alternative.cs | 54 +++++++ SharpLexer/match/CharacterGroup.cs | 203 ++++++++++++++++++++++++++ SharpLexer/match/CharacterSequence.cs | 43 ++++++ SharpLexer/match/Expression.cs | 53 +++++++ SharpLexer/match/Matchable.cs | 98 +++++++++++++ SharpLexer/match/MatchedDelegate.cs | 8 + SharpLexer/match/MatchedPart.cs | 60 ++++++++ SharpLexer/match/Sequence.cs | 54 +++++++ 19 files changed, 1297 insertions(+) create mode 100644 .gitignore create mode 100644 SharpLexer.sln create mode 100644 SharpLexer/Fundamentals.xml create mode 100644 SharpLexer/Grammar.cs create mode 100644 SharpLexer/Lexer.cs create mode 100644 SharpLexer/MainClass.cs create mode 100644 SharpLexer/README.md create mode 100644 SharpLexer/SharpLexer.csproj create mode 100644 SharpLexer/TestGrammar.xml create mode 100644 SharpLexer/buffer/CharacterBuffer.cs create mode 100644 SharpLexer/buffer/DefinitionReader.cs create mode 100644 SharpLexer/match/Alternative.cs create mode 100644 SharpLexer/match/CharacterGroup.cs create mode 100644 SharpLexer/match/CharacterSequence.cs create mode 100644 SharpLexer/match/Expression.cs create mode 100644 SharpLexer/match/Matchable.cs create mode 100644 SharpLexer/match/MatchedDelegate.cs create mode 100644 SharpLexer/match/MatchedPart.cs create mode 100644 SharpLexer/match/Sequence.cs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4e82d27 --- /dev/null +++ b/.gitignore @@ -0,0 +1,40 @@ +# Autosave files +*~ + +# build +[Oo]bj/ +[Bb]in/ +packages/ +TestResults/ + +# globs +Makefile.in +*.DS_Store +*.sln.cache +*.suo +*.cache +*.pidb +*.userprefs +*.usertasks +config.log +config.make +config.status +aclocal.m4 +install-sh +autom4te.cache/ +*.user +*.tar.gz +tarballs/ +test-results/ +Thumbs.db + +# Mac bundle stuff +*.dmg +*.app + +# resharper +*_Resharper.* +*.Resharper + +# dotCover +*.dotCover diff --git a/SharpLexer.sln b/SharpLexer.sln new file mode 100644 index 0000000..be15472 --- /dev/null +++ b/SharpLexer.sln @@ -0,0 +1,17 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 2012 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SharpLexer", "SharpLexer\SharpLexer.csproj", "{177C81C7-F6E3-494C-8866-2E3E134969C0}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x86 = Debug|x86 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {177C81C7-F6E3-494C-8866-2E3E134969C0}.Debug|x86.ActiveCfg = Debug|x86 + {177C81C7-F6E3-494C-8866-2E3E134969C0}.Debug|x86.Build.0 = Debug|x86 + {177C81C7-F6E3-494C-8866-2E3E134969C0}.Release|x86.ActiveCfg = Release|x86 + {177C81C7-F6E3-494C-8866-2E3E134969C0}.Release|x86.Build.0 = Release|x86 + EndGlobalSection +EndGlobal diff --git a/SharpLexer/Fundamentals.xml b/SharpLexer/Fundamentals.xml new file mode 100644 index 0000000..e5d4a19 --- /dev/null +++ b/SharpLexer/Fundamentals.xml @@ -0,0 +1,11 @@ + + + + [\0x0000..\0x0020] + [\0x0021..] + [a..zA..Z] + [0..9] + alpha | digit + [1..9] + + \ No newline at end of file diff --git a/SharpLexer/Grammar.cs b/SharpLexer/Grammar.cs new file mode 100644 index 0000000..5ecff23 --- /dev/null +++ b/SharpLexer/Grammar.cs @@ -0,0 +1,181 @@ +using System; +using System.Xml; +using System.Collections.Generic; +using lexer.match; +using lexer.buffer; +namespace lexer +{ + public class Grammar + { + Dictionary sequences = new Dictionary(); + + public Grammar(){ + } + + public Grammar(String filename) + { + Load(filename); + } + + public Grammar(XmlDocument xml) + { + Load(xml); + } + + public void Load(String filename) + { + XmlDocument xml = new XmlDocument(); + xml.Load(filename); + Load(xml); + } + + public void Load(XmlDocument xml) + { + XmlNodeList tokens = xml.SelectNodes("/Grammar/Tokens/Token"); + foreach (XmlNode _ntoken in tokens) + { + XmlElement ntoken = (XmlElement)_ntoken; + Console.WriteLine("Loading Token: {0}", ntoken.Attributes["name"].Value); + + CharacterBuffer chb = new CharacterBuffer(ntoken.InnerText); + Sequence sequence = parseSequence(chb, ntoken.Attributes["name"].Value); + + sequence.Grouping = ntoken.HasAttribute("grouping"); + sequence.Notice = ntoken.HasAttribute("notice"); + + this.sequences.Add(ntoken.Attributes["name"].Value, sequence); + } + } + + public Sequence getSequence(String name) + { + return this.sequences[name]; + } + + + public Sequence parseSequence(CharacterBuffer chbuffer, String name = null) + { + Sequence sequence = new Sequence(name); + int min, max; + + while (!chbuffer.EndOfBuffer) + { + parseWhiteSpace(chbuffer); + if (chbuffer.EndOfBuffer) + break; + + min = 1; + max = 1; + + if (chbuffer.Current == '[') + { + + CharacterGroup cg = new CharacterGroup(chbuffer); + parseMinMax(chbuffer, ref min, ref max); + + Expression e = new Expression(cg, min, max); + sequence.addMatchable(e); + + } + else if (chbuffer.Current == '"') + { + CharacterSequence cs = new CharacterSequence(chbuffer); + parseMinMax(chbuffer, ref min, ref max); + + Expression e = new Expression(cs, min, max); + sequence.addMatchable(e); + + } + else if (chbuffer.Current == '(') + { + + chbuffer.MoveNext(); + Sequence s = parseSequence(chbuffer); + parseMinMax(chbuffer, ref min, ref max); + + Expression e = new Expression(s, min, max); + sequence.addMatchable(e); + + } + else if (chbuffer.Current == '|') + { + + Alternative alt = new Alternative(); + alt.addMatchable(sequence); + chbuffer.MoveNext(); + alt.addMatchable(parseSequence(chbuffer)); + + return alt; + } + else if (chbuffer.Current == ')') + { + + chbuffer.MoveNext(); + + return sequence; + + } + else + { + + char[] sym = chbuffer.findSymbol(); + Matchable m = Matchable.getNamedMatchable(sym); + if (m == null){ + m = new Matchable.DeferredMatchable(sym); + } + parseMinMax(chbuffer, ref min, ref max); + + Expression e = new Expression(m, min, max); + sequence.addMatchable(e); + } + + } + return sequence; + } + + public void parseWhiteSpace(CharacterBuffer chbuffer) + { + while (chbuffer.Current <= 0x20) + { + chbuffer.MoveNext(); + } + } + + public void parseMinMax(CharacterBuffer chbuffer, ref int min,ref int max){ + + if (chbuffer.Current == '{'){ + chbuffer.MoveNext(); + char[] def = chbuffer.find('}'); + + if (def.Length > 0) + { + int pcomma = -1; + + while ((++pcomma < def.Length) && (def[pcomma] != ',')) { } + + if (pcomma == 0) + { + min = 0; + } else { + min = int.Parse(new String(def, 0, pcomma)); + } + + if (pcomma == def.Length - 1) + { + max = int.MaxValue; + } else if (pcomma == def.Length){ + max = min; + } else { + pcomma++; + max = int.Parse(new String(def, pcomma, def.Length - pcomma)); + } + + return; + } + } + + min = 1; + max = 1; + } + } +} diff --git a/SharpLexer/Lexer.cs b/SharpLexer/Lexer.cs new file mode 100644 index 0000000..302c844 --- /dev/null +++ b/SharpLexer/Lexer.cs @@ -0,0 +1,10 @@ +using System; +namespace lexer +{ + public class Lexer + { + public Lexer() + { + } + } +} diff --git a/SharpLexer/MainClass.cs b/SharpLexer/MainClass.cs new file mode 100644 index 0000000..75f0c60 --- /dev/null +++ b/SharpLexer/MainClass.cs @@ -0,0 +1,59 @@ +using System; +using lexer.match; +using lexer.buffer; +namespace lexer +{ + public class MainClass + { + + public static void Main(String[] args) + { + Grammar grammar = new Grammar(); + grammar.Load("fundamentals.xml"); + grammar.Load("TestGrammar.xml"); + + + Sequence num = grammar.getSequence("number"); + + String[] tests = new string[]{ + "32", + "032", + "-189463738.34gdts" + }; + + Console.WriteLine("Number Defintion: {0}",num.ToString()); + + foreach (string l in tests){ + Console.WriteLine("Matching {0}",l); + MatchedPart[] mp = num.Match(new CharacterBuffer(l)); + if (mp == null){ + Console.WriteLine("Did not Match!"); + } else { + Console.WriteLine("Matched: {0}",mp.MatchingCharacters().AsString()); + } + } + + Console.WriteLine("------------------------------------"); + + String testsource = @"1 +2 +""Hallo Welt"" +3 +15.4 +13.765 +-3 +-14.3 +123456.7890 +IchBinEinSymbol"; + + + Sequence testseq = grammar.getSequence("numbersandstrings"); + MatchedPart[] testmp = testseq.MatchNoticeable(new CharacterBuffer(testsource)); + testmp.Notice( (matchedPart) => Console.WriteLine("Found {0:-10} = {1}",matchedPart.Matchable.Name,matchedPart.MatchedString) ); + + + + + } + } +} diff --git a/SharpLexer/README.md b/SharpLexer/README.md new file mode 100644 index 0000000..641c3bf --- /dev/null +++ b/SharpLexer/README.md @@ -0,0 +1,69 @@ +# Lexing Classes for .NET / Mono + + + + + +Regular Expression Language used for definition of tokens: + +- Whitespace is ignored +- '\' is used as escape marker within character lists + +- [] define a character list for matching + a) [a..z] match every character from 'a' to 'z' (included) + b) [g] match character 'g' + c) [egt] match one of the given characters + // INVALID: d) characters may be defined by the character itself (e.g: H), by numerical value (e.g.: 32 or 0x20) + e) [a..i/d..f] matches characters 'a' to 'i' but excludes characters 'd' to 'f' from matching + f) [a\.z] matches characters 'a','.' and 'z' + g) [abcijkx..] matches characters 'a','b','c','i','j','k' and every character from 'x' to highest char (0xFFFF) + +- (...) define a group that is matched at whole + +- | define an alternative matching path, e.g. [.] | [,] matches a "." or a ",", but only one character each time matching happens + +- {} defines a repeated match for the prepended expression: + a) "" or "{}" or "{1}" match exactly one time + b) {3,} match at least 3 times to infinite times + c) {1,3} match 1 to 3 times + c) {,3} match 0 to 3 times + +- every other word consisting of the characters 0..9,a..z,A..Z is considered reference to another named expression + +examples: + +a numeric literal may be defined by: + + [-]{0..1} [1..9] [0..9]{0..} ( [.] [0..9]{1..} ){0..1} + +a possible string literal: + +nonwhitespace: [33..] + +string: ["] ( nonwhitespace | ([\\] ["]) ) ["] + + + + +------------------------------------------------------------ + +[-]{0..1} digit19 digit{0..} ( [.] digit{1..} ){0..1} + +[-]{0..1} <= Expression + [-] <= Matchable + +digit19{1..1} <= Expression + digit19 <= Matchable + + +Matchable: A singleton matchable object (a token or charactergroup) +Expression: Combine a Matchable with an interval definition + + + + + + + + + diff --git a/SharpLexer/SharpLexer.csproj b/SharpLexer/SharpLexer.csproj new file mode 100644 index 0000000..98dfa28 --- /dev/null +++ b/SharpLexer/SharpLexer.csproj @@ -0,0 +1,72 @@ + + + + Debug + x86 + {177C81C7-F6E3-494C-8866-2E3E134969C0} + Exe + lexer + SharpLexer + v4.5 + + + true + full + false + bin\Debug + DEBUG; + prompt + 4 + x86 + + + true + bin\Release + prompt + 4 + x86 + + + + + + + + + + + + + + + + + + + + + + + + PreserveNewest + + + + PreserveNewest + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/SharpLexer/TestGrammar.xml b/SharpLexer/TestGrammar.xml new file mode 100644 index 0000000..0686940 --- /dev/null +++ b/SharpLexer/TestGrammar.xml @@ -0,0 +1,23 @@ + + + + alpha alphadigit{,} + [-]{,1} digit19 digit{0,} ( [.] digit{1,} ){0,1} + ["] ( [../"] | [\\] ["] ){,} ["] + + number | string + + symbol + symbol + + [+-*\/%=?] + (lvalue operator){0,1} rvalue [;] + "if" whitespace "(" rvalue ")" statement + "{" source "}" + operation | conditional | block + statement{,} + + (whitespace{,} (number | string | symbol)){,} + + + \ No newline at end of file diff --git a/SharpLexer/buffer/CharacterBuffer.cs b/SharpLexer/buffer/CharacterBuffer.cs new file mode 100644 index 0000000..5bd9901 --- /dev/null +++ b/SharpLexer/buffer/CharacterBuffer.cs @@ -0,0 +1,187 @@ +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Linq; +namespace lexer.buffer +{ + public class CharacterBuffer + { + char[] characters; + int position; + + Stack positionStack = new Stack(); + + public CharacterBuffer(char[] characters) + { + this.characters = new char[characters.Length]; + Array.Copy(characters, this.characters, characters.Length); + } + public CharacterBuffer(String characters) + : this(characters.ToCharArray()) + { + } + + public void Push() + { + positionStack.Push(position); + } + public void Pop() + { + this.position = positionStack.Pop(); + } + + public char this[int n] + { + get { return this.characters[n]; } + } + + public char Current + { + get { return CharAt(this.position); } + } + + public char Last { get { return CharAt(this.position - 1); } } + public char Next { get { return CharAt(this.position + 1); } } + + public char CharAt(int position) + { + if (position >= this.characters.Length) + return (char)0xFFFF; + if (position < 0) + return (char)0; + return this.characters[position]; + } + + public int Position + { + get { return this.position; } + set { this.position = value; } + } + + public char NextNext + { + get + { + if ((position + 1) >= this.characters.Length) + return (char)0xFFFF; + + return this.characters[this.position + 2]; + } + } + + public char next(int n) + { + if ((position + n) >= this.characters.Length) + return (char)0xFFFF; + return this.characters[this.position + n]; + } + + public char MoveNext() + { + this.position++; + if (EndOfBuffer) + return (char)0; + + return this.characters[this.position]; + } + + public bool EndOfBuffer + { + get { return this.position >= this.characters.Length; } + } + + public char[] getSection() + { + int start = positionStack.Last(); + return getSection(start, position - start); + } + + public char[] getSection(int start, int len) + { + char[] result = new char[len]; + + for (int n = 0; n < len; n++) + { + result[n] = this.characters[start + n]; + } + + return result; + } + + public char[] find(char ch) + { + return find(new char[] { ch }); + } + public char[] find(char[] ch) + { + List characters = new List(); + while (!EndOfBuffer) + { + foreach (char c in ch) + { + if (Current == c) + { + MoveNext(); + return characters.ToArray(); + } + } + characters.Add(Current); + MoveNext(); + } + return characters.ToArray(); + } + + public char[] findUnescaped(char find) + { + return findUnescaped(new char[] { find }); + } + + public char[] findUnescaped(char[] find) + { + int n; + + for (n = 0; !EndOfBuffer; n++) + { + foreach (char f in find) + { + if (f == Current) + { + char[] r = getSection(position - n, n); + MoveNext(); + return r; + } + } + + if (Current == '\\') + { + MoveNext(); + n++; + } + MoveNext(); + } + return null; + } + + public char[] findSymbol() + { + List characters = new List(); + while (!EndOfBuffer) + { + if ( + !char.IsDigit(Current) && + !char.IsLetter(Current) && + (Current != '_') + ) + break; + + characters.Add(Current); + MoveNext(); + } +#if DEBUG + Console.WriteLine("findSymbol() = {0}",new String(characters.ToArray())); +#endif + return characters.ToArray(); + } + + } +} diff --git a/SharpLexer/buffer/DefinitionReader.cs b/SharpLexer/buffer/DefinitionReader.cs new file mode 100644 index 0000000..63eddaf --- /dev/null +++ b/SharpLexer/buffer/DefinitionReader.cs @@ -0,0 +1,55 @@ +using System; +namespace lexer.buffer +{ + public class DefinitionReader + { + public const int OP_INTERVAL = 0x00010000; + public const int OP_EOB = 0x10000000; + + char[] definition; + int position; + + int currentChar = -1; + + public DefinitionReader(char[] definition) + { + this.definition = definition; + this.position = 0; + MoveNext(); + } + + public int Current { + get { return this.currentChar; } + } + + public int MoveNext() { + if (this.position >= this.definition.Length){ + currentChar = OP_EOB; + } else { + currentChar = this.definition[this.position++]; + if (currentChar == '\\') + { + currentChar = this.definition[this.position++]; + if ((currentChar == '0') && (this.definition[this.position] == 'x')){ + char[] hexvalue = new char[4]; + this.position++; + for (int n = 0; n < 4;n++){ + hexvalue[n] = this.definition[this.position++]; + } + currentChar = (char)int.Parse(new String(hexvalue), System.Globalization.NumberStyles.HexNumber); + } + } else if (currentChar == '.'){ + if ((this.position < this.definition.Length) && (this.definition[this.position] == '.')) + { + this.position++; + currentChar = OP_INTERVAL; + } + } + } + + return currentChar; + } + + + } +} diff --git a/SharpLexer/match/Alternative.cs b/SharpLexer/match/Alternative.cs new file mode 100644 index 0000000..a885597 --- /dev/null +++ b/SharpLexer/match/Alternative.cs @@ -0,0 +1,54 @@ +using System; +using System.Collections.Generic; +using System.Text; +namespace lexer.match +{ + public class Alternative : Sequence + { + public Alternative() + { + } + + public override MatchedPart[] Match(buffer.CharacterBuffer chbuffer) + { + MatchedPart[] matchedParts = null; + int len = -1; + + foreach (Matchable m in matchables) + { + chbuffer.Push(); + + MatchedPart[] mparts = m.MatchNoticeable(chbuffer); + if (mparts != null) + { + int mplen = mparts.MatchedLength(); + if ((matchedParts == null) || (mplen > len)) + { + matchedParts = mparts; + len = mplen; + } + } + chbuffer.Pop(); + } + + return matchedParts; + } + + public override string ToString() + { + StringBuilder sb = new StringBuilder(); + + sb.Append("( "); + for (int n = 0; n < this.matchables.Count; n++) + { + if (n > 0) + sb.Append(" | "); + + Matchable m = this.matchables[n]; + sb.Append(m.ToString(true)); + } + sb.Append(")"); + return sb.ToString(); + } + } +} diff --git a/SharpLexer/match/CharacterGroup.cs b/SharpLexer/match/CharacterGroup.cs new file mode 100644 index 0000000..1c236ab --- /dev/null +++ b/SharpLexer/match/CharacterGroup.cs @@ -0,0 +1,203 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using lexer.buffer; +using System.Threading; +using System.Runtime.Remoting.Messaging; +using System.Text; +namespace lexer.match +{ + + public class CharacterGroup : Matchable + { + struct chinterval + { + public char first; + public char last; + + public chinterval(char first, char last) + { + this.first = first; + this.last = last; + } + + public bool Match(char ch) + { + return (ch >= first) && (ch <= last); + } + + public override string ToString() + { + return string.Format("[chinterval min=0x{0:X4} max=0x{1:X4}]", (int)this.first, (int)this.last); + } + } + + List includeCharacters = new List(); + List includeIntervals = new List(); + List excludeCharacters = new List(); + List excludeIntervals = new List(); + + + public CharacterGroup(CharacterBuffer chbuffer) + { + if (chbuffer.Current != '[') + { + throw new FormatException("CharacterGroup Definition must start with '['"); + } + + chbuffer.MoveNext(); + + char[] idef = chbuffer.findUnescaped(new char[] { ']', '/' }); + char[] edef = null; + + if (chbuffer.Last == '/') + { + edef = chbuffer.findUnescaped(']'); + } + +#if DEBUG + Console.WriteLine("CharacterGroup: include = {0}", new String(idef)); + Console.WriteLine("CharacterGroup: exclude = {0}", new String(edef)); +#endif + parseComponents(includeCharacters, includeIntervals, idef); + if (edef != null) + { + parseComponents(excludeCharacters, excludeIntervals, edef); + } + } + + private void parseComponents(List cList, List iList, char[] def) + { + DefinitionReader dr = new DefinitionReader(def); + while (dr.Current != DefinitionReader.OP_EOB) + { + if (dr.Current == DefinitionReader.OP_INTERVAL) + { + char min = (char)0; + char max = (char)0xffff; + + if (cList.Count > 0) + { + min = cList.Last(); + cList.RemoveAt(cList.Count - 1); + } + + dr.MoveNext(); + + if (dr.Current != DefinitionReader.OP_EOB) + { + max = (char)dr.Current; + } + iList.Add(new chinterval(min, max)); + } + else + { + cList.Add((char)dr.Current); + } + dr.MoveNext(); + } + } + + public bool Match(char ch) + { + foreach (chinterval i in excludeIntervals) + { + if (i.Match(ch)) + { + return false; + } + } + foreach (char ec in excludeCharacters) + { + if (ec == ch) + { + return false; + } + } + foreach (chinterval i in includeIntervals) + { + if (i.Match(ch)) + { + return true; + } + } + foreach (char ic in includeCharacters) + { + if (ic == ch) + { + return true; + } + } + return false; + } + + public override MatchedPart[] Match(CharacterBuffer chbuffer) + { + if (!chbuffer.EndOfBuffer && Match(chbuffer.Current)) + { + return new MatchedPart[] { new MatchedPart(this, new char[] { chbuffer.Current }) }; + } + return null; + } + + public override string ToString() + { + StringBuilder sb = new StringBuilder(); + sb.Append("["); + foreach (char ch in includeCharacters) + { + if (ch <= 0x20) + { + sb.AppendFormat("\\0x{0:X4}", (int)ch); + } + else + { + sb.Append(ch); + } + } + + foreach (chinterval chi in includeIntervals) + { + if (chi.first <= 0x20) + { + sb.AppendFormat("\\0x{0:X4}", (int)chi.first); + } + else + { + sb.Append(chi.first); + } + sb.Append(".."); + if (chi.last <= 0x20) + { + sb.AppendFormat("\\0x{0:X4}", (int)chi.last); + } + else + { + sb.Append(chi.last); + } + + } + + if ((excludeIntervals.Count > 0) || (excludeCharacters.Count > 0)) + { + sb.Append("/"); + + foreach (char ch in includeCharacters) + { + if (ch <= 0x20) + { + sb.AppendFormat("\\{0:X4}", (int)ch); + } + else + { + sb.Append(ch); + } + } + } + + sb.Append("]"); + + return sb.ToString(); + } + } +} diff --git a/SharpLexer/match/CharacterSequence.cs b/SharpLexer/match/CharacterSequence.cs new file mode 100644 index 0000000..53868c8 --- /dev/null +++ b/SharpLexer/match/CharacterSequence.cs @@ -0,0 +1,43 @@ +using System; +using lexer.buffer; +using System.Collections.Generic; +namespace lexer.match +{ + public class CharacterSequence : Matchable + { + char[] sequence; + + public CharacterSequence(CharacterBuffer chbuffer) + { + List characters = new List(); + + while (chbuffer.MoveNext() != '"'){ + if (chbuffer.Current == '\\'){ + chbuffer.MoveNext(); + } + characters.Add(chbuffer.Current); + } + + sequence = characters.ToArray(); + chbuffer.MoveNext(); + } + + public override MatchedPart[] Match(CharacterBuffer chbuffer) + { + if (chbuffer.Current != '"'){ + throw new FormatException("CharacterSequence must start with \""); + } + chbuffer.Push(); + foreach (char sch in sequence){ + if (sch != chbuffer.MoveNext()){ + chbuffer.Pop(); + return null; + } + } + chbuffer.Pop(); + return new MatchedPart[] { new MatchedPart(this, this.sequence) }; + } + + + } +} diff --git a/SharpLexer/match/Expression.cs b/SharpLexer/match/Expression.cs new file mode 100644 index 0000000..0178f33 --- /dev/null +++ b/SharpLexer/match/Expression.cs @@ -0,0 +1,53 @@ +using System; +using lexer.buffer; +using System.Collections.Generic; +namespace lexer.match +{ + public class Expression : Matchable + { + public Matchable Matchable { get; private set; } + + public int Minimum { get; private set; } = 0; + public int Maximum { get; private set; } = int.MaxValue; + + public Expression(Matchable matchable,int minimum = 1,int maximum = 1) + { + this.Matchable = matchable; + this.Minimum = minimum; + this.Maximum = maximum; + } + + public override MatchedPart[] Match(CharacterBuffer chbuffer){ + List matchedParts = new List(); + int n; + chbuffer.Push(); + + for (n = 0; n < Maximum;n++){ + MatchedPart[] mparts = Matchable.MatchNoticeable(chbuffer); + if (mparts == null){ + break; + } + matchedParts.AddRange(mparts); + chbuffer.Position += mparts.MatchedLength(); + } + + chbuffer.Pop(); + + if (n < Minimum){ + return null; + } + + return matchedParts.ToArray(); + } + + public override string ToString() + { + return String.Format("{0}{{{1},{2}}}", + this.Matchable.ToString(true), + Minimum == 0 ? "" : Minimum.ToString(), + Maximum == int.MaxValue ? "" : Maximum.ToString() + ); + } + + } +} diff --git a/SharpLexer/match/Matchable.cs b/SharpLexer/match/Matchable.cs new file mode 100644 index 0000000..dcf1a39 --- /dev/null +++ b/SharpLexer/match/Matchable.cs @@ -0,0 +1,98 @@ +using System; +using lexer.buffer; +using System.Collections.Generic; +namespace lexer.match +{ + public abstract class Matchable + { + public event MatchedDelegate OnMatched; + public String Name { get; private set; } + + public bool Grouping { get; set; } + public bool Notice { get; set; } + + protected Matchable(String name = null) + { + this.Name = name; + + if (name != null) + { + namedMatchables.Add(name, this); + } + } + + public MatchedPart[] MatchNoticeable(CharacterBuffer chbuffer){ + MatchedPart[] mparts = Match(chbuffer); + if (Grouping && (mparts != null)){ + MatchedPart mp = new MatchedPart(this, mparts.MatchingCharacters()); + return new MatchedPart[] { mp }; + } + return mparts; + } + + /** + * Match() match this Matchable starting at current position of CharacterBuffer + * + * if an successfull Match is found, return the matched characters as char[] + * returns null if no match is found + * + **/ + public abstract MatchedPart[] Match(CharacterBuffer chbuffer); + + public override string ToString() + { + return ToString(false); + } + + public virtual String ToString(bool useSymbol) + { + if (useSymbol && (this.Name != null)) + { + return this.Name; + } + return ToString(); + } + + + + static Dictionary namedMatchables = new Dictionary(); + public static Matchable getNamedMatchable(String name) + { + if (!namedMatchables.ContainsKey(name)){ + return null; + } + return namedMatchables[name]; + } + public static Matchable getNamedMatchable(char[] name) + { + return getNamedMatchable(new String(name)); + } + + public class DeferredMatchable : Matchable{ + + Matchable matchable; + + public DeferredMatchable(char[] name) + :this(new String(name)){} + + public DeferredMatchable(String name) + :base(){ + Name = name; + matchable = null; + } + + public override MatchedPart[] Match(CharacterBuffer chbuffer) + { + if (matchable == null){ + matchable = getNamedMatchable(Name); + } + if (matchable == null) + { + throw new KeyNotFoundException(String.Format("Deferred matchable '{0}' was not found", Name)); + } + return matchable.Match(chbuffer); + } + } + + } +} diff --git a/SharpLexer/match/MatchedDelegate.cs b/SharpLexer/match/MatchedDelegate.cs new file mode 100644 index 0000000..b694618 --- /dev/null +++ b/SharpLexer/match/MatchedDelegate.cs @@ -0,0 +1,8 @@ +using System; + +namespace lexer.match { + + public delegate void MatchedDelegate(MatchedPart matchedPart); + +} + diff --git a/SharpLexer/match/MatchedPart.cs b/SharpLexer/match/MatchedPart.cs new file mode 100644 index 0000000..abf23e8 --- /dev/null +++ b/SharpLexer/match/MatchedPart.cs @@ -0,0 +1,60 @@ +using System; +using System.Runtime.CompilerServices; +using System.Collections.Generic; +namespace lexer.match +{ + public delegate void MatchedPartNoticeDelegate(MatchedPart matchedPart); + + public class MatchedPart + { + public Matchable Matchable { get; private set; } + public char[] MatchedCharacters { get; private set; } + + public int Length { get { return this.MatchedCharacters.Length; } } + + public MatchedPart(Matchable matchable, char[] characters) + { + this.Matchable = matchable; + this.MatchedCharacters = characters; + } + + public String MatchedString { get { return new String(MatchedCharacters); } } + + } + + public static class MatchedPartArray { + + public static int MatchedLength(this MatchedPart[] parts){ + int l = 0; + foreach (MatchedPart p in parts) + l += p.Length; + return l; + } + + public static char[] MatchingCharacters(this MatchedPart[] parts) { + if ((parts == null)||(parts.Length == 0)){ + return new char[0]; + } + List characters = new List(); + foreach (MatchedPart mp in parts){ + characters.AddRange(mp.MatchedCharacters); + } + return characters.ToArray(); + } + + + public static String AsString(this char[] chars){ + return new String(chars); + } + + public static void Notice(this MatchedPart[] matchedParts,MatchedPartNoticeDelegate notice){ + foreach (MatchedPart mpart in matchedParts){ + if (mpart.Matchable.Notice){ + notice(mpart); + } + } + } + + } + +} diff --git a/SharpLexer/match/Sequence.cs b/SharpLexer/match/Sequence.cs new file mode 100644 index 0000000..41ef853 --- /dev/null +++ b/SharpLexer/match/Sequence.cs @@ -0,0 +1,54 @@ +using System; +using System.Collections.Generic; +using System.Text; +namespace lexer.match +{ + public class Sequence : Matchable + { + public List matchables = new List(); + + public Sequence() + { + } + public Sequence(String name) + :base(name){} + + public void addMatchable(Matchable matchable){ + this.matchables.Add(matchable); + } + public Matchable[] Matchables { get { return this.matchables.ToArray(); } } + + public override MatchedPart[] Match(buffer.CharacterBuffer chbuffer) + { + List matchedParts = new List(); + chbuffer.Push(); + + foreach (Matchable m in matchables){ + MatchedPart[] mparts = m.MatchNoticeable(chbuffer); + if (mparts == null){ + chbuffer.Pop(); + return null; + } + matchedParts.AddRange(mparts); + chbuffer.Position += mparts.MatchedLength(); + } + + chbuffer.Pop(); + return matchedParts.ToArray(); + } + + public override string ToString() + { + StringBuilder sb = new StringBuilder(); + + sb.Append("( "); + foreach (Matchable m in this.matchables){ + sb.Append(m.ToString(true)); + sb.Append(' '); + } + sb.Append(")"); + return sb.ToString(); + } + + } +}