Initial Commit

master
Harald Wolff 2018-03-07 21:05:21 +01:00
commit 44c2c89de0
19 changed files with 1297 additions and 0 deletions

40
.gitignore vendored 100644
View File

@ -0,0 +1,40 @@
# Autosave files
*~
# build
[Oo]bj/
[Bb]in/
packages/
TestResults/
# globs
Makefile.in
*.DS_Store
*.sln.cache
*.suo
*.cache
*.pidb
*.userprefs
*.usertasks
config.log
config.make
config.status
aclocal.m4
install-sh
autom4te.cache/
*.user
*.tar.gz
tarballs/
test-results/
Thumbs.db
# Mac bundle stuff
*.dmg
*.app
# resharper
*_Resharper.*
*.Resharper
# dotCover
*.dotCover

17
SharpLexer.sln 100644
View File

@ -0,0 +1,17 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2012
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SharpLexer", "SharpLexer\SharpLexer.csproj", "{177C81C7-F6E3-494C-8866-2E3E134969C0}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x86 = Debug|x86
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{177C81C7-F6E3-494C-8866-2E3E134969C0}.Debug|x86.ActiveCfg = Debug|x86
{177C81C7-F6E3-494C-8866-2E3E134969C0}.Debug|x86.Build.0 = Debug|x86
{177C81C7-F6E3-494C-8866-2E3E134969C0}.Release|x86.ActiveCfg = Release|x86
{177C81C7-F6E3-494C-8866-2E3E134969C0}.Release|x86.Build.0 = Release|x86
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8" ?>
<Grammar>
<Tokens>
<Token name="whitespace">[\0x0000..\0x0020]</Token>
<Token name="nonwhitespace">[\0x0021..]</Token>
<Token name="alpha">[a..zA..Z]</Token>
<Token name="digit">[0..9]</Token>
<Token name="alphadigit">alpha | digit</Token>
<Token name="digit19">[1..9]</Token>
</Tokens>
</Grammar>

View File

@ -0,0 +1,181 @@
using System;
using System.Xml;
using System.Collections.Generic;
using lexer.match;
using lexer.buffer;
namespace lexer
{
public class Grammar
{
Dictionary<String, Sequence> sequences = new Dictionary<string, Sequence>();
public Grammar(){
}
public Grammar(String filename)
{
Load(filename);
}
public Grammar(XmlDocument xml)
{
Load(xml);
}
public void Load(String filename)
{
XmlDocument xml = new XmlDocument();
xml.Load(filename);
Load(xml);
}
public void Load(XmlDocument xml)
{
XmlNodeList tokens = xml.SelectNodes("/Grammar/Tokens/Token");
foreach (XmlNode _ntoken in tokens)
{
XmlElement ntoken = (XmlElement)_ntoken;
Console.WriteLine("Loading Token: {0}", ntoken.Attributes["name"].Value);
CharacterBuffer chb = new CharacterBuffer(ntoken.InnerText);
Sequence sequence = parseSequence(chb, ntoken.Attributes["name"].Value);
sequence.Grouping = ntoken.HasAttribute("grouping");
sequence.Notice = ntoken.HasAttribute("notice");
this.sequences.Add(ntoken.Attributes["name"].Value, sequence);
}
}
public Sequence getSequence(String name)
{
return this.sequences[name];
}
public Sequence parseSequence(CharacterBuffer chbuffer, String name = null)
{
Sequence sequence = new Sequence(name);
int min, max;
while (!chbuffer.EndOfBuffer)
{
parseWhiteSpace(chbuffer);
if (chbuffer.EndOfBuffer)
break;
min = 1;
max = 1;
if (chbuffer.Current == '[')
{
CharacterGroup cg = new CharacterGroup(chbuffer);
parseMinMax(chbuffer, ref min, ref max);
Expression e = new Expression(cg, min, max);
sequence.addMatchable(e);
}
else if (chbuffer.Current == '"')
{
CharacterSequence cs = new CharacterSequence(chbuffer);
parseMinMax(chbuffer, ref min, ref max);
Expression e = new Expression(cs, min, max);
sequence.addMatchable(e);
}
else if (chbuffer.Current == '(')
{
chbuffer.MoveNext();
Sequence s = parseSequence(chbuffer);
parseMinMax(chbuffer, ref min, ref max);
Expression e = new Expression(s, min, max);
sequence.addMatchable(e);
}
else if (chbuffer.Current == '|')
{
Alternative alt = new Alternative();
alt.addMatchable(sequence);
chbuffer.MoveNext();
alt.addMatchable(parseSequence(chbuffer));
return alt;
}
else if (chbuffer.Current == ')')
{
chbuffer.MoveNext();
return sequence;
}
else
{
char[] sym = chbuffer.findSymbol();
Matchable m = Matchable.getNamedMatchable(sym);
if (m == null){
m = new Matchable.DeferredMatchable(sym);
}
parseMinMax(chbuffer, ref min, ref max);
Expression e = new Expression(m, min, max);
sequence.addMatchable(e);
}
}
return sequence;
}
public void parseWhiteSpace(CharacterBuffer chbuffer)
{
while (chbuffer.Current <= 0x20)
{
chbuffer.MoveNext();
}
}
public void parseMinMax(CharacterBuffer chbuffer, ref int min,ref int max){
if (chbuffer.Current == '{'){
chbuffer.MoveNext();
char[] def = chbuffer.find('}');
if (def.Length > 0)
{
int pcomma = -1;
while ((++pcomma < def.Length) && (def[pcomma] != ',')) { }
if (pcomma == 0)
{
min = 0;
} else {
min = int.Parse(new String(def, 0, pcomma));
}
if (pcomma == def.Length - 1)
{
max = int.MaxValue;
} else if (pcomma == def.Length){
max = min;
} else {
pcomma++;
max = int.Parse(new String(def, pcomma, def.Length - pcomma));
}
return;
}
}
min = 1;
max = 1;
}
}
}

View File

@ -0,0 +1,10 @@
using System;
namespace lexer
{
public class Lexer
{
public Lexer()
{
}
}
}

View File

@ -0,0 +1,59 @@
using System;
using lexer.match;
using lexer.buffer;
namespace lexer
{
public class MainClass
{
public static void Main(String[] args)
{
Grammar grammar = new Grammar();
grammar.Load("fundamentals.xml");
grammar.Load("TestGrammar.xml");
Sequence num = grammar.getSequence("number");
String[] tests = new string[]{
"32",
"032",
"-189463738.34gdts"
};
Console.WriteLine("Number Defintion: {0}",num.ToString());
foreach (string l in tests){
Console.WriteLine("Matching {0}",l);
MatchedPart[] mp = num.Match(new CharacterBuffer(l));
if (mp == null){
Console.WriteLine("Did not Match!");
} else {
Console.WriteLine("Matched: {0}",mp.MatchingCharacters().AsString());
}
}
Console.WriteLine("------------------------------------");
String testsource = @"1
2
""Hallo Welt""
3
15.4
13.765
-3
-14.3
123456.7890
IchBinEinSymbol";
Sequence testseq = grammar.getSequence("numbersandstrings");
MatchedPart[] testmp = testseq.MatchNoticeable(new CharacterBuffer(testsource));
testmp.Notice( (matchedPart) => Console.WriteLine("Found {0:-10} = {1}",matchedPart.Matchable.Name,matchedPart.MatchedString) );
}
}
}

View File

@ -0,0 +1,69 @@
# Lexing Classes for .NET / Mono
Regular Expression Language used for definition of tokens:
- Whitespace is ignored
- '\' is used as escape marker within character lists
- [] define a character list for matching
a) [a..z] match every character from 'a' to 'z' (included)
b) [g] match character 'g'
c) [egt] match one of the given characters
// INVALID: d) characters may be defined by the character itself (e.g: H), by numerical value (e.g.: 32 or 0x20)
e) [a..i/d..f] matches characters 'a' to 'i' but excludes characters 'd' to 'f' from matching
f) [a\.z] matches characters 'a','.' and 'z'
g) [abcijkx..] matches characters 'a','b','c','i','j','k' and every character from 'x' to highest char (0xFFFF)
- (...) define a group that is matched at whole
- | define an alternative matching path, e.g. [.] | [,] matches a "." or a ",", but only one character each time matching happens
- {} defines a repeated match for the prepended expression:
a) "" or "{}" or "{1}" match exactly one time
b) {3,} match at least 3 times to infinite times
c) {1,3} match 1 to 3 times
c) {,3} match 0 to 3 times
- every other word consisting of the characters 0..9,a..z,A..Z is considered reference to another named expression
examples:
a numeric literal may be defined by:
[-]{0..1} [1..9] [0..9]{0..} ( [.] [0..9]{1..} ){0..1}
a possible string literal:
nonwhitespace: [33..]
string: ["] ( nonwhitespace | ([\\] ["]) ) ["]
------------------------------------------------------------
[-]{0..1} digit19 digit{0..} ( [.] digit{1..} ){0..1}
[-]{0..1} <= Expression
[-] <= Matchable
digit19{1..1} <= Expression
digit19 <= Matchable
Matchable: A singleton matchable object (a token or charactergroup)
Expression: Combine a Matchable with an interval definition

View File

@ -0,0 +1,72 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">x86</Platform>
<ProjectGuid>{177C81C7-F6E3-494C-8866-2E3E134969C0}</ProjectGuid>
<OutputType>Exe</OutputType>
<RootNamespace>lexer</RootNamespace>
<AssemblyName>SharpLexer</AssemblyName>
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug</OutputPath>
<DefineConstants>DEBUG;</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<PlatformTarget>x86</PlatformTarget>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">
<Optimize>true</Optimize>
<OutputPath>bin\Release</OutputPath>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<PlatformTarget>x86</PlatformTarget>
</PropertyGroup>
<ItemGroup>
<Reference Include="System.Xml" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System" />
</ItemGroup>
<ItemGroup>
<Compile Include="Lexer.cs" />
<Compile Include="MainClass.cs" />
<Compile Include="buffer\CharacterBuffer.cs" />
<Compile Include="Grammar.cs" />
<Compile Include="buffer\DefinitionReader.cs" />
<Compile Include="match\Matchable.cs" />
<Compile Include="match\MatchedPart.cs" />
<Compile Include="match\MatchedDelegate.cs" />
<Compile Include="match\Expression.cs" />
<Compile Include="match\Sequence.cs" />
<Compile Include="match\Alternative.cs" />
<Compile Include="match\CharacterGroup.cs" />
<Compile Include="match\CharacterSequence.cs" />
</ItemGroup>
<ItemGroup>
<None Include="TestGrammar.xml">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Include="README.md" />
<None Include="Fundamentals.xml">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
<ItemGroup>
<Folder Include="buffer\" />
<Folder Include="match\" />
</ItemGroup>
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
<ProjectExtensions>
<MonoDevelop>
<Properties>
<Policies>
<DotNetNamingPolicy DirectoryNamespaceAssociation="PrefixedHierarchical" ResourceNamePolicy="FileFormatDefault" />
</Policies>
</Properties>
</MonoDevelop>
</ProjectExtensions>
</Project>

View File

@ -0,0 +1,23 @@
<?xml version="1.0" encoding="UTF-8" ?>
<Grammar>
<Tokens>
<Token name="symbol" notice="yes" grouping="yes">alpha alphadigit{,}</Token>
<Token name="number" notice="yes" grouping="yes">[-]{,1} digit19 digit{0,} ( [.] digit{1,} ){0,1}</Token>
<Token name="string" notice="yes" grouping="yes">["] ( [../"] | [\\] ["] ){,} ["]</Token>
<Token name="const_literal">number | string</Token>
<Token name="lvalue">symbol</Token>
<Token name="rvalue">symbol</Token>
<Token name="operator">[+-*\/%=?]</Token>
<Token name="operation">(lvalue operator){0,1} rvalue [;]</Token>
<Token name="conditional">"if" whitespace "(" rvalue ")" statement</Token>
<Token name="block">"{" source "}"</Token>
<Token name="statement">operation | conditional | block</Token>
<Token name="source">statement{,}</Token>
<Token name="numbersandstrings">(whitespace{,} (number | string | symbol)){,}</Token>
</Tokens>
</Grammar>

View File

@ -0,0 +1,187 @@
using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Linq;
namespace lexer.buffer
{
public class CharacterBuffer
{
char[] characters;
int position;
Stack<int> positionStack = new Stack<int>();
public CharacterBuffer(char[] characters)
{
this.characters = new char[characters.Length];
Array.Copy(characters, this.characters, characters.Length);
}
public CharacterBuffer(String characters)
: this(characters.ToCharArray())
{
}
public void Push()
{
positionStack.Push(position);
}
public void Pop()
{
this.position = positionStack.Pop();
}
public char this[int n]
{
get { return this.characters[n]; }
}
public char Current
{
get { return CharAt(this.position); }
}
public char Last { get { return CharAt(this.position - 1); } }
public char Next { get { return CharAt(this.position + 1); } }
public char CharAt(int position)
{
if (position >= this.characters.Length)
return (char)0xFFFF;
if (position < 0)
return (char)0;
return this.characters[position];
}
public int Position
{
get { return this.position; }
set { this.position = value; }
}
public char NextNext
{
get
{
if ((position + 1) >= this.characters.Length)
return (char)0xFFFF;
return this.characters[this.position + 2];
}
}
public char next(int n)
{
if ((position + n) >= this.characters.Length)
return (char)0xFFFF;
return this.characters[this.position + n];
}
public char MoveNext()
{
this.position++;
if (EndOfBuffer)
return (char)0;
return this.characters[this.position];
}
public bool EndOfBuffer
{
get { return this.position >= this.characters.Length; }
}
public char[] getSection()
{
int start = positionStack.Last();
return getSection(start, position - start);
}
public char[] getSection(int start, int len)
{
char[] result = new char[len];
for (int n = 0; n < len; n++)
{
result[n] = this.characters[start + n];
}
return result;
}
public char[] find(char ch)
{
return find(new char[] { ch });
}
public char[] find(char[] ch)
{
List<char> characters = new List<char>();
while (!EndOfBuffer)
{
foreach (char c in ch)
{
if (Current == c)
{
MoveNext();
return characters.ToArray();
}
}
characters.Add(Current);
MoveNext();
}
return characters.ToArray();
}
public char[] findUnescaped(char find)
{
return findUnescaped(new char[] { find });
}
public char[] findUnescaped(char[] find)
{
int n;
for (n = 0; !EndOfBuffer; n++)
{
foreach (char f in find)
{
if (f == Current)
{
char[] r = getSection(position - n, n);
MoveNext();
return r;
}
}
if (Current == '\\')
{
MoveNext();
n++;
}
MoveNext();
}
return null;
}
public char[] findSymbol()
{
List<char> characters = new List<char>();
while (!EndOfBuffer)
{
if (
!char.IsDigit(Current) &&
!char.IsLetter(Current) &&
(Current != '_')
)
break;
characters.Add(Current);
MoveNext();
}
#if DEBUG
Console.WriteLine("findSymbol() = {0}",new String(characters.ToArray()));
#endif
return characters.ToArray();
}
}
}

View File

@ -0,0 +1,55 @@
using System;
namespace lexer.buffer
{
public class DefinitionReader
{
public const int OP_INTERVAL = 0x00010000;
public const int OP_EOB = 0x10000000;
char[] definition;
int position;
int currentChar = -1;
public DefinitionReader(char[] definition)
{
this.definition = definition;
this.position = 0;
MoveNext();
}
public int Current {
get { return this.currentChar; }
}
public int MoveNext() {
if (this.position >= this.definition.Length){
currentChar = OP_EOB;
} else {
currentChar = this.definition[this.position++];
if (currentChar == '\\')
{
currentChar = this.definition[this.position++];
if ((currentChar == '0') && (this.definition[this.position] == 'x')){
char[] hexvalue = new char[4];
this.position++;
for (int n = 0; n < 4;n++){
hexvalue[n] = this.definition[this.position++];
}
currentChar = (char)int.Parse(new String(hexvalue), System.Globalization.NumberStyles.HexNumber);
}
} else if (currentChar == '.'){
if ((this.position < this.definition.Length) && (this.definition[this.position] == '.'))
{
this.position++;
currentChar = OP_INTERVAL;
}
}
}
return currentChar;
}
}
}

View File

@ -0,0 +1,54 @@
using System;
using System.Collections.Generic;
using System.Text;
namespace lexer.match
{
public class Alternative : Sequence
{
public Alternative()
{
}
public override MatchedPart[] Match(buffer.CharacterBuffer chbuffer)
{
MatchedPart[] matchedParts = null;
int len = -1;
foreach (Matchable m in matchables)
{
chbuffer.Push();
MatchedPart[] mparts = m.MatchNoticeable(chbuffer);
if (mparts != null)
{
int mplen = mparts.MatchedLength();
if ((matchedParts == null) || (mplen > len))
{
matchedParts = mparts;
len = mplen;
}
}
chbuffer.Pop();
}
return matchedParts;
}
public override string ToString()
{
StringBuilder sb = new StringBuilder();
sb.Append("( ");
for (int n = 0; n < this.matchables.Count; n++)
{
if (n > 0)
sb.Append(" | ");
Matchable m = this.matchables[n];
sb.Append(m.ToString(true));
}
sb.Append(")");
return sb.ToString();
}
}
}

View File

@ -0,0 +1,203 @@
using System;
using System.Collections.Generic;
using System.Linq;
using lexer.buffer;
using System.Threading;
using System.Runtime.Remoting.Messaging;
using System.Text;
namespace lexer.match
{
public class CharacterGroup : Matchable
{
struct chinterval
{
public char first;
public char last;
public chinterval(char first, char last)
{
this.first = first;
this.last = last;
}
public bool Match(char ch)
{
return (ch >= first) && (ch <= last);
}
public override string ToString()
{
return string.Format("[chinterval min=0x{0:X4} max=0x{1:X4}]", (int)this.first, (int)this.last);
}
}
List<char> includeCharacters = new List<char>();
List<chinterval> includeIntervals = new List<chinterval>();
List<char> excludeCharacters = new List<char>();
List<chinterval> excludeIntervals = new List<chinterval>();
public CharacterGroup(CharacterBuffer chbuffer)
{
if (chbuffer.Current != '[')
{
throw new FormatException("CharacterGroup Definition must start with '['");
}
chbuffer.MoveNext();
char[] idef = chbuffer.findUnescaped(new char[] { ']', '/' });
char[] edef = null;
if (chbuffer.Last == '/')
{
edef = chbuffer.findUnescaped(']');
}
#if DEBUG
Console.WriteLine("CharacterGroup: include = {0}", new String(idef));
Console.WriteLine("CharacterGroup: exclude = {0}", new String(edef));
#endif
parseComponents(includeCharacters, includeIntervals, idef);
if (edef != null)
{
parseComponents(excludeCharacters, excludeIntervals, edef);
}
}
private void parseComponents(List<char> cList, List<chinterval> iList, char[] def)
{
DefinitionReader dr = new DefinitionReader(def);
while (dr.Current != DefinitionReader.OP_EOB)
{
if (dr.Current == DefinitionReader.OP_INTERVAL)
{
char min = (char)0;
char max = (char)0xffff;
if (cList.Count > 0)
{
min = cList.Last();
cList.RemoveAt(cList.Count - 1);
}
dr.MoveNext();
if (dr.Current != DefinitionReader.OP_EOB)
{
max = (char)dr.Current;
}
iList.Add(new chinterval(min, max));
}
else
{
cList.Add((char)dr.Current);
}
dr.MoveNext();
}
}
public bool Match(char ch)
{
foreach (chinterval i in excludeIntervals)
{
if (i.Match(ch))
{
return false;
}
}
foreach (char ec in excludeCharacters)
{
if (ec == ch)
{
return false;
}
}
foreach (chinterval i in includeIntervals)
{
if (i.Match(ch))
{
return true;
}
}
foreach (char ic in includeCharacters)
{
if (ic == ch)
{
return true;
}
}
return false;
}
public override MatchedPart[] Match(CharacterBuffer chbuffer)
{
if (!chbuffer.EndOfBuffer && Match(chbuffer.Current))
{
return new MatchedPart[] { new MatchedPart(this, new char[] { chbuffer.Current }) };
}
return null;
}
public override string ToString()
{
StringBuilder sb = new StringBuilder();
sb.Append("[");
foreach (char ch in includeCharacters)
{
if (ch <= 0x20)
{
sb.AppendFormat("\\0x{0:X4}", (int)ch);
}
else
{
sb.Append(ch);
}
}
foreach (chinterval chi in includeIntervals)
{
if (chi.first <= 0x20)
{
sb.AppendFormat("\\0x{0:X4}", (int)chi.first);
}
else
{
sb.Append(chi.first);
}
sb.Append("..");
if (chi.last <= 0x20)
{
sb.AppendFormat("\\0x{0:X4}", (int)chi.last);
}
else
{
sb.Append(chi.last);
}
}
if ((excludeIntervals.Count > 0) || (excludeCharacters.Count > 0))
{
sb.Append("/");
foreach (char ch in includeCharacters)
{
if (ch <= 0x20)
{
sb.AppendFormat("\\{0:X4}", (int)ch);
}
else
{
sb.Append(ch);
}
}
}
sb.Append("]");
return sb.ToString();
}
}
}

View File

@ -0,0 +1,43 @@
using System;
using lexer.buffer;
using System.Collections.Generic;
namespace lexer.match
{
public class CharacterSequence : Matchable
{
char[] sequence;
public CharacterSequence(CharacterBuffer chbuffer)
{
List<char> characters = new List<char>();
while (chbuffer.MoveNext() != '"'){
if (chbuffer.Current == '\\'){
chbuffer.MoveNext();
}
characters.Add(chbuffer.Current);
}
sequence = characters.ToArray();
chbuffer.MoveNext();
}
public override MatchedPart[] Match(CharacterBuffer chbuffer)
{
if (chbuffer.Current != '"'){
throw new FormatException("CharacterSequence must start with \"");
}
chbuffer.Push();
foreach (char sch in sequence){
if (sch != chbuffer.MoveNext()){
chbuffer.Pop();
return null;
}
}
chbuffer.Pop();
return new MatchedPart[] { new MatchedPart(this, this.sequence) };
}
}
}

View File

@ -0,0 +1,53 @@
using System;
using lexer.buffer;
using System.Collections.Generic;
namespace lexer.match
{
public class Expression : Matchable
{
public Matchable Matchable { get; private set; }
public int Minimum { get; private set; } = 0;
public int Maximum { get; private set; } = int.MaxValue;
public Expression(Matchable matchable,int minimum = 1,int maximum = 1)
{
this.Matchable = matchable;
this.Minimum = minimum;
this.Maximum = maximum;
}
public override MatchedPart[] Match(CharacterBuffer chbuffer){
List<MatchedPart> matchedParts = new List<MatchedPart>();
int n;
chbuffer.Push();
for (n = 0; n < Maximum;n++){
MatchedPart[] mparts = Matchable.MatchNoticeable(chbuffer);
if (mparts == null){
break;
}
matchedParts.AddRange(mparts);
chbuffer.Position += mparts.MatchedLength();
}
chbuffer.Pop();
if (n < Minimum){
return null;
}
return matchedParts.ToArray();
}
public override string ToString()
{
return String.Format("{0}{{{1},{2}}}",
this.Matchable.ToString(true),
Minimum == 0 ? "" : Minimum.ToString(),
Maximum == int.MaxValue ? "" : Maximum.ToString()
);
}
}
}

View File

@ -0,0 +1,98 @@
using System;
using lexer.buffer;
using System.Collections.Generic;
namespace lexer.match
{
public abstract class Matchable
{
public event MatchedDelegate OnMatched;
public String Name { get; private set; }
public bool Grouping { get; set; }
public bool Notice { get; set; }
protected Matchable(String name = null)
{
this.Name = name;
if (name != null)
{
namedMatchables.Add(name, this);
}
}
public MatchedPart[] MatchNoticeable(CharacterBuffer chbuffer){
MatchedPart[] mparts = Match(chbuffer);
if (Grouping && (mparts != null)){
MatchedPart mp = new MatchedPart(this, mparts.MatchingCharacters());
return new MatchedPart[] { mp };
}
return mparts;
}
/**
* Match() match this Matchable starting at current position of CharacterBuffer
*
* if an successfull Match is found, return the matched characters as char[]
* returns null if no match is found
*
**/
public abstract MatchedPart[] Match(CharacterBuffer chbuffer);
public override string ToString()
{
return ToString(false);
}
public virtual String ToString(bool useSymbol)
{
if (useSymbol && (this.Name != null))
{
return this.Name;
}
return ToString();
}
static Dictionary<string, Matchable> namedMatchables = new Dictionary<string, Matchable>();
public static Matchable getNamedMatchable(String name)
{
if (!namedMatchables.ContainsKey(name)){
return null;
}
return namedMatchables[name];
}
public static Matchable getNamedMatchable(char[] name)
{
return getNamedMatchable(new String(name));
}
public class DeferredMatchable : Matchable{
Matchable matchable;
public DeferredMatchable(char[] name)
:this(new String(name)){}
public DeferredMatchable(String name)
:base(){
Name = name;
matchable = null;
}
public override MatchedPart[] Match(CharacterBuffer chbuffer)
{
if (matchable == null){
matchable = getNamedMatchable(Name);
}
if (matchable == null)
{
throw new KeyNotFoundException(String.Format("Deferred matchable '{0}' was not found", Name));
}
return matchable.Match(chbuffer);
}
}
}
}

View File

@ -0,0 +1,8 @@
using System;
namespace lexer.match {
public delegate void MatchedDelegate(MatchedPart matchedPart);
}

View File

@ -0,0 +1,60 @@
using System;
using System.Runtime.CompilerServices;
using System.Collections.Generic;
namespace lexer.match
{
public delegate void MatchedPartNoticeDelegate(MatchedPart matchedPart);
public class MatchedPart
{
public Matchable Matchable { get; private set; }
public char[] MatchedCharacters { get; private set; }
public int Length { get { return this.MatchedCharacters.Length; } }
public MatchedPart(Matchable matchable, char[] characters)
{
this.Matchable = matchable;
this.MatchedCharacters = characters;
}
public String MatchedString { get { return new String(MatchedCharacters); } }
}
public static class MatchedPartArray {
public static int MatchedLength(this MatchedPart[] parts){
int l = 0;
foreach (MatchedPart p in parts)
l += p.Length;
return l;
}
public static char[] MatchingCharacters(this MatchedPart[] parts) {
if ((parts == null)||(parts.Length == 0)){
return new char[0];
}
List<char> characters = new List<char>();
foreach (MatchedPart mp in parts){
characters.AddRange(mp.MatchedCharacters);
}
return characters.ToArray();
}
public static String AsString(this char[] chars){
return new String(chars);
}
public static void Notice(this MatchedPart[] matchedParts,MatchedPartNoticeDelegate notice){
foreach (MatchedPart mpart in matchedParts){
if (mpart.Matchable.Notice){
notice(mpart);
}
}
}
}
}

View File

@ -0,0 +1,54 @@
using System;
using System.Collections.Generic;
using System.Text;
namespace lexer.match
{
public class Sequence : Matchable
{
public List<Matchable> matchables = new List<Matchable>();
public Sequence()
{
}
public Sequence(String name)
:base(name){}
public void addMatchable(Matchable matchable){
this.matchables.Add(matchable);
}
public Matchable[] Matchables { get { return this.matchables.ToArray(); } }
public override MatchedPart[] Match(buffer.CharacterBuffer chbuffer)
{
List<MatchedPart> matchedParts = new List<MatchedPart>();
chbuffer.Push();
foreach (Matchable m in matchables){
MatchedPart[] mparts = m.MatchNoticeable(chbuffer);
if (mparts == null){
chbuffer.Pop();
return null;
}
matchedParts.AddRange(mparts);
chbuffer.Position += mparts.MatchedLength();
}
chbuffer.Pop();
return matchedParts.ToArray();
}
public override string ToString()
{
StringBuilder sb = new StringBuilder();
sb.Append("( ");
foreach (Matchable m in this.matchables){
sb.Append(m.ToString(true));
sb.Append(' ');
}
sb.Append(")");
return sb.ToString();
}
}
}