Initial Commit
commit
44c2c89de0
|
@ -0,0 +1,40 @@
|
|||
# Autosave files
|
||||
*~
|
||||
|
||||
# build
|
||||
[Oo]bj/
|
||||
[Bb]in/
|
||||
packages/
|
||||
TestResults/
|
||||
|
||||
# globs
|
||||
Makefile.in
|
||||
*.DS_Store
|
||||
*.sln.cache
|
||||
*.suo
|
||||
*.cache
|
||||
*.pidb
|
||||
*.userprefs
|
||||
*.usertasks
|
||||
config.log
|
||||
config.make
|
||||
config.status
|
||||
aclocal.m4
|
||||
install-sh
|
||||
autom4te.cache/
|
||||
*.user
|
||||
*.tar.gz
|
||||
tarballs/
|
||||
test-results/
|
||||
Thumbs.db
|
||||
|
||||
# Mac bundle stuff
|
||||
*.dmg
|
||||
*.app
|
||||
|
||||
# resharper
|
||||
*_Resharper.*
|
||||
*.Resharper
|
||||
|
||||
# dotCover
|
||||
*.dotCover
|
|
@ -0,0 +1,17 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2012
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SharpLexer", "SharpLexer\SharpLexer.csproj", "{177C81C7-F6E3-494C-8866-2E3E134969C0}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x86 = Debug|x86
|
||||
Release|x86 = Release|x86
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{177C81C7-F6E3-494C-8866-2E3E134969C0}.Debug|x86.ActiveCfg = Debug|x86
|
||||
{177C81C7-F6E3-494C-8866-2E3E134969C0}.Debug|x86.Build.0 = Debug|x86
|
||||
{177C81C7-F6E3-494C-8866-2E3E134969C0}.Release|x86.ActiveCfg = Release|x86
|
||||
{177C81C7-F6E3-494C-8866-2E3E134969C0}.Release|x86.Build.0 = Release|x86
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -0,0 +1,11 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<Grammar>
|
||||
<Tokens>
|
||||
<Token name="whitespace">[\0x0000..\0x0020]</Token>
|
||||
<Token name="nonwhitespace">[\0x0021..]</Token>
|
||||
<Token name="alpha">[a..zA..Z]</Token>
|
||||
<Token name="digit">[0..9]</Token>
|
||||
<Token name="alphadigit">alpha | digit</Token>
|
||||
<Token name="digit19">[1..9]</Token>
|
||||
</Tokens>
|
||||
</Grammar>
|
|
@ -0,0 +1,181 @@
|
|||
using System;
|
||||
using System.Xml;
|
||||
using System.Collections.Generic;
|
||||
using lexer.match;
|
||||
using lexer.buffer;
|
||||
namespace lexer
|
||||
{
|
||||
public class Grammar
|
||||
{
|
||||
Dictionary<String, Sequence> sequences = new Dictionary<string, Sequence>();
|
||||
|
||||
public Grammar(){
|
||||
}
|
||||
|
||||
public Grammar(String filename)
|
||||
{
|
||||
Load(filename);
|
||||
}
|
||||
|
||||
public Grammar(XmlDocument xml)
|
||||
{
|
||||
Load(xml);
|
||||
}
|
||||
|
||||
public void Load(String filename)
|
||||
{
|
||||
XmlDocument xml = new XmlDocument();
|
||||
xml.Load(filename);
|
||||
Load(xml);
|
||||
}
|
||||
|
||||
public void Load(XmlDocument xml)
|
||||
{
|
||||
XmlNodeList tokens = xml.SelectNodes("/Grammar/Tokens/Token");
|
||||
foreach (XmlNode _ntoken in tokens)
|
||||
{
|
||||
XmlElement ntoken = (XmlElement)_ntoken;
|
||||
Console.WriteLine("Loading Token: {0}", ntoken.Attributes["name"].Value);
|
||||
|
||||
CharacterBuffer chb = new CharacterBuffer(ntoken.InnerText);
|
||||
Sequence sequence = parseSequence(chb, ntoken.Attributes["name"].Value);
|
||||
|
||||
sequence.Grouping = ntoken.HasAttribute("grouping");
|
||||
sequence.Notice = ntoken.HasAttribute("notice");
|
||||
|
||||
this.sequences.Add(ntoken.Attributes["name"].Value, sequence);
|
||||
}
|
||||
}
|
||||
|
||||
public Sequence getSequence(String name)
|
||||
{
|
||||
return this.sequences[name];
|
||||
}
|
||||
|
||||
|
||||
public Sequence parseSequence(CharacterBuffer chbuffer, String name = null)
|
||||
{
|
||||
Sequence sequence = new Sequence(name);
|
||||
int min, max;
|
||||
|
||||
while (!chbuffer.EndOfBuffer)
|
||||
{
|
||||
parseWhiteSpace(chbuffer);
|
||||
if (chbuffer.EndOfBuffer)
|
||||
break;
|
||||
|
||||
min = 1;
|
||||
max = 1;
|
||||
|
||||
if (chbuffer.Current == '[')
|
||||
{
|
||||
|
||||
CharacterGroup cg = new CharacterGroup(chbuffer);
|
||||
parseMinMax(chbuffer, ref min, ref max);
|
||||
|
||||
Expression e = new Expression(cg, min, max);
|
||||
sequence.addMatchable(e);
|
||||
|
||||
}
|
||||
else if (chbuffer.Current == '"')
|
||||
{
|
||||
CharacterSequence cs = new CharacterSequence(chbuffer);
|
||||
parseMinMax(chbuffer, ref min, ref max);
|
||||
|
||||
Expression e = new Expression(cs, min, max);
|
||||
sequence.addMatchable(e);
|
||||
|
||||
}
|
||||
else if (chbuffer.Current == '(')
|
||||
{
|
||||
|
||||
chbuffer.MoveNext();
|
||||
Sequence s = parseSequence(chbuffer);
|
||||
parseMinMax(chbuffer, ref min, ref max);
|
||||
|
||||
Expression e = new Expression(s, min, max);
|
||||
sequence.addMatchable(e);
|
||||
|
||||
}
|
||||
else if (chbuffer.Current == '|')
|
||||
{
|
||||
|
||||
Alternative alt = new Alternative();
|
||||
alt.addMatchable(sequence);
|
||||
chbuffer.MoveNext();
|
||||
alt.addMatchable(parseSequence(chbuffer));
|
||||
|
||||
return alt;
|
||||
}
|
||||
else if (chbuffer.Current == ')')
|
||||
{
|
||||
|
||||
chbuffer.MoveNext();
|
||||
|
||||
return sequence;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
char[] sym = chbuffer.findSymbol();
|
||||
Matchable m = Matchable.getNamedMatchable(sym);
|
||||
if (m == null){
|
||||
m = new Matchable.DeferredMatchable(sym);
|
||||
}
|
||||
parseMinMax(chbuffer, ref min, ref max);
|
||||
|
||||
Expression e = new Expression(m, min, max);
|
||||
sequence.addMatchable(e);
|
||||
}
|
||||
|
||||
}
|
||||
return sequence;
|
||||
}
|
||||
|
||||
public void parseWhiteSpace(CharacterBuffer chbuffer)
|
||||
{
|
||||
while (chbuffer.Current <= 0x20)
|
||||
{
|
||||
chbuffer.MoveNext();
|
||||
}
|
||||
}
|
||||
|
||||
public void parseMinMax(CharacterBuffer chbuffer, ref int min,ref int max){
|
||||
|
||||
if (chbuffer.Current == '{'){
|
||||
chbuffer.MoveNext();
|
||||
char[] def = chbuffer.find('}');
|
||||
|
||||
if (def.Length > 0)
|
||||
{
|
||||
int pcomma = -1;
|
||||
|
||||
while ((++pcomma < def.Length) && (def[pcomma] != ',')) { }
|
||||
|
||||
if (pcomma == 0)
|
||||
{
|
||||
min = 0;
|
||||
} else {
|
||||
min = int.Parse(new String(def, 0, pcomma));
|
||||
}
|
||||
|
||||
if (pcomma == def.Length - 1)
|
||||
{
|
||||
max = int.MaxValue;
|
||||
} else if (pcomma == def.Length){
|
||||
max = min;
|
||||
} else {
|
||||
pcomma++;
|
||||
max = int.Parse(new String(def, pcomma, def.Length - pcomma));
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
min = 1;
|
||||
max = 1;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
using System;
|
||||
namespace lexer
|
||||
{
|
||||
public class Lexer
|
||||
{
|
||||
public Lexer()
|
||||
{
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
using System;
|
||||
using lexer.match;
|
||||
using lexer.buffer;
|
||||
namespace lexer
|
||||
{
|
||||
public class MainClass
|
||||
{
|
||||
|
||||
public static void Main(String[] args)
|
||||
{
|
||||
Grammar grammar = new Grammar();
|
||||
grammar.Load("fundamentals.xml");
|
||||
grammar.Load("TestGrammar.xml");
|
||||
|
||||
|
||||
Sequence num = grammar.getSequence("number");
|
||||
|
||||
String[] tests = new string[]{
|
||||
"32",
|
||||
"032",
|
||||
"-189463738.34gdts"
|
||||
};
|
||||
|
||||
Console.WriteLine("Number Defintion: {0}",num.ToString());
|
||||
|
||||
foreach (string l in tests){
|
||||
Console.WriteLine("Matching {0}",l);
|
||||
MatchedPart[] mp = num.Match(new CharacterBuffer(l));
|
||||
if (mp == null){
|
||||
Console.WriteLine("Did not Match!");
|
||||
} else {
|
||||
Console.WriteLine("Matched: {0}",mp.MatchingCharacters().AsString());
|
||||
}
|
||||
}
|
||||
|
||||
Console.WriteLine("------------------------------------");
|
||||
|
||||
String testsource = @"1
|
||||
2
|
||||
""Hallo Welt""
|
||||
3
|
||||
15.4
|
||||
13.765
|
||||
-3
|
||||
-14.3
|
||||
123456.7890
|
||||
IchBinEinSymbol";
|
||||
|
||||
|
||||
Sequence testseq = grammar.getSequence("numbersandstrings");
|
||||
MatchedPart[] testmp = testseq.MatchNoticeable(new CharacterBuffer(testsource));
|
||||
testmp.Notice( (matchedPart) => Console.WriteLine("Found {0:-10} = {1}",matchedPart.Matchable.Name,matchedPart.MatchedString) );
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
# Lexing Classes for .NET / Mono
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Regular Expression Language used for definition of tokens:
|
||||
|
||||
- Whitespace is ignored
|
||||
- '\' is used as escape marker within character lists
|
||||
|
||||
- [] define a character list for matching
|
||||
a) [a..z] match every character from 'a' to 'z' (included)
|
||||
b) [g] match character 'g'
|
||||
c) [egt] match one of the given characters
|
||||
// INVALID: d) characters may be defined by the character itself (e.g: H), by numerical value (e.g.: 32 or 0x20)
|
||||
e) [a..i/d..f] matches characters 'a' to 'i' but excludes characters 'd' to 'f' from matching
|
||||
f) [a\.z] matches characters 'a','.' and 'z'
|
||||
g) [abcijkx..] matches characters 'a','b','c','i','j','k' and every character from 'x' to highest char (0xFFFF)
|
||||
|
||||
- (...) define a group that is matched at whole
|
||||
|
||||
- | define an alternative matching path, e.g. [.] | [,] matches a "." or a ",", but only one character each time matching happens
|
||||
|
||||
- {} defines a repeated match for the prepended expression:
|
||||
a) "" or "{}" or "{1}" match exactly one time
|
||||
b) {3,} match at least 3 times to infinite times
|
||||
c) {1,3} match 1 to 3 times
|
||||
c) {,3} match 0 to 3 times
|
||||
|
||||
- every other word consisting of the characters 0..9,a..z,A..Z is considered reference to another named expression
|
||||
|
||||
examples:
|
||||
|
||||
a numeric literal may be defined by:
|
||||
|
||||
[-]{0..1} [1..9] [0..9]{0..} ( [.] [0..9]{1..} ){0..1}
|
||||
|
||||
a possible string literal:
|
||||
|
||||
nonwhitespace: [33..]
|
||||
|
||||
string: ["] ( nonwhitespace | ([\\] ["]) ) ["]
|
||||
|
||||
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
|
||||
[-]{0..1} digit19 digit{0..} ( [.] digit{1..} ){0..1}
|
||||
|
||||
[-]{0..1} <= Expression
|
||||
[-] <= Matchable
|
||||
|
||||
digit19{1..1} <= Expression
|
||||
digit19 <= Matchable
|
||||
|
||||
|
||||
Matchable: A singleton matchable object (a token or charactergroup)
|
||||
Expression: Combine a Matchable with an interval definition
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
|
||||
<Platform Condition=" '$(Platform)' == '' ">x86</Platform>
|
||||
<ProjectGuid>{177C81C7-F6E3-494C-8866-2E3E134969C0}</ProjectGuid>
|
||||
<OutputType>Exe</OutputType>
|
||||
<RootNamespace>lexer</RootNamespace>
|
||||
<AssemblyName>SharpLexer</AssemblyName>
|
||||
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
|
||||
<DebugSymbols>true</DebugSymbols>
|
||||
<DebugType>full</DebugType>
|
||||
<Optimize>false</Optimize>
|
||||
<OutputPath>bin\Debug</OutputPath>
|
||||
<DefineConstants>DEBUG;</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
<PlatformTarget>x86</PlatformTarget>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">
|
||||
<Optimize>true</Optimize>
|
||||
<OutputPath>bin\Release</OutputPath>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
<PlatformTarget>x86</PlatformTarget>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="System.Xml" />
|
||||
<Reference Include="System.Xml.Linq" />
|
||||
<Reference Include="System" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="Lexer.cs" />
|
||||
<Compile Include="MainClass.cs" />
|
||||
<Compile Include="buffer\CharacterBuffer.cs" />
|
||||
<Compile Include="Grammar.cs" />
|
||||
<Compile Include="buffer\DefinitionReader.cs" />
|
||||
<Compile Include="match\Matchable.cs" />
|
||||
<Compile Include="match\MatchedPart.cs" />
|
||||
<Compile Include="match\MatchedDelegate.cs" />
|
||||
<Compile Include="match\Expression.cs" />
|
||||
<Compile Include="match\Sequence.cs" />
|
||||
<Compile Include="match\Alternative.cs" />
|
||||
<Compile Include="match\CharacterGroup.cs" />
|
||||
<Compile Include="match\CharacterSequence.cs" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="TestGrammar.xml">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Include="README.md" />
|
||||
<None Include="Fundamentals.xml">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Folder Include="buffer\" />
|
||||
<Folder Include="match\" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
|
||||
<ProjectExtensions>
|
||||
<MonoDevelop>
|
||||
<Properties>
|
||||
<Policies>
|
||||
<DotNetNamingPolicy DirectoryNamespaceAssociation="PrefixedHierarchical" ResourceNamePolicy="FileFormatDefault" />
|
||||
</Policies>
|
||||
</Properties>
|
||||
</MonoDevelop>
|
||||
</ProjectExtensions>
|
||||
</Project>
|
|
@ -0,0 +1,23 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<Grammar>
|
||||
<Tokens>
|
||||
<Token name="symbol" notice="yes" grouping="yes">alpha alphadigit{,}</Token>
|
||||
<Token name="number" notice="yes" grouping="yes">[-]{,1} digit19 digit{0,} ( [.] digit{1,} ){0,1}</Token>
|
||||
<Token name="string" notice="yes" grouping="yes">["] ( [../"] | [\\] ["] ){,} ["]</Token>
|
||||
|
||||
<Token name="const_literal">number | string</Token>
|
||||
|
||||
<Token name="lvalue">symbol</Token>
|
||||
<Token name="rvalue">symbol</Token>
|
||||
|
||||
<Token name="operator">[+-*\/%=?]</Token>
|
||||
<Token name="operation">(lvalue operator){0,1} rvalue [;]</Token>
|
||||
<Token name="conditional">"if" whitespace "(" rvalue ")" statement</Token>
|
||||
<Token name="block">"{" source "}"</Token>
|
||||
<Token name="statement">operation | conditional | block</Token>
|
||||
<Token name="source">statement{,}</Token>
|
||||
|
||||
<Token name="numbersandstrings">(whitespace{,} (number | string | symbol)){,}</Token>
|
||||
|
||||
</Tokens>
|
||||
</Grammar>
|
|
@ -0,0 +1,187 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Linq;
|
||||
namespace lexer.buffer
|
||||
{
|
||||
public class CharacterBuffer
|
||||
{
|
||||
char[] characters;
|
||||
int position;
|
||||
|
||||
Stack<int> positionStack = new Stack<int>();
|
||||
|
||||
public CharacterBuffer(char[] characters)
|
||||
{
|
||||
this.characters = new char[characters.Length];
|
||||
Array.Copy(characters, this.characters, characters.Length);
|
||||
}
|
||||
public CharacterBuffer(String characters)
|
||||
: this(characters.ToCharArray())
|
||||
{
|
||||
}
|
||||
|
||||
public void Push()
|
||||
{
|
||||
positionStack.Push(position);
|
||||
}
|
||||
public void Pop()
|
||||
{
|
||||
this.position = positionStack.Pop();
|
||||
}
|
||||
|
||||
public char this[int n]
|
||||
{
|
||||
get { return this.characters[n]; }
|
||||
}
|
||||
|
||||
public char Current
|
||||
{
|
||||
get { return CharAt(this.position); }
|
||||
}
|
||||
|
||||
public char Last { get { return CharAt(this.position - 1); } }
|
||||
public char Next { get { return CharAt(this.position + 1); } }
|
||||
|
||||
public char CharAt(int position)
|
||||
{
|
||||
if (position >= this.characters.Length)
|
||||
return (char)0xFFFF;
|
||||
if (position < 0)
|
||||
return (char)0;
|
||||
return this.characters[position];
|
||||
}
|
||||
|
||||
public int Position
|
||||
{
|
||||
get { return this.position; }
|
||||
set { this.position = value; }
|
||||
}
|
||||
|
||||
public char NextNext
|
||||
{
|
||||
get
|
||||
{
|
||||
if ((position + 1) >= this.characters.Length)
|
||||
return (char)0xFFFF;
|
||||
|
||||
return this.characters[this.position + 2];
|
||||
}
|
||||
}
|
||||
|
||||
public char next(int n)
|
||||
{
|
||||
if ((position + n) >= this.characters.Length)
|
||||
return (char)0xFFFF;
|
||||
return this.characters[this.position + n];
|
||||
}
|
||||
|
||||
public char MoveNext()
|
||||
{
|
||||
this.position++;
|
||||
if (EndOfBuffer)
|
||||
return (char)0;
|
||||
|
||||
return this.characters[this.position];
|
||||
}
|
||||
|
||||
public bool EndOfBuffer
|
||||
{
|
||||
get { return this.position >= this.characters.Length; }
|
||||
}
|
||||
|
||||
public char[] getSection()
|
||||
{
|
||||
int start = positionStack.Last();
|
||||
return getSection(start, position - start);
|
||||
}
|
||||
|
||||
public char[] getSection(int start, int len)
|
||||
{
|
||||
char[] result = new char[len];
|
||||
|
||||
for (int n = 0; n < len; n++)
|
||||
{
|
||||
result[n] = this.characters[start + n];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public char[] find(char ch)
|
||||
{
|
||||
return find(new char[] { ch });
|
||||
}
|
||||
public char[] find(char[] ch)
|
||||
{
|
||||
List<char> characters = new List<char>();
|
||||
while (!EndOfBuffer)
|
||||
{
|
||||
foreach (char c in ch)
|
||||
{
|
||||
if (Current == c)
|
||||
{
|
||||
MoveNext();
|
||||
return characters.ToArray();
|
||||
}
|
||||
}
|
||||
characters.Add(Current);
|
||||
MoveNext();
|
||||
}
|
||||
return characters.ToArray();
|
||||
}
|
||||
|
||||
public char[] findUnescaped(char find)
|
||||
{
|
||||
return findUnescaped(new char[] { find });
|
||||
}
|
||||
|
||||
public char[] findUnescaped(char[] find)
|
||||
{
|
||||
int n;
|
||||
|
||||
for (n = 0; !EndOfBuffer; n++)
|
||||
{
|
||||
foreach (char f in find)
|
||||
{
|
||||
if (f == Current)
|
||||
{
|
||||
char[] r = getSection(position - n, n);
|
||||
MoveNext();
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
if (Current == '\\')
|
||||
{
|
||||
MoveNext();
|
||||
n++;
|
||||
}
|
||||
MoveNext();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public char[] findSymbol()
|
||||
{
|
||||
List<char> characters = new List<char>();
|
||||
while (!EndOfBuffer)
|
||||
{
|
||||
if (
|
||||
!char.IsDigit(Current) &&
|
||||
!char.IsLetter(Current) &&
|
||||
(Current != '_')
|
||||
)
|
||||
break;
|
||||
|
||||
characters.Add(Current);
|
||||
MoveNext();
|
||||
}
|
||||
#if DEBUG
|
||||
Console.WriteLine("findSymbol() = {0}",new String(characters.ToArray()));
|
||||
#endif
|
||||
return characters.ToArray();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
using System;
|
||||
namespace lexer.buffer
|
||||
{
|
||||
public class DefinitionReader
|
||||
{
|
||||
public const int OP_INTERVAL = 0x00010000;
|
||||
public const int OP_EOB = 0x10000000;
|
||||
|
||||
char[] definition;
|
||||
int position;
|
||||
|
||||
int currentChar = -1;
|
||||
|
||||
public DefinitionReader(char[] definition)
|
||||
{
|
||||
this.definition = definition;
|
||||
this.position = 0;
|
||||
MoveNext();
|
||||
}
|
||||
|
||||
public int Current {
|
||||
get { return this.currentChar; }
|
||||
}
|
||||
|
||||
public int MoveNext() {
|
||||
if (this.position >= this.definition.Length){
|
||||
currentChar = OP_EOB;
|
||||
} else {
|
||||
currentChar = this.definition[this.position++];
|
||||
if (currentChar == '\\')
|
||||
{
|
||||
currentChar = this.definition[this.position++];
|
||||
if ((currentChar == '0') && (this.definition[this.position] == 'x')){
|
||||
char[] hexvalue = new char[4];
|
||||
this.position++;
|
||||
for (int n = 0; n < 4;n++){
|
||||
hexvalue[n] = this.definition[this.position++];
|
||||
}
|
||||
currentChar = (char)int.Parse(new String(hexvalue), System.Globalization.NumberStyles.HexNumber);
|
||||
}
|
||||
} else if (currentChar == '.'){
|
||||
if ((this.position < this.definition.Length) && (this.definition[this.position] == '.'))
|
||||
{
|
||||
this.position++;
|
||||
currentChar = OP_INTERVAL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return currentChar;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
namespace lexer.match
|
||||
{
|
||||
public class Alternative : Sequence
|
||||
{
|
||||
public Alternative()
|
||||
{
|
||||
}
|
||||
|
||||
public override MatchedPart[] Match(buffer.CharacterBuffer chbuffer)
|
||||
{
|
||||
MatchedPart[] matchedParts = null;
|
||||
int len = -1;
|
||||
|
||||
foreach (Matchable m in matchables)
|
||||
{
|
||||
chbuffer.Push();
|
||||
|
||||
MatchedPart[] mparts = m.MatchNoticeable(chbuffer);
|
||||
if (mparts != null)
|
||||
{
|
||||
int mplen = mparts.MatchedLength();
|
||||
if ((matchedParts == null) || (mplen > len))
|
||||
{
|
||||
matchedParts = mparts;
|
||||
len = mplen;
|
||||
}
|
||||
}
|
||||
chbuffer.Pop();
|
||||
}
|
||||
|
||||
return matchedParts;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
sb.Append("( ");
|
||||
for (int n = 0; n < this.matchables.Count; n++)
|
||||
{
|
||||
if (n > 0)
|
||||
sb.Append(" | ");
|
||||
|
||||
Matchable m = this.matchables[n];
|
||||
sb.Append(m.ToString(true));
|
||||
}
|
||||
sb.Append(")");
|
||||
return sb.ToString();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,203 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using lexer.buffer;
|
||||
using System.Threading;
|
||||
using System.Runtime.Remoting.Messaging;
|
||||
using System.Text;
|
||||
namespace lexer.match
|
||||
{
|
||||
|
||||
public class CharacterGroup : Matchable
|
||||
{
|
||||
struct chinterval
|
||||
{
|
||||
public char first;
|
||||
public char last;
|
||||
|
||||
public chinterval(char first, char last)
|
||||
{
|
||||
this.first = first;
|
||||
this.last = last;
|
||||
}
|
||||
|
||||
public bool Match(char ch)
|
||||
{
|
||||
return (ch >= first) && (ch <= last);
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return string.Format("[chinterval min=0x{0:X4} max=0x{1:X4}]", (int)this.first, (int)this.last);
|
||||
}
|
||||
}
|
||||
|
||||
List<char> includeCharacters = new List<char>();
|
||||
List<chinterval> includeIntervals = new List<chinterval>();
|
||||
List<char> excludeCharacters = new List<char>();
|
||||
List<chinterval> excludeIntervals = new List<chinterval>();
|
||||
|
||||
|
||||
public CharacterGroup(CharacterBuffer chbuffer)
|
||||
{
|
||||
if (chbuffer.Current != '[')
|
||||
{
|
||||
throw new FormatException("CharacterGroup Definition must start with '['");
|
||||
}
|
||||
|
||||
chbuffer.MoveNext();
|
||||
|
||||
char[] idef = chbuffer.findUnescaped(new char[] { ']', '/' });
|
||||
char[] edef = null;
|
||||
|
||||
if (chbuffer.Last == '/')
|
||||
{
|
||||
edef = chbuffer.findUnescaped(']');
|
||||
}
|
||||
|
||||
#if DEBUG
|
||||
Console.WriteLine("CharacterGroup: include = {0}", new String(idef));
|
||||
Console.WriteLine("CharacterGroup: exclude = {0}", new String(edef));
|
||||
#endif
|
||||
parseComponents(includeCharacters, includeIntervals, idef);
|
||||
if (edef != null)
|
||||
{
|
||||
parseComponents(excludeCharacters, excludeIntervals, edef);
|
||||
}
|
||||
}
|
||||
|
||||
private void parseComponents(List<char> cList, List<chinterval> iList, char[] def)
|
||||
{
|
||||
DefinitionReader dr = new DefinitionReader(def);
|
||||
while (dr.Current != DefinitionReader.OP_EOB)
|
||||
{
|
||||
if (dr.Current == DefinitionReader.OP_INTERVAL)
|
||||
{
|
||||
char min = (char)0;
|
||||
char max = (char)0xffff;
|
||||
|
||||
if (cList.Count > 0)
|
||||
{
|
||||
min = cList.Last();
|
||||
cList.RemoveAt(cList.Count - 1);
|
||||
}
|
||||
|
||||
dr.MoveNext();
|
||||
|
||||
if (dr.Current != DefinitionReader.OP_EOB)
|
||||
{
|
||||
max = (char)dr.Current;
|
||||
}
|
||||
iList.Add(new chinterval(min, max));
|
||||
}
|
||||
else
|
||||
{
|
||||
cList.Add((char)dr.Current);
|
||||
}
|
||||
dr.MoveNext();
|
||||
}
|
||||
}
|
||||
|
||||
public bool Match(char ch)
|
||||
{
|
||||
foreach (chinterval i in excludeIntervals)
|
||||
{
|
||||
if (i.Match(ch))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
foreach (char ec in excludeCharacters)
|
||||
{
|
||||
if (ec == ch)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
foreach (chinterval i in includeIntervals)
|
||||
{
|
||||
if (i.Match(ch))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
foreach (char ic in includeCharacters)
|
||||
{
|
||||
if (ic == ch)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public override MatchedPart[] Match(CharacterBuffer chbuffer)
|
||||
{
|
||||
if (!chbuffer.EndOfBuffer && Match(chbuffer.Current))
|
||||
{
|
||||
return new MatchedPart[] { new MatchedPart(this, new char[] { chbuffer.Current }) };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.Append("[");
|
||||
foreach (char ch in includeCharacters)
|
||||
{
|
||||
if (ch <= 0x20)
|
||||
{
|
||||
sb.AppendFormat("\\0x{0:X4}", (int)ch);
|
||||
}
|
||||
else
|
||||
{
|
||||
sb.Append(ch);
|
||||
}
|
||||
}
|
||||
|
||||
foreach (chinterval chi in includeIntervals)
|
||||
{
|
||||
if (chi.first <= 0x20)
|
||||
{
|
||||
sb.AppendFormat("\\0x{0:X4}", (int)chi.first);
|
||||
}
|
||||
else
|
||||
{
|
||||
sb.Append(chi.first);
|
||||
}
|
||||
sb.Append("..");
|
||||
if (chi.last <= 0x20)
|
||||
{
|
||||
sb.AppendFormat("\\0x{0:X4}", (int)chi.last);
|
||||
}
|
||||
else
|
||||
{
|
||||
sb.Append(chi.last);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if ((excludeIntervals.Count > 0) || (excludeCharacters.Count > 0))
|
||||
{
|
||||
sb.Append("/");
|
||||
|
||||
foreach (char ch in includeCharacters)
|
||||
{
|
||||
if (ch <= 0x20)
|
||||
{
|
||||
sb.AppendFormat("\\{0:X4}", (int)ch);
|
||||
}
|
||||
else
|
||||
{
|
||||
sb.Append(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sb.Append("]");
|
||||
|
||||
return sb.ToString();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
using System;
|
||||
using lexer.buffer;
|
||||
using System.Collections.Generic;
|
||||
namespace lexer.match
|
||||
{
|
||||
public class CharacterSequence : Matchable
|
||||
{
|
||||
char[] sequence;
|
||||
|
||||
public CharacterSequence(CharacterBuffer chbuffer)
|
||||
{
|
||||
List<char> characters = new List<char>();
|
||||
|
||||
while (chbuffer.MoveNext() != '"'){
|
||||
if (chbuffer.Current == '\\'){
|
||||
chbuffer.MoveNext();
|
||||
}
|
||||
characters.Add(chbuffer.Current);
|
||||
}
|
||||
|
||||
sequence = characters.ToArray();
|
||||
chbuffer.MoveNext();
|
||||
}
|
||||
|
||||
public override MatchedPart[] Match(CharacterBuffer chbuffer)
|
||||
{
|
||||
if (chbuffer.Current != '"'){
|
||||
throw new FormatException("CharacterSequence must start with \"");
|
||||
}
|
||||
chbuffer.Push();
|
||||
foreach (char sch in sequence){
|
||||
if (sch != chbuffer.MoveNext()){
|
||||
chbuffer.Pop();
|
||||
return null;
|
||||
}
|
||||
}
|
||||
chbuffer.Pop();
|
||||
return new MatchedPart[] { new MatchedPart(this, this.sequence) };
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
using System;
|
||||
using lexer.buffer;
|
||||
using System.Collections.Generic;
|
||||
namespace lexer.match
|
||||
{
|
||||
public class Expression : Matchable
|
||||
{
|
||||
public Matchable Matchable { get; private set; }
|
||||
|
||||
public int Minimum { get; private set; } = 0;
|
||||
public int Maximum { get; private set; } = int.MaxValue;
|
||||
|
||||
public Expression(Matchable matchable,int minimum = 1,int maximum = 1)
|
||||
{
|
||||
this.Matchable = matchable;
|
||||
this.Minimum = minimum;
|
||||
this.Maximum = maximum;
|
||||
}
|
||||
|
||||
public override MatchedPart[] Match(CharacterBuffer chbuffer){
|
||||
List<MatchedPart> matchedParts = new List<MatchedPart>();
|
||||
int n;
|
||||
chbuffer.Push();
|
||||
|
||||
for (n = 0; n < Maximum;n++){
|
||||
MatchedPart[] mparts = Matchable.MatchNoticeable(chbuffer);
|
||||
if (mparts == null){
|
||||
break;
|
||||
}
|
||||
matchedParts.AddRange(mparts);
|
||||
chbuffer.Position += mparts.MatchedLength();
|
||||
}
|
||||
|
||||
chbuffer.Pop();
|
||||
|
||||
if (n < Minimum){
|
||||
return null;
|
||||
}
|
||||
|
||||
return matchedParts.ToArray();
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return String.Format("{0}{{{1},{2}}}",
|
||||
this.Matchable.ToString(true),
|
||||
Minimum == 0 ? "" : Minimum.ToString(),
|
||||
Maximum == int.MaxValue ? "" : Maximum.ToString()
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
using System;
|
||||
using lexer.buffer;
|
||||
using System.Collections.Generic;
|
||||
namespace lexer.match
|
||||
{
|
||||
public abstract class Matchable
|
||||
{
|
||||
public event MatchedDelegate OnMatched;
|
||||
public String Name { get; private set; }
|
||||
|
||||
public bool Grouping { get; set; }
|
||||
public bool Notice { get; set; }
|
||||
|
||||
protected Matchable(String name = null)
|
||||
{
|
||||
this.Name = name;
|
||||
|
||||
if (name != null)
|
||||
{
|
||||
namedMatchables.Add(name, this);
|
||||
}
|
||||
}
|
||||
|
||||
public MatchedPart[] MatchNoticeable(CharacterBuffer chbuffer){
|
||||
MatchedPart[] mparts = Match(chbuffer);
|
||||
if (Grouping && (mparts != null)){
|
||||
MatchedPart mp = new MatchedPart(this, mparts.MatchingCharacters());
|
||||
return new MatchedPart[] { mp };
|
||||
}
|
||||
return mparts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Match() match this Matchable starting at current position of CharacterBuffer
|
||||
*
|
||||
* if an successfull Match is found, return the matched characters as char[]
|
||||
* returns null if no match is found
|
||||
*
|
||||
**/
|
||||
public abstract MatchedPart[] Match(CharacterBuffer chbuffer);
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return ToString(false);
|
||||
}
|
||||
|
||||
public virtual String ToString(bool useSymbol)
|
||||
{
|
||||
if (useSymbol && (this.Name != null))
|
||||
{
|
||||
return this.Name;
|
||||
}
|
||||
return ToString();
|
||||
}
|
||||
|
||||
|
||||
|
||||
static Dictionary<string, Matchable> namedMatchables = new Dictionary<string, Matchable>();
|
||||
public static Matchable getNamedMatchable(String name)
|
||||
{
|
||||
if (!namedMatchables.ContainsKey(name)){
|
||||
return null;
|
||||
}
|
||||
return namedMatchables[name];
|
||||
}
|
||||
public static Matchable getNamedMatchable(char[] name)
|
||||
{
|
||||
return getNamedMatchable(new String(name));
|
||||
}
|
||||
|
||||
public class DeferredMatchable : Matchable{
|
||||
|
||||
Matchable matchable;
|
||||
|
||||
public DeferredMatchable(char[] name)
|
||||
:this(new String(name)){}
|
||||
|
||||
public DeferredMatchable(String name)
|
||||
:base(){
|
||||
Name = name;
|
||||
matchable = null;
|
||||
}
|
||||
|
||||
public override MatchedPart[] Match(CharacterBuffer chbuffer)
|
||||
{
|
||||
if (matchable == null){
|
||||
matchable = getNamedMatchable(Name);
|
||||
}
|
||||
if (matchable == null)
|
||||
{
|
||||
throw new KeyNotFoundException(String.Format("Deferred matchable '{0}' was not found", Name));
|
||||
}
|
||||
return matchable.Match(chbuffer);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
using System;
|
||||
|
||||
namespace lexer.match {
|
||||
|
||||
public delegate void MatchedDelegate(MatchedPart matchedPart);
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
using System;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Collections.Generic;
|
||||
namespace lexer.match
|
||||
{
|
||||
public delegate void MatchedPartNoticeDelegate(MatchedPart matchedPart);
|
||||
|
||||
public class MatchedPart
|
||||
{
|
||||
public Matchable Matchable { get; private set; }
|
||||
public char[] MatchedCharacters { get; private set; }
|
||||
|
||||
public int Length { get { return this.MatchedCharacters.Length; } }
|
||||
|
||||
public MatchedPart(Matchable matchable, char[] characters)
|
||||
{
|
||||
this.Matchable = matchable;
|
||||
this.MatchedCharacters = characters;
|
||||
}
|
||||
|
||||
public String MatchedString { get { return new String(MatchedCharacters); } }
|
||||
|
||||
}
|
||||
|
||||
public static class MatchedPartArray {
|
||||
|
||||
public static int MatchedLength(this MatchedPart[] parts){
|
||||
int l = 0;
|
||||
foreach (MatchedPart p in parts)
|
||||
l += p.Length;
|
||||
return l;
|
||||
}
|
||||
|
||||
public static char[] MatchingCharacters(this MatchedPart[] parts) {
|
||||
if ((parts == null)||(parts.Length == 0)){
|
||||
return new char[0];
|
||||
}
|
||||
List<char> characters = new List<char>();
|
||||
foreach (MatchedPart mp in parts){
|
||||
characters.AddRange(mp.MatchedCharacters);
|
||||
}
|
||||
return characters.ToArray();
|
||||
}
|
||||
|
||||
|
||||
public static String AsString(this char[] chars){
|
||||
return new String(chars);
|
||||
}
|
||||
|
||||
public static void Notice(this MatchedPart[] matchedParts,MatchedPartNoticeDelegate notice){
|
||||
foreach (MatchedPart mpart in matchedParts){
|
||||
if (mpart.Matchable.Notice){
|
||||
notice(mpart);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
namespace lexer.match
|
||||
{
|
||||
public class Sequence : Matchable
|
||||
{
|
||||
public List<Matchable> matchables = new List<Matchable>();
|
||||
|
||||
public Sequence()
|
||||
{
|
||||
}
|
||||
public Sequence(String name)
|
||||
:base(name){}
|
||||
|
||||
public void addMatchable(Matchable matchable){
|
||||
this.matchables.Add(matchable);
|
||||
}
|
||||
public Matchable[] Matchables { get { return this.matchables.ToArray(); } }
|
||||
|
||||
public override MatchedPart[] Match(buffer.CharacterBuffer chbuffer)
|
||||
{
|
||||
List<MatchedPart> matchedParts = new List<MatchedPart>();
|
||||
chbuffer.Push();
|
||||
|
||||
foreach (Matchable m in matchables){
|
||||
MatchedPart[] mparts = m.MatchNoticeable(chbuffer);
|
||||
if (mparts == null){
|
||||
chbuffer.Pop();
|
||||
return null;
|
||||
}
|
||||
matchedParts.AddRange(mparts);
|
||||
chbuffer.Position += mparts.MatchedLength();
|
||||
}
|
||||
|
||||
chbuffer.Pop();
|
||||
return matchedParts.ToArray();
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
sb.Append("( ");
|
||||
foreach (Matchable m in this.matchables){
|
||||
sb.Append(m.ToString(true));
|
||||
sb.Append(' ');
|
||||
}
|
||||
sb.Append(")");
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue