master
Harald Wolff 2017-11-23 13:04:51 +01:00
parent 22212dedc5
commit 46f7b96393
13 changed files with 708 additions and 2 deletions

164
CharBuffer.cs 100644
View File

@ -0,0 +1,164 @@
using System;
using sharp.extensions;
using System.Collections.Generic;
using System.Linq;
namespace sharp.parser
{
public class CharBuffer
{
char[] chars;
int position;
int lineno, linepos, linestart;
int[] lineStarts;
public char[] Characters { get { return this.chars; } }
public CharBuffer(char[] source)
{
this.chars = source;
this.initialize();
}
public CharBuffer(string source){
this.chars = source.ToCharArray();
this.initialize();
}
private void initialize(){
this.lineno = 1;
this.linepos = 1;
this.linestart = 0;
List<int> lineStartList = new List<int>();
lineStartList.Add(0);
for (int n = 0; n < this.chars.Length;n++)
{
if (this.chars[n] == 0x0a){
lineStartList.Add(n);
}
}
lineStartList.Add(this.chars.Length);
this.lineStarts = lineStartList.ToArray();
}
private int lineEnd(){
int p = position;
while (this.chars[p]!='\n'){
p++;
if (p >= this.chars.Length){
break;
}
}
return p;
}
public string getLineAt(int pos){
int n;
if (pos >= this.chars.Length){
return null;
}
for (n = 0; n < this.lineStarts.Count();n++){
if (pos >= this.lineStarts[n]){
break;
}
}
return new string( this.chars.Segment(this.lineStarts[n],this.lineStarts[n+1]-this.lineStarts[n]) );
}
public int Position {
get { return position; }
set { this.position = value >= chars.Length ? chars.Length : value; }
}
public bool MoveNext(){
if (position < this.chars.Length){
position++;
this.linepos++;
if (Last == '\n'){
this.linepos = 1;
this.linestart = position;
this.lineno++;
}
return true;
}
return false;
}
public bool MoveBack(){
if (position > 0){
position--;
return true;
}
return false;
}
public bool EndOfBuffer(){
return (this.position >= this.chars.Length);
}
public void BypassWhiteSpace(){
while (Current <= 0x20){
MoveNext();
}
}
public int CurrentLineNumber { get { return this.lineno; } }
public int CurrentLinePosition { get { return this.linepos; } }
public string CurrentLine { get { return new String(this.chars.Segment(this.linestart,lineEnd()-this.linestart)); } }
public char Last {
get {
if (position > 0) {
return this.chars[position - 1];
};
throw new IndexOutOfRangeException("No character before the first one");
}
}
public char Current
{
get {
if (position < this.chars.Length){
return this.chars[position];
}
throw new IndexOutOfRangeException("No character after the last one");
}
}
public char Next
{
get {
if (position < this.chars.Length-1) {
return this.chars[position + 1];
};
throw new IndexOutOfRangeException("No character after the last one");
}
}
public string Following(int len)
{
return new string(this.chars.Segment(position, len));
}
public string Preceding(int len)
{
return new string(this.chars.Segment(position-len, len));
}
public void Pass(char ch){
if (Current != ch){
throw new ParserFormatException(String.Format("Expected {0}, but got {1}",ch,Current),lineno,linepos,CurrentLine);
}
MoveNext();
}
}
}

77
CharGroup.cs 100644
View File

@ -0,0 +1,77 @@
using System;
using sharp.extensions;
namespace sharp.parser
{
public class CharGroup
{
public static readonly CharGroup digit = new CharGroup('0', '9');
public static readonly CharGroup zero = new CharGroup('0');
public static readonly CharGroup digit19 = new CharGroup('1', '9');
public static readonly CharGroup plusminus = new CharGroup(new char[] { '+', '-' });
public static readonly CharGroup minus = new CharGroup(new char[] { '+', '-' });
public static readonly CharGroup plus = new CharGroup(new char[] { '+' });
public static readonly CharGroup az = new CharGroup('a','z');
public static readonly CharGroup AZ = new CharGroup('A','Z');
public static readonly CharGroup aAzZ = az + AZ;
public static readonly CharGroup LF = new CharGroup((char)0x0A);
public static readonly CharGroup CR = new CharGroup((char)0x0D);
public static readonly CharGroup HTAB = new CharGroup((char)0x09);
public static readonly CharGroup WS = new CharGroup(new char[] { (char)0x09, (char)0x0A, (char)0x0B, (char)0x0C, (char)0x0D, (char)0x20});
public static readonly CharGroup hexdigits = new CharGroup('a','f') + new CharGroup('A','F') + digit;
char[] chars;
public CharGroup(char ch)
{
chars = new char[] { ch };
}
public CharGroup(char[] chars)
{
this.chars = chars.Segment(0);
}
public CharGroup(char first,char last)
{
int l = (int)last - (int)first;
this.chars = new char[l+1];
for (int n = 0; n <= l;n++){
this.chars[n] = (char)(first + n);
}
}
public bool Contains(char ch){
foreach (char c in chars){
if (c == ch){
return true;
}
}
return false;
}
public bool Intersects(CharGroup other){
foreach (char ch in chars){
if (other.Contains(ch)){
return true;
}
}
return false;
}
public static CharGroup operator +(CharGroup cg1, CharGroup cg2)
{
return new CharGroup(cg1.chars.Combine(cg2.chars));
}
public static CharGroup operator -(CharGroup cg1, CharGroup cg2)
{
return new CharGroup(cg1.chars.Remove(cg2.chars));
}
public override string ToString()
{
return string.Format("[CharGroup '{0}']",new string(this.chars));
}
}
}

59
Lexer.cs 100644
View File

@ -0,0 +1,59 @@
using System;
using System.Collections.Generic;
namespace sharp.parser
{
public class Lexer
{
List<TokenDefinition> tokenDefinitions = new List<TokenDefinition>();
public Lexer(){
}
public Lexer(TokenDefinition[] tokenDefinitions)
{
this.tokenDefinitions.AddRange(tokenDefinitions);
}
public void AddTokenDefinition(TokenDefinition tokenDefinition)
{
this.tokenDefinitions.Add(tokenDefinition);
}
public void RemoveTokenDefinition(TokenDefinition tokenDefinition)
{
this.tokenDefinitions.Remove(tokenDefinition);
}
public Token[] parse(CharBuffer buffer){
List<Token> tokens = new List<Token>();
while (!buffer.EndOfBuffer()){
Token t = null;
foreach (TokenDefinition tdef in tokenDefinitions)
{
t = tdef.tryParse(buffer);
if (t != null)
{
break;
}
}
if (t == null)
{
break;
}
tokens.Add(t);
}
if (!buffer.EndOfBuffer())
{
throw new FormatException(String.Format("Unexpected character at line {0} position {1}. '{2}'", buffer.CurrentLineNumber, buffer.CurrentLinePosition, buffer.Current));
}
return tokens.ToArray();
}
}
}

148
LexerPathSegment.cs 100644
View File

@ -0,0 +1,148 @@
using System;
using System.Collections.Generic;
namespace sharp.parser
{
public class LexerPathSegment
{
public CharGroup CharGroup { get; private set; }
public bool MayFinish { get; set; }
public LexerPathSegment[] Followers { get { return followers.ToArray(); } }
private List<LexerPathSegment> followers = new List<LexerPathSegment>();
public LexerPathSegment(){
this.CharGroup = null;
}
public LexerPathSegment(LexerPathSegment follower)
{
this.CharGroup = null;
this.AddFollower(follower);
}
public LexerPathSegment(CharGroup charGroup)
{
this.CharGroup = charGroup;
}
public LexerPathSegment(char[] chars){
this.CharGroup = new CharGroup(chars);
}
public LexerPathSegment(char ch)
{
this.CharGroup = new CharGroup(ch);
}
public LexerPathSegment(char first,char last)
{
this.CharGroup = new CharGroup(first,last);
}
public LexerPathSegment(CharGroup charGroup,bool mayFinish)
{
this.CharGroup = charGroup;
this.MayFinish = mayFinish;
}
public LexerPathSegment(char[] chars,bool mayFinish)
{
this.CharGroup = new CharGroup(chars);
this.MayFinish = mayFinish;
}
public LexerPathSegment(char ch,bool mayFinish)
{
this.CharGroup = new CharGroup(ch);
this.MayFinish = mayFinish;
}
public LexerPathSegment(char first, char last,bool mayFinish)
{
this.CharGroup = new CharGroup(first, last);
this.MayFinish = mayFinish;
}
public LexerPathSegment(CharGroup charGroup, LexerPathSegment follower)
{
this.CharGroup = charGroup;
this.AddFollower(follower);
}
public LexerPathSegment(char[] chars, LexerPathSegment follower)
{
this.CharGroup = new CharGroup(chars);
this.AddFollower(follower);
}
public LexerPathSegment(char ch, LexerPathSegment follower)
{
this.CharGroup = new CharGroup(ch);
this.AddFollower(follower);
}
public LexerPathSegment(char first, char last, LexerPathSegment follower)
{
this.CharGroup = new CharGroup(first, last);
this.AddFollower(follower);
}
public void AddFollower(LexerPathSegment path, params LexerPathSegment[] paths)
{
AddFollower(path);
foreach (LexerPathSegment p in paths)
{
AddFollower(p);
}
}
public void AddFollower(LexerPathSegment path)
{
followers.Add(path);
}
public void RemoveFollower(LexerPathSegment path)
{
followers.Remove(path);
}
public int walk(CharBuffer buffer){
if (this.CharGroup == null){
foreach (LexerPathSegment next in followers){
int n = next.walk(buffer);
if (n > 0){
return n;
}
}
return -1;
}
if (MayFinish && buffer.EndOfBuffer()){
return 1;
}
if (this.CharGroup.Contains(buffer.Current)){
buffer.MoveNext();
foreach (LexerPathSegment next in followers){
int n = next.walk(buffer);
if (n > 0){
buffer.MoveBack();
return n + 1;
}
}
buffer.MoveBack();
if (MayFinish){
return 1;
}
}
return -1;
}
}
}

32
Parser.cs 100644
View File

@ -0,0 +1,32 @@
using System;
using System.Collections.Generic;
using System.Collections;
namespace sharp.parser
{
public abstract class Parser<T>
{
protected Lexer Lexer { get; private set; }
protected Parser(TokenDefinition[] tokenDefinitions)
{
this.Lexer = new Lexer(tokenDefinitions);
}
public Token[] Tokenize(char[] source){
return this.Lexer.parse(new CharBuffer(source));
}
public T Parse(string source){
return Parse(source.ToCharArray());
}
public T Parse(char[] source){
Token[] tokens = Tokenize(source);
return ParseTokens(tokens);
}
protected abstract T ParseTokens(Token[] tokens);
}
}

View File

@ -0,0 +1,18 @@
using System;
namespace sharp.parser
{
public class ParserFormatException : Exception
{
public String Line { get; private set; }
public int LineNumber { get; private set; }
public int Position { get; private set; }
public ParserFormatException(string message,int lineno,int pos,string line)
:base(message)
{
Line = line;
LineNumber = lineno;
Position = pos;
}
}
}

37
ParserPath.cs 100644
View File

@ -0,0 +1,37 @@
using System;
using System.Collections.Generic;
namespace sharp.parser
{
public abstract class ParserPath
{
public ParserPath[] Followers { get { return followers.ToArray(); } }
private List<ParserPath> followers = new List<ParserPath>();
public ParserPath(){
}
public ParserPath(ParserPath[] followers)
{
this.followers.AddRange(followers);
}
public void AddFollower(ParserPath path,params ParserPath[] paths){
AddFollower(path);
foreach (ParserPath p in paths){
AddFollower(p);
}
}
public void AddFollower(ParserPath path)
{
followers.Add(path);
}
public void RemoveFollower(ParserPath path)
{
followers.Remove(path);
}
}
}

View File

@ -0,0 +1,25 @@
using System;
namespace sharp.parser
{
public class ParserPathSegment : ParserPath
{
public CharGroup CharGroup { get; private set; }
public ParserPathSegment(CharGroup charGroup)
{
this.CharGroup = charGroup;
}
public ParserPathSegment(char ch)
{
this.CharGroup = new CharGroup(ch);
}
public ParserPathSegment(char[] chars)
{
this.CharGroup = new CharGroup(chars);
}
public ParserPathSegment(char first, char last)
{
this.CharGroup = new CharGroup(first, last);
}
}
}

34
Token.cs 100644
View File

@ -0,0 +1,34 @@
using System;
using System.Collections.Generic;
using sharp.extensions;
namespace sharp.parser
{
public class Token
{
public TokenDefinition Definition { get; private set; }
public CharBuffer charBuffer;
public CharBuffer Buffer { get { return charBuffer; } }
public int Position { get; private set; }
public int Len { get; private set; }
public string Value { get { return new String(charBuffer.Characters.Segment(Position, Len)); } }
public Token(TokenDefinition tdef,CharBuffer buffer,int len)
{
this.Definition = tdef;
this.charBuffer = buffer;
this.Position = buffer.Position;
this.Len = len;
buffer.Position += len;
}
public override string ToString()
{
return string.Format("[Token: {0} Value={1}]", Definition.Name, Value);
}
}
}

47
TokenDefinition.cs 100644
View File

@ -0,0 +1,47 @@
using System;
using System.Collections.Generic;
namespace sharp.parser
{
public class TokenDefinition
{
public String Name { get; private set; }
public LexerPathSegment[] PathHeads { get { return this.pathHeads.ToArray(); } }
private List<LexerPathSegment> pathHeads = new List<LexerPathSegment>();
public TokenDefinition(string name)
{
this.Name = name;
}
public TokenDefinition(string name,LexerPathSegment pathHead)
{
this.Name = name;
this.pathHeads.Add(pathHead);
}
public TokenDefinition(string name,LexerPathSegment[] pathHeads)
{
this.Name = name;
this.pathHeads.AddRange(pathHeads);
}
public Token tryParse(CharBuffer buffer){
int cpos = buffer.Position;
foreach (LexerPathSegment head in pathHeads){
int n = head.walk(buffer);
if (n > 0){
Token t = new Token(this,buffer,n);
return t;
}
}
return null;
}
public override string ToString()
{
return string.Format("[TokenDefinition: Name={0}]", Name);
}
}
}

24
TokenQueue.cs 100644
View File

@ -0,0 +1,24 @@
using System;
using System.Collections.Generic;
namespace sharp.parser
{
public class TokenQueue : Queue<Token>
{
public TokenQueue()
{
}
public Token Expect(params TokenDefinition[] tdefs)
{
Token t = Dequeue();
foreach (TokenDefinition tdef in tdefs){
if (t.Definition == tdef)
{
return t;
}
}
throw new UnexpectedTokenException(t, tdefs);
}
}
}

View File

@ -0,0 +1,18 @@
using System;
namespace sharp.parser
{
public class UnexpectedTokenException : Exception
{
public Token Token { get; private set; }
public TokenDefinition[] Expected { get; private set; }
public new string Message { get; private set; }
public UnexpectedTokenException(Token token, TokenDefinition[] expected)
{
this.Token = token;
this.Expected = expected;
this.Message = string.Format("Unexpected Token in Line {0} at position {1}.\nGot {2} but should be one of [{3}]\nLine: {4}",0,token.Position,token.Value,string.Join<TokenDefinition>(",",expected),token.charBuffer.getLineAt(token.Position));
}
}
}

View File

@ -4,10 +4,10 @@
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">x86</Platform>
<ProjectGuid>{32267133-ADB7-4A85-8CF1-03CBDF53715C}</ProjectGuid>
<OutputType>Exe</OutputType>
<OutputType>Library</OutputType>
<RootNamespace>sharp.parser</RootNamespace>
<AssemblyName>sharp.parser</AssemblyName>
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
<TargetFrameworkVersion>v4.7</TargetFrameworkVersion>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
<DebugSymbols>true</DebugSymbols>
@ -26,5 +26,28 @@
<WarningLevel>4</WarningLevel>
<PlatformTarget>x86</PlatformTarget>
</PropertyGroup>
<ItemGroup>
<Compile Include="Parser.cs" />
<Compile Include="CharGroup.cs" />
<Compile Include="TokenDefinition.cs" />
<Compile Include="ParserPath.cs" />
<Compile Include="CharBuffer.cs" />
<Compile Include="ParserFormatException.cs" />
<Compile Include="ParserPathSegment.cs" />
<Compile Include="Token.cs" />
<Compile Include="Lexer.cs" />
<Compile Include="LexerPathSegment.cs" />
<Compile Include="TokenQueue.cs" />
<Compile Include="UnexpectedTokenException.cs" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\sharp-extensions\sharp.extensions.csproj">
<Project>{97CA3CA9-98B3-4492-B072-D7A5995B68E9}</Project>
<Name>sharp.extensions</Name>
</ProjectReference>
</ItemGroup>
<ItemGroup>
<Reference Include="System" />
</ItemGroup>
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
</Project>