diff --git a/CharBuffer.cs b/CharBuffer.cs new file mode 100644 index 0000000..e03c41b --- /dev/null +++ b/CharBuffer.cs @@ -0,0 +1,164 @@ +using System; +using sharp.extensions; +using System.Collections.Generic; +using System.Linq; +namespace sharp.parser +{ + public class CharBuffer + { + char[] chars; + int position; + + int lineno, linepos, linestart; + + int[] lineStarts; + + public char[] Characters { get { return this.chars; } } + + public CharBuffer(char[] source) + { + this.chars = source; + this.initialize(); + } + public CharBuffer(string source){ + this.chars = source.ToCharArray(); + this.initialize(); + } + + private void initialize(){ + this.lineno = 1; + this.linepos = 1; + this.linestart = 0; + + List lineStartList = new List(); + lineStartList.Add(0); + + for (int n = 0; n < this.chars.Length;n++) + { + if (this.chars[n] == 0x0a){ + lineStartList.Add(n); + } + } + lineStartList.Add(this.chars.Length); + + this.lineStarts = lineStartList.ToArray(); + } + + private int lineEnd(){ + int p = position; + while (this.chars[p]!='\n'){ + p++; + if (p >= this.chars.Length){ + break; + } + } + return p; + } + + public string getLineAt(int pos){ + int n; + + if (pos >= this.chars.Length){ + return null; + } + + for (n = 0; n < this.lineStarts.Count();n++){ + if (pos >= this.lineStarts[n]){ + break; + } + } + + return new string( this.chars.Segment(this.lineStarts[n],this.lineStarts[n+1]-this.lineStarts[n]) ); + } + + public int Position { + get { return position; } + set { this.position = value >= chars.Length ? chars.Length : value; } + } + + public bool MoveNext(){ + if (position < this.chars.Length){ + position++; + + this.linepos++; + + if (Last == '\n'){ + this.linepos = 1; + this.linestart = position; + this.lineno++; + } + + return true; + } + return false; + } + + public bool MoveBack(){ + if (position > 0){ + position--; + return true; + } + return false; + } + + public bool EndOfBuffer(){ + return (this.position >= this.chars.Length); + } + + public void BypassWhiteSpace(){ + while (Current <= 0x20){ + MoveNext(); + } + } + + public int CurrentLineNumber { get { return this.lineno; } } + public int CurrentLinePosition { get { return this.linepos; } } + + public string CurrentLine { get { return new String(this.chars.Segment(this.linestart,lineEnd()-this.linestart)); } } + + public char Last { + get { + if (position > 0) { + return this.chars[position - 1]; + }; + throw new IndexOutOfRangeException("No character before the first one"); + } + } + public char Current + { + get { + if (position < this.chars.Length){ + return this.chars[position]; + } + throw new IndexOutOfRangeException("No character after the last one"); + } + } + public char Next + { + get { + if (position < this.chars.Length-1) { + return this.chars[position + 1]; + }; + throw new IndexOutOfRangeException("No character after the last one"); + + } + } + + public string Following(int len) + { + return new string(this.chars.Segment(position, len)); + } + public string Preceding(int len) + { + return new string(this.chars.Segment(position-len, len)); + } + + public void Pass(char ch){ + if (Current != ch){ + throw new ParserFormatException(String.Format("Expected {0}, but got {1}",ch,Current),lineno,linepos,CurrentLine); + } + MoveNext(); + } + + } +} diff --git a/CharGroup.cs b/CharGroup.cs new file mode 100644 index 0000000..9cc6afa --- /dev/null +++ b/CharGroup.cs @@ -0,0 +1,77 @@ +using System; +using sharp.extensions; +namespace sharp.parser +{ + public class CharGroup + { + public static readonly CharGroup digit = new CharGroup('0', '9'); + public static readonly CharGroup zero = new CharGroup('0'); + public static readonly CharGroup digit19 = new CharGroup('1', '9'); + public static readonly CharGroup plusminus = new CharGroup(new char[] { '+', '-' }); + public static readonly CharGroup minus = new CharGroup(new char[] { '+', '-' }); + public static readonly CharGroup plus = new CharGroup(new char[] { '+' }); + public static readonly CharGroup az = new CharGroup('a','z'); + public static readonly CharGroup AZ = new CharGroup('A','Z'); + public static readonly CharGroup aAzZ = az + AZ; + public static readonly CharGroup LF = new CharGroup((char)0x0A); + public static readonly CharGroup CR = new CharGroup((char)0x0D); + public static readonly CharGroup HTAB = new CharGroup((char)0x09); + public static readonly CharGroup WS = new CharGroup(new char[] { (char)0x09, (char)0x0A, (char)0x0B, (char)0x0C, (char)0x0D, (char)0x20}); + public static readonly CharGroup hexdigits = new CharGroup('a','f') + new CharGroup('A','F') + digit; + + char[] chars; + + public CharGroup(char ch) + { + chars = new char[] { ch }; + } + public CharGroup(char[] chars) + { + this.chars = chars.Segment(0); + } + public CharGroup(char first,char last) + { + int l = (int)last - (int)first; + this.chars = new char[l+1]; + + for (int n = 0; n <= l;n++){ + this.chars[n] = (char)(first + n); + } + } + + public bool Contains(char ch){ + foreach (char c in chars){ + if (c == ch){ + return true; + } + } + return false; + } + + public bool Intersects(CharGroup other){ + foreach (char ch in chars){ + if (other.Contains(ch)){ + return true; + } + } + return false; + } + + + + public static CharGroup operator +(CharGroup cg1, CharGroup cg2) + { + return new CharGroup(cg1.chars.Combine(cg2.chars)); + } + + public static CharGroup operator -(CharGroup cg1, CharGroup cg2) + { + return new CharGroup(cg1.chars.Remove(cg2.chars)); + } + + public override string ToString() + { + return string.Format("[CharGroup '{0}']",new string(this.chars)); + } + } +} diff --git a/Lexer.cs b/Lexer.cs new file mode 100644 index 0000000..f1d5a59 --- /dev/null +++ b/Lexer.cs @@ -0,0 +1,59 @@ +using System; +using System.Collections.Generic; +namespace sharp.parser +{ + public class Lexer + { + List tokenDefinitions = new List(); + + public Lexer(){ + } + public Lexer(TokenDefinition[] tokenDefinitions) + { + this.tokenDefinitions.AddRange(tokenDefinitions); + } + + public void AddTokenDefinition(TokenDefinition tokenDefinition) + { + this.tokenDefinitions.Add(tokenDefinition); + } + public void RemoveTokenDefinition(TokenDefinition tokenDefinition) + { + this.tokenDefinitions.Remove(tokenDefinition); + } + + + public Token[] parse(CharBuffer buffer){ + List tokens = new List(); + + while (!buffer.EndOfBuffer()){ + Token t = null; + + foreach (TokenDefinition tdef in tokenDefinitions) + { + t = tdef.tryParse(buffer); + if (t != null) + { + break; + } + } + + if (t == null) + { + break; + } + + tokens.Add(t); + } + + if (!buffer.EndOfBuffer()) + { + throw new FormatException(String.Format("Unexpected character at line {0} position {1}. '{2}'", buffer.CurrentLineNumber, buffer.CurrentLinePosition, buffer.Current)); + } + return tokens.ToArray(); + } + + + + } +} diff --git a/LexerPathSegment.cs b/LexerPathSegment.cs new file mode 100644 index 0000000..5fd63a8 --- /dev/null +++ b/LexerPathSegment.cs @@ -0,0 +1,148 @@ +using System; +using System.Collections.Generic; +namespace sharp.parser +{ + public class LexerPathSegment + { + public CharGroup CharGroup { get; private set; } + public bool MayFinish { get; set; } + + public LexerPathSegment[] Followers { get { return followers.ToArray(); } } + private List followers = new List(); + + public LexerPathSegment(){ + this.CharGroup = null; + } + + public LexerPathSegment(LexerPathSegment follower) + { + this.CharGroup = null; + this.AddFollower(follower); + } + + public LexerPathSegment(CharGroup charGroup) + { + this.CharGroup = charGroup; + } + + public LexerPathSegment(char[] chars){ + this.CharGroup = new CharGroup(chars); + } + + public LexerPathSegment(char ch) + { + this.CharGroup = new CharGroup(ch); + } + + public LexerPathSegment(char first,char last) + { + this.CharGroup = new CharGroup(first,last); + } + + public LexerPathSegment(CharGroup charGroup,bool mayFinish) + { + this.CharGroup = charGroup; + this.MayFinish = mayFinish; + } + + public LexerPathSegment(char[] chars,bool mayFinish) + { + this.CharGroup = new CharGroup(chars); + this.MayFinish = mayFinish; + } + + public LexerPathSegment(char ch,bool mayFinish) + { + this.CharGroup = new CharGroup(ch); + this.MayFinish = mayFinish; + } + + public LexerPathSegment(char first, char last,bool mayFinish) + { + this.CharGroup = new CharGroup(first, last); + this.MayFinish = mayFinish; + } + + public LexerPathSegment(CharGroup charGroup, LexerPathSegment follower) + { + this.CharGroup = charGroup; + this.AddFollower(follower); + } + + public LexerPathSegment(char[] chars, LexerPathSegment follower) + { + this.CharGroup = new CharGroup(chars); + this.AddFollower(follower); + } + + public LexerPathSegment(char ch, LexerPathSegment follower) + { + this.CharGroup = new CharGroup(ch); + this.AddFollower(follower); + } + + public LexerPathSegment(char first, char last, LexerPathSegment follower) + { + this.CharGroup = new CharGroup(first, last); + this.AddFollower(follower); + } + + + + public void AddFollower(LexerPathSegment path, params LexerPathSegment[] paths) + { + AddFollower(path); + foreach (LexerPathSegment p in paths) + { + AddFollower(p); + } + } + public void AddFollower(LexerPathSegment path) + { + followers.Add(path); + } + + public void RemoveFollower(LexerPathSegment path) + { + followers.Remove(path); + } + + public int walk(CharBuffer buffer){ + + if (this.CharGroup == null){ + + foreach (LexerPathSegment next in followers){ + int n = next.walk(buffer); + if (n > 0){ + return n; + } + } + + return -1; + } + + if (MayFinish && buffer.EndOfBuffer()){ + return 1; + } + + if (this.CharGroup.Contains(buffer.Current)){ + buffer.MoveNext(); + + foreach (LexerPathSegment next in followers){ + int n = next.walk(buffer); + if (n > 0){ + buffer.MoveBack(); + return n + 1; + } + } + buffer.MoveBack(); + + if (MayFinish){ + return 1; + } + } + return -1; + } + + } +} diff --git a/Parser.cs b/Parser.cs new file mode 100644 index 0000000..6f43337 --- /dev/null +++ b/Parser.cs @@ -0,0 +1,32 @@ +using System; +using System.Collections.Generic; +using System.Collections; + +namespace sharp.parser +{ + public abstract class Parser + { + protected Lexer Lexer { get; private set; } + + protected Parser(TokenDefinition[] tokenDefinitions) + { + this.Lexer = new Lexer(tokenDefinitions); + } + + public Token[] Tokenize(char[] source){ + return this.Lexer.parse(new CharBuffer(source)); + } + + public T Parse(string source){ + return Parse(source.ToCharArray()); + } + + public T Parse(char[] source){ + Token[] tokens = Tokenize(source); + return ParseTokens(tokens); + } + + protected abstract T ParseTokens(Token[] tokens); + + } +} diff --git a/ParserFormatException.cs b/ParserFormatException.cs new file mode 100644 index 0000000..c3ca1ca --- /dev/null +++ b/ParserFormatException.cs @@ -0,0 +1,18 @@ +using System; +namespace sharp.parser +{ + public class ParserFormatException : Exception + { + public String Line { get; private set; } + public int LineNumber { get; private set; } + public int Position { get; private set; } + + public ParserFormatException(string message,int lineno,int pos,string line) + :base(message) + { + Line = line; + LineNumber = lineno; + Position = pos; + } + } +} diff --git a/ParserPath.cs b/ParserPath.cs new file mode 100644 index 0000000..3077571 --- /dev/null +++ b/ParserPath.cs @@ -0,0 +1,37 @@ +using System; +using System.Collections.Generic; +namespace sharp.parser +{ + public abstract class ParserPath + { + public ParserPath[] Followers { get { return followers.ToArray(); } } + private List followers = new List(); + + public ParserPath(){ + } + public ParserPath(ParserPath[] followers) + { + this.followers.AddRange(followers); + } + + + public void AddFollower(ParserPath path,params ParserPath[] paths){ + AddFollower(path); + foreach (ParserPath p in paths){ + AddFollower(p); + } + } + public void AddFollower(ParserPath path) + { + followers.Add(path); + } + + public void RemoveFollower(ParserPath path) + { + followers.Remove(path); + } + + + + } +} diff --git a/ParserPathSegment.cs b/ParserPathSegment.cs new file mode 100644 index 0000000..c815b5c --- /dev/null +++ b/ParserPathSegment.cs @@ -0,0 +1,25 @@ +using System; +namespace sharp.parser +{ + public class ParserPathSegment : ParserPath + { + public CharGroup CharGroup { get; private set; } + + public ParserPathSegment(CharGroup charGroup) + { + this.CharGroup = charGroup; + } + public ParserPathSegment(char ch) + { + this.CharGroup = new CharGroup(ch); + } + public ParserPathSegment(char[] chars) + { + this.CharGroup = new CharGroup(chars); + } + public ParserPathSegment(char first, char last) + { + this.CharGroup = new CharGroup(first, last); + } + } +} diff --git a/Token.cs b/Token.cs new file mode 100644 index 0000000..b8b8656 --- /dev/null +++ b/Token.cs @@ -0,0 +1,34 @@ +using System; +using System.Collections.Generic; +using sharp.extensions; +namespace sharp.parser +{ + public class Token + { + public TokenDefinition Definition { get; private set; } + public CharBuffer charBuffer; + + public CharBuffer Buffer { get { return charBuffer; } } + public int Position { get; private set; } + public int Len { get; private set; } + + public string Value { get { return new String(charBuffer.Characters.Segment(Position, Len)); } } + + public Token(TokenDefinition tdef,CharBuffer buffer,int len) + { + this.Definition = tdef; + this.charBuffer = buffer; + this.Position = buffer.Position; + this.Len = len; + + buffer.Position += len; + } + + + public override string ToString() + { + return string.Format("[Token: {0} Value={1}]", Definition.Name, Value); + } + + } +} diff --git a/TokenDefinition.cs b/TokenDefinition.cs new file mode 100644 index 0000000..f1a9961 --- /dev/null +++ b/TokenDefinition.cs @@ -0,0 +1,47 @@ +using System; +using System.Collections.Generic; +namespace sharp.parser +{ + public class TokenDefinition + { + public String Name { get; private set; } + public LexerPathSegment[] PathHeads { get { return this.pathHeads.ToArray(); } } + + private List pathHeads = new List(); + + public TokenDefinition(string name) + { + this.Name = name; + } + public TokenDefinition(string name,LexerPathSegment pathHead) + { + this.Name = name; + this.pathHeads.Add(pathHead); + } + public TokenDefinition(string name,LexerPathSegment[] pathHeads) + { + this.Name = name; + this.pathHeads.AddRange(pathHeads); + } + + + public Token tryParse(CharBuffer buffer){ + int cpos = buffer.Position; + + foreach (LexerPathSegment head in pathHeads){ + int n = head.walk(buffer); + if (n > 0){ + Token t = new Token(this,buffer,n); + return t; + } + } + + return null; + } + + public override string ToString() + { + return string.Format("[TokenDefinition: Name={0}]", Name); + } + } +} diff --git a/TokenQueue.cs b/TokenQueue.cs new file mode 100644 index 0000000..b534f22 --- /dev/null +++ b/TokenQueue.cs @@ -0,0 +1,24 @@ +using System; +using System.Collections.Generic; +namespace sharp.parser +{ + public class TokenQueue : Queue + { + public TokenQueue() + { + } + + public Token Expect(params TokenDefinition[] tdefs) + { + Token t = Dequeue(); + foreach (TokenDefinition tdef in tdefs){ + if (t.Definition == tdef) + { + return t; + } + } + throw new UnexpectedTokenException(t, tdefs); + } + + } +} diff --git a/UnexpectedTokenException.cs b/UnexpectedTokenException.cs new file mode 100644 index 0000000..fe865ac --- /dev/null +++ b/UnexpectedTokenException.cs @@ -0,0 +1,18 @@ +using System; +namespace sharp.parser +{ + public class UnexpectedTokenException : Exception + { + public Token Token { get; private set; } + public TokenDefinition[] Expected { get; private set; } + + public new string Message { get; private set; } + + public UnexpectedTokenException(Token token, TokenDefinition[] expected) + { + this.Token = token; + this.Expected = expected; + this.Message = string.Format("Unexpected Token in Line {0} at position {1}.\nGot {2} but should be one of [{3}]\nLine: {4}",0,token.Position,token.Value,string.Join(",",expected),token.charBuffer.getLineAt(token.Position)); + } + } +} diff --git a/sharp.parser.csproj b/sharp.parser.csproj index 9084d52..0fb262c 100644 --- a/sharp.parser.csproj +++ b/sharp.parser.csproj @@ -4,10 +4,10 @@ Debug x86 {32267133-ADB7-4A85-8CF1-03CBDF53715C} - Exe + Library sharp.parser sharp.parser - v4.5 + v4.7 true @@ -26,5 +26,28 @@ 4 x86 + + + + + + + + + + + + + + + + + {97CA3CA9-98B3-4492-B072-D7A5995B68E9} + sharp.extensions + + + + + \ No newline at end of file