diff --git a/Lexer.Tests/Lexer.Tests.csproj b/Lexer.Tests/Lexer.Tests.csproj new file mode 100644 index 0000000..fa7811a --- /dev/null +++ b/Lexer.Tests/Lexer.Tests.csproj @@ -0,0 +1,14 @@ + + + netcoreapp2.0 + false + + + + + + + + + + \ No newline at end of file diff --git a/Lexer.Tests/MLexerShould.cs b/Lexer.Tests/MLexerShould.cs new file mode 100644 index 0000000..8e99e3d --- /dev/null +++ b/Lexer.Tests/MLexerShould.cs @@ -0,0 +1,209 @@ +using System.Linq; +using Lexer; +using NUnit.Framework; + +namespace Parser.Tests +{ + public class MLexerShould + { + private static MLexer CreateLexer(string text) + { + var window = new TextWindowWithNull(text); + return new MLexer(window, new PureTokenFactory(window)); + } + + [Test] + public void ParseSequenceOfIdentifiers() + { + var sut = CreateLexer("undefined is not a function"); + var tokens = sut.ParseAll(); + Assert.AreEqual(6, tokens.Count); + CollectionAssert.AreEqual( + new[] {"undefined", "is", "not", "a", "function"}, + tokens.Take(5).Select(token => token.PureToken.LiteralText)); + CollectionAssert.AreEqual( + Enumerable.Repeat(TokenKind.Identifier, 5), + tokens.Take(5).Select(token => token.PureToken.Kind)); + } + + [Test] + public void ParseIdentifierAndBrackets() + { + var sut = CreateLexer("undefined()"); + var tokens = sut.ParseAll(); + Assert.AreEqual(4, tokens.Count); + CollectionAssert.AreEqual( + new[] + { + TokenKind.Identifier, + TokenKind.OpeningBracket, + TokenKind.ClosingBracket, + TokenKind.EndOfFile + }, + tokens.Select(token => token.PureToken.Kind)); + } + + [Test] + public void ParseTransposeSignAfterClosingSquareBracket() + { + var sut = CreateLexer("[undefined]'"); + var tokens = sut.ParseAll(); + Assert.AreEqual(5, tokens.Count); + CollectionAssert.AreEqual( + new[] + { + TokenKind.OpeningSquareBracket, + TokenKind.Identifier, + TokenKind.ClosingSquareBracket, + TokenKind.Transpose, + TokenKind.EndOfFile + }, + tokens.Select(token => token.PureToken.Kind)); + } + + [Test] + public void ParseTransposeSignAfterClosingBrace() + { + var sut = CreateLexer("{undefined}'"); + var tokens = sut.ParseAll(); + Assert.AreEqual(5, tokens.Count); + CollectionAssert.AreEqual( + new[] + { + TokenKind.OpeningBrace, + TokenKind.Identifier, + TokenKind.ClosingBrace, + TokenKind.Transpose, + TokenKind.EndOfFile + }, + tokens.Select(token => token.PureToken.Kind)); + } + + [Test] + public void ParseTransposeSignAfterClosingBracket() + { + var sut = CreateLexer("undefined()'"); + var tokens = sut.ParseAll(); + Assert.AreEqual(5, tokens.Count); + CollectionAssert.AreEqual( + new[] + { + TokenKind.Identifier, + TokenKind.OpeningBracket, + TokenKind.ClosingBracket, + TokenKind.Transpose, + TokenKind.EndOfFile + }, + tokens.Select(token => token.PureToken.Kind)); + } + + [Test] + public void ParseTransposeSignAfterIdentifier() + { + var sut = CreateLexer("undefined'"); + var tokens = sut.ParseAll(); + Assert.AreEqual(3, tokens.Count); + CollectionAssert.AreEqual( + new[] + { + TokenKind.Identifier, + TokenKind.Transpose, + TokenKind.EndOfFile + }, + tokens.Select(token => token.PureToken.Kind)); + } + + [Test] + public void ParseTransposeSignAfterDot() + { + var sut = CreateLexer("undefined.'"); + var tokens = sut.ParseAll(); + Assert.AreEqual(3, tokens.Count); + CollectionAssert.AreEqual( + new[] + { + TokenKind.Identifier, + TokenKind.DotTranspose, + TokenKind.EndOfFile + }, + tokens.Select(token => token.PureToken.Kind)); + } + + [Test] + public void ParseDotPowerAfterNumber() + { + var sut = CreateLexer("26.^[1]"); + var tokens = sut.ParseAll(); + Assert.AreEqual(6, tokens.Count); + CollectionAssert.AreEqual( + new[] + { + TokenKind.NumberLiteral, + TokenKind.DotPower, + TokenKind.OpeningSquareBracket, + TokenKind.NumberLiteral, + TokenKind.ClosingSquareBracket, + TokenKind.EndOfFile + }, + tokens.Select(token => token.PureToken.Kind)); + } + + [Test] + public void ParseDotInNumberBeforeSemicolon() + { + var sut = CreateLexer("42.;"); + var tokens = sut.ParseAll(); + Assert.AreEqual(3, tokens.Count); + CollectionAssert.AreEqual( + new[] + { + TokenKind.NumberLiteral, + TokenKind.Semicolon, + TokenKind.EndOfFile + }, + tokens.Select(token => token.PureToken.Kind)); + } + + [Test] + public void ParseEAfterDotInANumber() + { + var sut = CreateLexer("42.e-5"); + var tokens = sut.ParseAll(); + Assert.AreEqual(2, tokens.Count); + CollectionAssert.AreEqual( + new[] + { + TokenKind.NumberLiteral, + TokenKind.EndOfFile + }, + tokens.Select(token => token.PureToken.Kind)); + } + + [Test] + public void ParseEmptyLine() + { + var sut = CreateLexer("\n\nfunction shmunction\n\n\n"); + var tokens = sut.ParseAll(); + Assert.AreEqual(3, tokens.Count); + } + + [Test] + public void ParseCommentsAfterDotDotDot() + { + var sut = CreateLexer("something ... #$@#%*^!@#\n"); + var tokens = sut.ParseAll(); + Assert.AreEqual(2, tokens.Count); + } + + [TestCase("something ... #$@#%*^!@#\n")] + [TestCase("undefined is not a function")] + [TestCase("\n\nfunction shmunction\n\n\n")] + public void ReconstructTest(string s) + { + var sut = CreateLexer(s); + var tokens = sut.ParseAll(); + var actual = string.Join("", tokens.Select(token => token.FullText)); + Assert.AreEqual(s, actual); + } + } +} \ No newline at end of file diff --git a/Lexer.Tests/TextWindowShould.cs b/Lexer.Tests/TextWindowShould.cs new file mode 100644 index 0000000..753bd70 --- /dev/null +++ b/Lexer.Tests/TextWindowShould.cs @@ -0,0 +1,37 @@ +using Lexer; +using NUnit.Framework; + +namespace Parser.Tests +{ + [TestFixture] + public class TestWindowShould + { + [Test] + public void ReturnEofGivenEmptyText() + { + var sut = new TextWindow(""); + Assert.IsTrue(sut.IsEof()); + } + + [Test] + public void ReturnNotEofGivenNonEmptyText() + { + var sut = new TextWindow("Text."); + Assert.IsFalse(sut.IsEof()); + } + + [Test] + public void ReturnCharsInCorrectOrder() + { + var text = "abc"; + var sut = new TextWindow(text); + Assert.AreEqual('a', sut.PeekChar()); + sut.ConsumeChar(); + Assert.AreEqual('b', sut.PeekChar()); + sut.ConsumeChar(); + Assert.AreEqual('c', sut.PeekChar()); + sut.ConsumeChar(); + Assert.IsTrue(sut.IsEof()); + } + } +} \ No newline at end of file diff --git a/Lexer.Tests/TextWindowWithNullShould.cs b/Lexer.Tests/TextWindowWithNullShould.cs new file mode 100644 index 0000000..c935ecc --- /dev/null +++ b/Lexer.Tests/TextWindowWithNullShould.cs @@ -0,0 +1,30 @@ +using Lexer; +using NUnit.Framework; + +namespace Parser.Tests +{ + [TestFixture] + public class TestWindowWithNullShould + { + [Test] + public void ReturnNullGivenEmptyText() + { + var sut = new TextWindowWithNull(""); + Assert.IsTrue(sut.PeekChar() == '\0'); + } + + [Test] + public void ReturnCharsInCorrectOrder() + { + var text = "abc"; + var sut = new TextWindowWithNull(text); + Assert.AreEqual('a', sut.PeekChar()); + sut.ConsumeChar(); + Assert.AreEqual('b', sut.PeekChar()); + sut.ConsumeChar(); + Assert.AreEqual('c', sut.PeekChar()); + sut.ConsumeChar(); + Assert.AreEqual('\0', sut.PeekChar()); + } + } +} \ No newline at end of file diff --git a/Lexer/ILexer.cs b/Lexer/ILexer.cs new file mode 100644 index 0000000..1df7603 --- /dev/null +++ b/Lexer/ILexer.cs @@ -0,0 +1,10 @@ +using System.Collections.Generic; + +namespace Lexer +{ + public interface ILexer where T : class + { + T NextToken(); + List ParseAll(); + } +} diff --git a/Lexer/IPosition.cs b/Lexer/IPosition.cs new file mode 100644 index 0000000..c4a08cf --- /dev/null +++ b/Lexer/IPosition.cs @@ -0,0 +1,6 @@ +namespace Lexer +{ + public interface IPosition + { + } +} \ No newline at end of file diff --git a/Lexer/ITextWindow.cs b/Lexer/ITextWindow.cs new file mode 100644 index 0000000..aa43a66 --- /dev/null +++ b/Lexer/ITextWindow.cs @@ -0,0 +1,15 @@ +namespace Lexer +{ + public interface ITextWindow + { + bool IsEof(); + char PeekChar(); + char PeekChar(int n); + void ConsumeChar(); + void ConsumeChars(int n); + char GetAndConsumeChar(); + string GetAndConsumeChars(int n); + int CharactersLeft(); + IPosition Position { get; } + } +} \ No newline at end of file diff --git a/Lexer/Lexer.csproj b/Lexer/Lexer.csproj new file mode 100644 index 0000000..18344e7 --- /dev/null +++ b/Lexer/Lexer.csproj @@ -0,0 +1,5 @@ + + + netcoreapp2.0 + + \ No newline at end of file diff --git a/Lexer/MLexer.cs b/Lexer/MLexer.cs new file mode 100644 index 0000000..eed2c4e --- /dev/null +++ b/Lexer/MLexer.cs @@ -0,0 +1,592 @@ +using System.Collections.Generic; +using System.Text; + +namespace Lexer +{ + public class MLexer : ILexer + { + private ITextWindow Window { get; } + private Token LastToken { get; set; } + private PureTokenFactory PureTokenFactory { get; } + + public MLexer(ITextWindow window, PureTokenFactory pureTokenFactory) + { + Window = window; + PureTokenFactory = pureTokenFactory; + } + + private static bool IsEolOrEof(char c) + { + return c == '\n' || c == '\r' || c == '\0'; + } + + private Trivia LexComment() + { + var n = 1; + while (!IsEolOrEof(Window.PeekChar(n))) + { + n++; + } + + return new Trivia(TriviaType.Comment, Window.GetAndConsumeChars(n)); + } + + private List LexCommentAfterDotDotDot() + { + var n = 0; + while (!IsEolOrEof(Window.PeekChar(n))) + { + n++; + } + + var comment = new Trivia(TriviaType.Comment, Window.GetAndConsumeChars(n)); + var result = new List { comment }; + var character = Window.PeekChar(); + if (character == '\n' || character == '\r') + { + Window.ConsumeChar(); + result.Add(new Trivia(TriviaType.Whitespace, character.ToString())); + } + + return result; + } + + private List LexTrivia(bool isTrailing) + { + var triviaList = new List(); + var whiteSpaceCache = new StringBuilder(); + while (true) + { + var character = Window.PeekChar(); + switch (character) + { + case ' ': + case '\t': + Window.ConsumeChar(); + whiteSpaceCache.Append(character); + break; + case '\r': + case '\n': + Window.ConsumeChar(); + whiteSpaceCache.Append(character); + var whiteSpace = whiteSpaceCache.ToString(); + triviaList.Add(new Trivia(TriviaType.Whitespace, whiteSpace)); + if (isTrailing) + { + return triviaList; + } + + whiteSpaceCache.Clear(); + break; + case '%': + if (whiteSpaceCache.Length > 0) + { + triviaList.Add(new Trivia(TriviaType.Whitespace, whiteSpaceCache.ToString())); + } + + whiteSpaceCache.Clear(); + triviaList.Add(LexComment()); + break; + case '.': + if (Window.PeekChar(1) == '.' && Window.PeekChar(2) == '.') + { + if (whiteSpaceCache.Length > 0) + { + triviaList.Add(new Trivia(TriviaType.Whitespace, whiteSpaceCache.ToString())); + } + + whiteSpaceCache.Clear(); + triviaList.AddRange(LexCommentAfterDotDotDot()); + } + else + { + if (whiteSpaceCache.Length > 0) + { + triviaList.Add(new Trivia(TriviaType.Whitespace, whiteSpaceCache.ToString())); + } + return triviaList; + } + break; + default: + if (whiteSpaceCache.Length > 0) + { + triviaList.Add(new Trivia(TriviaType.Whitespace, whiteSpaceCache.ToString())); + } + return triviaList; + } + } + } + + private static bool IsLetterOrDigitOrUnderscore(char c) + { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || (c == '_'); + } + + private PureToken ContinueParsingIdentifier() + { + var n = 1; + while (IsLetterOrDigitOrUnderscore(Window.PeekChar(n))) + { + n++; + } + + var identifier = Window.GetAndConsumeChars(n); + return PureTokenFactory.CreateIdentifier(identifier); + } + + private enum NumberParsingState + { + Start, + DigitsBeforeDot, + AfterDot, + DigitsAfterDot, + AfterE, + SignAfterE, + DigitsAfterE + } + + private static bool IsDigit(char c) + { + return c >= '0' && c <= '9'; + } + + private static bool IsWhitespace(char c) + { + return c == ' ' || c == '\t' || c == '\n'; + } + + private PureToken? ContinueParsingNumber() + { + var state = NumberParsingState.Start; + var n = 0; + var left = Window.CharactersLeft(); + var success = false; + var fail = false; + while (n < left) + { + var c = Window.PeekChar(n); + switch (state) + { + case NumberParsingState.Start: + if (IsDigit(c)) + { + state = NumberParsingState.DigitsBeforeDot; + } + else + { + fail = true; + } + break; + case NumberParsingState.DigitsBeforeDot: + if (IsDigit(c)) + { + } + else if (c == '.') + { + state = NumberParsingState.AfterDot; + } + else if (c == 'e' || c == 'E') + { + state = NumberParsingState.AfterE; + } + else + { + success = true; + } + break; + case NumberParsingState.AfterDot: + if (IsDigit(c)) + { + state = NumberParsingState.DigitsAfterDot; + } + else if (c == 'e' || c == 'E') + { + state = NumberParsingState.AfterE; + } + else if (IsWhitespace(c) || c == ';' || c == ']' || c == ')' || c == '}') + { + success = true; + } + else if (c == '^' || c == '*' || c == '/' || c == '\\' || c == '\'') + { + n -= 1; + success = true; + } + else + { + fail = true; + } + + break; + case NumberParsingState.DigitsAfterDot: + if (IsDigit(c)) + { + } + else if (c == 'e' || c == 'E') + { + state = NumberParsingState.AfterE; + } + else + { + success = true; + } + + break; + case NumberParsingState.AfterE: + if (IsDigit(c)) + { + state = NumberParsingState.DigitsAfterE; + } + else if (c == '+' || c == '-') + { + state = NumberParsingState.SignAfterE; + } + else + { + fail = true; + } + + break; + case NumberParsingState.SignAfterE: + if (IsDigit(c)) + { + state = NumberParsingState.DigitsAfterE; + } + else + { + fail = true; + } + + break; + case NumberParsingState.DigitsAfterE: + if (IsDigit(c)) + { + } + else + { + success = true; + } + + break; + } + + if (fail) + { + throw new ParsingException("Error while parsing number."); + } + + if (success) + { + break; + } + n++; + } + + if (n >= left) + { + switch (state) + { + case NumberParsingState.DigitsBeforeDot: + case NumberParsingState.DigitsAfterDot: + case NumberParsingState.DigitsAfterE: + success = true; + break; + } + } + + if (success) + { + var s = Window.GetAndConsumeChars(n); + return PureTokenFactory.CreateNumberLiteral(s); + } + + return null; + } + + private PureToken ContinueParsingStringLiteral() + { + Window.ConsumeChar(); + var n = 0; + while (true) { + if (Window.PeekChar(n) == '\'') + { + break; + } + if (IsEolOrEof(Window.PeekChar(n))) + { + throw new ParsingException("Unfinished string literal."); + } + n++; + } + + var literal = Window.GetAndConsumeChars(n); + Window.ConsumeChar(); + return PureTokenFactory.CreateStringLiteral(literal); + } + + private PureToken ContinueParsingDoubleQuotedStringLiteral() + { + Window.ConsumeChar(); + var n = 0; + while (Window.PeekChar(n) != '"') + { + n++; + } + + var literal = Window.GetAndConsumeChars(n); + Window.ConsumeChar(); + return PureTokenFactory.CreateDoubleQuotedStringLiteral(literal); + } + + private PureToken LexTokenWithoutTrivia(List leadingTrivia) + { + var character = Window.PeekChar(); + switch (character) + { + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + return ContinueParsingIdentifier(); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + var possiblyNumberToken = ContinueParsingNumber(); + if (possiblyNumberToken == null) + { + throw new ParsingException($"Unexpected character \"{Window.PeekChar()}\" while parsing a number"); + } + + return (PureToken)possiblyNumberToken; + case '=': + Window.ConsumeChar(); + if (Window.PeekChar() == '=') + { + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.Equality); + } + + return PureTokenFactory.CreatePunctuation(TokenKind.Assignment); + case '.': + Window.ConsumeChar(); + var c = Window.PeekChar(); + switch (c) + { + case '*': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.DotMultiply); + case '/': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.DotDivide); + case '^': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.DotPower); + case '\\': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.DotBackslash); + case '\'': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.DotTranspose); + default: + return PureTokenFactory.CreatePunctuation(TokenKind.Dot); + } + case '(': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.OpeningBracket); + case ')': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.ClosingBracket); + case '[': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.OpeningSquareBracket); + case ']': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.ClosingSquareBracket); + case '{': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.OpeningBrace); + case '}': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.ClosingBrace); + case ',': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.Comma); + case ';': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.Semicolon); + case '&': + Window.ConsumeChar(); + if (Window.PeekChar() == '&') + { + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.LogicalAnd); + } + return PureTokenFactory.CreatePunctuation(TokenKind.BitwiseAnd); + case '|': + Window.ConsumeChar(); + if (Window.PeekChar() == '|') + { + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.LogicalOr); + } + return PureTokenFactory.CreatePunctuation(TokenKind.BitwiseOr); + case '<': + Window.ConsumeChar(); + if (Window.PeekChar() == '=') + { + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.LessOrEqual); + } + return PureTokenFactory.CreatePunctuation(TokenKind.Less); + case '>': + Window.ConsumeChar(); + if (Window.PeekChar() == '=') + { + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.GreaterOrEqual); + } + return PureTokenFactory.CreatePunctuation(TokenKind.Greater); + case '~': + Window.ConsumeChar(); + if (Window.PeekChar() == '=') + { + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.Inequality); + } + return PureTokenFactory.CreatePunctuation(TokenKind.Not); + case '+': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.Plus); + case '-': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.Minus); + case '*': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.Multiply); + case '/': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.Divide); + case '\\': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.Backslash); + case '^': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.Power); + case '@': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.At); + case ':': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.Colon); + case '?': + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.QuestionMark); + case '\'': + if (LastToken.PureToken.Kind == TokenKind.ClosingBrace + || LastToken.PureToken.Kind == TokenKind.ClosingBracket + || LastToken.PureToken.Kind == TokenKind.ClosingSquareBracket + || LastToken.PureToken.Kind == TokenKind.Identifier) + { + if (LastToken.TrailingTrivia.Count == 0 && leadingTrivia.Count == 0) + { + Window.ConsumeChar(); + return PureTokenFactory.CreatePunctuation(TokenKind.Transpose); + } + } + return ContinueParsingStringLiteral(); + case '"': + return ContinueParsingDoubleQuotedStringLiteral(); + case '\0': + return PureTokenFactory.CreateEndOfFileToken(); + default: + throw new ParsingException( + $"Unknown symbol \"{character}\" at {Window.Position}." + ); + } + } + + public Token NextToken() + { + var leadingTrivia = LexTrivia(false); + var token = LexTokenWithoutTrivia(leadingTrivia); + var trailingTrivia = LexTrivia(true); + + var result = new Token(token, leadingTrivia, trailingTrivia); + LastToken = result; + return result; + } + + public List ParseAll() + { + var result = new List(); + while (true) + { + var token = NextToken(); + if (token == null) + { + throw new ParsingException($"Unexpected character: '{Window.PeekChar()}' at {Window.Position}."); + } + result.Add(token); + if (token.PureToken.Kind == TokenKind.EndOfFile) + { + return result; + } + } + } + } +} \ No newline at end of file diff --git a/Lexer/ParsingException.cs b/Lexer/ParsingException.cs new file mode 100644 index 0000000..14a6277 --- /dev/null +++ b/Lexer/ParsingException.cs @@ -0,0 +1,9 @@ +using System; + +namespace Lexer +{ + public class ParsingException : Exception + { + public ParsingException(string text) : base(text) {} + } +} \ No newline at end of file diff --git a/Lexer/PositionInsideFile.cs b/Lexer/PositionInsideFile.cs new file mode 100644 index 0000000..c80d2b3 --- /dev/null +++ b/Lexer/PositionInsideFile.cs @@ -0,0 +1,14 @@ +namespace Lexer +{ + public struct PositionInsideFile : IPosition + { + public string File { get; set; } + public int Line { get; set; } + public int Column { get; set; } + + public override string ToString() + { + return $"line {Line}, column {Column}" + (File != null ? $" of {File}" : ""); + } + } +} \ No newline at end of file diff --git a/Lexer/PureToken.cs b/Lexer/PureToken.cs new file mode 100644 index 0000000..ecc0fa5 --- /dev/null +++ b/Lexer/PureToken.cs @@ -0,0 +1,20 @@ +namespace Lexer +{ + public struct PureToken + { + public TokenKind Kind { get; } + public string LiteralText { get; } + public object Value { get; } + public IPosition Position { get; } + + public PureToken(TokenKind kind, string literalText, object value, IPosition position) + { + Kind = kind; + LiteralText = literalText; + Value = value; + Position = position; + } + + public override string ToString() => LiteralText; + } +} \ No newline at end of file diff --git a/Lexer/PureTokenFactory.cs b/Lexer/PureTokenFactory.cs new file mode 100644 index 0000000..a5461eb --- /dev/null +++ b/Lexer/PureTokenFactory.cs @@ -0,0 +1,94 @@ +namespace Lexer +{ + public class PureTokenFactory + { + private ITextWindow Window { get; } + + public PureTokenFactory(ITextWindow window) + { + Window = window; + } + + private static readonly string[] PureTokenOfKind = + { + null, // None = 0, + null, // Identifier = 1, + null, // NumberLiteral = 2, + null, // StringLiteral = 3, + null, // DoubleQuotedStringLiteral = 4, + null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, + "=", // Assignment = 20, + "==", // Equality = 21, + "~=", // Inequality = 22, + "&&", // LogicalAnd = 23, + "||", // LogicalOr = 24, + "&", // BitwiseAnd = 25, + "|", // BitwiseOr = 26, + "<", // Less = 27, + "<=", // LessOrEqual = 28, + ">", // Greater = 29, + ">=", // GreaterOrEqual = 30, + "~", // Not = 31, + "+", // Plus = 32, + "-", // Minus = 33, + "*", // Multiply = 34, + "/", // Divide = 35, + "^", // Power = 36, + "\\", // Backslash = 37, + "'", // Transpose = 38, + ".*", // DotMultiply = 39, + "./", // DotDivide = 40, + ".^", // DotPower = 41, + ".\\", // DotBackslash = 42, + ".'", // DotTranspose = 43, + "@", // At = 44, + ":", // Colon = 45, + "?", // QuestionMark = 46, + ",", // Comma = 47, + ";", // Semicolon = 48, + "{", // OpeningBrace = 49, + "}", // ClosingBrace = 50, + "[", // OpeningSquareBracket = 51, + "]", // ClosingSquareBracket = 52, + "(", // OpeningBracket = 53, + ")", // ClosingBracket = 54, + ".", // Dot = 55, + "...", // DotDotDot = 56, + + "+", // UnaryPlus = 57, + "-", // UnaryMinus = 58, + "~", // UnaryNot = 59, + + }; + + public PureToken CreatePunctuation(TokenKind kind) + { + return new PureToken(kind, PureTokenOfKind[(int)kind], null, Window.Position); + } + + public PureToken CreateIdentifier(string s) + { + return new PureToken(TokenKind.Identifier, s, null, Window.Position); + } + + public PureToken CreateNumberLiteral(string s) + { + return new PureToken(TokenKind.NumberLiteral, s, null, Window.Position); // TODO: actually parse number (here or in the lexer?) + } + + public PureToken CreateStringLiteral(string s) + { + return new PureToken(TokenKind.StringLiteral, "'" + s + "'", s, Window.Position); + } + + public PureToken CreateDoubleQuotedStringLiteral(string s) + { + return new PureToken(TokenKind.DoubleQuotedStringLiteral, "\"" + s + "\"", s, Window.Position); + } + + public PureToken CreateEndOfFileToken() + { + return new PureToken(TokenKind.EndOfFile, "", null, Window.Position); + } + } +} \ No newline at end of file diff --git a/Lexer/TextWindow.cs b/Lexer/TextWindow.cs new file mode 100644 index 0000000..e57f17e --- /dev/null +++ b/Lexer/TextWindow.cs @@ -0,0 +1,78 @@ +namespace Lexer +{ + public class TextWindow : ITextWindow + { + protected readonly string Text; + protected int Offset { get; set; } + private PositionInsideFile _position; + public IPosition Position => _position; + + public TextWindow(string text, string fileName = null) + { + Text = text; + Offset = 0; + _position = new PositionInsideFile + { + File = fileName, + Line = 0, + Column = 0 + }; + } + + public bool IsEof() + { + return Offset >= Text.Length; + } + + public virtual char PeekChar() + { + return Text[Offset]; + } + + + public virtual char PeekChar(int n) + { + return Text[Offset + n]; + } + + + public void ConsumeChar() + { + if (Text[Offset] == '\n' || Text[Offset] == '\r') + { + _position.Line++; + _position.Column = 0; + } + else + { + _position.Column++; + } + Offset++; + } + + public void ConsumeChars(int n) + { + Offset += n; + _position.Column += n; + } + + public char GetAndConsumeChar() + { + var c = Text[Offset]; + ConsumeChar(); + return c; + } + + public string GetAndConsumeChars(int n) + { + var s = Text.Substring(Offset, n); + ConsumeChars(n); + return s; + } + + public int CharactersLeft() + { + return Text.Length - Offset; + } + } +} diff --git a/Lexer/TextWindowWithNull.cs b/Lexer/TextWindowWithNull.cs new file mode 100644 index 0000000..38e1411 --- /dev/null +++ b/Lexer/TextWindowWithNull.cs @@ -0,0 +1,19 @@ +namespace Lexer +{ + public class TextWindowWithNull : TextWindow + { + public TextWindowWithNull(string text, string fileName = null) : base(text, fileName) + { + } + + public override char PeekChar() + { + return IsEof() ? '\0' : base.PeekChar(); + } + + public override char PeekChar(int n) + { + return Offset + n >= Text.Length ? '\0' : base.PeekChar(n); + } + } +} diff --git a/Lexer/Token.cs b/Lexer/Token.cs new file mode 100644 index 0000000..a9b7a09 --- /dev/null +++ b/Lexer/Token.cs @@ -0,0 +1,32 @@ +using System.Collections.Generic; +using System.Linq; + +namespace Lexer +{ + public class Token + { + public List LeadingTrivia { get; } + public List TrailingTrivia { get; } + public PureToken PureToken { get; } + public string FullText { get; } + public TokenKind Kind => PureToken.Kind; + + public Token(PureToken pureToken, List leadingTrivia, List trailingTrivia) + { + PureToken = pureToken; + LeadingTrivia = leadingTrivia; + TrailingTrivia = trailingTrivia; + FullText = BuildFullText(); + } + + private string BuildFullText() + { + var leading = LeadingTrivia.Select(t => t.LiteralText); + var token = PureToken.LiteralText; + var trailing = TrailingTrivia.Select(t => t.LiteralText); + return string.Join("", leading.Concat(new[] {token}).Concat(trailing)); + } + + public override string ToString() => FullText; + } +} \ No newline at end of file diff --git a/Lexer/TokenKind.cs b/Lexer/TokenKind.cs new file mode 100644 index 0000000..d67a3c5 --- /dev/null +++ b/Lexer/TokenKind.cs @@ -0,0 +1,54 @@ +namespace Lexer +{ + public enum TokenKind + { + None = 0, + EndOfFile = 1, + Identifier = 2, + NumberLiteral = 3, + StringLiteral = 4, + DoubleQuotedStringLiteral = 5, + + Assignment = 20, + Equality = 21, + Inequality = 22, + LogicalAnd = 23, + LogicalOr = 24, + BitwiseAnd = 25, + BitwiseOr = 26, + Less = 27, + LessOrEqual = 28, + Greater = 29, + GreaterOrEqual = 30, + Not = 31, + Plus = 32, + Minus = 33, + Multiply = 34, + Divide = 35, + Power = 36, + Backslash = 37, + Transpose = 38, + DotMultiply = 39, + DotDivide = 40, + DotPower = 41, + DotBackslash = 42, + DotTranspose = 43, + At = 44, + Colon = 45, + QuestionMark = 46, + Comma = 47, + Semicolon = 48, + OpeningBrace = 49, + ClosingBrace = 50, + OpeningSquareBracket = 51, + ClosingSquareBracket = 52, + OpeningBracket = 53, + ClosingBracket = 54, + Dot = 55, + DotDotDot = 56, + // unary tokens are not recognized during lexing; they are contextually recognized while parsing. + UnaryPlus = 57, + UnaryMinus = 58, + UnaryNot = 59, + } +} \ No newline at end of file diff --git a/Lexer/Trivia.cs b/Lexer/Trivia.cs new file mode 100644 index 0000000..4e70158 --- /dev/null +++ b/Lexer/Trivia.cs @@ -0,0 +1,14 @@ +namespace Lexer +{ + public class Trivia + { + public TriviaType Type { get; } + public string LiteralText { get; } + + public Trivia(TriviaType type, string literalText) + { + Type = type; + LiteralText = literalText; + } + } +} \ No newline at end of file diff --git a/Lexer/TriviaType.cs b/Lexer/TriviaType.cs new file mode 100644 index 0000000..91b4287 --- /dev/null +++ b/Lexer/TriviaType.cs @@ -0,0 +1,8 @@ +namespace Lexer +{ + public enum TriviaType + { + Whitespace, + Comment + } +} \ No newline at end of file diff --git a/Parser.Tests/MParserShould.cs b/Parser.Tests/MParserShould.cs new file mode 100644 index 0000000..09f358e --- /dev/null +++ b/Parser.Tests/MParserShould.cs @@ -0,0 +1,217 @@ +using System.Collections.Generic; +using Lexer; + +using NUnit.Framework; + +namespace Parser.Tests +{ + public class MParserShould + { + private static MParser CreateParser(string text) + { + var window = new TextWindowWithNull(text); + var lexer = new MLexer(window, new PureTokenFactory(window)); + var tokens = lexer.ParseAll(); + var parser = new MParser(tokens); + return parser; + } + + [Test] + public void ParseAssignmentExpression() + { + var text = "a = b"; + var sut = CreateParser(text); + var actual = sut.ParseExpression(); + Assert.IsInstanceOf(actual); + Assert.AreEqual(text, actual.FullText); + } + + [Test] + public void ParseSimpleStatement() + { + var text = "a = b"; + var sut = CreateParser(text); + var actual = sut.ParseStatement(); + Assert.IsInstanceOf(actual); + Assert.AreEqual(text, actual.FullText); + } + + [Test] + public void ParseFunctionCallExpression() + { + var text = "func(a, 2, 'abc', d)"; + var sut = CreateParser(text); + var actual = sut.ParseExpression(); + Assert.IsInstanceOf(actual); + var f = actual as FunctionCallExpressionNode; + Assert.AreEqual(4, f?.Parameters.Parameters.Count); + Assert.AreEqual(text, actual.FullText); + } + + [Test] + public void ParseArrayLiteralExpression() + { + var text = "[a, 2, 'text']"; + var sut = CreateParser(text); + var actual = sut.ParseExpression(); + Assert.IsInstanceOf(actual); + var a = actual as ArrayLiteralExpressionNode; + Assert.AreEqual(3, a?.Elements.Elements.Count); + Assert.AreEqual(text, actual.FullText); + } + + [Test] + public void ParseLeftAssociativeSamePrecedence() + { + var text = "2 + 3 + 4"; + var sut = CreateParser(text); + var actual = sut.ParseExpression(); + Assert.IsInstanceOf(actual); + var e = (BinaryOperationExpressionNode)actual; + Assert.IsInstanceOf(e.Lhs); + Assert.IsInstanceOf(e.Rhs); + Assert.AreEqual(text, actual.FullText); + } + + [Test] + public void ParseLeftAssociativeRaisingPrecedence() + { + var text = "2 + 3 * 4"; + var sut = CreateParser(text); + var actual = sut.ParseExpression(); + Assert.IsInstanceOf(actual); + var e = (BinaryOperationExpressionNode) actual; + Assert.AreEqual(TokenKind.Plus, e.Operation.Token.Kind); + Assert.IsInstanceOf(e.Lhs); + Assert.IsInstanceOf(e.Rhs); + Assert.AreEqual(text, actual.FullText); + } + + [Test] + public void ParseLeftAssociativeLoweringPrecedence() + { + var text = "2 * 3 + 4"; + var sut = CreateParser(text); + var actual = sut.ParseExpression(); + Assert.IsInstanceOf(actual); + var e = (BinaryOperationExpressionNode) actual; + Assert.AreEqual(TokenKind.Plus, e.Operation.Token.Kind); + Assert.IsInstanceOf(e.Lhs); + Assert.IsInstanceOf(e.Rhs); + Assert.AreEqual(text, actual.FullText); + } + + [Test] + public void ParseUnaryOperators() + { + var text = "-42"; + var sut = CreateParser(text); + var actual = sut.ParseExpression(); + Assert.IsInstanceOf(actual); + var e = (UnaryPrefixOperationExpressionNode) actual; + Assert.AreEqual(TokenKind.Minus, e.Operation.Token.Kind); + Assert.IsInstanceOf(e.Operand); + Assert.AreEqual(text, actual.FullText); + } + + [Test] + public void ParseMemberAccess() + { + var text = "a.b.c"; + var sut = CreateParser(text); + var actual = sut.ParseExpression(); + Assert.IsInstanceOf(actual); + var m = (MemberAccessNode) actual; + Assert.IsInstanceOf(m.LeftOperand); + Assert.IsInstanceOf(m.RightOperand); + Assert.AreEqual(text, actual.FullText); + } + + [Test] + public void ParseWhileStatement() + { + var text = "while a < b c = d end"; + var sut = CreateParser(text); + var actual = sut.ParseStatement(); + Assert.IsInstanceOf(actual); + Assert.AreEqual(text, actual.FullText); + } + + [Test] + public void ParseWhileStatementWithComma() + { + var text = "while a < b, c = d end"; + var sut = CreateParser(text); + var actual = sut.ParseStatement(); + Assert.IsInstanceOf(actual); + Assert.AreEqual(text, actual.FullText); + } + + [Test] + public void ParseIfStatement() + { + var text = "if 2 < 3 a = b end"; + var sut = CreateParser(text); + var actual = sut.ParseStatement(); + Assert.IsInstanceOf(actual); + Assert.AreEqual(text, actual.FullText); + } + + [Test] + public void ParseIfElseStatement() + { + var text = "if 2 < 3 a = b else c = d end"; + var sut = CreateParser(text); + var actual = sut.ParseStatement(); + Assert.IsInstanceOf(actual); + Assert.AreEqual(text, actual.FullText); + } + + [Test] + public void ParseParenthesizedExpression() + { + var text = "2 * (3 + 4)"; + var sut = CreateParser(text); + var actual = sut.ParseExpression(); + Assert.IsInstanceOf(actual); + var e = (BinaryOperationExpressionNode) actual; + Assert.IsInstanceOf(e.Lhs); + Assert.IsInstanceOf(e.Rhs); + var p = (ParenthesizedExpressionNode) e.Rhs; + Assert.IsInstanceOf(p.Expression); + Assert.AreEqual(text, actual.FullText); + } + + [Test] + public void ParseForStatement() + { + var text = "for i = 1:5 a = i end"; + var sut = CreateParser(text); + var actual = sut.ParseStatement(); + Assert.IsInstanceOf(actual); + Assert.AreEqual(text, actual.FullText); + } + + [Test] + public void ParseEmptyArray() + { + var text = "[]"; + var sut = CreateParser(text); + var actual = sut.ParseExpression(); + Assert.IsInstanceOf(actual); + var a = (ArrayLiteralExpressionNode) actual; + Assert.AreEqual(0, a.Elements.Elements.Count); + } + + [Test] + public void ParseCellArrayLiteral() + { + var text = "{ 1 2, 3 }"; + var sut = CreateParser(text); + var actual = sut.ParseExpression(); + Assert.IsInstanceOf(actual); + var a = (CellArrayLiteralExpressionNode) actual; + Assert.AreEqual(3, a.Elements.Elements.Count); + } + } +} \ No newline at end of file diff --git a/Parser.Tests/Parser.Tests.csproj b/Parser.Tests/Parser.Tests.csproj new file mode 100644 index 0000000..3d8d59b --- /dev/null +++ b/Parser.Tests/Parser.Tests.csproj @@ -0,0 +1,17 @@ + + + netcoreapp2.0 + false + + + + + + + + + {B20EDC10-E6E6-4430-8527-B95206DEF941} + Parser + + + \ No newline at end of file diff --git a/Parser/MParser.cs b/Parser/MParser.cs new file mode 100644 index 0000000..31adc4b --- /dev/null +++ b/Parser/MParser.cs @@ -0,0 +1,749 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Lexer; + +namespace Parser +{ + public class MParser + { + public enum Precedence + { + // see https://mathworks.com/help/matlab/matlab_prog/operator-precedence.html + Expression = 0, + Assignment, + LogicalOr, + LogicalAnd, + BitwiseOr, + BitwiseAnd, + Relational, + Colon, + Additive, + Multiplicative, + Unary, + WeirdPower, + Power + } + + private static Precedence GetPrecedence(TokenKind kind) + { + switch (kind) + { + case TokenKind.Assignment: + return Precedence.Assignment; + case TokenKind.LogicalOr: + return Precedence.LogicalOr; + case TokenKind.LogicalAnd: + return Precedence.LogicalAnd; + case TokenKind.BitwiseOr: + return Precedence.BitwiseOr; + case TokenKind.BitwiseAnd: + return Precedence.BitwiseAnd; + case TokenKind.Less: + case TokenKind.LessOrEqual: + case TokenKind.Greater: + case TokenKind.GreaterOrEqual: + case TokenKind.Equality: + case TokenKind.Inequality: + return Precedence.Relational; + case TokenKind.Colon: + return Precedence.Colon; + case TokenKind.Plus: + case TokenKind.Minus: + return Precedence.Additive; + case TokenKind.Multiply: + case TokenKind.DotMultiply: + case TokenKind.Divide: + case TokenKind.DotDivide: + case TokenKind.Backslash: + case TokenKind.DotBackslash: + return Precedence.Multiplicative; + case TokenKind.Not: + return Precedence.Unary; + case TokenKind.Power: + case TokenKind.DotPower: + case TokenKind.Transpose: + case TokenKind.DotTranspose: + return Precedence.Power; + default: + return Precedence.Expression; + } + } + + private List Tokens { get; } + private int _index; + private Token CurrentToken => Tokens[_index]; + private SyntaxFactory Factory { get; } + + public MParser(List tokens) + { + Tokens = tokens; + _index = 0; + Factory = new SyntaxFactory(); + } + + private Token EatToken() + { + var token = Tokens[_index]; + //Console.WriteLine($"{token} at {token.PureToken.Position}"); + _index++; + return token; + } + + private Token EatToken(TokenKind kind) + { + var token = Tokens[_index]; + //Console.WriteLine($"{token} at {token.PureToken.Position}"); + if (token.Kind != kind) + { + throw new ParsingException($"Unexpected token \"{token.PureToken}\" instead of {kind} at {token.PureToken.Position}."); + } + _index++; + return token; + } + + private Token EatIdentifier(string s) + { + var token = Tokens[_index]; + //Console.WriteLine($"{token} at {token.PureToken.Position}"); + if (token.PureToken.Kind != TokenKind.Identifier) + { + throw new ParsingException($"Unexpected token \"{token.PureToken}\" instead of identifier \"{s}\" at {token.PureToken.Position}."); + } + + if (token.PureToken.LiteralText != s) + { + throw new ParsingException($"Unexpected identifier \"{token.PureToken.LiteralText}\" instead of \"{s}\" at {token.PureToken.Position}."); + } + _index++; + return token; + } + + private void EatAll() + { + _index = Tokens.Count - 1; + } + + private List ParseFunctionOutputList() + { + var outputs = new List(); + outputs.Add(EatToken(TokenKind.Identifier)); + while (CurrentToken.Kind != TokenKind.ClosingSquareBracket) + { + if (CurrentToken.Kind == TokenKind.Comma) + { + outputs.Add(EatToken()); + } + outputs.Add(EatToken(TokenKind.Identifier)); + } + + return outputs.Select(token => new TokenNode(token) as SyntaxNode).ToList(); + } + + private FunctionOutputDescriptionNode ParseFunctionOutputDescription() + { + if (CurrentToken.Kind == TokenKind.Identifier) + { + var result = EatToken(); + return Factory.FunctionOutputDescription(new List { Factory.Token(result) }); + } else if (CurrentToken.Kind == TokenKind.OpeningSquareBracket) + { + var leftBracket = EatToken(); + var outputs = ParseFunctionOutputList(); + var rightBracket = EatToken(TokenKind.ClosingSquareBracket); + var nodes = new List {Factory.Token(leftBracket)}; + nodes.AddRange(outputs); + nodes.Add(Factory.Token(rightBracket)); + return Factory.FunctionOutputDescription(nodes); + } + throw new ParsingException($"Unexpected token {CurrentToken.PureToken} during parsing function output descritpion at {CurrentToken.PureToken.Position}."); + } + + private ParameterListNode ParseParameterList() + { + var identifierTokens = new List(); + identifierTokens.Add(EatToken(TokenKind.Identifier)); + while (CurrentToken.PureToken.Kind != TokenKind.ClosingBracket) + { + identifierTokens.Add(EatToken(TokenKind.Comma)); + identifierTokens.Add(EatToken(TokenKind.Identifier)); + } + + return Factory.ParameterList(identifierTokens.Select(token => new TokenNode(token) as SyntaxNode).ToList()); + } + + private FunctionInputDescriptionNode ParseFunctionInputDescription() + { + var openingBracket = EatToken(TokenKind.OpeningBracket); + var parameterList = ParseParameterList(); + var closingBracket = EatToken(TokenKind.ClosingBracket); + return Factory.FunctionInputDescription( + new TokenNode(openingBracket), + parameterList, + new TokenNode(closingBracket)); + } + + private TokenNode PossibleSemicolonOrComma() + { + if (CurrentToken.Kind == TokenKind.Semicolon + || CurrentToken.Kind == TokenKind.Comma) + { + return Factory.Token(EatToken()); + } + + return null; + } + + private FunctionDeclarationNode ParseFunctionDeclaration() + { + var functionKeyword = EatIdentifier("function"); + var outputDescription = ParseFunctionOutputDescription(); + var assignment = EatToken(TokenKind.Assignment); + var name = EatToken(TokenKind.Identifier); + var inputDescription = ParseFunctionInputDescription(); + var body = ParseStatements(); + TokenNode end = null; + if (CurrentToken.Kind == TokenKind.Identifier + && CurrentToken.PureToken.LiteralText == "end") + { + end = Factory.Token(EatIdentifier("end")); + } + + var semicolonOrComma = PossibleSemicolonOrComma(); + return Factory.FunctionDeclaration( + Factory.Token(functionKeyword), + outputDescription, + Factory.Token(assignment), + Factory.Token(name), + inputDescription, + body, + end, + semicolonOrComma); + } + + private StatementNode ParseClassDeclaration() + { + var node = new TokenNode(CurrentToken); + EatAll(); + return null; + } + + private FunctionCallParameterListNode ParseFunctionCallParameterList() + { + var first = ParseExpression(); + var nodes = new List { first }; + while (CurrentToken.PureToken.Kind != TokenKind.ClosingBracket) + { + nodes.Add(Factory.Token(EatToken(TokenKind.Comma))); + nodes.Add(ParseExpression()); + } + + return Factory.FunctionCallParameterList(nodes); + } + + private ExpressionNode ParseMember() + { + if (CurrentToken.Kind == TokenKind.Identifier) + { + return Factory.IdentifierName(EatToken()); + } + throw new ParsingException($"Unexpected token {CurrentToken.PureToken} at {CurrentToken.PureToken.Position}."); + } + + private ExpressionNode ParsePostfix(ExpressionNode expression) + { + while (true) + { + var token = CurrentToken; + switch(token.Kind) { + case TokenKind.OpeningBrace: // cell array element access + var openingBrace = EatToken(); + var index = ParseExpression(); + var closingBrace = EatToken(TokenKind.ClosingBrace); + expression = Factory.CellArrayElementAccessExpression( + expression, + Factory.Token(openingBrace), + index, + Factory.Token(closingBrace) + ); + break; + case TokenKind.OpeningBracket: // function call + var openingBracket = EatToken(); + var parameters = ParseFunctionCallParameterList(); + var closingBracket = EatToken(TokenKind.ClosingBracket); + expression = Factory.FunctionCallExpression( + expression, + Factory.Token(openingBracket), + parameters, + Factory.Token(closingBracket)); + break; + case TokenKind.Dot: // member access + if (expression is IdentifierNameNode + || expression is MemberAccessNode) + { + var dot = EatToken(); + var member = ParseMember(); + expression = Factory.MemberAccess(expression, Factory.Token(dot), member); + } + else + { + throw new ParsingException( + $"Unexpected token {token.PureToken} at {token.PureToken.Position}."); + } + + break; + case TokenKind.Transpose: + var transposeSign = Factory.Token(EatToken()); + expression = Factory.UnaryPostfixOperationExpression(expression, transposeSign); + break; + default: + return expression; + } + } + } + + private ArrayElementListNode ParseArrayElementList() + { + var nodes = new List {}; + + while (CurrentToken.Kind != TokenKind.ClosingSquareBracket) + { + if (nodes.Count > 0) + { + if (CurrentToken.Kind == TokenKind.Comma + || CurrentToken.Kind == TokenKind.Semicolon) + { + nodes.Add(Factory.Token(EatToken())); + } + } + + nodes.Add(ParseExpression()); + } + + return Factory.ArrayElementList(nodes); + } + + private ArrayElementListNode ParseCellArrayElementList() + { + var nodes = new List {}; + + while (CurrentToken.Kind != TokenKind.ClosingBrace) + { + if (nodes.Count > 0) + { + if (CurrentToken.Kind == TokenKind.Comma + || CurrentToken.Kind == TokenKind.Semicolon) + { + nodes.Add(Factory.Token(EatToken())); + } + } + + nodes.Add(ParseExpression()); + } + + return Factory.ArrayElementList(nodes); + } + + private ArrayLiteralExpressionNode ParseArrayLiteral() + { + var openingSquareBracket = EatToken(TokenKind.OpeningSquareBracket); + var elements = ParseArrayElementList(); + var closingSquareBracket = EatToken(TokenKind.ClosingSquareBracket); + return Factory.ArrayLiteralExpression( + Factory.Token(openingSquareBracket), + elements, + Factory.Token(closingSquareBracket)); + } + + private CellArrayLiteralExpressionNode ParseCellArrayLiteral() + { + var openingBrace = EatToken(TokenKind.OpeningBrace); + var elements = ParseCellArrayElementList(); + var closingBrace = EatToken(TokenKind.ClosingBrace); + return Factory.CellArrayLiteralExpression( + Factory.Token(openingBrace), + elements, + Factory.Token(closingBrace)); + } + + private ParenthesizedExpressionNode ParseParenthesizedExpression() + { + var openParen = Factory.Token(EatToken(TokenKind.OpeningBracket)); + var expression = ParseExpression(); + var closeParen = Factory.Token(EatToken(TokenKind.ClosingBracket)); + return Factory.ParenthesizedExpression( + openParen, + expression, + closeParen); + } + + private ExpressionNode ParseTerm() + { + var token = CurrentToken; + ExpressionNode expression = null; + if (token.Kind == TokenKind.Identifier) + { + var term = EatToken(); + expression = Factory.IdentifierName(term); + } + else if (token.Kind == TokenKind.NumberLiteral) + { + var number = EatToken(); + expression = Factory.NumberLiteral(number); + } + else if (token.Kind == TokenKind.StringLiteral) + { + var str = EatToken(); + expression = Factory.StringLiteral(str); + } + else if (token.Kind == TokenKind.OpeningSquareBracket) // array literal expression + { + expression = ParseArrayLiteral(); + } + else if (token.Kind == TokenKind.OpeningBrace) // cell array literal expression + { + expression = ParseCellArrayLiteral(); + } + else if (token.Kind == TokenKind.Colon) // for parsing things like a{:} + { + expression = Factory.EmptyExpression(); + } + else if (token.Kind == TokenKind.OpeningBracket) + { + expression = ParseParenthesizedExpression(); + } + + return ParsePostfix(expression); + } + + public ExpressionNode ParseExpression() + { + return ParseSubExpression(Precedence.Expression); + } + + private bool IsUnaryOperator(TokenKind kind) + { + switch (kind) + { + case TokenKind.Plus: + case TokenKind.Minus: + case TokenKind.Not: + return true; + default: + return false; + } + } + + private bool IsBinaryOperator(TokenKind kind) + { + switch (kind) + { + case TokenKind.Assignment: + case TokenKind.LogicalOr: + case TokenKind.LogicalAnd: + case TokenKind.BitwiseOr: + case TokenKind.BitwiseAnd: + case TokenKind.Less: + case TokenKind.LessOrEqual: + case TokenKind.Greater: + case TokenKind.GreaterOrEqual: + case TokenKind.Equality: + case TokenKind.Inequality: + case TokenKind.Colon: + case TokenKind.Plus: + case TokenKind.Minus: + case TokenKind.Multiply: + case TokenKind.DotMultiply: + case TokenKind.Divide: + case TokenKind.DotDivide: + case TokenKind.Backslash: + case TokenKind.DotBackslash: + case TokenKind.Not: + case TokenKind.Power: + case TokenKind.DotPower: + return true; + default: + return false; + } + } + + private bool IsLeftAssociative(TokenKind kind) + { + return true; // TODO: really? + } + + private TokenKind ConvertToUnaryTokenKind(TokenKind kind) + { + switch (kind) + { + case TokenKind.Plus: + return TokenKind.UnaryPlus; + case TokenKind.Minus: + return TokenKind.UnaryMinus; + case TokenKind.Not: + return TokenKind.UnaryNot; + default: + throw new ArgumentException(nameof(kind)); + } + } + + private ExpressionNode ParseSubExpression(Precedence precedence) + { + ExpressionNode lhs = null; + if (IsUnaryOperator(CurrentToken.Kind)) + { + var operation = EatToken(); + var unaryTokenKind = ConvertToUnaryTokenKind(operation.Kind); + var newPrecedence = GetPrecedence(unaryTokenKind); + var operand = ParseSubExpression(newPrecedence); + lhs = Factory.UnaryPrefixOperationExpression(Factory.Token(operation), operand); + } + else + { + lhs = ParseTerm(); + } + while (true) + { + var token = CurrentToken; + if (IsBinaryOperator(token.Kind)) + { + var newPrecedence = GetPrecedence(token.Kind); + if (newPrecedence < precedence) + { + break; + } + + if (newPrecedence == precedence && IsLeftAssociative(token.Kind)) + { + break; + } + + EatToken(); + var rhs = ParseSubExpression(newPrecedence); + if (rhs == null && token.Kind == TokenKind.Colon) // for parsing things like a{:} + { + rhs = Factory.EmptyExpression(); + } + if (token.Kind == TokenKind.Assignment) + { + lhs = Factory.AssignmentExpression(lhs, Factory.Token(token), rhs); + } + else + { + lhs = Factory.BinaryOperationExpression(lhs, Factory.Token(token), rhs); + } + } + else + { + break; + } + } + + return lhs; + } + + private SwitchCaseNode ParseSwitchCase() + { + var caseKeyword = EatIdentifier("case"); + var caseId = EatToken(TokenKind.StringLiteral); + var statementList = ParseStatements(); + return Factory.SwitchCase(Factory.Token(caseKeyword), Factory.Token(caseId), statementList); + } + + private SwitchStatementNode ParseSwitchStatement() + { + var switchKeyword = EatIdentifier("switch"); + var expression = ParseExpression(); + var casesList = new List(); + while (CurrentToken.Kind == TokenKind.Identifier + && CurrentToken.PureToken.LiteralText == "case") + { + casesList.Add(ParseSwitchCase()); + } + + var endKeyword = EatIdentifier("end"); + return Factory.SwitchStatement( + Factory.Token(switchKeyword), + expression, + casesList, + Factory.Token(endKeyword)); + } + + public ExpressionStatementNode ParseExpressionStatement() + { + var statement = ParseExpression(); + if (CurrentToken.Kind == TokenKind.Semicolon) + { + var semicolon = EatToken(); + return Factory.ExpressionStatement(statement, Factory.Token(semicolon)); + } + + return Factory.ExpressionStatement(statement); + } + + public WhileStatementNode ParseWhileStatement() + { + var whileKeyword = EatToken(); + var condition = ParseExpression(); + var commas = new List(); + while (CurrentToken.Kind == TokenKind.Comma) + { + commas.Add(Factory.Token(EatToken())); + } + if (commas.Count == 0) + { + commas = null; + } + + var body = ParseStatements(); + var endKeyword = EatIdentifier("end"); + return Factory.WhileStatement( + Factory.Token(whileKeyword), + condition, + body, + Factory.Token(endKeyword), + commas); + } + + public StatementNode ParseStatement() + { + var statement = ParseStatementCore(); + if (statement != null) + { + if (CurrentToken.Kind == TokenKind.Semicolon + || CurrentToken.Kind == TokenKind.Comma) + { + statement = Factory.AppendSemicolonOrComma(statement, Factory.Token(EatToken())); + } + } + + return statement; + } + + public IfStatementNode ParseIfStatement() + { + var ifKeyword = Factory.Token(EatToken()); + var condition = ParseExpression(); + var commas = new List(); + while (CurrentToken.Kind == TokenKind.Comma) + { + commas.Add(Factory.Token(EatToken())); + } + if (commas.Count == 0) + { + commas = null; + } + var body = ParseStatements(); + TokenNode elseKeyword = null; + StatementListNode elseBody = null; + if (CurrentToken.Kind == TokenKind.Identifier + && CurrentToken.PureToken.LiteralText == "else") + { + elseKeyword = Factory.Token(EatToken()); + elseBody = ParseStatements(); + } + + var endKeyword = Factory.Token(EatIdentifier("end")); + return Factory.IfStatement( + ifKeyword, + condition, + body, + elseKeyword, + elseBody, + endKeyword, + commas); + } + + public ForStatementNode ParseForStatement() + { + var forKeyword = Factory.Token(EatIdentifier("for")); + var expression = ParseExpression(); + if (!(expression is AssignmentExpressionNode)) + { + throw new ParsingException($"Unexpected expression \"{expression.FullText}\" while parsing FOR statement at {CurrentToken.PureToken.Position}."); + } + + var forAssignment = (AssignmentExpressionNode) expression; + var body = ParseStatements(); + var endKeyword = Factory.Token(EatIdentifier("end")); + return Factory.ForStatement(forKeyword, forAssignment, body, endKeyword); + } + + public StatementNode ParseStatementCore() + { + if (CurrentToken.Kind == TokenKind.Identifier) + { + if (CurrentToken.PureToken.LiteralText == "function") + { + return ParseFunctionDeclaration(); + } + else if (CurrentToken.PureToken.LiteralText == "classdef") + { + return ParseClassDeclaration(); + } + else if (CurrentToken.PureToken.LiteralText == "switch") + { + return ParseSwitchStatement(); + } + else if (CurrentToken.PureToken.LiteralText == "while") + { + return ParseWhileStatement(); + } + else if (CurrentToken.PureToken.LiteralText == "if") + { + return ParseIfStatement(); + } + else if (CurrentToken.PureToken.LiteralText == "case") + { + return null; + } + else if (CurrentToken.PureToken.LiteralText == "else") + { + return null; + } + else if (CurrentToken.PureToken.LiteralText == "end") + { + return null; + } + else if (CurrentToken.PureToken.LiteralText == "for") + { + return ParseForStatement(); + } + + return ParseExpressionStatement(); + } + + if (CurrentToken.Kind == TokenKind.OpeningSquareBracket) + { + return ParseExpressionStatement(); + } + throw new ParsingException($"Unexpected token: \"{CurrentToken.PureToken}\" at {CurrentToken.PureToken.Position}"); + } + + private StatementListNode ParseStatements() + { + var statements = new List(); + while (CurrentToken.PureToken.Kind != TokenKind.EndOfFile) + { + var node = ParseStatement(); + if (node == null) + { + break; + } + statements.Add(node); + } + + return Factory.StatementList(statements); + } + + private StatementListNode ParseFile() + { + return ParseStatements(); + } + + public StatementListNode Parse() + { + return ParseFile(); + } + } +} \ No newline at end of file diff --git a/Parser/Parser.csproj b/Parser/Parser.csproj new file mode 100644 index 0000000..4d7af63 --- /dev/null +++ b/Parser/Parser.csproj @@ -0,0 +1,8 @@ + + + netcoreapp2.0 + + + + + \ No newline at end of file diff --git a/Parser/SyntaxFactory.cs b/Parser/SyntaxFactory.cs new file mode 100644 index 0000000..a436c14 --- /dev/null +++ b/Parser/SyntaxFactory.cs @@ -0,0 +1,506 @@ +using System.Collections.Generic; +using System.Linq; +using System.Reflection.Metadata; +using Lexer; + +namespace Parser +{ + public class SyntaxFactory + { + private static SyntaxNode SetParent(SyntaxNode parent) + { + foreach (var node in parent.Children) + { + node.Parent = parent; + } + + return parent; + } + + public FunctionDeclarationNode FunctionDeclaration( + TokenNode token, + FunctionOutputDescriptionNode outputDescription, + TokenNode equalitySign, + TokenNode name, + FunctionInputDescriptionNode inputDescription, + StatementListNode body, + TokenNode end, + TokenNode semicolonOrComma = null) + { + var children = new List + { + token, + outputDescription, + equalitySign, + name, + inputDescription, + body + }; + if (end != null) + { + children.Add(end); + } + if (semicolonOrComma != null) + { + children.Add(semicolonOrComma); + } + var result = + new FunctionDeclarationNode( + children, + token, + outputDescription, + equalitySign, + name, + inputDescription, + body, + end, + semicolonOrComma); + SetParent(result); + return result; + } + + public FunctionOutputDescriptionNode FunctionOutputDescription( + List nodes) + { + var result = new FunctionOutputDescriptionNode( + nodes, + nodes + .Where(node => node is TokenNode && ((TokenNode) node).Token.Kind == TokenKind.Identifier) + .Select(node => node as TokenNode) + .ToList() + ); + SetParent(result); + return result; + } + + public ParameterListNode ParameterList(List nodes) + { + var result = new ParameterListNode( + nodes, + nodes + .Where( + node => node is TokenNode && ((TokenNode) node).Token.Kind != TokenKind.Comma + ).ToList()); + SetParent(result); + return result; + } + + public StatementListNode StatementList(List nodes) + { + var result = new StatementListNode(nodes); + SetParent(result); + return result; + } + + public FunctionInputDescriptionNode FunctionInputDescription( + TokenNode openingBracket, + ParameterListNode parameterList, + TokenNode closingBracket) + { + var children = new List + { + openingBracket, + parameterList, + closingBracket + }; + var result = new FunctionInputDescriptionNode(children, openingBracket, parameterList, closingBracket); + SetParent(result); + return result; + } + + public TokenNode Token(Token token) + { + return new TokenNode(token); + } + + public SwitchStatementNode SwitchStatement( + TokenNode switchKeyword, + ExpressionNode switchExpression, + List cases, + TokenNode endKeyword, + TokenNode semicolonOrComma = null) + { + var children = new List { switchKeyword, switchExpression }; + children.AddRange(cases); + children.Add(endKeyword); + if (semicolonOrComma != null) + { + children.Add(semicolonOrComma); + } + + var result = new SwitchStatementNode( + children, + switchKeyword, + switchExpression, + cases, + endKeyword, + semicolonOrComma); + SetParent(result); + return result; + } + + public SwitchCaseNode SwitchCase( + TokenNode caseKeyword, + TokenNode caseIdentifier, + StatementListNode statementList) + { + var children = new List + { + caseKeyword, + caseIdentifier, + statementList + }; + var result = new SwitchCaseNode(children, caseKeyword, caseIdentifier, statementList); + SetParent(result); + return result; + } + + public AssignmentExpressionNode AssignmentExpression( + ExpressionNode lhs, + TokenNode assignmentSign, + ExpressionNode rhs) + { + var children = new List + { + lhs, + assignmentSign, + rhs + }; + var result = new AssignmentExpressionNode(children, lhs, assignmentSign, rhs); + SetParent(result); + return result; + } + + public UnaryPrefixOperationExpressionNode UnaryPrefixOperationExpression( + TokenNode operation, + ExpressionNode operand) + { + var children = new List + { + operation, + operand + }; + var result = new UnaryPrefixOperationExpressionNode(children, operation, operand); + SetParent(result); + return result; + } + + public UnaryPostfixOperationExpressionNode UnaryPostfixOperationExpression( + ExpressionNode operand, + TokenNode operation) + { + var children = new List + { + operand, + operation + }; + var result = new UnaryPostfixOperationExpressionNode(children, operand, operation); + SetParent(result); + return result; + } + + public BinaryOperationExpressionNode BinaryOperationExpression( + ExpressionNode lhs, + TokenNode operation, + ExpressionNode rhs) + { + var children = new List + { + lhs, + operation, + rhs + }; + var result = new BinaryOperationExpressionNode(children, lhs, operation, rhs); + SetParent(result); + return result; + } + + public IdentifierNameNode IdentifierName( + Token identifier) + { + return new IdentifierNameNode(identifier); + } + + public NumberLiteralNode NumberLiteral( + Token numberLiteral) + { + return new NumberLiteralNode(numberLiteral); + } + + public StringLiteralNode StringLiteral( + Token stringLiteral) + { + return new StringLiteralNode(stringLiteral); + } + + public ExpressionStatementNode ExpressionStatement(ExpressionNode expression) + { + var children = new List {expression}; + var result = new ExpressionStatementNode(children, expression, null); + SetParent(result); + return result; + } + + public ExpressionStatementNode ExpressionStatement(ExpressionNode expression, TokenNode semicolonOrComma) + { + var children = new List {expression, semicolonOrComma}; + var result = new ExpressionStatementNode(children, expression, semicolonOrComma); + SetParent(result); + return result; + } + + public CellArrayElementAccessExpressionNode CellArrayElementAccessExpression( + ExpressionNode cellArray, + TokenNode openingBrace, + ExpressionNode index, + TokenNode closingBrace) + { + var children = new List {cellArray, openingBrace, index, closingBrace}; + var result = new CellArrayElementAccessExpressionNode( + children, + cellArray, + openingBrace, + index, + closingBrace); + SetParent(result); + return result; + } + + public FunctionCallExpressionNode FunctionCallExpression( + ExpressionNode functionName, + TokenNode openingBracket, + FunctionCallParameterListNode parameters, + TokenNode closingBracket) + { + var children = new List + { + functionName, + openingBracket, + parameters, + closingBracket + }; + var result = new FunctionCallExpressionNode( + children, + functionName, + openingBracket, + parameters, + closingBracket); + SetParent(result); + return result; + } + + public FunctionCallParameterListNode FunctionCallParameterList(List nodes) + { + var result = new FunctionCallParameterListNode( + nodes, + nodes + .OfType() + .ToList()); + SetParent(result); + return result; + } + + public ArrayElementListNode ArrayElementList(List nodes) + { + var result = new ArrayElementListNode( + nodes, + nodes + .OfType() + .ToList()); + SetParent(result); + return result; + } + + public ArrayLiteralExpressionNode ArrayLiteralExpression( + TokenNode openingSquareBracket, + ArrayElementListNode elements, + TokenNode closingSquareBracket) + { + var children = new List + { + openingSquareBracket, + elements, + closingSquareBracket + }; + var result = new ArrayLiteralExpressionNode( + children, + openingSquareBracket, + elements, + closingSquareBracket); + SetParent(result); + return result; + } + + public CellArrayLiteralExpressionNode CellArrayLiteralExpression( + TokenNode openingBrace, + ArrayElementListNode elements, + TokenNode closingBrace) + { + var children = new List + { + openingBrace, + elements, + closingBrace + }; + var result = new CellArrayLiteralExpressionNode( + children, + openingBrace, + elements, + closingBrace); + SetParent(result); + return result; + } + + public EmptyExpressionNode EmptyExpression() + { + return new EmptyExpressionNode(); + } + + public MemberAccessNode MemberAccess( + SyntaxNode leftOperand, + TokenNode dot, + SyntaxNode rightOperand) + { + var children = new List + { + leftOperand, + dot, + rightOperand + }; + var result = new MemberAccessNode( + children, + leftOperand, + dot, + rightOperand); + SetParent(result); + return result; + } + + public WhileStatementNode WhileStatement( + TokenNode whileKeyword, + ExpressionNode condition, + StatementListNode body, + TokenNode end, + List optionalCommasAfterCondition = null, + TokenNode semicolonOrComma = null) + { + var children = new List + { + whileKeyword, + condition, + }; + if (optionalCommasAfterCondition != null) + { + children.AddRange(optionalCommasAfterCondition); + } + + children.Add(body); + children.Add(end); + if (semicolonOrComma != null) + { + children.Add(semicolonOrComma); + } + var result = new WhileStatementNode( + children, + whileKeyword, + condition, + optionalCommasAfterCondition, + body, + end, + semicolonOrComma); + SetParent(result); + return result; + } + + public StatementNode AppendSemicolonOrComma(StatementNode statement, TokenNode semicolonOrComma) + { + statement.SemicolonOrComma = semicolonOrComma; + statement.Children.Add(semicolonOrComma); + statement.Children[statement.Children.Count - 1].Parent = statement; + return statement; + } + + public IfStatementNode IfStatement( + TokenNode ifKeyword, + ExpressionNode condition, + StatementListNode body, + TokenNode elseKeyword, + StatementListNode elseBody, + TokenNode endKeyword, + List optionalCommasAfterCondition = null) + { + var children = new List + { + ifKeyword, + condition + }; + if (optionalCommasAfterCondition != null) + { + children.AddRange(optionalCommasAfterCondition); + } + + children.Add(body); + if (elseKeyword != null) + { + children.Add(elseKeyword); + children.Add(elseBody); + } + + children.Add(endKeyword); + var result = new IfStatementNode( + children, + ifKeyword, + condition, + optionalCommasAfterCondition, + body, + elseKeyword, + elseBody, + endKeyword); + SetParent(result); + return result; + } + + public ParenthesizedExpressionNode ParenthesizedExpression( + TokenNode openParen, + ExpressionNode expression, + TokenNode closeParen) + { + var children = new List + { + openParen, + expression, + closeParen + }; + var result = new ParenthesizedExpressionNode( + children, + openParen, + expression, + closeParen); + SetParent(result); + return result; + } + + public ForStatementNode ForStatement( + TokenNode forKeyword, + AssignmentExpressionNode forAssignment, + StatementListNode body, + TokenNode endKeyword) + { + var children = new List + { + forKeyword, + forAssignment, + body, + endKeyword + }; + var result = new ForStatementNode( + children, + forKeyword, + forAssignment, + body, + endKeyword); + SetParent(result); + return result; + } + } +} \ No newline at end of file diff --git a/Parser/SyntaxNode.cs b/Parser/SyntaxNode.cs new file mode 100644 index 0000000..19f1610 --- /dev/null +++ b/Parser/SyntaxNode.cs @@ -0,0 +1,513 @@ +using System.Collections.Generic; +using System.Linq; +using Lexer; + +namespace Parser +{ + public class SyntaxNode + { + public SyntaxNode Parent { get; set; } + public List Children { get; } + + public SyntaxNode(List children) + { + Children = children; + } + + public virtual string FullText => + string.Join("", Children.Select(c => c.FullText)); + } + + public class TokenNode : SyntaxNode + { + public Token Token { get; } + + public TokenNode(Token token) + : base(null) + { + Token = token; + } + + public override string FullText => Token.FullText; + } + + public class OutputIdentifierNode : SyntaxNode + { + public OutputIdentifierNode(List children) : base(children) + { + } + } + + public class FunctionOutputDescriptionNode : SyntaxNode + { + public List Outputs { get; } + + public FunctionOutputDescriptionNode(List children, List outputs) : base(children) + { + Outputs = outputs; + } + } + + public class FunctionInputDescriptionNode : SyntaxNode + { + public TokenNode OpeningBracket { get; } + public ParameterListNode Parameters { get; } + public TokenNode ClosingBracket { get; } + public FunctionInputDescriptionNode( + List children, + TokenNode openingBracket, + ParameterListNode parameters, + TokenNode closingBracket) : base(children) + { + OpeningBracket = openingBracket; + Parameters = parameters; + ClosingBracket = closingBracket; + } + } + + public class FunctionDeclarationNode : StatementNode + { + public TokenNode Token { get; } + public FunctionOutputDescriptionNode OutputDescription { get; } + public TokenNode EqualitySign { get; } + public TokenNode Name { get; } + public FunctionInputDescriptionNode InputDescription { get; } + public StatementListNode Body { get; } + public TokenNode End { get; } + + public FunctionDeclarationNode( + List children, + TokenNode token, + FunctionOutputDescriptionNode outputDescription, + TokenNode equalitySign, + TokenNode name, + FunctionInputDescriptionNode inputDescription, + StatementListNode body, + TokenNode end, + TokenNode semicolonOrComma + ) : base(children, semicolonOrComma) + { + Token = token; + OutputDescription = outputDescription; + EqualitySign = equalitySign; + Name = name; + InputDescription = inputDescription; + Body = body; + End = end; + } + } + + public class StatementListNode : SyntaxNode + { + public List Statements => Children; + + public StatementListNode(List children) : base(children) + { + } + } + + public class ParameterListNode : SyntaxNode + { + public List Parameters { get; } + + public ParameterListNode(List children, List parameters) : base(children) + { + Parameters = parameters; + } + } + + public class ExpressionNode : SyntaxNode + { + public ExpressionNode(List children) : base(children) + { + } + } + + public class AssignmentExpressionNode : ExpressionNode + { + public ExpressionNode Lhs { get; } + public TokenNode Assignment { get; } + public ExpressionNode Rhs { get; } + + public AssignmentExpressionNode( + List children, + ExpressionNode lhs, + TokenNode assignment, + ExpressionNode rhs) : base(children) + { + Lhs = lhs; + Assignment = assignment; + Rhs = rhs; + } + } + + public class UnaryPrefixOperationExpressionNode : ExpressionNode + { + public TokenNode Operation { get; } + public ExpressionNode Operand { get; } + + public UnaryPrefixOperationExpressionNode( + List children, + TokenNode operation, + ExpressionNode operand) : base(children) + { + Operation = operation; + Operand = operand; + } + } + + public class UnaryPostfixOperationExpressionNode : ExpressionNode + { + public ExpressionNode Operand { get; } + public TokenNode Operation { get; } + public UnaryPostfixOperationExpressionNode( + List children, + ExpressionNode operand, + TokenNode operation) : base(children) + { + Operand = operand; + Operation = operation; + } + } + + public class BinaryOperationExpressionNode : ExpressionNode + { + public ExpressionNode Lhs { get; } + public TokenNode Operation { get; } + public ExpressionNode Rhs { get; } + + public BinaryOperationExpressionNode( + List children, + ExpressionNode lhs, + TokenNode operation, + ExpressionNode rhs) : base(children) + { + Lhs = lhs; + Operation = operation; + Rhs = rhs; + } + } + + public class SwitchStatementNode : StatementNode + { + public TokenNode SwitchKeyword { get; } + public ExpressionNode SwitchExpression { get; } + public List Cases { get; } + public TokenNode EndKeyword { get; } + + public SwitchStatementNode( + List children, + TokenNode switchKeyword, + ExpressionNode switchExpression, + List cases, + TokenNode endKeyword, + TokenNode semicolonOrComma + ) : base(children, semicolonOrComma) + { + SwitchKeyword = switchKeyword; + SwitchExpression = switchExpression; + Cases = cases; + EndKeyword = endKeyword; + } + } + + public class SwitchCaseNode : SyntaxNode + { + public TokenNode CaseKeyword { get; } + public TokenNode CaseIdentifier { get; } + public StatementListNode StatementList { get; } + + public SwitchCaseNode( + List children, + TokenNode caseKeyword, + TokenNode caseIdentifier, + StatementListNode statementList + ) : base(children) + { + CaseKeyword = caseKeyword; + CaseIdentifier = caseIdentifier; + StatementList = statementList; + } + } + + public class IdentifierNameNode : ExpressionNode + { + public Token Token { get; } + + public IdentifierNameNode(Token token) + : base(null) + { + Token = token; + } + + public override string FullText => Token.FullText; + } + + public class NumberLiteralNode : ExpressionNode + { + public Token Token { get; } + + public NumberLiteralNode(Token token) : base(null) + { + Token = token; + } + + public override string FullText => Token.FullText; + } + + public class StringLiteralNode : ExpressionNode + { + public Token Token { get; } + + public StringLiteralNode(Token token) : base(null) + { + Token = token; + } + + public override string FullText => Token.FullText; + } + + public class StatementNode : SyntaxNode + { + public TokenNode SemicolonOrComma { get; set; } + + public StatementNode(List children, TokenNode semicolonOrComma = null) : base(children) + { + SemicolonOrComma = semicolonOrComma; + } + } + + public class ExpressionStatementNode : StatementNode + { + public ExpressionNode Expression { get; } + + public ExpressionStatementNode(List children, ExpressionNode expression, TokenNode semicolonOrComma) + : base(children, semicolonOrComma) + { + Expression = expression; + } + } + + public class CellArrayElementAccessExpressionNode : ExpressionNode + { + public ExpressionNode CellArray { get; } + public TokenNode OpeningBrace { get; } + public ExpressionNode Index { get; } + public TokenNode ClosingBrace { get; } + + public CellArrayElementAccessExpressionNode( + List children, + ExpressionNode cellArray, + TokenNode openingBrace, + ExpressionNode index, + TokenNode closingBrace) : base(children) + { + CellArray = cellArray; + OpeningBrace = openingBrace; + Index = index; + ClosingBrace = closingBrace; + } + } + + public class FunctionCallExpressionNode : ExpressionNode + { + public ExpressionNode FunctionName { get; } + public TokenNode OpeningBracket { get; } + public FunctionCallParameterListNode Parameters { get; } + public TokenNode ClosingBracket { get; } + + public FunctionCallExpressionNode( + List children, + ExpressionNode functionName, + TokenNode openingBracket, + FunctionCallParameterListNode parameters, + TokenNode closingBracket) : base(children) + { + FunctionName = functionName; + OpeningBracket = openingBracket; + Parameters = parameters; + ClosingBracket = closingBracket; + } + } + + public class FunctionCallParameterListNode : SyntaxNode + { + public List Parameters; + + public FunctionCallParameterListNode( + List children, + List parameters) : base(children) + { + Parameters = parameters; + } + } + + public class ArrayElementListNode : SyntaxNode + { + public List Elements; + + public ArrayElementListNode( + List children, + List elements) : base(children) + { + Elements = elements; + } + } + + public class ArrayLiteralExpressionNode : ExpressionNode + { + public TokenNode OpeningSquareBracket { get; } + public ArrayElementListNode Elements { get; } + public TokenNode ClosingSquareBracket { get; } + + public ArrayLiteralExpressionNode( + List children, + TokenNode openingSquareBracket, + ArrayElementListNode elements, + TokenNode closingSquareBracket) : base(children) + { + OpeningSquareBracket = openingSquareBracket; + Elements = elements; + ClosingSquareBracket = closingSquareBracket; + } + } + + public class CellArrayLiteralExpressionNode : ExpressionNode + { + public TokenNode OpeningBrace { get; } + public ArrayElementListNode Elements { get; } + public TokenNode ClosingBrace { get; } + + public CellArrayLiteralExpressionNode( + List children, + TokenNode openingBrace, + ArrayElementListNode elements, + TokenNode closingBrace) : base(children) + { + OpeningBrace = openingBrace; + Elements = elements; + ClosingBrace = closingBrace; + } + } + + public class EmptyExpressionNode : ExpressionNode + { + public EmptyExpressionNode() : base(null) + { + } + } + + public class MemberAccessNode : ExpressionNode + { + public SyntaxNode LeftOperand { get; } + public TokenNode Dot { get; } + public SyntaxNode RightOperand { get; } + + public MemberAccessNode( + List children, + SyntaxNode leftOperand, + TokenNode dot, + SyntaxNode rightOperand) : base(children) + { + LeftOperand = leftOperand; + Dot = dot; + RightOperand = rightOperand; + } + } + + public class WhileStatementNode : StatementNode + { + public TokenNode WhileKeyword { get; } + public ExpressionNode Condition { get; } + public List OptionalCommasAfterCondition { get; } + public StatementListNode Body { get; } + public TokenNode End { get; } + + public WhileStatementNode( + List children, + TokenNode whileKeyword, + ExpressionNode condition, + List optionalCommasAfterCondition, + StatementListNode body, + TokenNode end, + TokenNode semicolonOrComma + ) : base(children, semicolonOrComma) + { + WhileKeyword = whileKeyword; + Condition = condition; + OptionalCommasAfterCondition = optionalCommasAfterCondition; + Body = body; + End = end; + } + } + + public class IfStatementNode : StatementNode + { + public TokenNode IfKeyword { get; } + public ExpressionNode Condition { get; } + public List OptionalCommasAfterCondition { get; } + public StatementListNode Body { get; } + public TokenNode ElseKeyword { get; } + public StatementListNode ElseBody { get; } + public TokenNode EndKeyword { get; } + + public IfStatementNode( + List children, + TokenNode ifKeyword, + ExpressionNode condition, + List optionalCommasAfterCondition, + StatementListNode body, + TokenNode elseKeyword, + StatementListNode elseBody, + TokenNode endKeyword + ) : base(children) + { + IfKeyword = ifKeyword; + Condition = condition; + OptionalCommasAfterCondition = optionalCommasAfterCondition; + Body = body; + ElseKeyword = elseKeyword; + ElseBody = elseBody; + EndKeyword = endKeyword; + } + } + + public class ParenthesizedExpressionNode : ExpressionNode + { + public TokenNode OpenParen { get; } + public ExpressionNode Expression { get; } + public TokenNode CloseParen { get; } + + public ParenthesizedExpressionNode( + List children, + TokenNode openParen, + ExpressionNode expression, + TokenNode closeParen) : base(children) + { + OpenParen = openParen; + Expression = expression; + CloseParen = closeParen; + } + } + + public class ForStatementNode : StatementNode + { + public TokenNode ForKeyword { get; } + public AssignmentExpressionNode ForAssignment { get; } + public StatementListNode Body { get; } + public TokenNode EndKeyword { get; } + + public ForStatementNode( + List children, + TokenNode forKeyword, + AssignmentExpressionNode forAssignment, + StatementListNode body, + TokenNode endKeyword) : base(children) + { + ForKeyword = forKeyword; + ForAssignment = forAssignment; + Body = body; + EndKeyword = endKeyword; + } + } +} \ No newline at end of file diff --git a/ProjectConsole/Program.cs b/ProjectConsole/Program.cs index cae6381..f9f4192 100644 --- a/ProjectConsole/Program.cs +++ b/ProjectConsole/Program.cs @@ -1,12 +1,136 @@ using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Linq; +using Lexer; +using Parser; namespace ProjectConsole { class Program { + //private const string BaseDirectory = @"C:\Program Files\MATLAB\R2018a\toolbox\matlab\"; + private const string BaseDirectory = @"/Applications/MATLAB_R2017b.app/toolbox/matlab/guide/"; + + private static HashSet skipFiles = new HashSet + { + @"codetools\private\template.m", // this is a template, so it contains '$' characters. + @"plottools\+matlab\+graphics\+internal\+propertyinspector\+views\CategoricalHistogramPropertyView.m", // this one contains a 0xA0 character (probably it's 'non-breakable space' in Win-1252). + @"plottools\+matlab\+graphics\+internal\+propertyinspector\+views\PrimitiveHistogram2PropertyView.m", // same + @"plottools\+matlab\+graphics\+internal\+propertyinspector\+views\PrimitiveHistogramPropertyView.m", // same + @"codetools/private/template.m", // this is a template, so it contains '$' characters. + @"plottools/+matlab/+graphics/+internal/+propertyinspector/+views/CategoricalHistogramPropertyView.m", // this one contains a 0xA0 character (probably it's 'non-breakable space' in Win-1252). + @"plottools/+matlab/+graphics/+internal/+propertyinspector/+views/PrimitiveHistogram2PropertyView.m", // same + @"plottools/+matlab/+graphics/+internal/+propertyinspector/+views/PrimitiveHistogramPropertyView.m", // same + }; + + static void ProcessFile(string fileName) + { + var text = File.ReadAllText(fileName); + Console.WriteLine($"Parsing {fileName}..."); + var window = new TextWindowWithNull(text, fileName); + ILexer lexer = new MLexer(window, new PureTokenFactory(window)); + var tokens = lexer.ParseAll(); + //AfterFunction(tokens); + //FirstToken(tokens); + var parser = new MParser(tokens); + var tree = parser.Parse(); + var back = string.Join("", tokens.Select(token => token.FullText)); + if (text != back) + { + throw new ApplicationException(); + } + } + + private static readonly int[] firstTokenCount; + private static readonly int[] afterFunctionCount; + + static Program() + { + var maxKind = ((int[]) typeof(TokenKind).GetEnumValues()).Max(); + firstTokenCount = new int[maxKind + 1]; + afterFunctionCount = new int[maxKind + 1]; + } + + static void AfterFunction(List tokens) + { + for (var i = 0; i < tokens.Count; i++) + { + if (tokens[i].PureToken.Kind == TokenKind.Identifier && + tokens[i].PureToken.LiteralText == "function") + { + var nextKind = tokens[i + 1].PureToken.Kind; + afterFunctionCount[(int) nextKind]++; + if (nextKind != TokenKind.Identifier && nextKind != TokenKind.OpeningSquareBracket) + { + Console.WriteLine("===EXAMPLE==="); + Console.WriteLine($"{tokens[i]}{tokens[i+1]}"); + } + } + } + } + + static void FirstToken(List tokens) + { + var firstKind = tokens[0].PureToken.Kind; + firstTokenCount[(int) firstKind]++; + } + + static void AfterFunctionFinish() + { + for (var i = 0; i < afterFunctionCount.Length; i++) + { + Console.WriteLine($"{(TokenKind)i}: {afterFunctionCount[i]}."); + } + } + + static void FirstTokenFinish() + { + for (var i = 0; i < firstTokenCount.Length; i++) + { + if (firstTokenCount[i] != 0) + { + Console.WriteLine($"{(TokenKind) i}: {firstTokenCount[i]}."); + } + } + } + + static int ProcessDirectory(string directory) + { + var counter = 0; + var files = Directory.GetFiles(directory, "*.m"); + foreach (var file in files) + { + var relativePath = Path.GetRelativePath(BaseDirectory, file); + if (skipFiles.Contains(relativePath)) + { + continue; + } + ProcessFile(file); + counter++; + } + + var subDirectories = Directory.GetDirectories(directory); + foreach (var subDirectory in subDirectories) + { + counter += ProcessDirectory(subDirectory); + } + + return counter; + } + static void Main(string[] args) { Console.WriteLine("Hello World!"); + var sw = new Stopwatch(); + sw.Start(); + var processed = ProcessDirectory(BaseDirectory); + sw.Stop(); + Console.WriteLine($"{processed} files parsed. Elapsed: {sw.Elapsed}."); + //AfterFunctionFinish(); + //FirstTokenFinish(); + Console.ReadKey(); } } } diff --git a/ProjectConsole/ProjectConsole.csproj b/ProjectConsole/ProjectConsole.csproj index ce1697a..3e31ff2 100644 --- a/ProjectConsole/ProjectConsole.csproj +++ b/ProjectConsole/ProjectConsole.csproj @@ -1,8 +1,9 @@ - - + Exe netcoreapp2.0 - - + + + + \ No newline at end of file diff --git a/Solution.sln b/Solution.sln index ffe4de6..3d26b2d 100644 --- a/Solution.sln +++ b/Solution.sln @@ -5,6 +5,14 @@ VisualStudioVersion = 15.0.27130.2026 MinimumVisualStudioVersion = 10.0.40219.1 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ProjectConsole", "ProjectConsole\ProjectConsole.csproj", "{5025FD8F-0F1A-43E5-A996-7753BC703D62}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Parser", "Parser\Parser.csproj", "{B20EDC10-E6E6-4430-8527-B95206DEF941}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Parser.Tests", "Parser.Tests\Parser.Tests.csproj", "{83008C72-2EFC-41EB-AC8D-023C6AE1709F}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lexer", "Lexer\Lexer.csproj", "{1B8E5BBC-E5CD-427B-A6C7-F30047AA4A39}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lexer.Tests", "Lexer.Tests\Lexer.Tests.csproj", "{7BFEAD86-EAC3-43C8-9388-EBAB377938D4}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -15,6 +23,22 @@ Global {5025FD8F-0F1A-43E5-A996-7753BC703D62}.Debug|Any CPU.Build.0 = Debug|Any CPU {5025FD8F-0F1A-43E5-A996-7753BC703D62}.Release|Any CPU.ActiveCfg = Release|Any CPU {5025FD8F-0F1A-43E5-A996-7753BC703D62}.Release|Any CPU.Build.0 = Release|Any CPU + {B20EDC10-E6E6-4430-8527-B95206DEF941}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {B20EDC10-E6E6-4430-8527-B95206DEF941}.Debug|Any CPU.Build.0 = Debug|Any CPU + {B20EDC10-E6E6-4430-8527-B95206DEF941}.Release|Any CPU.ActiveCfg = Release|Any CPU + {B20EDC10-E6E6-4430-8527-B95206DEF941}.Release|Any CPU.Build.0 = Release|Any CPU + {83008C72-2EFC-41EB-AC8D-023C6AE1709F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {83008C72-2EFC-41EB-AC8D-023C6AE1709F}.Debug|Any CPU.Build.0 = Debug|Any CPU + {83008C72-2EFC-41EB-AC8D-023C6AE1709F}.Release|Any CPU.ActiveCfg = Release|Any CPU + {83008C72-2EFC-41EB-AC8D-023C6AE1709F}.Release|Any CPU.Build.0 = Release|Any CPU + {1B8E5BBC-E5CD-427B-A6C7-F30047AA4A39}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {1B8E5BBC-E5CD-427B-A6C7-F30047AA4A39}.Debug|Any CPU.Build.0 = Debug|Any CPU + {1B8E5BBC-E5CD-427B-A6C7-F30047AA4A39}.Release|Any CPU.ActiveCfg = Release|Any CPU + {1B8E5BBC-E5CD-427B-A6C7-F30047AA4A39}.Release|Any CPU.Build.0 = Release|Any CPU + {7BFEAD86-EAC3-43C8-9388-EBAB377938D4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {7BFEAD86-EAC3-43C8-9388-EBAB377938D4}.Debug|Any CPU.Build.0 = Debug|Any CPU + {7BFEAD86-EAC3-43C8-9388-EBAB377938D4}.Release|Any CPU.ActiveCfg = Release|Any CPU + {7BFEAD86-EAC3-43C8-9388-EBAB377938D4}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE