First version

This commit is contained in:
Alexander Luzgarev 2018-03-31 21:51:53 +02:00
parent 1c98b7e5b2
commit 869f3490f1
28 changed files with 3423 additions and 4 deletions

View File

@ -0,0 +1,14 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netcoreapp2.0</TargetFramework>
<IsPackable>false</IsPackable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="15.5.0" />
<PackageReference Include="NUnit" Version="3.9.0" />
<PackageReference Include="NUnit3TestAdapter" Version="3.9.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Lexer\Lexer.csproj" />
</ItemGroup>
</Project>

209
Lexer.Tests/MLexerShould.cs Normal file
View File

@ -0,0 +1,209 @@
using System.Linq;
using Lexer;
using NUnit.Framework;
namespace Parser.Tests
{
public class MLexerShould
{
private static MLexer CreateLexer(string text)
{
var window = new TextWindowWithNull(text);
return new MLexer(window, new PureTokenFactory(window));
}
[Test]
public void ParseSequenceOfIdentifiers()
{
var sut = CreateLexer("undefined is not a function");
var tokens = sut.ParseAll();
Assert.AreEqual(6, tokens.Count);
CollectionAssert.AreEqual(
new[] {"undefined", "is", "not", "a", "function"},
tokens.Take(5).Select(token => token.PureToken.LiteralText));
CollectionAssert.AreEqual(
Enumerable.Repeat(TokenKind.Identifier, 5),
tokens.Take(5).Select(token => token.PureToken.Kind));
}
[Test]
public void ParseIdentifierAndBrackets()
{
var sut = CreateLexer("undefined()");
var tokens = sut.ParseAll();
Assert.AreEqual(4, tokens.Count);
CollectionAssert.AreEqual(
new[]
{
TokenKind.Identifier,
TokenKind.OpeningBracket,
TokenKind.ClosingBracket,
TokenKind.EndOfFile
},
tokens.Select(token => token.PureToken.Kind));
}
[Test]
public void ParseTransposeSignAfterClosingSquareBracket()
{
var sut = CreateLexer("[undefined]'");
var tokens = sut.ParseAll();
Assert.AreEqual(5, tokens.Count);
CollectionAssert.AreEqual(
new[]
{
TokenKind.OpeningSquareBracket,
TokenKind.Identifier,
TokenKind.ClosingSquareBracket,
TokenKind.Transpose,
TokenKind.EndOfFile
},
tokens.Select(token => token.PureToken.Kind));
}
[Test]
public void ParseTransposeSignAfterClosingBrace()
{
var sut = CreateLexer("{undefined}'");
var tokens = sut.ParseAll();
Assert.AreEqual(5, tokens.Count);
CollectionAssert.AreEqual(
new[]
{
TokenKind.OpeningBrace,
TokenKind.Identifier,
TokenKind.ClosingBrace,
TokenKind.Transpose,
TokenKind.EndOfFile
},
tokens.Select(token => token.PureToken.Kind));
}
[Test]
public void ParseTransposeSignAfterClosingBracket()
{
var sut = CreateLexer("undefined()'");
var tokens = sut.ParseAll();
Assert.AreEqual(5, tokens.Count);
CollectionAssert.AreEqual(
new[]
{
TokenKind.Identifier,
TokenKind.OpeningBracket,
TokenKind.ClosingBracket,
TokenKind.Transpose,
TokenKind.EndOfFile
},
tokens.Select(token => token.PureToken.Kind));
}
[Test]
public void ParseTransposeSignAfterIdentifier()
{
var sut = CreateLexer("undefined'");
var tokens = sut.ParseAll();
Assert.AreEqual(3, tokens.Count);
CollectionAssert.AreEqual(
new[]
{
TokenKind.Identifier,
TokenKind.Transpose,
TokenKind.EndOfFile
},
tokens.Select(token => token.PureToken.Kind));
}
[Test]
public void ParseTransposeSignAfterDot()
{
var sut = CreateLexer("undefined.'");
var tokens = sut.ParseAll();
Assert.AreEqual(3, tokens.Count);
CollectionAssert.AreEqual(
new[]
{
TokenKind.Identifier,
TokenKind.DotTranspose,
TokenKind.EndOfFile
},
tokens.Select(token => token.PureToken.Kind));
}
[Test]
public void ParseDotPowerAfterNumber()
{
var sut = CreateLexer("26.^[1]");
var tokens = sut.ParseAll();
Assert.AreEqual(6, tokens.Count);
CollectionAssert.AreEqual(
new[]
{
TokenKind.NumberLiteral,
TokenKind.DotPower,
TokenKind.OpeningSquareBracket,
TokenKind.NumberLiteral,
TokenKind.ClosingSquareBracket,
TokenKind.EndOfFile
},
tokens.Select(token => token.PureToken.Kind));
}
[Test]
public void ParseDotInNumberBeforeSemicolon()
{
var sut = CreateLexer("42.;");
var tokens = sut.ParseAll();
Assert.AreEqual(3, tokens.Count);
CollectionAssert.AreEqual(
new[]
{
TokenKind.NumberLiteral,
TokenKind.Semicolon,
TokenKind.EndOfFile
},
tokens.Select(token => token.PureToken.Kind));
}
[Test]
public void ParseEAfterDotInANumber()
{
var sut = CreateLexer("42.e-5");
var tokens = sut.ParseAll();
Assert.AreEqual(2, tokens.Count);
CollectionAssert.AreEqual(
new[]
{
TokenKind.NumberLiteral,
TokenKind.EndOfFile
},
tokens.Select(token => token.PureToken.Kind));
}
[Test]
public void ParseEmptyLine()
{
var sut = CreateLexer("\n\nfunction shmunction\n\n\n");
var tokens = sut.ParseAll();
Assert.AreEqual(3, tokens.Count);
}
[Test]
public void ParseCommentsAfterDotDotDot()
{
var sut = CreateLexer("something ... #$@#%*^!@#\n");
var tokens = sut.ParseAll();
Assert.AreEqual(2, tokens.Count);
}
[TestCase("something ... #$@#%*^!@#\n")]
[TestCase("undefined is not a function")]
[TestCase("\n\nfunction shmunction\n\n\n")]
public void ReconstructTest(string s)
{
var sut = CreateLexer(s);
var tokens = sut.ParseAll();
var actual = string.Join("", tokens.Select(token => token.FullText));
Assert.AreEqual(s, actual);
}
}
}

View File

@ -0,0 +1,37 @@
using Lexer;
using NUnit.Framework;
namespace Parser.Tests
{
[TestFixture]
public class TestWindowShould
{
[Test]
public void ReturnEofGivenEmptyText()
{
var sut = new TextWindow("");
Assert.IsTrue(sut.IsEof());
}
[Test]
public void ReturnNotEofGivenNonEmptyText()
{
var sut = new TextWindow("Text.");
Assert.IsFalse(sut.IsEof());
}
[Test]
public void ReturnCharsInCorrectOrder()
{
var text = "abc";
var sut = new TextWindow(text);
Assert.AreEqual('a', sut.PeekChar());
sut.ConsumeChar();
Assert.AreEqual('b', sut.PeekChar());
sut.ConsumeChar();
Assert.AreEqual('c', sut.PeekChar());
sut.ConsumeChar();
Assert.IsTrue(sut.IsEof());
}
}
}

View File

@ -0,0 +1,30 @@
using Lexer;
using NUnit.Framework;
namespace Parser.Tests
{
[TestFixture]
public class TestWindowWithNullShould
{
[Test]
public void ReturnNullGivenEmptyText()
{
var sut = new TextWindowWithNull("");
Assert.IsTrue(sut.PeekChar() == '\0');
}
[Test]
public void ReturnCharsInCorrectOrder()
{
var text = "abc";
var sut = new TextWindowWithNull(text);
Assert.AreEqual('a', sut.PeekChar());
sut.ConsumeChar();
Assert.AreEqual('b', sut.PeekChar());
sut.ConsumeChar();
Assert.AreEqual('c', sut.PeekChar());
sut.ConsumeChar();
Assert.AreEqual('\0', sut.PeekChar());
}
}
}

10
Lexer/ILexer.cs Normal file
View File

@ -0,0 +1,10 @@
using System.Collections.Generic;
namespace Lexer
{
public interface ILexer<T> where T : class
{
T NextToken();
List<T> ParseAll();
}
}

6
Lexer/IPosition.cs Normal file
View File

@ -0,0 +1,6 @@
namespace Lexer
{
public interface IPosition
{
}
}

15
Lexer/ITextWindow.cs Normal file
View File

@ -0,0 +1,15 @@
namespace Lexer
{
public interface ITextWindow
{
bool IsEof();
char PeekChar();
char PeekChar(int n);
void ConsumeChar();
void ConsumeChars(int n);
char GetAndConsumeChar();
string GetAndConsumeChars(int n);
int CharactersLeft();
IPosition Position { get; }
}
}

5
Lexer/Lexer.csproj Normal file
View File

@ -0,0 +1,5 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netcoreapp2.0</TargetFramework>
</PropertyGroup>
</Project>

592
Lexer/MLexer.cs Normal file
View File

@ -0,0 +1,592 @@
using System.Collections.Generic;
using System.Text;
namespace Lexer
{
public class MLexer : ILexer<Token>
{
private ITextWindow Window { get; }
private Token LastToken { get; set; }
private PureTokenFactory PureTokenFactory { get; }
public MLexer(ITextWindow window, PureTokenFactory pureTokenFactory)
{
Window = window;
PureTokenFactory = pureTokenFactory;
}
private static bool IsEolOrEof(char c)
{
return c == '\n' || c == '\r' || c == '\0';
}
private Trivia LexComment()
{
var n = 1;
while (!IsEolOrEof(Window.PeekChar(n)))
{
n++;
}
return new Trivia(TriviaType.Comment, Window.GetAndConsumeChars(n));
}
private List<Trivia> LexCommentAfterDotDotDot()
{
var n = 0;
while (!IsEolOrEof(Window.PeekChar(n)))
{
n++;
}
var comment = new Trivia(TriviaType.Comment, Window.GetAndConsumeChars(n));
var result = new List<Trivia> { comment };
var character = Window.PeekChar();
if (character == '\n' || character == '\r')
{
Window.ConsumeChar();
result.Add(new Trivia(TriviaType.Whitespace, character.ToString()));
}
return result;
}
private List<Trivia> LexTrivia(bool isTrailing)
{
var triviaList = new List<Trivia>();
var whiteSpaceCache = new StringBuilder();
while (true)
{
var character = Window.PeekChar();
switch (character)
{
case ' ':
case '\t':
Window.ConsumeChar();
whiteSpaceCache.Append(character);
break;
case '\r':
case '\n':
Window.ConsumeChar();
whiteSpaceCache.Append(character);
var whiteSpace = whiteSpaceCache.ToString();
triviaList.Add(new Trivia(TriviaType.Whitespace, whiteSpace));
if (isTrailing)
{
return triviaList;
}
whiteSpaceCache.Clear();
break;
case '%':
if (whiteSpaceCache.Length > 0)
{
triviaList.Add(new Trivia(TriviaType.Whitespace, whiteSpaceCache.ToString()));
}
whiteSpaceCache.Clear();
triviaList.Add(LexComment());
break;
case '.':
if (Window.PeekChar(1) == '.' && Window.PeekChar(2) == '.')
{
if (whiteSpaceCache.Length > 0)
{
triviaList.Add(new Trivia(TriviaType.Whitespace, whiteSpaceCache.ToString()));
}
whiteSpaceCache.Clear();
triviaList.AddRange(LexCommentAfterDotDotDot());
}
else
{
if (whiteSpaceCache.Length > 0)
{
triviaList.Add(new Trivia(TriviaType.Whitespace, whiteSpaceCache.ToString()));
}
return triviaList;
}
break;
default:
if (whiteSpaceCache.Length > 0)
{
triviaList.Add(new Trivia(TriviaType.Whitespace, whiteSpaceCache.ToString()));
}
return triviaList;
}
}
}
private static bool IsLetterOrDigitOrUnderscore(char c)
{
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || (c == '_');
}
private PureToken ContinueParsingIdentifier()
{
var n = 1;
while (IsLetterOrDigitOrUnderscore(Window.PeekChar(n)))
{
n++;
}
var identifier = Window.GetAndConsumeChars(n);
return PureTokenFactory.CreateIdentifier(identifier);
}
private enum NumberParsingState
{
Start,
DigitsBeforeDot,
AfterDot,
DigitsAfterDot,
AfterE,
SignAfterE,
DigitsAfterE
}
private static bool IsDigit(char c)
{
return c >= '0' && c <= '9';
}
private static bool IsWhitespace(char c)
{
return c == ' ' || c == '\t' || c == '\n';
}
private PureToken? ContinueParsingNumber()
{
var state = NumberParsingState.Start;
var n = 0;
var left = Window.CharactersLeft();
var success = false;
var fail = false;
while (n < left)
{
var c = Window.PeekChar(n);
switch (state)
{
case NumberParsingState.Start:
if (IsDigit(c))
{
state = NumberParsingState.DigitsBeforeDot;
}
else
{
fail = true;
}
break;
case NumberParsingState.DigitsBeforeDot:
if (IsDigit(c))
{
}
else if (c == '.')
{
state = NumberParsingState.AfterDot;
}
else if (c == 'e' || c == 'E')
{
state = NumberParsingState.AfterE;
}
else
{
success = true;
}
break;
case NumberParsingState.AfterDot:
if (IsDigit(c))
{
state = NumberParsingState.DigitsAfterDot;
}
else if (c == 'e' || c == 'E')
{
state = NumberParsingState.AfterE;
}
else if (IsWhitespace(c) || c == ';' || c == ']' || c == ')' || c == '}')
{
success = true;
}
else if (c == '^' || c == '*' || c == '/' || c == '\\' || c == '\'')
{
n -= 1;
success = true;
}
else
{
fail = true;
}
break;
case NumberParsingState.DigitsAfterDot:
if (IsDigit(c))
{
}
else if (c == 'e' || c == 'E')
{
state = NumberParsingState.AfterE;
}
else
{
success = true;
}
break;
case NumberParsingState.AfterE:
if (IsDigit(c))
{
state = NumberParsingState.DigitsAfterE;
}
else if (c == '+' || c == '-')
{
state = NumberParsingState.SignAfterE;
}
else
{
fail = true;
}
break;
case NumberParsingState.SignAfterE:
if (IsDigit(c))
{
state = NumberParsingState.DigitsAfterE;
}
else
{
fail = true;
}
break;
case NumberParsingState.DigitsAfterE:
if (IsDigit(c))
{
}
else
{
success = true;
}
break;
}
if (fail)
{
throw new ParsingException("Error while parsing number.");
}
if (success)
{
break;
}
n++;
}
if (n >= left)
{
switch (state)
{
case NumberParsingState.DigitsBeforeDot:
case NumberParsingState.DigitsAfterDot:
case NumberParsingState.DigitsAfterE:
success = true;
break;
}
}
if (success)
{
var s = Window.GetAndConsumeChars(n);
return PureTokenFactory.CreateNumberLiteral(s);
}
return null;
}
private PureToken ContinueParsingStringLiteral()
{
Window.ConsumeChar();
var n = 0;
while (true) {
if (Window.PeekChar(n) == '\'')
{
break;
}
if (IsEolOrEof(Window.PeekChar(n)))
{
throw new ParsingException("Unfinished string literal.");
}
n++;
}
var literal = Window.GetAndConsumeChars(n);
Window.ConsumeChar();
return PureTokenFactory.CreateStringLiteral(literal);
}
private PureToken ContinueParsingDoubleQuotedStringLiteral()
{
Window.ConsumeChar();
var n = 0;
while (Window.PeekChar(n) != '"')
{
n++;
}
var literal = Window.GetAndConsumeChars(n);
Window.ConsumeChar();
return PureTokenFactory.CreateDoubleQuotedStringLiteral(literal);
}
private PureToken LexTokenWithoutTrivia(List<Trivia> leadingTrivia)
{
var character = Window.PeekChar();
switch (character)
{
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
case 'u':
case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
case 'H':
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P':
case 'Q':
case 'R':
case 'S':
case 'T':
case 'U':
case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
return ContinueParsingIdentifier();
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
var possiblyNumberToken = ContinueParsingNumber();
if (possiblyNumberToken == null)
{
throw new ParsingException($"Unexpected character \"{Window.PeekChar()}\" while parsing a number");
}
return (PureToken)possiblyNumberToken;
case '=':
Window.ConsumeChar();
if (Window.PeekChar() == '=')
{
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.Equality);
}
return PureTokenFactory.CreatePunctuation(TokenKind.Assignment);
case '.':
Window.ConsumeChar();
var c = Window.PeekChar();
switch (c)
{
case '*':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.DotMultiply);
case '/':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.DotDivide);
case '^':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.DotPower);
case '\\':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.DotBackslash);
case '\'':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.DotTranspose);
default:
return PureTokenFactory.CreatePunctuation(TokenKind.Dot);
}
case '(':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.OpeningBracket);
case ')':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.ClosingBracket);
case '[':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.OpeningSquareBracket);
case ']':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.ClosingSquareBracket);
case '{':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.OpeningBrace);
case '}':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.ClosingBrace);
case ',':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.Comma);
case ';':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.Semicolon);
case '&':
Window.ConsumeChar();
if (Window.PeekChar() == '&')
{
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.LogicalAnd);
}
return PureTokenFactory.CreatePunctuation(TokenKind.BitwiseAnd);
case '|':
Window.ConsumeChar();
if (Window.PeekChar() == '|')
{
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.LogicalOr);
}
return PureTokenFactory.CreatePunctuation(TokenKind.BitwiseOr);
case '<':
Window.ConsumeChar();
if (Window.PeekChar() == '=')
{
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.LessOrEqual);
}
return PureTokenFactory.CreatePunctuation(TokenKind.Less);
case '>':
Window.ConsumeChar();
if (Window.PeekChar() == '=')
{
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.GreaterOrEqual);
}
return PureTokenFactory.CreatePunctuation(TokenKind.Greater);
case '~':
Window.ConsumeChar();
if (Window.PeekChar() == '=')
{
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.Inequality);
}
return PureTokenFactory.CreatePunctuation(TokenKind.Not);
case '+':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.Plus);
case '-':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.Minus);
case '*':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.Multiply);
case '/':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.Divide);
case '\\':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.Backslash);
case '^':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.Power);
case '@':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.At);
case ':':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.Colon);
case '?':
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.QuestionMark);
case '\'':
if (LastToken.PureToken.Kind == TokenKind.ClosingBrace
|| LastToken.PureToken.Kind == TokenKind.ClosingBracket
|| LastToken.PureToken.Kind == TokenKind.ClosingSquareBracket
|| LastToken.PureToken.Kind == TokenKind.Identifier)
{
if (LastToken.TrailingTrivia.Count == 0 && leadingTrivia.Count == 0)
{
Window.ConsumeChar();
return PureTokenFactory.CreatePunctuation(TokenKind.Transpose);
}
}
return ContinueParsingStringLiteral();
case '"':
return ContinueParsingDoubleQuotedStringLiteral();
case '\0':
return PureTokenFactory.CreateEndOfFileToken();
default:
throw new ParsingException(
$"Unknown symbol \"{character}\" at {Window.Position}."
);
}
}
public Token NextToken()
{
var leadingTrivia = LexTrivia(false);
var token = LexTokenWithoutTrivia(leadingTrivia);
var trailingTrivia = LexTrivia(true);
var result = new Token(token, leadingTrivia, trailingTrivia);
LastToken = result;
return result;
}
public List<Token> ParseAll()
{
var result = new List<Token>();
while (true)
{
var token = NextToken();
if (token == null)
{
throw new ParsingException($"Unexpected character: '{Window.PeekChar()}' at {Window.Position}.");
}
result.Add(token);
if (token.PureToken.Kind == TokenKind.EndOfFile)
{
return result;
}
}
}
}
}

View File

@ -0,0 +1,9 @@
using System;
namespace Lexer
{
public class ParsingException : Exception
{
public ParsingException(string text) : base(text) {}
}
}

View File

@ -0,0 +1,14 @@
namespace Lexer
{
public struct PositionInsideFile : IPosition
{
public string File { get; set; }
public int Line { get; set; }
public int Column { get; set; }
public override string ToString()
{
return $"line {Line}, column {Column}" + (File != null ? $" of {File}" : "");
}
}
}

20
Lexer/PureToken.cs Normal file
View File

@ -0,0 +1,20 @@
namespace Lexer
{
public struct PureToken
{
public TokenKind Kind { get; }
public string LiteralText { get; }
public object Value { get; }
public IPosition Position { get; }
public PureToken(TokenKind kind, string literalText, object value, IPosition position)
{
Kind = kind;
LiteralText = literalText;
Value = value;
Position = position;
}
public override string ToString() => LiteralText;
}
}

94
Lexer/PureTokenFactory.cs Normal file
View File

@ -0,0 +1,94 @@
namespace Lexer
{
public class PureTokenFactory
{
private ITextWindow Window { get; }
public PureTokenFactory(ITextWindow window)
{
Window = window;
}
private static readonly string[] PureTokenOfKind =
{
null, // None = 0,
null, // Identifier = 1,
null, // NumberLiteral = 2,
null, // StringLiteral = 3,
null, // DoubleQuotedStringLiteral = 4,
null, null, null, null, null, null, null, null, null, null, null, null, null, null, null,
"=", // Assignment = 20,
"==", // Equality = 21,
"~=", // Inequality = 22,
"&&", // LogicalAnd = 23,
"||", // LogicalOr = 24,
"&", // BitwiseAnd = 25,
"|", // BitwiseOr = 26,
"<", // Less = 27,
"<=", // LessOrEqual = 28,
">", // Greater = 29,
">=", // GreaterOrEqual = 30,
"~", // Not = 31,
"+", // Plus = 32,
"-", // Minus = 33,
"*", // Multiply = 34,
"/", // Divide = 35,
"^", // Power = 36,
"\\", // Backslash = 37,
"'", // Transpose = 38,
".*", // DotMultiply = 39,
"./", // DotDivide = 40,
".^", // DotPower = 41,
".\\", // DotBackslash = 42,
".'", // DotTranspose = 43,
"@", // At = 44,
":", // Colon = 45,
"?", // QuestionMark = 46,
",", // Comma = 47,
";", // Semicolon = 48,
"{", // OpeningBrace = 49,
"}", // ClosingBrace = 50,
"[", // OpeningSquareBracket = 51,
"]", // ClosingSquareBracket = 52,
"(", // OpeningBracket = 53,
")", // ClosingBracket = 54,
".", // Dot = 55,
"...", // DotDotDot = 56,
"+", // UnaryPlus = 57,
"-", // UnaryMinus = 58,
"~", // UnaryNot = 59,
};
public PureToken CreatePunctuation(TokenKind kind)
{
return new PureToken(kind, PureTokenOfKind[(int)kind], null, Window.Position);
}
public PureToken CreateIdentifier(string s)
{
return new PureToken(TokenKind.Identifier, s, null, Window.Position);
}
public PureToken CreateNumberLiteral(string s)
{
return new PureToken(TokenKind.NumberLiteral, s, null, Window.Position); // TODO: actually parse number (here or in the lexer?)
}
public PureToken CreateStringLiteral(string s)
{
return new PureToken(TokenKind.StringLiteral, "'" + s + "'", s, Window.Position);
}
public PureToken CreateDoubleQuotedStringLiteral(string s)
{
return new PureToken(TokenKind.DoubleQuotedStringLiteral, "\"" + s + "\"", s, Window.Position);
}
public PureToken CreateEndOfFileToken()
{
return new PureToken(TokenKind.EndOfFile, "", null, Window.Position);
}
}
}

78
Lexer/TextWindow.cs Normal file
View File

@ -0,0 +1,78 @@
namespace Lexer
{
public class TextWindow : ITextWindow
{
protected readonly string Text;
protected int Offset { get; set; }
private PositionInsideFile _position;
public IPosition Position => _position;
public TextWindow(string text, string fileName = null)
{
Text = text;
Offset = 0;
_position = new PositionInsideFile
{
File = fileName,
Line = 0,
Column = 0
};
}
public bool IsEof()
{
return Offset >= Text.Length;
}
public virtual char PeekChar()
{
return Text[Offset];
}
public virtual char PeekChar(int n)
{
return Text[Offset + n];
}
public void ConsumeChar()
{
if (Text[Offset] == '\n' || Text[Offset] == '\r')
{
_position.Line++;
_position.Column = 0;
}
else
{
_position.Column++;
}
Offset++;
}
public void ConsumeChars(int n)
{
Offset += n;
_position.Column += n;
}
public char GetAndConsumeChar()
{
var c = Text[Offset];
ConsumeChar();
return c;
}
public string GetAndConsumeChars(int n)
{
var s = Text.Substring(Offset, n);
ConsumeChars(n);
return s;
}
public int CharactersLeft()
{
return Text.Length - Offset;
}
}
}

View File

@ -0,0 +1,19 @@
namespace Lexer
{
public class TextWindowWithNull : TextWindow
{
public TextWindowWithNull(string text, string fileName = null) : base(text, fileName)
{
}
public override char PeekChar()
{
return IsEof() ? '\0' : base.PeekChar();
}
public override char PeekChar(int n)
{
return Offset + n >= Text.Length ? '\0' : base.PeekChar(n);
}
}
}

32
Lexer/Token.cs Normal file
View File

@ -0,0 +1,32 @@
using System.Collections.Generic;
using System.Linq;
namespace Lexer
{
public class Token
{
public List<Trivia> LeadingTrivia { get; }
public List<Trivia> TrailingTrivia { get; }
public PureToken PureToken { get; }
public string FullText { get; }
public TokenKind Kind => PureToken.Kind;
public Token(PureToken pureToken, List<Trivia> leadingTrivia, List<Trivia> trailingTrivia)
{
PureToken = pureToken;
LeadingTrivia = leadingTrivia;
TrailingTrivia = trailingTrivia;
FullText = BuildFullText();
}
private string BuildFullText()
{
var leading = LeadingTrivia.Select(t => t.LiteralText);
var token = PureToken.LiteralText;
var trailing = TrailingTrivia.Select(t => t.LiteralText);
return string.Join("", leading.Concat(new[] {token}).Concat(trailing));
}
public override string ToString() => FullText;
}
}

54
Lexer/TokenKind.cs Normal file
View File

@ -0,0 +1,54 @@
namespace Lexer
{
public enum TokenKind
{
None = 0,
EndOfFile = 1,
Identifier = 2,
NumberLiteral = 3,
StringLiteral = 4,
DoubleQuotedStringLiteral = 5,
Assignment = 20,
Equality = 21,
Inequality = 22,
LogicalAnd = 23,
LogicalOr = 24,
BitwiseAnd = 25,
BitwiseOr = 26,
Less = 27,
LessOrEqual = 28,
Greater = 29,
GreaterOrEqual = 30,
Not = 31,
Plus = 32,
Minus = 33,
Multiply = 34,
Divide = 35,
Power = 36,
Backslash = 37,
Transpose = 38,
DotMultiply = 39,
DotDivide = 40,
DotPower = 41,
DotBackslash = 42,
DotTranspose = 43,
At = 44,
Colon = 45,
QuestionMark = 46,
Comma = 47,
Semicolon = 48,
OpeningBrace = 49,
ClosingBrace = 50,
OpeningSquareBracket = 51,
ClosingSquareBracket = 52,
OpeningBracket = 53,
ClosingBracket = 54,
Dot = 55,
DotDotDot = 56,
// unary tokens are not recognized during lexing; they are contextually recognized while parsing.
UnaryPlus = 57,
UnaryMinus = 58,
UnaryNot = 59,
}
}

14
Lexer/Trivia.cs Normal file
View File

@ -0,0 +1,14 @@
namespace Lexer
{
public class Trivia
{
public TriviaType Type { get; }
public string LiteralText { get; }
public Trivia(TriviaType type, string literalText)
{
Type = type;
LiteralText = literalText;
}
}
}

8
Lexer/TriviaType.cs Normal file
View File

@ -0,0 +1,8 @@
namespace Lexer
{
public enum TriviaType
{
Whitespace,
Comment
}
}

View File

@ -0,0 +1,217 @@
using System.Collections.Generic;
using Lexer;
using NUnit.Framework;
namespace Parser.Tests
{
public class MParserShould
{
private static MParser CreateParser(string text)
{
var window = new TextWindowWithNull(text);
var lexer = new MLexer(window, new PureTokenFactory(window));
var tokens = lexer.ParseAll();
var parser = new MParser(tokens);
return parser;
}
[Test]
public void ParseAssignmentExpression()
{
var text = "a = b";
var sut = CreateParser(text);
var actual = sut.ParseExpression();
Assert.IsInstanceOf<AssignmentExpressionNode>(actual);
Assert.AreEqual(text, actual.FullText);
}
[Test]
public void ParseSimpleStatement()
{
var text = "a = b";
var sut = CreateParser(text);
var actual = sut.ParseStatement();
Assert.IsInstanceOf<ExpressionStatementNode>(actual);
Assert.AreEqual(text, actual.FullText);
}
[Test]
public void ParseFunctionCallExpression()
{
var text = "func(a, 2, 'abc', d)";
var sut = CreateParser(text);
var actual = sut.ParseExpression();
Assert.IsInstanceOf<FunctionCallExpressionNode>(actual);
var f = actual as FunctionCallExpressionNode;
Assert.AreEqual(4, f?.Parameters.Parameters.Count);
Assert.AreEqual(text, actual.FullText);
}
[Test]
public void ParseArrayLiteralExpression()
{
var text = "[a, 2, 'text']";
var sut = CreateParser(text);
var actual = sut.ParseExpression();
Assert.IsInstanceOf<ArrayLiteralExpressionNode>(actual);
var a = actual as ArrayLiteralExpressionNode;
Assert.AreEqual(3, a?.Elements.Elements.Count);
Assert.AreEqual(text, actual.FullText);
}
[Test]
public void ParseLeftAssociativeSamePrecedence()
{
var text = "2 + 3 + 4";
var sut = CreateParser(text);
var actual = sut.ParseExpression();
Assert.IsInstanceOf<BinaryOperationExpressionNode>(actual);
var e = (BinaryOperationExpressionNode)actual;
Assert.IsInstanceOf<BinaryOperationExpressionNode>(e.Lhs);
Assert.IsInstanceOf<NumberLiteralNode>(e.Rhs);
Assert.AreEqual(text, actual.FullText);
}
[Test]
public void ParseLeftAssociativeRaisingPrecedence()
{
var text = "2 + 3 * 4";
var sut = CreateParser(text);
var actual = sut.ParseExpression();
Assert.IsInstanceOf<BinaryOperationExpressionNode>(actual);
var e = (BinaryOperationExpressionNode) actual;
Assert.AreEqual(TokenKind.Plus, e.Operation.Token.Kind);
Assert.IsInstanceOf<NumberLiteralNode>(e.Lhs);
Assert.IsInstanceOf<BinaryOperationExpressionNode>(e.Rhs);
Assert.AreEqual(text, actual.FullText);
}
[Test]
public void ParseLeftAssociativeLoweringPrecedence()
{
var text = "2 * 3 + 4";
var sut = CreateParser(text);
var actual = sut.ParseExpression();
Assert.IsInstanceOf<BinaryOperationExpressionNode>(actual);
var e = (BinaryOperationExpressionNode) actual;
Assert.AreEqual(TokenKind.Plus, e.Operation.Token.Kind);
Assert.IsInstanceOf<BinaryOperationExpressionNode>(e.Lhs);
Assert.IsInstanceOf<NumberLiteralNode>(e.Rhs);
Assert.AreEqual(text, actual.FullText);
}
[Test]
public void ParseUnaryOperators()
{
var text = "-42";
var sut = CreateParser(text);
var actual = sut.ParseExpression();
Assert.IsInstanceOf<UnaryPrefixOperationExpressionNode>(actual);
var e = (UnaryPrefixOperationExpressionNode) actual;
Assert.AreEqual(TokenKind.Minus, e.Operation.Token.Kind);
Assert.IsInstanceOf<NumberLiteralNode>(e.Operand);
Assert.AreEqual(text, actual.FullText);
}
[Test]
public void ParseMemberAccess()
{
var text = "a.b.c";
var sut = CreateParser(text);
var actual = sut.ParseExpression();
Assert.IsInstanceOf<MemberAccessNode>(actual);
var m = (MemberAccessNode) actual;
Assert.IsInstanceOf<MemberAccessNode>(m.LeftOperand);
Assert.IsInstanceOf<IdentifierNameNode>(m.RightOperand);
Assert.AreEqual(text, actual.FullText);
}
[Test]
public void ParseWhileStatement()
{
var text = "while a < b c = d end";
var sut = CreateParser(text);
var actual = sut.ParseStatement();
Assert.IsInstanceOf<WhileStatementNode>(actual);
Assert.AreEqual(text, actual.FullText);
}
[Test]
public void ParseWhileStatementWithComma()
{
var text = "while a < b, c = d end";
var sut = CreateParser(text);
var actual = sut.ParseStatement();
Assert.IsInstanceOf<WhileStatementNode>(actual);
Assert.AreEqual(text, actual.FullText);
}
[Test]
public void ParseIfStatement()
{
var text = "if 2 < 3 a = b end";
var sut = CreateParser(text);
var actual = sut.ParseStatement();
Assert.IsInstanceOf<IfStatementNode>(actual);
Assert.AreEqual(text, actual.FullText);
}
[Test]
public void ParseIfElseStatement()
{
var text = "if 2 < 3 a = b else c = d end";
var sut = CreateParser(text);
var actual = sut.ParseStatement();
Assert.IsInstanceOf<IfStatementNode>(actual);
Assert.AreEqual(text, actual.FullText);
}
[Test]
public void ParseParenthesizedExpression()
{
var text = "2 * (3 + 4)";
var sut = CreateParser(text);
var actual = sut.ParseExpression();
Assert.IsInstanceOf<BinaryOperationExpressionNode>(actual);
var e = (BinaryOperationExpressionNode) actual;
Assert.IsInstanceOf<NumberLiteralNode>(e.Lhs);
Assert.IsInstanceOf<ParenthesizedExpressionNode>(e.Rhs);
var p = (ParenthesizedExpressionNode) e.Rhs;
Assert.IsInstanceOf<BinaryOperationExpressionNode>(p.Expression);
Assert.AreEqual(text, actual.FullText);
}
[Test]
public void ParseForStatement()
{
var text = "for i = 1:5 a = i end";
var sut = CreateParser(text);
var actual = sut.ParseStatement();
Assert.IsInstanceOf<ForStatementNode>(actual);
Assert.AreEqual(text, actual.FullText);
}
[Test]
public void ParseEmptyArray()
{
var text = "[]";
var sut = CreateParser(text);
var actual = sut.ParseExpression();
Assert.IsInstanceOf<ArrayLiteralExpressionNode>(actual);
var a = (ArrayLiteralExpressionNode) actual;
Assert.AreEqual(0, a.Elements.Elements.Count);
}
[Test]
public void ParseCellArrayLiteral()
{
var text = "{ 1 2, 3 }";
var sut = CreateParser(text);
var actual = sut.ParseExpression();
Assert.IsInstanceOf<CellArrayLiteralExpressionNode>(actual);
var a = (CellArrayLiteralExpressionNode) actual;
Assert.AreEqual(3, a.Elements.Elements.Count);
}
}
}

View File

@ -0,0 +1,17 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netcoreapp2.0</TargetFramework>
<IsPackable>false</IsPackable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="15.5.0" />
<PackageReference Include="NUnit" Version="3.9.0" />
<PackageReference Include="NUnit3TestAdapter" Version="3.9.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Parser\Parser.csproj">
<Project>{B20EDC10-E6E6-4430-8527-B95206DEF941}</Project>
<Name>Parser</Name>
</ProjectReference>
</ItemGroup>
</Project>

749
Parser/MParser.cs Normal file
View File

@ -0,0 +1,749 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Lexer;
namespace Parser
{
public class MParser
{
public enum Precedence
{
// see https://mathworks.com/help/matlab/matlab_prog/operator-precedence.html
Expression = 0,
Assignment,
LogicalOr,
LogicalAnd,
BitwiseOr,
BitwiseAnd,
Relational,
Colon,
Additive,
Multiplicative,
Unary,
WeirdPower,
Power
}
private static Precedence GetPrecedence(TokenKind kind)
{
switch (kind)
{
case TokenKind.Assignment:
return Precedence.Assignment;
case TokenKind.LogicalOr:
return Precedence.LogicalOr;
case TokenKind.LogicalAnd:
return Precedence.LogicalAnd;
case TokenKind.BitwiseOr:
return Precedence.BitwiseOr;
case TokenKind.BitwiseAnd:
return Precedence.BitwiseAnd;
case TokenKind.Less:
case TokenKind.LessOrEqual:
case TokenKind.Greater:
case TokenKind.GreaterOrEqual:
case TokenKind.Equality:
case TokenKind.Inequality:
return Precedence.Relational;
case TokenKind.Colon:
return Precedence.Colon;
case TokenKind.Plus:
case TokenKind.Minus:
return Precedence.Additive;
case TokenKind.Multiply:
case TokenKind.DotMultiply:
case TokenKind.Divide:
case TokenKind.DotDivide:
case TokenKind.Backslash:
case TokenKind.DotBackslash:
return Precedence.Multiplicative;
case TokenKind.Not:
return Precedence.Unary;
case TokenKind.Power:
case TokenKind.DotPower:
case TokenKind.Transpose:
case TokenKind.DotTranspose:
return Precedence.Power;
default:
return Precedence.Expression;
}
}
private List<Token> Tokens { get; }
private int _index;
private Token CurrentToken => Tokens[_index];
private SyntaxFactory Factory { get; }
public MParser(List<Token> tokens)
{
Tokens = tokens;
_index = 0;
Factory = new SyntaxFactory();
}
private Token EatToken()
{
var token = Tokens[_index];
//Console.WriteLine($"{token} at {token.PureToken.Position}");
_index++;
return token;
}
private Token EatToken(TokenKind kind)
{
var token = Tokens[_index];
//Console.WriteLine($"{token} at {token.PureToken.Position}");
if (token.Kind != kind)
{
throw new ParsingException($"Unexpected token \"{token.PureToken}\" instead of {kind} at {token.PureToken.Position}.");
}
_index++;
return token;
}
private Token EatIdentifier(string s)
{
var token = Tokens[_index];
//Console.WriteLine($"{token} at {token.PureToken.Position}");
if (token.PureToken.Kind != TokenKind.Identifier)
{
throw new ParsingException($"Unexpected token \"{token.PureToken}\" instead of identifier \"{s}\" at {token.PureToken.Position}.");
}
if (token.PureToken.LiteralText != s)
{
throw new ParsingException($"Unexpected identifier \"{token.PureToken.LiteralText}\" instead of \"{s}\" at {token.PureToken.Position}.");
}
_index++;
return token;
}
private void EatAll()
{
_index = Tokens.Count - 1;
}
private List<SyntaxNode> ParseFunctionOutputList()
{
var outputs = new List<Token>();
outputs.Add(EatToken(TokenKind.Identifier));
while (CurrentToken.Kind != TokenKind.ClosingSquareBracket)
{
if (CurrentToken.Kind == TokenKind.Comma)
{
outputs.Add(EatToken());
}
outputs.Add(EatToken(TokenKind.Identifier));
}
return outputs.Select(token => new TokenNode(token) as SyntaxNode).ToList();
}
private FunctionOutputDescriptionNode ParseFunctionOutputDescription()
{
if (CurrentToken.Kind == TokenKind.Identifier)
{
var result = EatToken();
return Factory.FunctionOutputDescription(new List<SyntaxNode> { Factory.Token(result) });
} else if (CurrentToken.Kind == TokenKind.OpeningSquareBracket)
{
var leftBracket = EatToken();
var outputs = ParseFunctionOutputList();
var rightBracket = EatToken(TokenKind.ClosingSquareBracket);
var nodes = new List<SyntaxNode> {Factory.Token(leftBracket)};
nodes.AddRange(outputs);
nodes.Add(Factory.Token(rightBracket));
return Factory.FunctionOutputDescription(nodes);
}
throw new ParsingException($"Unexpected token {CurrentToken.PureToken} during parsing function output descritpion at {CurrentToken.PureToken.Position}.");
}
private ParameterListNode ParseParameterList()
{
var identifierTokens = new List<Token>();
identifierTokens.Add(EatToken(TokenKind.Identifier));
while (CurrentToken.PureToken.Kind != TokenKind.ClosingBracket)
{
identifierTokens.Add(EatToken(TokenKind.Comma));
identifierTokens.Add(EatToken(TokenKind.Identifier));
}
return Factory.ParameterList(identifierTokens.Select(token => new TokenNode(token) as SyntaxNode).ToList());
}
private FunctionInputDescriptionNode ParseFunctionInputDescription()
{
var openingBracket = EatToken(TokenKind.OpeningBracket);
var parameterList = ParseParameterList();
var closingBracket = EatToken(TokenKind.ClosingBracket);
return Factory.FunctionInputDescription(
new TokenNode(openingBracket),
parameterList,
new TokenNode(closingBracket));
}
private TokenNode PossibleSemicolonOrComma()
{
if (CurrentToken.Kind == TokenKind.Semicolon
|| CurrentToken.Kind == TokenKind.Comma)
{
return Factory.Token(EatToken());
}
return null;
}
private FunctionDeclarationNode ParseFunctionDeclaration()
{
var functionKeyword = EatIdentifier("function");
var outputDescription = ParseFunctionOutputDescription();
var assignment = EatToken(TokenKind.Assignment);
var name = EatToken(TokenKind.Identifier);
var inputDescription = ParseFunctionInputDescription();
var body = ParseStatements();
TokenNode end = null;
if (CurrentToken.Kind == TokenKind.Identifier
&& CurrentToken.PureToken.LiteralText == "end")
{
end = Factory.Token(EatIdentifier("end"));
}
var semicolonOrComma = PossibleSemicolonOrComma();
return Factory.FunctionDeclaration(
Factory.Token(functionKeyword),
outputDescription,
Factory.Token(assignment),
Factory.Token(name),
inputDescription,
body,
end,
semicolonOrComma);
}
private StatementNode ParseClassDeclaration()
{
var node = new TokenNode(CurrentToken);
EatAll();
return null;
}
private FunctionCallParameterListNode ParseFunctionCallParameterList()
{
var first = ParseExpression();
var nodes = new List<SyntaxNode> { first };
while (CurrentToken.PureToken.Kind != TokenKind.ClosingBracket)
{
nodes.Add(Factory.Token(EatToken(TokenKind.Comma)));
nodes.Add(ParseExpression());
}
return Factory.FunctionCallParameterList(nodes);
}
private ExpressionNode ParseMember()
{
if (CurrentToken.Kind == TokenKind.Identifier)
{
return Factory.IdentifierName(EatToken());
}
throw new ParsingException($"Unexpected token {CurrentToken.PureToken} at {CurrentToken.PureToken.Position}.");
}
private ExpressionNode ParsePostfix(ExpressionNode expression)
{
while (true)
{
var token = CurrentToken;
switch(token.Kind) {
case TokenKind.OpeningBrace: // cell array element access
var openingBrace = EatToken();
var index = ParseExpression();
var closingBrace = EatToken(TokenKind.ClosingBrace);
expression = Factory.CellArrayElementAccessExpression(
expression,
Factory.Token(openingBrace),
index,
Factory.Token(closingBrace)
);
break;
case TokenKind.OpeningBracket: // function call
var openingBracket = EatToken();
var parameters = ParseFunctionCallParameterList();
var closingBracket = EatToken(TokenKind.ClosingBracket);
expression = Factory.FunctionCallExpression(
expression,
Factory.Token(openingBracket),
parameters,
Factory.Token(closingBracket));
break;
case TokenKind.Dot: // member access
if (expression is IdentifierNameNode
|| expression is MemberAccessNode)
{
var dot = EatToken();
var member = ParseMember();
expression = Factory.MemberAccess(expression, Factory.Token(dot), member);
}
else
{
throw new ParsingException(
$"Unexpected token {token.PureToken} at {token.PureToken.Position}.");
}
break;
case TokenKind.Transpose:
var transposeSign = Factory.Token(EatToken());
expression = Factory.UnaryPostfixOperationExpression(expression, transposeSign);
break;
default:
return expression;
}
}
}
private ArrayElementListNode ParseArrayElementList()
{
var nodes = new List<SyntaxNode> {};
while (CurrentToken.Kind != TokenKind.ClosingSquareBracket)
{
if (nodes.Count > 0)
{
if (CurrentToken.Kind == TokenKind.Comma
|| CurrentToken.Kind == TokenKind.Semicolon)
{
nodes.Add(Factory.Token(EatToken()));
}
}
nodes.Add(ParseExpression());
}
return Factory.ArrayElementList(nodes);
}
private ArrayElementListNode ParseCellArrayElementList()
{
var nodes = new List<SyntaxNode> {};
while (CurrentToken.Kind != TokenKind.ClosingBrace)
{
if (nodes.Count > 0)
{
if (CurrentToken.Kind == TokenKind.Comma
|| CurrentToken.Kind == TokenKind.Semicolon)
{
nodes.Add(Factory.Token(EatToken()));
}
}
nodes.Add(ParseExpression());
}
return Factory.ArrayElementList(nodes);
}
private ArrayLiteralExpressionNode ParseArrayLiteral()
{
var openingSquareBracket = EatToken(TokenKind.OpeningSquareBracket);
var elements = ParseArrayElementList();
var closingSquareBracket = EatToken(TokenKind.ClosingSquareBracket);
return Factory.ArrayLiteralExpression(
Factory.Token(openingSquareBracket),
elements,
Factory.Token(closingSquareBracket));
}
private CellArrayLiteralExpressionNode ParseCellArrayLiteral()
{
var openingBrace = EatToken(TokenKind.OpeningBrace);
var elements = ParseCellArrayElementList();
var closingBrace = EatToken(TokenKind.ClosingBrace);
return Factory.CellArrayLiteralExpression(
Factory.Token(openingBrace),
elements,
Factory.Token(closingBrace));
}
private ParenthesizedExpressionNode ParseParenthesizedExpression()
{
var openParen = Factory.Token(EatToken(TokenKind.OpeningBracket));
var expression = ParseExpression();
var closeParen = Factory.Token(EatToken(TokenKind.ClosingBracket));
return Factory.ParenthesizedExpression(
openParen,
expression,
closeParen);
}
private ExpressionNode ParseTerm()
{
var token = CurrentToken;
ExpressionNode expression = null;
if (token.Kind == TokenKind.Identifier)
{
var term = EatToken();
expression = Factory.IdentifierName(term);
}
else if (token.Kind == TokenKind.NumberLiteral)
{
var number = EatToken();
expression = Factory.NumberLiteral(number);
}
else if (token.Kind == TokenKind.StringLiteral)
{
var str = EatToken();
expression = Factory.StringLiteral(str);
}
else if (token.Kind == TokenKind.OpeningSquareBracket) // array literal expression
{
expression = ParseArrayLiteral();
}
else if (token.Kind == TokenKind.OpeningBrace) // cell array literal expression
{
expression = ParseCellArrayLiteral();
}
else if (token.Kind == TokenKind.Colon) // for parsing things like a{:}
{
expression = Factory.EmptyExpression();
}
else if (token.Kind == TokenKind.OpeningBracket)
{
expression = ParseParenthesizedExpression();
}
return ParsePostfix(expression);
}
public ExpressionNode ParseExpression()
{
return ParseSubExpression(Precedence.Expression);
}
private bool IsUnaryOperator(TokenKind kind)
{
switch (kind)
{
case TokenKind.Plus:
case TokenKind.Minus:
case TokenKind.Not:
return true;
default:
return false;
}
}
private bool IsBinaryOperator(TokenKind kind)
{
switch (kind)
{
case TokenKind.Assignment:
case TokenKind.LogicalOr:
case TokenKind.LogicalAnd:
case TokenKind.BitwiseOr:
case TokenKind.BitwiseAnd:
case TokenKind.Less:
case TokenKind.LessOrEqual:
case TokenKind.Greater:
case TokenKind.GreaterOrEqual:
case TokenKind.Equality:
case TokenKind.Inequality:
case TokenKind.Colon:
case TokenKind.Plus:
case TokenKind.Minus:
case TokenKind.Multiply:
case TokenKind.DotMultiply:
case TokenKind.Divide:
case TokenKind.DotDivide:
case TokenKind.Backslash:
case TokenKind.DotBackslash:
case TokenKind.Not:
case TokenKind.Power:
case TokenKind.DotPower:
return true;
default:
return false;
}
}
private bool IsLeftAssociative(TokenKind kind)
{
return true; // TODO: really?
}
private TokenKind ConvertToUnaryTokenKind(TokenKind kind)
{
switch (kind)
{
case TokenKind.Plus:
return TokenKind.UnaryPlus;
case TokenKind.Minus:
return TokenKind.UnaryMinus;
case TokenKind.Not:
return TokenKind.UnaryNot;
default:
throw new ArgumentException(nameof(kind));
}
}
private ExpressionNode ParseSubExpression(Precedence precedence)
{
ExpressionNode lhs = null;
if (IsUnaryOperator(CurrentToken.Kind))
{
var operation = EatToken();
var unaryTokenKind = ConvertToUnaryTokenKind(operation.Kind);
var newPrecedence = GetPrecedence(unaryTokenKind);
var operand = ParseSubExpression(newPrecedence);
lhs = Factory.UnaryPrefixOperationExpression(Factory.Token(operation), operand);
}
else
{
lhs = ParseTerm();
}
while (true)
{
var token = CurrentToken;
if (IsBinaryOperator(token.Kind))
{
var newPrecedence = GetPrecedence(token.Kind);
if (newPrecedence < precedence)
{
break;
}
if (newPrecedence == precedence && IsLeftAssociative(token.Kind))
{
break;
}
EatToken();
var rhs = ParseSubExpression(newPrecedence);
if (rhs == null && token.Kind == TokenKind.Colon) // for parsing things like a{:}
{
rhs = Factory.EmptyExpression();
}
if (token.Kind == TokenKind.Assignment)
{
lhs = Factory.AssignmentExpression(lhs, Factory.Token(token), rhs);
}
else
{
lhs = Factory.BinaryOperationExpression(lhs, Factory.Token(token), rhs);
}
}
else
{
break;
}
}
return lhs;
}
private SwitchCaseNode ParseSwitchCase()
{
var caseKeyword = EatIdentifier("case");
var caseId = EatToken(TokenKind.StringLiteral);
var statementList = ParseStatements();
return Factory.SwitchCase(Factory.Token(caseKeyword), Factory.Token(caseId), statementList);
}
private SwitchStatementNode ParseSwitchStatement()
{
var switchKeyword = EatIdentifier("switch");
var expression = ParseExpression();
var casesList = new List<SwitchCaseNode>();
while (CurrentToken.Kind == TokenKind.Identifier
&& CurrentToken.PureToken.LiteralText == "case")
{
casesList.Add(ParseSwitchCase());
}
var endKeyword = EatIdentifier("end");
return Factory.SwitchStatement(
Factory.Token(switchKeyword),
expression,
casesList,
Factory.Token(endKeyword));
}
public ExpressionStatementNode ParseExpressionStatement()
{
var statement = ParseExpression();
if (CurrentToken.Kind == TokenKind.Semicolon)
{
var semicolon = EatToken();
return Factory.ExpressionStatement(statement, Factory.Token(semicolon));
}
return Factory.ExpressionStatement(statement);
}
public WhileStatementNode ParseWhileStatement()
{
var whileKeyword = EatToken();
var condition = ParseExpression();
var commas = new List<TokenNode>();
while (CurrentToken.Kind == TokenKind.Comma)
{
commas.Add(Factory.Token(EatToken()));
}
if (commas.Count == 0)
{
commas = null;
}
var body = ParseStatements();
var endKeyword = EatIdentifier("end");
return Factory.WhileStatement(
Factory.Token(whileKeyword),
condition,
body,
Factory.Token(endKeyword),
commas);
}
public StatementNode ParseStatement()
{
var statement = ParseStatementCore();
if (statement != null)
{
if (CurrentToken.Kind == TokenKind.Semicolon
|| CurrentToken.Kind == TokenKind.Comma)
{
statement = Factory.AppendSemicolonOrComma(statement, Factory.Token(EatToken()));
}
}
return statement;
}
public IfStatementNode ParseIfStatement()
{
var ifKeyword = Factory.Token(EatToken());
var condition = ParseExpression();
var commas = new List<TokenNode>();
while (CurrentToken.Kind == TokenKind.Comma)
{
commas.Add(Factory.Token(EatToken()));
}
if (commas.Count == 0)
{
commas = null;
}
var body = ParseStatements();
TokenNode elseKeyword = null;
StatementListNode elseBody = null;
if (CurrentToken.Kind == TokenKind.Identifier
&& CurrentToken.PureToken.LiteralText == "else")
{
elseKeyword = Factory.Token(EatToken());
elseBody = ParseStatements();
}
var endKeyword = Factory.Token(EatIdentifier("end"));
return Factory.IfStatement(
ifKeyword,
condition,
body,
elseKeyword,
elseBody,
endKeyword,
commas);
}
public ForStatementNode ParseForStatement()
{
var forKeyword = Factory.Token(EatIdentifier("for"));
var expression = ParseExpression();
if (!(expression is AssignmentExpressionNode))
{
throw new ParsingException($"Unexpected expression \"{expression.FullText}\" while parsing FOR statement at {CurrentToken.PureToken.Position}.");
}
var forAssignment = (AssignmentExpressionNode) expression;
var body = ParseStatements();
var endKeyword = Factory.Token(EatIdentifier("end"));
return Factory.ForStatement(forKeyword, forAssignment, body, endKeyword);
}
public StatementNode ParseStatementCore()
{
if (CurrentToken.Kind == TokenKind.Identifier)
{
if (CurrentToken.PureToken.LiteralText == "function")
{
return ParseFunctionDeclaration();
}
else if (CurrentToken.PureToken.LiteralText == "classdef")
{
return ParseClassDeclaration();
}
else if (CurrentToken.PureToken.LiteralText == "switch")
{
return ParseSwitchStatement();
}
else if (CurrentToken.PureToken.LiteralText == "while")
{
return ParseWhileStatement();
}
else if (CurrentToken.PureToken.LiteralText == "if")
{
return ParseIfStatement();
}
else if (CurrentToken.PureToken.LiteralText == "case")
{
return null;
}
else if (CurrentToken.PureToken.LiteralText == "else")
{
return null;
}
else if (CurrentToken.PureToken.LiteralText == "end")
{
return null;
}
else if (CurrentToken.PureToken.LiteralText == "for")
{
return ParseForStatement();
}
return ParseExpressionStatement();
}
if (CurrentToken.Kind == TokenKind.OpeningSquareBracket)
{
return ParseExpressionStatement();
}
throw new ParsingException($"Unexpected token: \"{CurrentToken.PureToken}\" at {CurrentToken.PureToken.Position}");
}
private StatementListNode ParseStatements()
{
var statements = new List<SyntaxNode>();
while (CurrentToken.PureToken.Kind != TokenKind.EndOfFile)
{
var node = ParseStatement();
if (node == null)
{
break;
}
statements.Add(node);
}
return Factory.StatementList(statements);
}
private StatementListNode ParseFile()
{
return ParseStatements();
}
public StatementListNode Parse()
{
return ParseFile();
}
}
}

8
Parser/Parser.csproj Normal file
View File

@ -0,0 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netcoreapp2.0</TargetFramework>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\Lexer\Lexer.csproj" />
</ItemGroup>
</Project>

506
Parser/SyntaxFactory.cs Normal file
View File

@ -0,0 +1,506 @@
using System.Collections.Generic;
using System.Linq;
using System.Reflection.Metadata;
using Lexer;
namespace Parser
{
public class SyntaxFactory
{
private static SyntaxNode SetParent(SyntaxNode parent)
{
foreach (var node in parent.Children)
{
node.Parent = parent;
}
return parent;
}
public FunctionDeclarationNode FunctionDeclaration(
TokenNode token,
FunctionOutputDescriptionNode outputDescription,
TokenNode equalitySign,
TokenNode name,
FunctionInputDescriptionNode inputDescription,
StatementListNode body,
TokenNode end,
TokenNode semicolonOrComma = null)
{
var children = new List<SyntaxNode>
{
token,
outputDescription,
equalitySign,
name,
inputDescription,
body
};
if (end != null)
{
children.Add(end);
}
if (semicolonOrComma != null)
{
children.Add(semicolonOrComma);
}
var result =
new FunctionDeclarationNode(
children,
token,
outputDescription,
equalitySign,
name,
inputDescription,
body,
end,
semicolonOrComma);
SetParent(result);
return result;
}
public FunctionOutputDescriptionNode FunctionOutputDescription(
List<SyntaxNode> nodes)
{
var result = new FunctionOutputDescriptionNode(
nodes,
nodes
.Where(node => node is TokenNode && ((TokenNode) node).Token.Kind == TokenKind.Identifier)
.Select(node => node as TokenNode)
.ToList()
);
SetParent(result);
return result;
}
public ParameterListNode ParameterList(List<SyntaxNode> nodes)
{
var result = new ParameterListNode(
nodes,
nodes
.Where(
node => node is TokenNode && ((TokenNode) node).Token.Kind != TokenKind.Comma
).ToList());
SetParent(result);
return result;
}
public StatementListNode StatementList(List<SyntaxNode> nodes)
{
var result = new StatementListNode(nodes);
SetParent(result);
return result;
}
public FunctionInputDescriptionNode FunctionInputDescription(
TokenNode openingBracket,
ParameterListNode parameterList,
TokenNode closingBracket)
{
var children = new List<SyntaxNode>
{
openingBracket,
parameterList,
closingBracket
};
var result = new FunctionInputDescriptionNode(children, openingBracket, parameterList, closingBracket);
SetParent(result);
return result;
}
public TokenNode Token(Token token)
{
return new TokenNode(token);
}
public SwitchStatementNode SwitchStatement(
TokenNode switchKeyword,
ExpressionNode switchExpression,
List<SwitchCaseNode> cases,
TokenNode endKeyword,
TokenNode semicolonOrComma = null)
{
var children = new List<SyntaxNode> { switchKeyword, switchExpression };
children.AddRange(cases);
children.Add(endKeyword);
if (semicolonOrComma != null)
{
children.Add(semicolonOrComma);
}
var result = new SwitchStatementNode(
children,
switchKeyword,
switchExpression,
cases,
endKeyword,
semicolonOrComma);
SetParent(result);
return result;
}
public SwitchCaseNode SwitchCase(
TokenNode caseKeyword,
TokenNode caseIdentifier,
StatementListNode statementList)
{
var children = new List<SyntaxNode>
{
caseKeyword,
caseIdentifier,
statementList
};
var result = new SwitchCaseNode(children, caseKeyword, caseIdentifier, statementList);
SetParent(result);
return result;
}
public AssignmentExpressionNode AssignmentExpression(
ExpressionNode lhs,
TokenNode assignmentSign,
ExpressionNode rhs)
{
var children = new List<SyntaxNode>
{
lhs,
assignmentSign,
rhs
};
var result = new AssignmentExpressionNode(children, lhs, assignmentSign, rhs);
SetParent(result);
return result;
}
public UnaryPrefixOperationExpressionNode UnaryPrefixOperationExpression(
TokenNode operation,
ExpressionNode operand)
{
var children = new List<SyntaxNode>
{
operation,
operand
};
var result = new UnaryPrefixOperationExpressionNode(children, operation, operand);
SetParent(result);
return result;
}
public UnaryPostfixOperationExpressionNode UnaryPostfixOperationExpression(
ExpressionNode operand,
TokenNode operation)
{
var children = new List<SyntaxNode>
{
operand,
operation
};
var result = new UnaryPostfixOperationExpressionNode(children, operand, operation);
SetParent(result);
return result;
}
public BinaryOperationExpressionNode BinaryOperationExpression(
ExpressionNode lhs,
TokenNode operation,
ExpressionNode rhs)
{
var children = new List<SyntaxNode>
{
lhs,
operation,
rhs
};
var result = new BinaryOperationExpressionNode(children, lhs, operation, rhs);
SetParent(result);
return result;
}
public IdentifierNameNode IdentifierName(
Token identifier)
{
return new IdentifierNameNode(identifier);
}
public NumberLiteralNode NumberLiteral(
Token numberLiteral)
{
return new NumberLiteralNode(numberLiteral);
}
public StringLiteralNode StringLiteral(
Token stringLiteral)
{
return new StringLiteralNode(stringLiteral);
}
public ExpressionStatementNode ExpressionStatement(ExpressionNode expression)
{
var children = new List<SyntaxNode> {expression};
var result = new ExpressionStatementNode(children, expression, null);
SetParent(result);
return result;
}
public ExpressionStatementNode ExpressionStatement(ExpressionNode expression, TokenNode semicolonOrComma)
{
var children = new List<SyntaxNode> {expression, semicolonOrComma};
var result = new ExpressionStatementNode(children, expression, semicolonOrComma);
SetParent(result);
return result;
}
public CellArrayElementAccessExpressionNode CellArrayElementAccessExpression(
ExpressionNode cellArray,
TokenNode openingBrace,
ExpressionNode index,
TokenNode closingBrace)
{
var children = new List<SyntaxNode> {cellArray, openingBrace, index, closingBrace};
var result = new CellArrayElementAccessExpressionNode(
children,
cellArray,
openingBrace,
index,
closingBrace);
SetParent(result);
return result;
}
public FunctionCallExpressionNode FunctionCallExpression(
ExpressionNode functionName,
TokenNode openingBracket,
FunctionCallParameterListNode parameters,
TokenNode closingBracket)
{
var children = new List<SyntaxNode>
{
functionName,
openingBracket,
parameters,
closingBracket
};
var result = new FunctionCallExpressionNode(
children,
functionName,
openingBracket,
parameters,
closingBracket);
SetParent(result);
return result;
}
public FunctionCallParameterListNode FunctionCallParameterList(List<SyntaxNode> nodes)
{
var result = new FunctionCallParameterListNode(
nodes,
nodes
.OfType<ExpressionNode>()
.ToList());
SetParent(result);
return result;
}
public ArrayElementListNode ArrayElementList(List<SyntaxNode> nodes)
{
var result = new ArrayElementListNode(
nodes,
nodes
.OfType<ExpressionNode>()
.ToList());
SetParent(result);
return result;
}
public ArrayLiteralExpressionNode ArrayLiteralExpression(
TokenNode openingSquareBracket,
ArrayElementListNode elements,
TokenNode closingSquareBracket)
{
var children = new List<SyntaxNode>
{
openingSquareBracket,
elements,
closingSquareBracket
};
var result = new ArrayLiteralExpressionNode(
children,
openingSquareBracket,
elements,
closingSquareBracket);
SetParent(result);
return result;
}
public CellArrayLiteralExpressionNode CellArrayLiteralExpression(
TokenNode openingBrace,
ArrayElementListNode elements,
TokenNode closingBrace)
{
var children = new List<SyntaxNode>
{
openingBrace,
elements,
closingBrace
};
var result = new CellArrayLiteralExpressionNode(
children,
openingBrace,
elements,
closingBrace);
SetParent(result);
return result;
}
public EmptyExpressionNode EmptyExpression()
{
return new EmptyExpressionNode();
}
public MemberAccessNode MemberAccess(
SyntaxNode leftOperand,
TokenNode dot,
SyntaxNode rightOperand)
{
var children = new List<SyntaxNode>
{
leftOperand,
dot,
rightOperand
};
var result = new MemberAccessNode(
children,
leftOperand,
dot,
rightOperand);
SetParent(result);
return result;
}
public WhileStatementNode WhileStatement(
TokenNode whileKeyword,
ExpressionNode condition,
StatementListNode body,
TokenNode end,
List<TokenNode> optionalCommasAfterCondition = null,
TokenNode semicolonOrComma = null)
{
var children = new List<SyntaxNode>
{
whileKeyword,
condition,
};
if (optionalCommasAfterCondition != null)
{
children.AddRange(optionalCommasAfterCondition);
}
children.Add(body);
children.Add(end);
if (semicolonOrComma != null)
{
children.Add(semicolonOrComma);
}
var result = new WhileStatementNode(
children,
whileKeyword,
condition,
optionalCommasAfterCondition,
body,
end,
semicolonOrComma);
SetParent(result);
return result;
}
public StatementNode AppendSemicolonOrComma(StatementNode statement, TokenNode semicolonOrComma)
{
statement.SemicolonOrComma = semicolonOrComma;
statement.Children.Add(semicolonOrComma);
statement.Children[statement.Children.Count - 1].Parent = statement;
return statement;
}
public IfStatementNode IfStatement(
TokenNode ifKeyword,
ExpressionNode condition,
StatementListNode body,
TokenNode elseKeyword,
StatementListNode elseBody,
TokenNode endKeyword,
List<TokenNode> optionalCommasAfterCondition = null)
{
var children = new List<SyntaxNode>
{
ifKeyword,
condition
};
if (optionalCommasAfterCondition != null)
{
children.AddRange(optionalCommasAfterCondition);
}
children.Add(body);
if (elseKeyword != null)
{
children.Add(elseKeyword);
children.Add(elseBody);
}
children.Add(endKeyword);
var result = new IfStatementNode(
children,
ifKeyword,
condition,
optionalCommasAfterCondition,
body,
elseKeyword,
elseBody,
endKeyword);
SetParent(result);
return result;
}
public ParenthesizedExpressionNode ParenthesizedExpression(
TokenNode openParen,
ExpressionNode expression,
TokenNode closeParen)
{
var children = new List<SyntaxNode>
{
openParen,
expression,
closeParen
};
var result = new ParenthesizedExpressionNode(
children,
openParen,
expression,
closeParen);
SetParent(result);
return result;
}
public ForStatementNode ForStatement(
TokenNode forKeyword,
AssignmentExpressionNode forAssignment,
StatementListNode body,
TokenNode endKeyword)
{
var children = new List<SyntaxNode>
{
forKeyword,
forAssignment,
body,
endKeyword
};
var result = new ForStatementNode(
children,
forKeyword,
forAssignment,
body,
endKeyword);
SetParent(result);
return result;
}
}
}

513
Parser/SyntaxNode.cs Normal file
View File

@ -0,0 +1,513 @@
using System.Collections.Generic;
using System.Linq;
using Lexer;
namespace Parser
{
public class SyntaxNode
{
public SyntaxNode Parent { get; set; }
public List<SyntaxNode> Children { get; }
public SyntaxNode(List<SyntaxNode> children)
{
Children = children;
}
public virtual string FullText =>
string.Join("", Children.Select(c => c.FullText));
}
public class TokenNode : SyntaxNode
{
public Token Token { get; }
public TokenNode(Token token)
: base(null)
{
Token = token;
}
public override string FullText => Token.FullText;
}
public class OutputIdentifierNode : SyntaxNode
{
public OutputIdentifierNode(List<SyntaxNode> children) : base(children)
{
}
}
public class FunctionOutputDescriptionNode : SyntaxNode
{
public List<TokenNode> Outputs { get; }
public FunctionOutputDescriptionNode(List<SyntaxNode> children, List<TokenNode> outputs) : base(children)
{
Outputs = outputs;
}
}
public class FunctionInputDescriptionNode : SyntaxNode
{
public TokenNode OpeningBracket { get; }
public ParameterListNode Parameters { get; }
public TokenNode ClosingBracket { get; }
public FunctionInputDescriptionNode(
List<SyntaxNode> children,
TokenNode openingBracket,
ParameterListNode parameters,
TokenNode closingBracket) : base(children)
{
OpeningBracket = openingBracket;
Parameters = parameters;
ClosingBracket = closingBracket;
}
}
public class FunctionDeclarationNode : StatementNode
{
public TokenNode Token { get; }
public FunctionOutputDescriptionNode OutputDescription { get; }
public TokenNode EqualitySign { get; }
public TokenNode Name { get; }
public FunctionInputDescriptionNode InputDescription { get; }
public StatementListNode Body { get; }
public TokenNode End { get; }
public FunctionDeclarationNode(
List<SyntaxNode> children,
TokenNode token,
FunctionOutputDescriptionNode outputDescription,
TokenNode equalitySign,
TokenNode name,
FunctionInputDescriptionNode inputDescription,
StatementListNode body,
TokenNode end,
TokenNode semicolonOrComma
) : base(children, semicolonOrComma)
{
Token = token;
OutputDescription = outputDescription;
EqualitySign = equalitySign;
Name = name;
InputDescription = inputDescription;
Body = body;
End = end;
}
}
public class StatementListNode : SyntaxNode
{
public List<SyntaxNode> Statements => Children;
public StatementListNode(List<SyntaxNode> children) : base(children)
{
}
}
public class ParameterListNode : SyntaxNode
{
public List<SyntaxNode> Parameters { get; }
public ParameterListNode(List<SyntaxNode> children, List<SyntaxNode> parameters) : base(children)
{
Parameters = parameters;
}
}
public class ExpressionNode : SyntaxNode
{
public ExpressionNode(List<SyntaxNode> children) : base(children)
{
}
}
public class AssignmentExpressionNode : ExpressionNode
{
public ExpressionNode Lhs { get; }
public TokenNode Assignment { get; }
public ExpressionNode Rhs { get; }
public AssignmentExpressionNode(
List<SyntaxNode> children,
ExpressionNode lhs,
TokenNode assignment,
ExpressionNode rhs) : base(children)
{
Lhs = lhs;
Assignment = assignment;
Rhs = rhs;
}
}
public class UnaryPrefixOperationExpressionNode : ExpressionNode
{
public TokenNode Operation { get; }
public ExpressionNode Operand { get; }
public UnaryPrefixOperationExpressionNode(
List<SyntaxNode> children,
TokenNode operation,
ExpressionNode operand) : base(children)
{
Operation = operation;
Operand = operand;
}
}
public class UnaryPostfixOperationExpressionNode : ExpressionNode
{
public ExpressionNode Operand { get; }
public TokenNode Operation { get; }
public UnaryPostfixOperationExpressionNode(
List<SyntaxNode> children,
ExpressionNode operand,
TokenNode operation) : base(children)
{
Operand = operand;
Operation = operation;
}
}
public class BinaryOperationExpressionNode : ExpressionNode
{
public ExpressionNode Lhs { get; }
public TokenNode Operation { get; }
public ExpressionNode Rhs { get; }
public BinaryOperationExpressionNode(
List<SyntaxNode> children,
ExpressionNode lhs,
TokenNode operation,
ExpressionNode rhs) : base(children)
{
Lhs = lhs;
Operation = operation;
Rhs = rhs;
}
}
public class SwitchStatementNode : StatementNode
{
public TokenNode SwitchKeyword { get; }
public ExpressionNode SwitchExpression { get; }
public List<SwitchCaseNode> Cases { get; }
public TokenNode EndKeyword { get; }
public SwitchStatementNode(
List<SyntaxNode> children,
TokenNode switchKeyword,
ExpressionNode switchExpression,
List<SwitchCaseNode> cases,
TokenNode endKeyword,
TokenNode semicolonOrComma
) : base(children, semicolonOrComma)
{
SwitchKeyword = switchKeyword;
SwitchExpression = switchExpression;
Cases = cases;
EndKeyword = endKeyword;
}
}
public class SwitchCaseNode : SyntaxNode
{
public TokenNode CaseKeyword { get; }
public TokenNode CaseIdentifier { get; }
public StatementListNode StatementList { get; }
public SwitchCaseNode(
List<SyntaxNode> children,
TokenNode caseKeyword,
TokenNode caseIdentifier,
StatementListNode statementList
) : base(children)
{
CaseKeyword = caseKeyword;
CaseIdentifier = caseIdentifier;
StatementList = statementList;
}
}
public class IdentifierNameNode : ExpressionNode
{
public Token Token { get; }
public IdentifierNameNode(Token token)
: base(null)
{
Token = token;
}
public override string FullText => Token.FullText;
}
public class NumberLiteralNode : ExpressionNode
{
public Token Token { get; }
public NumberLiteralNode(Token token) : base(null)
{
Token = token;
}
public override string FullText => Token.FullText;
}
public class StringLiteralNode : ExpressionNode
{
public Token Token { get; }
public StringLiteralNode(Token token) : base(null)
{
Token = token;
}
public override string FullText => Token.FullText;
}
public class StatementNode : SyntaxNode
{
public TokenNode SemicolonOrComma { get; set; }
public StatementNode(List<SyntaxNode> children, TokenNode semicolonOrComma = null) : base(children)
{
SemicolonOrComma = semicolonOrComma;
}
}
public class ExpressionStatementNode : StatementNode
{
public ExpressionNode Expression { get; }
public ExpressionStatementNode(List<SyntaxNode> children, ExpressionNode expression, TokenNode semicolonOrComma)
: base(children, semicolonOrComma)
{
Expression = expression;
}
}
public class CellArrayElementAccessExpressionNode : ExpressionNode
{
public ExpressionNode CellArray { get; }
public TokenNode OpeningBrace { get; }
public ExpressionNode Index { get; }
public TokenNode ClosingBrace { get; }
public CellArrayElementAccessExpressionNode(
List<SyntaxNode> children,
ExpressionNode cellArray,
TokenNode openingBrace,
ExpressionNode index,
TokenNode closingBrace) : base(children)
{
CellArray = cellArray;
OpeningBrace = openingBrace;
Index = index;
ClosingBrace = closingBrace;
}
}
public class FunctionCallExpressionNode : ExpressionNode
{
public ExpressionNode FunctionName { get; }
public TokenNode OpeningBracket { get; }
public FunctionCallParameterListNode Parameters { get; }
public TokenNode ClosingBracket { get; }
public FunctionCallExpressionNode(
List<SyntaxNode> children,
ExpressionNode functionName,
TokenNode openingBracket,
FunctionCallParameterListNode parameters,
TokenNode closingBracket) : base(children)
{
FunctionName = functionName;
OpeningBracket = openingBracket;
Parameters = parameters;
ClosingBracket = closingBracket;
}
}
public class FunctionCallParameterListNode : SyntaxNode
{
public List<ExpressionNode> Parameters;
public FunctionCallParameterListNode(
List<SyntaxNode> children,
List<ExpressionNode> parameters) : base(children)
{
Parameters = parameters;
}
}
public class ArrayElementListNode : SyntaxNode
{
public List<ExpressionNode> Elements;
public ArrayElementListNode(
List<SyntaxNode> children,
List<ExpressionNode> elements) : base(children)
{
Elements = elements;
}
}
public class ArrayLiteralExpressionNode : ExpressionNode
{
public TokenNode OpeningSquareBracket { get; }
public ArrayElementListNode Elements { get; }
public TokenNode ClosingSquareBracket { get; }
public ArrayLiteralExpressionNode(
List<SyntaxNode> children,
TokenNode openingSquareBracket,
ArrayElementListNode elements,
TokenNode closingSquareBracket) : base(children)
{
OpeningSquareBracket = openingSquareBracket;
Elements = elements;
ClosingSquareBracket = closingSquareBracket;
}
}
public class CellArrayLiteralExpressionNode : ExpressionNode
{
public TokenNode OpeningBrace { get; }
public ArrayElementListNode Elements { get; }
public TokenNode ClosingBrace { get; }
public CellArrayLiteralExpressionNode(
List<SyntaxNode> children,
TokenNode openingBrace,
ArrayElementListNode elements,
TokenNode closingBrace) : base(children)
{
OpeningBrace = openingBrace;
Elements = elements;
ClosingBrace = closingBrace;
}
}
public class EmptyExpressionNode : ExpressionNode
{
public EmptyExpressionNode() : base(null)
{
}
}
public class MemberAccessNode : ExpressionNode
{
public SyntaxNode LeftOperand { get; }
public TokenNode Dot { get; }
public SyntaxNode RightOperand { get; }
public MemberAccessNode(
List<SyntaxNode> children,
SyntaxNode leftOperand,
TokenNode dot,
SyntaxNode rightOperand) : base(children)
{
LeftOperand = leftOperand;
Dot = dot;
RightOperand = rightOperand;
}
}
public class WhileStatementNode : StatementNode
{
public TokenNode WhileKeyword { get; }
public ExpressionNode Condition { get; }
public List<TokenNode> OptionalCommasAfterCondition { get; }
public StatementListNode Body { get; }
public TokenNode End { get; }
public WhileStatementNode(
List<SyntaxNode> children,
TokenNode whileKeyword,
ExpressionNode condition,
List<TokenNode> optionalCommasAfterCondition,
StatementListNode body,
TokenNode end,
TokenNode semicolonOrComma
) : base(children, semicolonOrComma)
{
WhileKeyword = whileKeyword;
Condition = condition;
OptionalCommasAfterCondition = optionalCommasAfterCondition;
Body = body;
End = end;
}
}
public class IfStatementNode : StatementNode
{
public TokenNode IfKeyword { get; }
public ExpressionNode Condition { get; }
public List<TokenNode> OptionalCommasAfterCondition { get; }
public StatementListNode Body { get; }
public TokenNode ElseKeyword { get; }
public StatementListNode ElseBody { get; }
public TokenNode EndKeyword { get; }
public IfStatementNode(
List<SyntaxNode> children,
TokenNode ifKeyword,
ExpressionNode condition,
List<TokenNode> optionalCommasAfterCondition,
StatementListNode body,
TokenNode elseKeyword,
StatementListNode elseBody,
TokenNode endKeyword
) : base(children)
{
IfKeyword = ifKeyword;
Condition = condition;
OptionalCommasAfterCondition = optionalCommasAfterCondition;
Body = body;
ElseKeyword = elseKeyword;
ElseBody = elseBody;
EndKeyword = endKeyword;
}
}
public class ParenthesizedExpressionNode : ExpressionNode
{
public TokenNode OpenParen { get; }
public ExpressionNode Expression { get; }
public TokenNode CloseParen { get; }
public ParenthesizedExpressionNode(
List<SyntaxNode> children,
TokenNode openParen,
ExpressionNode expression,
TokenNode closeParen) : base(children)
{
OpenParen = openParen;
Expression = expression;
CloseParen = closeParen;
}
}
public class ForStatementNode : StatementNode
{
public TokenNode ForKeyword { get; }
public AssignmentExpressionNode ForAssignment { get; }
public StatementListNode Body { get; }
public TokenNode EndKeyword { get; }
public ForStatementNode(
List<SyntaxNode> children,
TokenNode forKeyword,
AssignmentExpressionNode forAssignment,
StatementListNode body,
TokenNode endKeyword) : base(children)
{
ForKeyword = forKeyword;
ForAssignment = forAssignment;
Body = body;
EndKeyword = endKeyword;
}
}
}

View File

@ -1,12 +1,136 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using Lexer;
using Parser;
namespace ProjectConsole
{
class Program
{
//private const string BaseDirectory = @"C:\Program Files\MATLAB\R2018a\toolbox\matlab\";
private const string BaseDirectory = @"/Applications/MATLAB_R2017b.app/toolbox/matlab/guide/";
private static HashSet<string> skipFiles = new HashSet<string>
{
@"codetools\private\template.m", // this is a template, so it contains '$' characters.
@"plottools\+matlab\+graphics\+internal\+propertyinspector\+views\CategoricalHistogramPropertyView.m", // this one contains a 0xA0 character (probably it's 'non-breakable space' in Win-1252).
@"plottools\+matlab\+graphics\+internal\+propertyinspector\+views\PrimitiveHistogram2PropertyView.m", // same
@"plottools\+matlab\+graphics\+internal\+propertyinspector\+views\PrimitiveHistogramPropertyView.m", // same
@"codetools/private/template.m", // this is a template, so it contains '$' characters.
@"plottools/+matlab/+graphics/+internal/+propertyinspector/+views/CategoricalHistogramPropertyView.m", // this one contains a 0xA0 character (probably it's 'non-breakable space' in Win-1252).
@"plottools/+matlab/+graphics/+internal/+propertyinspector/+views/PrimitiveHistogram2PropertyView.m", // same
@"plottools/+matlab/+graphics/+internal/+propertyinspector/+views/PrimitiveHistogramPropertyView.m", // same
};
static void ProcessFile(string fileName)
{
var text = File.ReadAllText(fileName);
Console.WriteLine($"Parsing {fileName}...");
var window = new TextWindowWithNull(text, fileName);
ILexer<Token> lexer = new MLexer(window, new PureTokenFactory(window));
var tokens = lexer.ParseAll();
//AfterFunction(tokens);
//FirstToken(tokens);
var parser = new MParser(tokens);
var tree = parser.Parse();
var back = string.Join("", tokens.Select(token => token.FullText));
if (text != back)
{
throw new ApplicationException();
}
}
private static readonly int[] firstTokenCount;
private static readonly int[] afterFunctionCount;
static Program()
{
var maxKind = ((int[]) typeof(TokenKind).GetEnumValues()).Max();
firstTokenCount = new int[maxKind + 1];
afterFunctionCount = new int[maxKind + 1];
}
static void AfterFunction(List<Token> tokens)
{
for (var i = 0; i < tokens.Count; i++)
{
if (tokens[i].PureToken.Kind == TokenKind.Identifier &&
tokens[i].PureToken.LiteralText == "function")
{
var nextKind = tokens[i + 1].PureToken.Kind;
afterFunctionCount[(int) nextKind]++;
if (nextKind != TokenKind.Identifier && nextKind != TokenKind.OpeningSquareBracket)
{
Console.WriteLine("===EXAMPLE===");
Console.WriteLine($"{tokens[i]}{tokens[i+1]}");
}
}
}
}
static void FirstToken(List<Token> tokens)
{
var firstKind = tokens[0].PureToken.Kind;
firstTokenCount[(int) firstKind]++;
}
static void AfterFunctionFinish()
{
for (var i = 0; i < afterFunctionCount.Length; i++)
{
Console.WriteLine($"{(TokenKind)i}: {afterFunctionCount[i]}.");
}
}
static void FirstTokenFinish()
{
for (var i = 0; i < firstTokenCount.Length; i++)
{
if (firstTokenCount[i] != 0)
{
Console.WriteLine($"{(TokenKind) i}: {firstTokenCount[i]}.");
}
}
}
static int ProcessDirectory(string directory)
{
var counter = 0;
var files = Directory.GetFiles(directory, "*.m");
foreach (var file in files)
{
var relativePath = Path.GetRelativePath(BaseDirectory, file);
if (skipFiles.Contains(relativePath))
{
continue;
}
ProcessFile(file);
counter++;
}
var subDirectories = Directory.GetDirectories(directory);
foreach (var subDirectory in subDirectories)
{
counter += ProcessDirectory(subDirectory);
}
return counter;
}
static void Main(string[] args)
{
Console.WriteLine("Hello World!");
var sw = new Stopwatch();
sw.Start();
var processed = ProcessDirectory(BaseDirectory);
sw.Stop();
Console.WriteLine($"{processed} files parsed. Elapsed: {sw.Elapsed}.");
//AfterFunctionFinish();
//FirstTokenFinish();
Console.ReadKey();
}
}
}

View File

@ -1,8 +1,9 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>netcoreapp2.0</TargetFramework>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\Parser\Parser.csproj" />
</ItemGroup>
</Project>

View File

@ -5,6 +5,14 @@ VisualStudioVersion = 15.0.27130.2026
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ProjectConsole", "ProjectConsole\ProjectConsole.csproj", "{5025FD8F-0F1A-43E5-A996-7753BC703D62}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Parser", "Parser\Parser.csproj", "{B20EDC10-E6E6-4430-8527-B95206DEF941}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Parser.Tests", "Parser.Tests\Parser.Tests.csproj", "{83008C72-2EFC-41EB-AC8D-023C6AE1709F}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lexer", "Lexer\Lexer.csproj", "{1B8E5BBC-E5CD-427B-A6C7-F30047AA4A39}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lexer.Tests", "Lexer.Tests\Lexer.Tests.csproj", "{7BFEAD86-EAC3-43C8-9388-EBAB377938D4}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@ -15,6 +23,22 @@ Global
{5025FD8F-0F1A-43E5-A996-7753BC703D62}.Debug|Any CPU.Build.0 = Debug|Any CPU
{5025FD8F-0F1A-43E5-A996-7753BC703D62}.Release|Any CPU.ActiveCfg = Release|Any CPU
{5025FD8F-0F1A-43E5-A996-7753BC703D62}.Release|Any CPU.Build.0 = Release|Any CPU
{B20EDC10-E6E6-4430-8527-B95206DEF941}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{B20EDC10-E6E6-4430-8527-B95206DEF941}.Debug|Any CPU.Build.0 = Debug|Any CPU
{B20EDC10-E6E6-4430-8527-B95206DEF941}.Release|Any CPU.ActiveCfg = Release|Any CPU
{B20EDC10-E6E6-4430-8527-B95206DEF941}.Release|Any CPU.Build.0 = Release|Any CPU
{83008C72-2EFC-41EB-AC8D-023C6AE1709F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{83008C72-2EFC-41EB-AC8D-023C6AE1709F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{83008C72-2EFC-41EB-AC8D-023C6AE1709F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{83008C72-2EFC-41EB-AC8D-023C6AE1709F}.Release|Any CPU.Build.0 = Release|Any CPU
{1B8E5BBC-E5CD-427B-A6C7-F30047AA4A39}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{1B8E5BBC-E5CD-427B-A6C7-F30047AA4A39}.Debug|Any CPU.Build.0 = Debug|Any CPU
{1B8E5BBC-E5CD-427B-A6C7-F30047AA4A39}.Release|Any CPU.ActiveCfg = Release|Any CPU
{1B8E5BBC-E5CD-427B-A6C7-F30047AA4A39}.Release|Any CPU.Build.0 = Release|Any CPU
{7BFEAD86-EAC3-43C8-9388-EBAB377938D4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{7BFEAD86-EAC3-43C8-9388-EBAB377938D4}.Debug|Any CPU.Build.0 = Debug|Any CPU
{7BFEAD86-EAC3-43C8-9388-EBAB377938D4}.Release|Any CPU.ActiveCfg = Release|Any CPU
{7BFEAD86-EAC3-43C8-9388-EBAB377938D4}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE