using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace Parser.Internal { internal class MLexerGreen : ILexer<(SyntaxToken, Position)> { private class TokenInfo { public TokenKind Kind { get; set; } = TokenKind.None; public string Text { get; set; } = ""; public string StringValue { get; set; } = ""; public double DoubleValue { get; set; } public bool ImaginaryFlag { get; set; } } private ITextWindow Window { get; } private SyntaxToken LastToken { get; set; } private int TokensSinceNewLine { get; set; } private Stack TokenStack { get; } public DiagnosticsBag Diagnostics { get; } = new DiagnosticsBag(); public MLexerGreen(ITextWindow window) { Window = window; LastToken = SyntaxToken.NoneToken; TokenStack = new Stack(); } private SyntaxTrivia LexComment() { if (TokensSinceNewLine == 0 && Window.PeekChar(1) == '{') { return LexMultilineComment(); } var n = 1; while (!SyntaxFacts.IsEolOrEof(Window.PeekChar(n))) { n++; } return TokenFactory.CreateTrivia(TokenKind.CommentToken, Window.GetAndConsumeChars(n)); } private SyntaxTrivia LexMultilineComment() { var n = 2; var metPercentSign = false; var atFirstLine = true; while (true) { var c = Window.PeekChar(n); if (c == '\0') { Diagnostics.ReportUnexpectedEndOfFile(new TextSpan(Window.Position.Offset, 0)); return TokenFactory.CreateTrivia(TokenKind.CommentToken, Window.GetAndConsumeChars(n)); } if (c == '\n' || (c == '\r' && Window.PeekChar(n + 1) == '\n')) { atFirstLine = false; } if (atFirstLine && !SyntaxFacts.IsWhitespace(c)) // this is a one-line comment { while (!SyntaxFacts.IsEolOrEof(Window.PeekChar(n))) { n++; } return TokenFactory.CreateTrivia(TokenKind.CommentToken, Window.GetAndConsumeChars(n)); } if (metPercentSign && c == '}') { return TokenFactory.CreateTrivia(TokenKind.CommentToken, Window.GetAndConsumeChars(n+1)); } metPercentSign = c == '%'; n++; } } private List LexCommentAfterDotDotDot() { var n = 0; while (!SyntaxFacts.IsEolOrEof(Window.PeekChar(n))) { n++; } var comment = TokenFactory.CreateTrivia(TokenKind.CommentToken, Window.GetAndConsumeChars(n)); var result = new List { comment }; var character = Window.PeekChar(); if (character == '\n' || character == '\r') { Window.ConsumeChar(); result.Add(TokenFactory.CreateTrivia(TokenKind.WhitespaceToken, character.ToString())); } return result; } private List LexTrivia(bool isTrailing) { var triviaList = new List(); var whitespaceCache = new StringBuilder(); void FlushWhitespaceCache() { if (whitespaceCache.Length > 0) { triviaList.Add(TokenFactory.CreateTrivia(TokenKind.WhitespaceToken, whitespaceCache.ToString())); } whitespaceCache.Clear(); } while (true) { var character = Window.PeekChar(); switch (character) { case ' ': case '\t': Window.ConsumeChar(); whitespaceCache.Append(character); break; case '\r': case '\n': FlushWhitespaceCache(); Window.ConsumeChar(); triviaList.Add(TokenFactory.CreateTrivia(TokenKind.NewlineToken, character.ToString())); if (isTrailing) { return triviaList; } break; case '%': FlushWhitespaceCache(); triviaList.Add(LexComment()); break; case '.': if (Window.PeekChar(1) == '.' && Window.PeekChar(2) == '.') { FlushWhitespaceCache(); triviaList.AddRange(LexCommentAfterDotDotDot()); } else { FlushWhitespaceCache(); return triviaList; } break; default: FlushWhitespaceCache(); return triviaList; } } } private static bool IsLetterOrDigitOrUnderscore(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || (c == '_'); } private bool ContinueLexingIdentifier(ref TokenInfo tokenInfo) { var n = 1; while (IsLetterOrDigitOrUnderscore(Window.PeekChar(n))) { n++; } var identifier = Window.GetAndConsumeChars(n); tokenInfo.Kind = TokenKind.IdentifierToken; tokenInfo.Text = identifier; return true; } private bool ContinueParsingUnquotedStringLiteral(ref TokenInfo tokenInfo) { var n = 0; while (true) { var c = Window.PeekChar(n); if (c == ' ' || c == '\n' || c == '\0') { var literal = Window.GetAndConsumeChars(n); tokenInfo.Kind = TokenKind.UnquotedStringLiteralToken; tokenInfo.Text = literal; tokenInfo.StringValue = literal; return true; } n++; } } private enum NumberParsingState { Start, DigitsBeforeDot, AfterDot, DigitsAfterDot, AfterE, SignAfterE, DigitsAfterE } private bool ContinueLexingNumber(ref TokenInfo tokenInfo) { var state = NumberParsingState.Start; var n = 0; var left = Window.CharactersLeft(); var success = false; var fail = false; while (n < left) { var c = Window.PeekChar(n); switch (state) { case NumberParsingState.Start: if (SyntaxFacts.IsDigitOrDot(c)) { state = NumberParsingState.DigitsBeforeDot; } else { throw new Exception($"Unexpected symbol '{c}' at the beginning of number literal."); } break; case NumberParsingState.DigitsBeforeDot: if (SyntaxFacts.IsDigit(c)) { } else if (c == '.') { state = NumberParsingState.AfterDot; } else if (c == 'e' || c == 'E') { state = NumberParsingState.AfterE; } else { success = true; } break; case NumberParsingState.AfterDot: if (SyntaxFacts.IsDigit(c)) { state = NumberParsingState.DigitsAfterDot; } else if (c == 'e' || c == 'E') { state = NumberParsingState.AfterE; } else if (SyntaxFacts.IsWhitespace(c) || c == ';' || c == ']' || c == ')' || c == '}') { success = true; } else if (c == '^' || c == '*' || c == '/' || c == '\\' || c == '\'') { n -= 1; success = true; } else { fail = true; } break; case NumberParsingState.DigitsAfterDot: if (SyntaxFacts.IsDigit(c)) { } else if (c == 'e' || c == 'E') { state = NumberParsingState.AfterE; } else { success = true; } break; case NumberParsingState.AfterE: if (SyntaxFacts.IsDigit(c)) { state = NumberParsingState.DigitsAfterE; } else if (c == '+' || c == '-') { state = NumberParsingState.SignAfterE; } else { fail = true; } break; case NumberParsingState.SignAfterE: if (SyntaxFacts.IsDigit(c)) { state = NumberParsingState.DigitsAfterE; } else { fail = true; } break; case NumberParsingState.DigitsAfterE: if (SyntaxFacts.IsDigit(c)) { } else { success = true; } break; } if (fail) { var s = Window.GetAndConsumeChars(n); tokenInfo.Kind = TokenKind.NumberLiteralToken; tokenInfo.Text = s; return false; } if (success) { break; } n++; } if (n >= left) { switch (state) { case NumberParsingState.DigitsBeforeDot: case NumberParsingState.DigitsAfterDot: case NumberParsingState.DigitsAfterE: case NumberParsingState.AfterDot: success = true; break; } } if (success) { tokenInfo.Kind = TokenKind.NumberLiteralToken; Range rangeToParse; if (Window.PeekChar(n) == 'i' || Window.PeekChar(n) == 'j') { tokenInfo.ImaginaryFlag = true; n++; rangeToParse = ..^1; } else { rangeToParse = ..; } var s = Window.GetAndConsumeChars(n); tokenInfo.Text = s; var maybeValue = ParseDoubleValue(s[rangeToParse]); if (maybeValue is double value) { tokenInfo.DoubleValue = value; return true; } else { tokenInfo.DoubleValue = double.NaN; return true; } } return false; } private double? ParseDoubleValue(string s) { if (double.TryParse(s, out var doubleValue)) { return doubleValue; } else { return null; } } private bool ContinueLexingGeneralStringLiteral(ref TokenInfo tokenInfo, char quote) { var status = 0; // no errors Window.ConsumeChar(); var textBuilder = new StringBuilder(); textBuilder.Append(quote); var valueBuilder = new StringBuilder(); var n = 0; while (true) { if (Window.PeekChar(n) == quote) { if (Window.PeekChar(n + 1) == quote) { var piece = Window.GetAndConsumeChars(n); textBuilder.Append(piece); valueBuilder.Append(piece); Window.ConsumeChar(); Window.ConsumeChar(); textBuilder.Append(quote); textBuilder.Append(quote); valueBuilder.Append(quote); n = -1; } else { break; } } if (SyntaxFacts.IsEof(Window.PeekChar(n))) { status = 1; break; } if (SyntaxFacts.IsEol(Window.PeekChar(n))) { status = 2; break; } n++; } var lastPiece = Window.GetAndConsumeChars(n); textBuilder.Append(lastPiece); valueBuilder.Append(lastPiece); switch (status) { case 0: Window.ConsumeChar(); textBuilder.Append(quote); break; case 1: Diagnostics.ReportUnexpectedEndOfFile(new TextSpan(Window.Position.Offset, 1)); break; case 2: Diagnostics.ReportUnexpectedEOLWhileParsingString(new TextSpan(Window.Position.Offset, 1)); break; default: throw new Exception($"Unexpected status of parsing string literal: {status}."); } tokenInfo.Text = textBuilder.ToString(); tokenInfo.StringValue = valueBuilder.ToString(); return status == 0; } private bool ContinueLexingStringLiteral(ref TokenInfo tokenInfo) { ContinueLexingGeneralStringLiteral(ref tokenInfo, '\''); tokenInfo.Kind = TokenKind.StringLiteralToken; return true; } private bool ContinueLexingDoubleQuotedStringLiteral(ref TokenInfo tokenInfo) { ContinueLexingGeneralStringLiteral(ref tokenInfo, '"'); tokenInfo.Kind = TokenKind.DoubleQuotedStringLiteralToken; return true; } private bool LexTokenWithoutTrivia(List leadingTrivia, ref TokenInfo tokenInfo) { var character = Window.PeekChar(); if (character == '\0') { tokenInfo.Kind = TokenKind.EndOfFileToken; tokenInfo.Text = ""; return true; } if (TokensSinceNewLine == 1 && !TokenStack.Any() && LastToken.Kind == TokenKind.IdentifierToken && LastToken.TrailingTrivia is not null && character != '=' && character != '(' && !SyntaxFacts.Keywords.Contains(LastToken.Text)) { return ContinueParsingUnquotedStringLiteral(ref tokenInfo); } if (LastToken?.Kind == TokenKind.UnquotedStringLiteralToken && !TokenStack.Any() && TokensSinceNewLine > 0) { return ContinueParsingUnquotedStringLiteral(ref tokenInfo); } switch (character) { case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': return ContinueLexingIdentifier(ref tokenInfo); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': var parsedNumber = ContinueLexingNumber(ref tokenInfo); if (!parsedNumber) { Diagnostics.ReportUnexpectedCharacterWhileParsingNumber(new TextSpan(Window.Position.Offset, 1), Window.PeekChar()); } return true; case '=': Window.ConsumeChar(); if (Window.PeekChar() == '=') { Window.ConsumeChar(); tokenInfo.Kind = TokenKind.EqualsEqualsToken; } else { tokenInfo.Kind = TokenKind.EqualsToken; } return true; case '.': if (SyntaxFacts.IsDigit(Window.PeekChar(1))) { var possiblyNumberToken2 = ContinueLexingNumber(ref tokenInfo); if (!possiblyNumberToken2) { Diagnostics.ReportUnexpectedCharacterWhileParsingNumber(new TextSpan(Window.Position.Offset, 1), Window.PeekChar()); } return true; } Window.ConsumeChar(); var c = Window.PeekChar(); switch (c) { case '*': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.DotStarToken; break; case '/': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.DotSlashToken; break; case '^': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.DotCaretToken; break; case '\\': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.DotBackslashToken; break; case '\'': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.DotApostropheToken; break; default: tokenInfo.Kind = TokenKind.DotToken; break; } return true; case '(': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.OpenParenthesisToken; return true; case ')': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.CloseParenthesisToken; return true; case '[': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.OpenSquareBracketToken; return true; case ']': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.CloseSquareBracketToken; return true; case '{': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.OpenBraceToken; return true; case '}': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.CloseBraceToken; return true; case ',': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.CommaToken; return true; case ';': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.SemicolonToken; return true; case '&': Window.ConsumeChar(); if (Window.PeekChar() == '&') { Window.ConsumeChar(); tokenInfo.Kind = TokenKind.AmpersandAmpersandToken; } else { tokenInfo.Kind = TokenKind.AmpersandToken; } return true; case '|': Window.ConsumeChar(); if (Window.PeekChar() == '|') { Window.ConsumeChar(); tokenInfo.Kind = TokenKind.PipePipeToken; } else { tokenInfo.Kind = TokenKind.PipeToken; } return true; case '<': Window.ConsumeChar(); if (Window.PeekChar() == '=') { Window.ConsumeChar(); tokenInfo.Kind = TokenKind.LessOrEqualsToken; } else { tokenInfo.Kind = TokenKind.LessToken; } return true; case '>': Window.ConsumeChar(); if (Window.PeekChar() == '=') { Window.ConsumeChar(); tokenInfo.Kind = TokenKind.GreaterOrEqualsToken; } else { tokenInfo.Kind = TokenKind.GreaterToken; } return true; case '~': Window.ConsumeChar(); if (Window.PeekChar() == '=') { Window.ConsumeChar(); tokenInfo.Kind = TokenKind.TildeEqualsToken; } else { tokenInfo.Kind = TokenKind.TildeToken; } return true; case '+': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.PlusToken; return true; case '-': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.MinusToken; return true; case '*': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.StarToken; return true; case '/': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.SlashToken; return true; case '\\': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.BackslashToken; return true; case '^': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.CaretToken; return true; case '@': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.AtToken; return true; case ':': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.ColonToken; return true; case '?': Window.ConsumeChar(); tokenInfo.Kind = TokenKind.QuestionToken; return true; case '\'': if (LastToken != null && (LastToken.Kind == TokenKind.CloseBraceToken || LastToken.Kind == TokenKind.CloseParenthesisToken || LastToken.Kind == TokenKind.CloseSquareBracketToken || LastToken.Kind == TokenKind.IdentifierToken)) { if (LastToken.TrailingTrivia is null && leadingTrivia.Count == 0) { Window.ConsumeChar(); tokenInfo.Kind = TokenKind.ApostropheToken; return true; } } return ContinueLexingStringLiteral(ref tokenInfo); case '"': return ContinueLexingDoubleQuotedStringLiteral(ref tokenInfo); case '\0': tokenInfo.Kind = TokenKind.EndOfFileToken; return true; default: Diagnostics.ReportUnknownSymbol(new TextSpan(Window.Position.Offset, 1), character); Window.ConsumeChar(); tokenInfo.Kind = TokenKind.BadToken; tokenInfo.Text = character.ToString(); return true; } } public (SyntaxToken, Position) NextToken() { var leadingTrivia = LexTrivia(false); var position = Window.Position; var tokenInfo = new TokenInfo(); LexTokenWithoutTrivia(leadingTrivia, ref tokenInfo); var trailingTrivia = LexTrivia(true); if (trailingTrivia.Any(t => t.Kind == TokenKind.NewlineToken)) { TokensSinceNewLine = 0; } else { TokensSinceNewLine++; } if (SyntaxFacts.IsOpeningToken(tokenInfo.Kind)) { TokenStack.Push(tokenInfo.Kind); } if (SyntaxFacts.IsClosingToken(tokenInfo.Kind)) { if (TokenStack.Count > 0) { var t = TokenStack.Peek(); if (t == SyntaxFacts.OpeningFromClosing(tokenInfo.Kind)) { TokenStack.Pop(); } else { Diagnostics.ReportUnmatchedCloseParenthesis( new TextSpan(Window.Position.Offset, 1), tokenInfo.Kind); } } else { Diagnostics.ReportUnmatchedCloseParenthesis( new TextSpan(Window.Position.Offset, 1), tokenInfo.Kind); } } if (tokenInfo.Kind == TokenKind.EndOfFileToken && TokenStack.Any()) { Diagnostics.ReportUnmatchedOpenParenthesisByEndOfFile( new TextSpan(Window.Position.Offset, 1)); } var result = Create( tokenInfo, leadingTrivia, trailingTrivia); LastToken = result; return (result, position); } private SyntaxToken Create( TokenInfo tokenInfo, List leadingTrivia, List trailingTrivia) { switch (tokenInfo.Kind) { case TokenKind.IdentifierToken: return TokenFactory.CreateIdentifier( tokenInfo.Text, leadingTrivia, trailingTrivia); case TokenKind.UnquotedStringLiteralToken: return TokenFactory.CreateUnquotedStringLiteral( tokenInfo.Text, tokenInfo.StringValue, leadingTrivia, trailingTrivia); case TokenKind.NumberLiteralToken: return TokenFactory.CreateTokenWithValueAndTrivia( tokenInfo.Kind, tokenInfo.Text, tokenInfo.DoubleValue, leadingTrivia, trailingTrivia); case TokenKind.StringLiteralToken: return TokenFactory.CreateTokenWithValueAndTrivia( tokenInfo.Kind, tokenInfo.Text, tokenInfo.StringValue, leadingTrivia, trailingTrivia); case TokenKind.DoubleQuotedStringLiteralToken: return TokenFactory.CreateTokenWithValueAndTrivia( tokenInfo.Kind, tokenInfo.Text, tokenInfo.StringValue, leadingTrivia, trailingTrivia); default: return TokenFactory.CreateTokenWithTrivia( tokenInfo.Kind, leadingTrivia, trailingTrivia); } } public List<(SyntaxToken, Position)> ParseAll() { var result = new List<(SyntaxToken, Position)>(); while (true) { var pair = NextToken(); var (token, _) = pair; if (token == null) { throw new ParsingException($"Unexpected character: '{Window.PeekChar()}' at {Window.Position}."); } result.Add(pair); if (token.Kind == TokenKind.EndOfFileToken) { return result; } } } } }