More diagnostics for the lexer

This commit is contained in:
Alexander Luzgarev 2018-10-18 18:48:08 +02:00
parent 989f368a28
commit 9490864ef3
5 changed files with 72 additions and 26 deletions

View File

@ -76,13 +76,7 @@ namespace ConsoleDemo
private static void ParserDemo()
{
Console.WriteLine("Hello World!");
var text = @"
function [a, b c] = functionName(d, e, f)
a = d + e;
end
%{
comment
";
var text = @"x = 'abc";
var window = new TextWindowWithNull(text, "noname");
var parser = CreateParser(window);
var tree = parser.Parse();

View File

@ -25,6 +25,21 @@ namespace Parser.Internal
Report(span, "Unexpected end of file.");
}
internal void ReportUnexpectedCharacterWhileParsingNumber(TextSpan span, char c)
{
Report(span, $"Unexpected character '{c}' while parsing a number.");
}
internal void ReportUnexpectedEOLWhileParsingString(TextSpan span)
{
Report(span, "Unexpected end of line while parsing a string literal.");
}
internal void ReportUnknownSymbol(TextSpan span, char c)
{
Report(span, $"Unknown symbol '{c}'.");
}
public IEnumerator<Diagnostic> GetEnumerator()
{
return _diagnostics.GetEnumerator();

View File

@ -1,4 +1,5 @@
using System.Collections.Generic;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
@ -230,7 +231,7 @@ namespace Parser.Internal
}
else
{
fail = true;
throw new Exception($"Unexpected symbol '{c}' at the beginning of number literal.");
}
break;
case NumberParsingState.DigitsBeforeDot:
@ -328,7 +329,10 @@ namespace Parser.Internal
if (fail)
{
throw new ParsingException("Error while parsing number.");
var s = Window.GetAndConsumeChars(n);
tokenInfo.Kind = TokenKind.NumberLiteral;
tokenInfo.Text = s;
return false;
}
if (success)
@ -368,6 +372,7 @@ namespace Parser.Internal
private bool ContinueLexingGeneralStringLiteral(ref TokenInfo tokenInfo, char quote)
{
var status = 0; // no errors
Window.ConsumeChar();
var textBuilder = new StringBuilder();
textBuilder.Append(quote);
@ -394,9 +399,15 @@ namespace Parser.Internal
break;
}
}
if (SyntaxFacts.IsEolOrEof(Window.PeekChar(n)))
if (SyntaxFacts.IsEof(Window.PeekChar(n)))
{
throw new ParsingException("Unfinished string literal.");
status = 1;
break;
}
if (SyntaxFacts.IsEol(Window.PeekChar(n)))
{
status = 2;
break;
}
n++;
}
@ -404,11 +415,24 @@ namespace Parser.Internal
var lastPiece = Window.GetAndConsumeChars(n);
textBuilder.Append(lastPiece);
valueBuilder.Append(lastPiece);
switch (status) {
case 0:
Window.ConsumeChar();
textBuilder.Append(quote);
break;
case 1:
Diagnostics.ReportUnexpectedEndOfFile(new TextSpan(Window.Position.Offset, 1));
break;
case 2:
Diagnostics.ReportUnexpectedEOLWhileParsingString(new TextSpan(Window.Position.Offset, 1));
break;
default:
throw new Exception($"Unexpected status of parsing string literal: {status}.");
}
tokenInfo.Text = textBuilder.ToString();
tokenInfo.StringValue = valueBuilder.ToString();
return true;
return status == 0;
}
private bool ContinueLexingStringLiteral(ref TokenInfo tokenInfo)
@ -520,7 +544,7 @@ namespace Parser.Internal
var parsedNumber = ContinueLexingNumber(ref tokenInfo);
if (!parsedNumber)
{
throw new ParsingException($"Unexpected character \"{Window.PeekChar()}\" while parsing a number");
Diagnostics.ReportUnexpectedCharacterWhileParsingNumber(new TextSpan(Window.Position.Offset, 1), Window.PeekChar());
}
return true;
case '=':
@ -542,7 +566,7 @@ namespace Parser.Internal
var possiblyNumberToken2 = ContinueLexingNumber(ref tokenInfo);
if (!possiblyNumberToken2)
{
throw new ParsingException($"Unexpected character \"{Window.PeekChar()}\" while parsing a number");
Diagnostics.ReportUnexpectedCharacterWhileParsingNumber(new TextSpan(Window.Position.Offset, 1), Window.PeekChar());
}
return true;
@ -732,9 +756,11 @@ namespace Parser.Internal
tokenInfo.Kind = TokenKind.EndOfFile;
return true;
default:
throw new ParsingException(
$"Unknown symbol \"{character}\" at {Window.Position}."
);
Diagnostics.ReportUnknownSymbol(new TextSpan(Window.Position.Offset, 1), character);
Window.ConsumeChar();
tokenInfo.Kind = TokenKind.BadToken;
tokenInfo.Text = character.ToString();
return true;
}
}

View File

@ -94,6 +94,16 @@ namespace Parser.Internal
return c == '\n' || c == '\r' || c == '\0';
}
public static bool IsEof(char c)
{
return c == '\0';
}
public static bool IsEol(char c)
{
return c == '\n' || c == '\r';
}
public static bool IsWhitespace(char c)
{
return c == ' ' || c == '\t' || c == '\n';

View File

@ -6,22 +6,23 @@
// SYNTAX TOKENS
None = 0,
BadToken = 1,
// The lexer puts a virtual "end of file" token at the end of the parsed file.
EndOfFile = 1,
EndOfFile = 2,
// Identifier: could be a reserved word, a variable name, a class name, etc.
Identifier = 2,
Identifier = 3,
// Number literal: 123, 45.678, 2e-5, etc.
NumberLiteral = 3,
NumberLiteral = 4,
// String literal: 'abc', '123', etc. The "usual" string literals are single-quoted and are just char arrays.
StringLiteral = 4,
StringLiteral = 5,
// Double-quoted string literal: "abc", "123", etc. These are the "new" string literal that are more like strings
// and less like char arrays (for example, char arrays could be columns instead of rows, or even multi-dimensional).
DoubleQuotedStringLiteral = 5,
DoubleQuotedStringLiteral = 6,
// This is for supporting "command statements" like
// > cd some/+folder/
// In this example, "some/folder" should be treated as a string literal (for example, "+' there should be a part
// of it, and not parsed as a binary operator).
UnquotedStringLiteral = 6,
UnquotedStringLiteral = 7,
// trivia