More diagnostics for the lexer
This commit is contained in:
parent
989f368a28
commit
9490864ef3
@ -76,13 +76,7 @@ namespace ConsoleDemo
|
||||
private static void ParserDemo()
|
||||
{
|
||||
Console.WriteLine("Hello World!");
|
||||
var text = @"
|
||||
function [a, b c] = functionName(d, e, f)
|
||||
a = d + e;
|
||||
end
|
||||
%{
|
||||
comment
|
||||
";
|
||||
var text = @"x = 'abc";
|
||||
var window = new TextWindowWithNull(text, "noname");
|
||||
var parser = CreateParser(window);
|
||||
var tree = parser.Parse();
|
||||
|
@ -25,6 +25,21 @@ namespace Parser.Internal
|
||||
Report(span, "Unexpected end of file.");
|
||||
}
|
||||
|
||||
internal void ReportUnexpectedCharacterWhileParsingNumber(TextSpan span, char c)
|
||||
{
|
||||
Report(span, $"Unexpected character '{c}' while parsing a number.");
|
||||
}
|
||||
|
||||
internal void ReportUnexpectedEOLWhileParsingString(TextSpan span)
|
||||
{
|
||||
Report(span, "Unexpected end of line while parsing a string literal.");
|
||||
}
|
||||
|
||||
internal void ReportUnknownSymbol(TextSpan span, char c)
|
||||
{
|
||||
Report(span, $"Unknown symbol '{c}'.");
|
||||
}
|
||||
|
||||
public IEnumerator<Diagnostic> GetEnumerator()
|
||||
{
|
||||
return _diagnostics.GetEnumerator();
|
||||
|
@ -1,4 +1,5 @@
|
||||
using System.Collections.Generic;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
|
||||
@ -230,7 +231,7 @@ namespace Parser.Internal
|
||||
}
|
||||
else
|
||||
{
|
||||
fail = true;
|
||||
throw new Exception($"Unexpected symbol '{c}' at the beginning of number literal.");
|
||||
}
|
||||
break;
|
||||
case NumberParsingState.DigitsBeforeDot:
|
||||
@ -328,7 +329,10 @@ namespace Parser.Internal
|
||||
|
||||
if (fail)
|
||||
{
|
||||
throw new ParsingException("Error while parsing number.");
|
||||
var s = Window.GetAndConsumeChars(n);
|
||||
tokenInfo.Kind = TokenKind.NumberLiteral;
|
||||
tokenInfo.Text = s;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (success)
|
||||
@ -368,6 +372,7 @@ namespace Parser.Internal
|
||||
|
||||
private bool ContinueLexingGeneralStringLiteral(ref TokenInfo tokenInfo, char quote)
|
||||
{
|
||||
var status = 0; // no errors
|
||||
Window.ConsumeChar();
|
||||
var textBuilder = new StringBuilder();
|
||||
textBuilder.Append(quote);
|
||||
@ -394,9 +399,15 @@ namespace Parser.Internal
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (SyntaxFacts.IsEolOrEof(Window.PeekChar(n)))
|
||||
if (SyntaxFacts.IsEof(Window.PeekChar(n)))
|
||||
{
|
||||
throw new ParsingException("Unfinished string literal.");
|
||||
status = 1;
|
||||
break;
|
||||
}
|
||||
if (SyntaxFacts.IsEol(Window.PeekChar(n)))
|
||||
{
|
||||
status = 2;
|
||||
break;
|
||||
}
|
||||
n++;
|
||||
}
|
||||
@ -404,11 +415,24 @@ namespace Parser.Internal
|
||||
var lastPiece = Window.GetAndConsumeChars(n);
|
||||
textBuilder.Append(lastPiece);
|
||||
valueBuilder.Append(lastPiece);
|
||||
switch (status) {
|
||||
case 0:
|
||||
Window.ConsumeChar();
|
||||
textBuilder.Append(quote);
|
||||
break;
|
||||
case 1:
|
||||
Diagnostics.ReportUnexpectedEndOfFile(new TextSpan(Window.Position.Offset, 1));
|
||||
break;
|
||||
case 2:
|
||||
Diagnostics.ReportUnexpectedEOLWhileParsingString(new TextSpan(Window.Position.Offset, 1));
|
||||
break;
|
||||
default:
|
||||
throw new Exception($"Unexpected status of parsing string literal: {status}.");
|
||||
}
|
||||
|
||||
tokenInfo.Text = textBuilder.ToString();
|
||||
tokenInfo.StringValue = valueBuilder.ToString();
|
||||
return true;
|
||||
return status == 0;
|
||||
}
|
||||
|
||||
private bool ContinueLexingStringLiteral(ref TokenInfo tokenInfo)
|
||||
@ -520,7 +544,7 @@ namespace Parser.Internal
|
||||
var parsedNumber = ContinueLexingNumber(ref tokenInfo);
|
||||
if (!parsedNumber)
|
||||
{
|
||||
throw new ParsingException($"Unexpected character \"{Window.PeekChar()}\" while parsing a number");
|
||||
Diagnostics.ReportUnexpectedCharacterWhileParsingNumber(new TextSpan(Window.Position.Offset, 1), Window.PeekChar());
|
||||
}
|
||||
return true;
|
||||
case '=':
|
||||
@ -542,7 +566,7 @@ namespace Parser.Internal
|
||||
var possiblyNumberToken2 = ContinueLexingNumber(ref tokenInfo);
|
||||
if (!possiblyNumberToken2)
|
||||
{
|
||||
throw new ParsingException($"Unexpected character \"{Window.PeekChar()}\" while parsing a number");
|
||||
Diagnostics.ReportUnexpectedCharacterWhileParsingNumber(new TextSpan(Window.Position.Offset, 1), Window.PeekChar());
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -732,9 +756,11 @@ namespace Parser.Internal
|
||||
tokenInfo.Kind = TokenKind.EndOfFile;
|
||||
return true;
|
||||
default:
|
||||
throw new ParsingException(
|
||||
$"Unknown symbol \"{character}\" at {Window.Position}."
|
||||
);
|
||||
Diagnostics.ReportUnknownSymbol(new TextSpan(Window.Position.Offset, 1), character);
|
||||
Window.ConsumeChar();
|
||||
tokenInfo.Kind = TokenKind.BadToken;
|
||||
tokenInfo.Text = character.ToString();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -94,6 +94,16 @@ namespace Parser.Internal
|
||||
return c == '\n' || c == '\r' || c == '\0';
|
||||
}
|
||||
|
||||
public static bool IsEof(char c)
|
||||
{
|
||||
return c == '\0';
|
||||
}
|
||||
|
||||
public static bool IsEol(char c)
|
||||
{
|
||||
return c == '\n' || c == '\r';
|
||||
}
|
||||
|
||||
public static bool IsWhitespace(char c)
|
||||
{
|
||||
return c == ' ' || c == '\t' || c == '\n';
|
||||
|
@ -6,22 +6,23 @@
|
||||
// SYNTAX TOKENS
|
||||
|
||||
None = 0,
|
||||
BadToken = 1,
|
||||
// The lexer puts a virtual "end of file" token at the end of the parsed file.
|
||||
EndOfFile = 1,
|
||||
EndOfFile = 2,
|
||||
// Identifier: could be a reserved word, a variable name, a class name, etc.
|
||||
Identifier = 2,
|
||||
Identifier = 3,
|
||||
// Number literal: 123, 45.678, 2e-5, etc.
|
||||
NumberLiteral = 3,
|
||||
NumberLiteral = 4,
|
||||
// String literal: 'abc', '123', etc. The "usual" string literals are single-quoted and are just char arrays.
|
||||
StringLiteral = 4,
|
||||
StringLiteral = 5,
|
||||
// Double-quoted string literal: "abc", "123", etc. These are the "new" string literal that are more like strings
|
||||
// and less like char arrays (for example, char arrays could be columns instead of rows, or even multi-dimensional).
|
||||
DoubleQuotedStringLiteral = 5,
|
||||
DoubleQuotedStringLiteral = 6,
|
||||
// This is for supporting "command statements" like
|
||||
// > cd some/+folder/
|
||||
// In this example, "some/folder" should be treated as a string literal (for example, "+' there should be a part
|
||||
// of it, and not parsed as a binary operator).
|
||||
UnquotedStringLiteral = 6,
|
||||
UnquotedStringLiteral = 7,
|
||||
|
||||
// trivia
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user