More diagnostics for the lexer
This commit is contained in:
parent
989f368a28
commit
9490864ef3
@ -76,13 +76,7 @@ namespace ConsoleDemo
|
|||||||
private static void ParserDemo()
|
private static void ParserDemo()
|
||||||
{
|
{
|
||||||
Console.WriteLine("Hello World!");
|
Console.WriteLine("Hello World!");
|
||||||
var text = @"
|
var text = @"x = 'abc";
|
||||||
function [a, b c] = functionName(d, e, f)
|
|
||||||
a = d + e;
|
|
||||||
end
|
|
||||||
%{
|
|
||||||
comment
|
|
||||||
";
|
|
||||||
var window = new TextWindowWithNull(text, "noname");
|
var window = new TextWindowWithNull(text, "noname");
|
||||||
var parser = CreateParser(window);
|
var parser = CreateParser(window);
|
||||||
var tree = parser.Parse();
|
var tree = parser.Parse();
|
||||||
|
@ -25,6 +25,21 @@ namespace Parser.Internal
|
|||||||
Report(span, "Unexpected end of file.");
|
Report(span, "Unexpected end of file.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
internal void ReportUnexpectedCharacterWhileParsingNumber(TextSpan span, char c)
|
||||||
|
{
|
||||||
|
Report(span, $"Unexpected character '{c}' while parsing a number.");
|
||||||
|
}
|
||||||
|
|
||||||
|
internal void ReportUnexpectedEOLWhileParsingString(TextSpan span)
|
||||||
|
{
|
||||||
|
Report(span, "Unexpected end of line while parsing a string literal.");
|
||||||
|
}
|
||||||
|
|
||||||
|
internal void ReportUnknownSymbol(TextSpan span, char c)
|
||||||
|
{
|
||||||
|
Report(span, $"Unknown symbol '{c}'.");
|
||||||
|
}
|
||||||
|
|
||||||
public IEnumerator<Diagnostic> GetEnumerator()
|
public IEnumerator<Diagnostic> GetEnumerator()
|
||||||
{
|
{
|
||||||
return _diagnostics.GetEnumerator();
|
return _diagnostics.GetEnumerator();
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
using System.Collections.Generic;
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
|
|
||||||
@ -230,7 +231,7 @@ namespace Parser.Internal
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
fail = true;
|
throw new Exception($"Unexpected symbol '{c}' at the beginning of number literal.");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case NumberParsingState.DigitsBeforeDot:
|
case NumberParsingState.DigitsBeforeDot:
|
||||||
@ -328,7 +329,10 @@ namespace Parser.Internal
|
|||||||
|
|
||||||
if (fail)
|
if (fail)
|
||||||
{
|
{
|
||||||
throw new ParsingException("Error while parsing number.");
|
var s = Window.GetAndConsumeChars(n);
|
||||||
|
tokenInfo.Kind = TokenKind.NumberLiteral;
|
||||||
|
tokenInfo.Text = s;
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (success)
|
if (success)
|
||||||
@ -368,6 +372,7 @@ namespace Parser.Internal
|
|||||||
|
|
||||||
private bool ContinueLexingGeneralStringLiteral(ref TokenInfo tokenInfo, char quote)
|
private bool ContinueLexingGeneralStringLiteral(ref TokenInfo tokenInfo, char quote)
|
||||||
{
|
{
|
||||||
|
var status = 0; // no errors
|
||||||
Window.ConsumeChar();
|
Window.ConsumeChar();
|
||||||
var textBuilder = new StringBuilder();
|
var textBuilder = new StringBuilder();
|
||||||
textBuilder.Append(quote);
|
textBuilder.Append(quote);
|
||||||
@ -394,9 +399,15 @@ namespace Parser.Internal
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (SyntaxFacts.IsEolOrEof(Window.PeekChar(n)))
|
if (SyntaxFacts.IsEof(Window.PeekChar(n)))
|
||||||
{
|
{
|
||||||
throw new ParsingException("Unfinished string literal.");
|
status = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (SyntaxFacts.IsEol(Window.PeekChar(n)))
|
||||||
|
{
|
||||||
|
status = 2;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
n++;
|
n++;
|
||||||
}
|
}
|
||||||
@ -404,11 +415,24 @@ namespace Parser.Internal
|
|||||||
var lastPiece = Window.GetAndConsumeChars(n);
|
var lastPiece = Window.GetAndConsumeChars(n);
|
||||||
textBuilder.Append(lastPiece);
|
textBuilder.Append(lastPiece);
|
||||||
valueBuilder.Append(lastPiece);
|
valueBuilder.Append(lastPiece);
|
||||||
Window.ConsumeChar();
|
switch (status) {
|
||||||
textBuilder.Append(quote);
|
case 0:
|
||||||
|
Window.ConsumeChar();
|
||||||
|
textBuilder.Append(quote);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
Diagnostics.ReportUnexpectedEndOfFile(new TextSpan(Window.Position.Offset, 1));
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
Diagnostics.ReportUnexpectedEOLWhileParsingString(new TextSpan(Window.Position.Offset, 1));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new Exception($"Unexpected status of parsing string literal: {status}.");
|
||||||
|
}
|
||||||
|
|
||||||
tokenInfo.Text = textBuilder.ToString();
|
tokenInfo.Text = textBuilder.ToString();
|
||||||
tokenInfo.StringValue = valueBuilder.ToString();
|
tokenInfo.StringValue = valueBuilder.ToString();
|
||||||
return true;
|
return status == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private bool ContinueLexingStringLiteral(ref TokenInfo tokenInfo)
|
private bool ContinueLexingStringLiteral(ref TokenInfo tokenInfo)
|
||||||
@ -520,7 +544,7 @@ namespace Parser.Internal
|
|||||||
var parsedNumber = ContinueLexingNumber(ref tokenInfo);
|
var parsedNumber = ContinueLexingNumber(ref tokenInfo);
|
||||||
if (!parsedNumber)
|
if (!parsedNumber)
|
||||||
{
|
{
|
||||||
throw new ParsingException($"Unexpected character \"{Window.PeekChar()}\" while parsing a number");
|
Diagnostics.ReportUnexpectedCharacterWhileParsingNumber(new TextSpan(Window.Position.Offset, 1), Window.PeekChar());
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
case '=':
|
case '=':
|
||||||
@ -542,7 +566,7 @@ namespace Parser.Internal
|
|||||||
var possiblyNumberToken2 = ContinueLexingNumber(ref tokenInfo);
|
var possiblyNumberToken2 = ContinueLexingNumber(ref tokenInfo);
|
||||||
if (!possiblyNumberToken2)
|
if (!possiblyNumberToken2)
|
||||||
{
|
{
|
||||||
throw new ParsingException($"Unexpected character \"{Window.PeekChar()}\" while parsing a number");
|
Diagnostics.ReportUnexpectedCharacterWhileParsingNumber(new TextSpan(Window.Position.Offset, 1), Window.PeekChar());
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@ -732,9 +756,11 @@ namespace Parser.Internal
|
|||||||
tokenInfo.Kind = TokenKind.EndOfFile;
|
tokenInfo.Kind = TokenKind.EndOfFile;
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
throw new ParsingException(
|
Diagnostics.ReportUnknownSymbol(new TextSpan(Window.Position.Offset, 1), character);
|
||||||
$"Unknown symbol \"{character}\" at {Window.Position}."
|
Window.ConsumeChar();
|
||||||
);
|
tokenInfo.Kind = TokenKind.BadToken;
|
||||||
|
tokenInfo.Text = character.ToString();
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -94,6 +94,16 @@ namespace Parser.Internal
|
|||||||
return c == '\n' || c == '\r' || c == '\0';
|
return c == '\n' || c == '\r' || c == '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static bool IsEof(char c)
|
||||||
|
{
|
||||||
|
return c == '\0';
|
||||||
|
}
|
||||||
|
|
||||||
|
public static bool IsEol(char c)
|
||||||
|
{
|
||||||
|
return c == '\n' || c == '\r';
|
||||||
|
}
|
||||||
|
|
||||||
public static bool IsWhitespace(char c)
|
public static bool IsWhitespace(char c)
|
||||||
{
|
{
|
||||||
return c == ' ' || c == '\t' || c == '\n';
|
return c == ' ' || c == '\t' || c == '\n';
|
||||||
|
@ -6,22 +6,23 @@
|
|||||||
// SYNTAX TOKENS
|
// SYNTAX TOKENS
|
||||||
|
|
||||||
None = 0,
|
None = 0,
|
||||||
|
BadToken = 1,
|
||||||
// The lexer puts a virtual "end of file" token at the end of the parsed file.
|
// The lexer puts a virtual "end of file" token at the end of the parsed file.
|
||||||
EndOfFile = 1,
|
EndOfFile = 2,
|
||||||
// Identifier: could be a reserved word, a variable name, a class name, etc.
|
// Identifier: could be a reserved word, a variable name, a class name, etc.
|
||||||
Identifier = 2,
|
Identifier = 3,
|
||||||
// Number literal: 123, 45.678, 2e-5, etc.
|
// Number literal: 123, 45.678, 2e-5, etc.
|
||||||
NumberLiteral = 3,
|
NumberLiteral = 4,
|
||||||
// String literal: 'abc', '123', etc. The "usual" string literals are single-quoted and are just char arrays.
|
// String literal: 'abc', '123', etc. The "usual" string literals are single-quoted and are just char arrays.
|
||||||
StringLiteral = 4,
|
StringLiteral = 5,
|
||||||
// Double-quoted string literal: "abc", "123", etc. These are the "new" string literal that are more like strings
|
// Double-quoted string literal: "abc", "123", etc. These are the "new" string literal that are more like strings
|
||||||
// and less like char arrays (for example, char arrays could be columns instead of rows, or even multi-dimensional).
|
// and less like char arrays (for example, char arrays could be columns instead of rows, or even multi-dimensional).
|
||||||
DoubleQuotedStringLiteral = 5,
|
DoubleQuotedStringLiteral = 6,
|
||||||
// This is for supporting "command statements" like
|
// This is for supporting "command statements" like
|
||||||
// > cd some/+folder/
|
// > cd some/+folder/
|
||||||
// In this example, "some/folder" should be treated as a string literal (for example, "+' there should be a part
|
// In this example, "some/folder" should be treated as a string literal (for example, "+' there should be a part
|
||||||
// of it, and not parsed as a binary operator).
|
// of it, and not parsed as a binary operator).
|
||||||
UnquotedStringLiteral = 6,
|
UnquotedStringLiteral = 7,
|
||||||
|
|
||||||
// trivia
|
// trivia
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user