More diagnostics for the lexer

This commit is contained in:
Alexander Luzgarev 2018-10-18 18:48:08 +02:00
parent 989f368a28
commit 9490864ef3
5 changed files with 72 additions and 26 deletions

View File

@ -76,13 +76,7 @@ namespace ConsoleDemo
private static void ParserDemo() private static void ParserDemo()
{ {
Console.WriteLine("Hello World!"); Console.WriteLine("Hello World!");
var text = @" var text = @"x = 'abc";
function [a, b c] = functionName(d, e, f)
a = d + e;
end
%{
comment
";
var window = new TextWindowWithNull(text, "noname"); var window = new TextWindowWithNull(text, "noname");
var parser = CreateParser(window); var parser = CreateParser(window);
var tree = parser.Parse(); var tree = parser.Parse();

View File

@ -25,6 +25,21 @@ namespace Parser.Internal
Report(span, "Unexpected end of file."); Report(span, "Unexpected end of file.");
} }
internal void ReportUnexpectedCharacterWhileParsingNumber(TextSpan span, char c)
{
Report(span, $"Unexpected character '{c}' while parsing a number.");
}
internal void ReportUnexpectedEOLWhileParsingString(TextSpan span)
{
Report(span, "Unexpected end of line while parsing a string literal.");
}
internal void ReportUnknownSymbol(TextSpan span, char c)
{
Report(span, $"Unknown symbol '{c}'.");
}
public IEnumerator<Diagnostic> GetEnumerator() public IEnumerator<Diagnostic> GetEnumerator()
{ {
return _diagnostics.GetEnumerator(); return _diagnostics.GetEnumerator();

View File

@ -1,4 +1,5 @@
using System.Collections.Generic; using System;
using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Text; using System.Text;
@ -230,7 +231,7 @@ namespace Parser.Internal
} }
else else
{ {
fail = true; throw new Exception($"Unexpected symbol '{c}' at the beginning of number literal.");
} }
break; break;
case NumberParsingState.DigitsBeforeDot: case NumberParsingState.DigitsBeforeDot:
@ -328,7 +329,10 @@ namespace Parser.Internal
if (fail) if (fail)
{ {
throw new ParsingException("Error while parsing number."); var s = Window.GetAndConsumeChars(n);
tokenInfo.Kind = TokenKind.NumberLiteral;
tokenInfo.Text = s;
return false;
} }
if (success) if (success)
@ -368,6 +372,7 @@ namespace Parser.Internal
private bool ContinueLexingGeneralStringLiteral(ref TokenInfo tokenInfo, char quote) private bool ContinueLexingGeneralStringLiteral(ref TokenInfo tokenInfo, char quote)
{ {
var status = 0; // no errors
Window.ConsumeChar(); Window.ConsumeChar();
var textBuilder = new StringBuilder(); var textBuilder = new StringBuilder();
textBuilder.Append(quote); textBuilder.Append(quote);
@ -394,9 +399,15 @@ namespace Parser.Internal
break; break;
} }
} }
if (SyntaxFacts.IsEolOrEof(Window.PeekChar(n))) if (SyntaxFacts.IsEof(Window.PeekChar(n)))
{ {
throw new ParsingException("Unfinished string literal."); status = 1;
break;
}
if (SyntaxFacts.IsEol(Window.PeekChar(n)))
{
status = 2;
break;
} }
n++; n++;
} }
@ -404,11 +415,24 @@ namespace Parser.Internal
var lastPiece = Window.GetAndConsumeChars(n); var lastPiece = Window.GetAndConsumeChars(n);
textBuilder.Append(lastPiece); textBuilder.Append(lastPiece);
valueBuilder.Append(lastPiece); valueBuilder.Append(lastPiece);
Window.ConsumeChar(); switch (status) {
textBuilder.Append(quote); case 0:
Window.ConsumeChar();
textBuilder.Append(quote);
break;
case 1:
Diagnostics.ReportUnexpectedEndOfFile(new TextSpan(Window.Position.Offset, 1));
break;
case 2:
Diagnostics.ReportUnexpectedEOLWhileParsingString(new TextSpan(Window.Position.Offset, 1));
break;
default:
throw new Exception($"Unexpected status of parsing string literal: {status}.");
}
tokenInfo.Text = textBuilder.ToString(); tokenInfo.Text = textBuilder.ToString();
tokenInfo.StringValue = valueBuilder.ToString(); tokenInfo.StringValue = valueBuilder.ToString();
return true; return status == 0;
} }
private bool ContinueLexingStringLiteral(ref TokenInfo tokenInfo) private bool ContinueLexingStringLiteral(ref TokenInfo tokenInfo)
@ -520,7 +544,7 @@ namespace Parser.Internal
var parsedNumber = ContinueLexingNumber(ref tokenInfo); var parsedNumber = ContinueLexingNumber(ref tokenInfo);
if (!parsedNumber) if (!parsedNumber)
{ {
throw new ParsingException($"Unexpected character \"{Window.PeekChar()}\" while parsing a number"); Diagnostics.ReportUnexpectedCharacterWhileParsingNumber(new TextSpan(Window.Position.Offset, 1), Window.PeekChar());
} }
return true; return true;
case '=': case '=':
@ -542,7 +566,7 @@ namespace Parser.Internal
var possiblyNumberToken2 = ContinueLexingNumber(ref tokenInfo); var possiblyNumberToken2 = ContinueLexingNumber(ref tokenInfo);
if (!possiblyNumberToken2) if (!possiblyNumberToken2)
{ {
throw new ParsingException($"Unexpected character \"{Window.PeekChar()}\" while parsing a number"); Diagnostics.ReportUnexpectedCharacterWhileParsingNumber(new TextSpan(Window.Position.Offset, 1), Window.PeekChar());
} }
return true; return true;
@ -732,9 +756,11 @@ namespace Parser.Internal
tokenInfo.Kind = TokenKind.EndOfFile; tokenInfo.Kind = TokenKind.EndOfFile;
return true; return true;
default: default:
throw new ParsingException( Diagnostics.ReportUnknownSymbol(new TextSpan(Window.Position.Offset, 1), character);
$"Unknown symbol \"{character}\" at {Window.Position}." Window.ConsumeChar();
); tokenInfo.Kind = TokenKind.BadToken;
tokenInfo.Text = character.ToString();
return true;
} }
} }

View File

@ -94,6 +94,16 @@ namespace Parser.Internal
return c == '\n' || c == '\r' || c == '\0'; return c == '\n' || c == '\r' || c == '\0';
} }
public static bool IsEof(char c)
{
return c == '\0';
}
public static bool IsEol(char c)
{
return c == '\n' || c == '\r';
}
public static bool IsWhitespace(char c) public static bool IsWhitespace(char c)
{ {
return c == ' ' || c == '\t' || c == '\n'; return c == ' ' || c == '\t' || c == '\n';

View File

@ -6,22 +6,23 @@
// SYNTAX TOKENS // SYNTAX TOKENS
None = 0, None = 0,
BadToken = 1,
// The lexer puts a virtual "end of file" token at the end of the parsed file. // The lexer puts a virtual "end of file" token at the end of the parsed file.
EndOfFile = 1, EndOfFile = 2,
// Identifier: could be a reserved word, a variable name, a class name, etc. // Identifier: could be a reserved word, a variable name, a class name, etc.
Identifier = 2, Identifier = 3,
// Number literal: 123, 45.678, 2e-5, etc. // Number literal: 123, 45.678, 2e-5, etc.
NumberLiteral = 3, NumberLiteral = 4,
// String literal: 'abc', '123', etc. The "usual" string literals are single-quoted and are just char arrays. // String literal: 'abc', '123', etc. The "usual" string literals are single-quoted and are just char arrays.
StringLiteral = 4, StringLiteral = 5,
// Double-quoted string literal: "abc", "123", etc. These are the "new" string literal that are more like strings // Double-quoted string literal: "abc", "123", etc. These are the "new" string literal that are more like strings
// and less like char arrays (for example, char arrays could be columns instead of rows, or even multi-dimensional). // and less like char arrays (for example, char arrays could be columns instead of rows, or even multi-dimensional).
DoubleQuotedStringLiteral = 5, DoubleQuotedStringLiteral = 6,
// This is for supporting "command statements" like // This is for supporting "command statements" like
// > cd some/+folder/ // > cd some/+folder/
// In this example, "some/folder" should be treated as a string literal (for example, "+' there should be a part // In this example, "some/folder" should be treated as a string literal (for example, "+' there should be a part
// of it, and not parsed as a binary operator). // of it, and not parsed as a binary operator).
UnquotedStringLiteral = 6, UnquotedStringLiteral = 7,
// trivia // trivia