# error.py - error handling for DHParser
#
# Copyright 2016 by Eckhart Arnold (arnold@badw.de)
# Bavarian Academy of Sciences an Humanities (badw.de)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the License for the specific language governing
# permissions and limitations under the License.
"""
Module ``error`` defines class Error and a few helpful functions that are
needed for error reporting of DHParser. Usually, what is of interest are
the string representations of the error objects. For example::
from DHParser import compile_source, has_errors
result, errors, ast = compile_source(source, preprocessor, grammar,
transformer, compiler)
if errors:
for error in errors:
print(error)
if has_errors(errors):
print("There have been fatal errors!")
sys.exit(1)
else:
print("There have been warnings, but no errors.")
The central class of module DHParser's ``error`` is the ``Error``-class.
The easiest way to create an error object is by instantiating
the Error class with an error message and a source position::
>>> error = Error('Something went wrong', 123)
>>> print(error)
Error (1000): Something went wrong
However, in order to report errors, usually at least a line and
column-number
"""
from __future__ import annotations
from collections import namedtuple
import functools
import os
from typing import Iterable, Iterator, Union, List, Sequence, Callable
from DHParser.toolkit import linebreaks, line_col, is_filename, TypeAlias
__all__ = ('SourceMap',
'SourceLocation',
'SourceMapFunc',
'ErrorCode',
'Error',
'is_fatal',
'is_error',
'is_warning',
'has_errors',
'only_errors',
'add_source_locations',
'canonical_error_strings',
'NO_ERROR',
'NOTICE',
'WARNING',
'ERROR',
'FATAL',
'HIGHEST',
'RESUME_NOTICE',
'REDECLARED_TOKEN_WARNING',
'UNUSED_ERROR_HANDLING_WARNING',
'LEFT_RECURSION_WARNING',
'INFINITE_LOOP_WARNING',
'UNDEFINED_SYMBOL_IN_TRANSTABLE_WARNING',
'CANNOT_VERIFY_TRANSTABLE_WARNING',
'CAPTURE_DROPPED_CONTENT_WARNING',
'ZERO_LENGTH_CAPTURE_POSSIBLE_WARNING',
'OPTIONAL_REDUNDANTLY_NESTED_WARNING',
'UNCONNECTED_SYMBOL_WARNING',
'REDUNDANT_PARSER_WARNING',
'UNUSED_MACRO_ARGUMENTS_WARNING',
'REORDERING_OF_ALTERNATIVES_REQUIRED',
'MANDATORY_CONTINUATION',
'MANDATORY_CONTINUATION_AT_EOF',
'MANDATORY_CONTINUATION_AT_EOF_NON_ROOT',
'CAPTURE_STACK_NOT_EMPTY_NON_ROOT_ONLY',
'AUTOCAPTURED_SYMBOL_NOT_CLEARED_NON_ROOT',
'ERROR_WHILE_RECOVERING_FROM_ERROR',
'PARSER_NEVER_TOUCHES_DOCUMENT',
'PARSER_NEVER_TOUCHES_DOCUMENT',
'PARSER_LOOKAHEAD_FAILURE_ONLY',
'PARSER_STOPPED_BEFORE_END',
'PARSER_STOPPED_ON_RETRY',
'PARSER_LOOKAHEAD_MATCH_ONLY',
'CUSTOM_PARSER_FAILURE',
'UNDEFINED_SYMBOL',
'WRONG_NUMBER_OF_ARGUMENTS',
'UNKNOWN_MACRO_ARGUMENT',
'UNDEFINED_MACRO',
'RECURSIVE_MACRO_CALL',
'CAPTURE_STACK_NOT_EMPTY',
'CAPTURE_STACK_NOT_EMPTY_WARNING',
'AUTOCAPTURED_SYMBOL_NOT_CLEARED',
'MALFORMED_ERROR_STRING',
'AMBIGUOUS_ERROR_HANDLING',
'REDEFINED_DIRECTIVE',
'UNDEFINED_RETRIEVE',
'DIRECTIVE_FOR_NONEXISTANT_SYMBOL',
'INAPPROPRIATE_SYMBOL_FOR_DIRECTIVE',
'PEG_EXPRESSION_IN_DIRECTIVE_WO_BRACKETS',
'CAPTURE_WITHOUT_PARSERNAME',
'LOOKAHEAD_WITH_OPTIONAL_PARSER',
'BADLY_NESTED_OPTIONAL_PARSER',
'BAD_MANDATORY_SETUP',
'DUPLICATE_PARSERS_IN_ALTERNATIVE',
'SYMBOL_NAME_IS_PYTHON_KEYWORD',
'BAD_ORDER_OF_ALTERNATIVES',
'BAD_REPETITION_COUNT',
'MALFORMED_REGULAR_EXPRESSION',
'EMPTY_GRAMMAR_ERROR',
'STRUCTURAL_ERROR_IN_AST',
'TREE_PROCESSING_CRASH',
'COMPILER_CRASH',
'AST_TRANSFORM_CRASH',
'RECURSION_DEPTH_LIMIT_HIT')
#######################################################################
#
# source mapping
#
#######################################################################
# class SourceMap(NamedTuple):
# original_name: str # nome or path or uri of the original source file
# positions: List[int] # a list of locations
# offsets: List[int] # the corresponding offsets to be added from these locations onward
# file_names: List[str] # list of file_names to which the source locations relate
# originals_dict: Dict[str, Union[str, StringView]] # File names => (included) source texts
# SourceMap = NamedTuple('SourceMap',
# [('original_name', str),
# ('positions', List[int]),
# ('offsets', List[int]),
# ('file_names', List[str]),
# ('originals_dict', Dict[str, Union[str, StringView]])])
# SourceMap.__module__ = __name__
SourceMap = namedtuple('SourceMap',
['original_name', ## type: str
'positions', ## type: List[int]
'offsets', ## type: List[int]
'file_names', ## type: List[str]
'originals_dict', ## type: Dict[str, Union[str, StringView]]
], module=__name__)
# class SourceLocation(NamedTuple):
# original_name: str # the file name (or path or uri) of the source code
# original_text: Union[str, StringView] # the source code itself
# pos: int # a position within the code
# SourceLocation = NamedTuple('SourceLocation',
# [('original_name', str),
# ('original_text', Union[str, StringView]),
# ('pos', int)])
SourceLocation = namedtuple('SourceLocation',
['original_name', ## type: str
'original_text', ## type: Union[str, StringView]
'pos', ## type: int
], module=__name__)
SourceMapFunc: TypeAlias = Union[Callable[[int], SourceLocation], functools.partial]
#######################################################################
#
# error codes
#
#######################################################################
[docs]
class ErrorCode(int):
pass
# error levels
NO_ERROR = ErrorCode(0)
NOTICE = ErrorCode(1)
WARNING = ErrorCode(100)
ERROR = ErrorCode(1000)
FATAL = ErrorCode(10000)
HIGHEST = FATAL
# notice codes
RESUME_NOTICE = ErrorCode(50)
# warning codes
REDECLARED_TOKEN_WARNING = ErrorCode(120)
UNUSED_ERROR_HANDLING_WARNING = ErrorCode(130)
LEFT_RECURSION_WARNING = ErrorCode(140) # obsolete!
INFINITE_LOOP_WARNING = ErrorCode(150)
UNDEFINED_SYMBOL_IN_TRANSTABLE_WARNING = ErrorCode(610)
CANNOT_VERIFY_TRANSTABLE_WARNING = ErrorCode(620)
CAPTURE_DROPPED_CONTENT_WARNING = ErrorCode(630)
CAPTURE_STACK_NOT_EMPTY_WARNING = ErrorCode(640)
ZERO_LENGTH_CAPTURE_POSSIBLE_WARNING = ErrorCode(650)
OPTIONAL_REDUNDANTLY_NESTED_WARNING = ErrorCode(660)
UNCONNECTED_SYMBOL_WARNING = ErrorCode(670)
REDUNDANT_PARSER_WARNING = ErrorCode(680)
UNUSED_MACRO_ARGUMENTS_WARNING = ErrorCode(690)
REORDERING_OF_ALTERNATIVES_REQUIRED = ErrorCode(710)
# error codes
MANDATORY_CONTINUATION = ErrorCode(1010)
MANDATORY_CONTINUATION_AT_EOF = ErrorCode(1015)
MANDATORY_CONTINUATION_AT_EOF_NON_ROOT = ErrorCode(1017)
PARSER_NEVER_TOUCHES_DOCUMENT = ErrorCode(1020)
PARSER_LOOKAHEAD_FAILURE_ONLY = ErrorCode(1030)
PARSER_STOPPED_BEFORE_END = ErrorCode(1040)
PARSER_STOPPED_ON_RETRY = ErrorCode(1042)
PARSER_LOOKAHEAD_MATCH_ONLY = ErrorCode(1045)
CAPTURE_STACK_NOT_EMPTY = ErrorCode(1050)
CAPTURE_STACK_NOT_EMPTY_NON_ROOT_ONLY = ErrorCode(1052)
AUTOCAPTURED_SYMBOL_NOT_CLEARED = ErrorCode(1055)
AUTOCAPTURED_SYMBOL_NOT_CLEARED_NON_ROOT = ErrorCode(1057)
MALFORMED_ERROR_STRING = ErrorCode(1060)
AMBIGUOUS_ERROR_HANDLING = ErrorCode(1070)
REDEFINED_DIRECTIVE = ErrorCode(1080)
UNDEFINED_RETRIEVE = ErrorCode(1090)
DIRECTIVE_FOR_NONEXISTANT_SYMBOL = ErrorCode(1100)
INAPPROPRIATE_SYMBOL_FOR_DIRECTIVE = ErrorCode(1110)
PEG_EXPRESSION_IN_DIRECTIVE_WO_BRACKETS = ErrorCode(1120)
CUSTOM_PARSER_FAILURE = ErrorCode(1130)
UNDEFINED_SYMBOL = ErrorCode(1140)
WRONG_NUMBER_OF_ARGUMENTS = ErrorCode(1160)
UNKNOWN_MACRO_ARGUMENT = ErrorCode(1170)
UNDEFINED_MACRO = ErrorCode(1180)
RECURSIVE_MACRO_CALL = ErrorCode(1190)
ERROR_WHILE_RECOVERING_FROM_ERROR = ErrorCode(1301)
# EBNF-specific and static analysis errors
CAPTURE_WITHOUT_PARSERNAME = ErrorCode(1510)
LOOKAHEAD_WITH_OPTIONAL_PARSER = ErrorCode(1520)
BADLY_NESTED_OPTIONAL_PARSER = ErrorCode(1530)
BAD_MANDATORY_SETUP = ErrorCode(1550)
SYMBOL_NAME_IS_PYTHON_KEYWORD = ErrorCode(1555)
DUPLICATE_PARSERS_IN_ALTERNATIVE = ErrorCode(1560)
BAD_ORDER_OF_ALTERNATIVES = ErrorCode(1570)
BAD_REPETITION_COUNT = ErrorCode(1580)
MALFORMED_REGULAR_EXPRESSION = ErrorCode(1585)
EMPTY_GRAMMAR_ERROR = ErrorCode(1590)
# Other Errors
PYTHON_ERROR_IN_TEST = ErrorCode(1710)
# fatal errors
TREE_PROCESSING_CRASH = ErrorCode(10100)
COMPILER_CRASH = ErrorCode(10200)
AST_TRANSFORM_CRASH = ErrorCode(10300)
RECURSION_DEPTH_LIMIT_HIT = ErrorCode(10400)
STRUCTURAL_ERROR_IN_AST = ErrorCode(10500)
#######################################################################
#
# class Error
#
#######################################################################
[docs]
class Error:
"""The Error class encapsulates the all information for a single
error.
:ivar message: the error message as text string
:ivar pos: the position where the error occurred in the preprocessed text
:ivar code: the error-code, which also indicates the severity of the
error::
========= ===========
code severity
========= ===========
0 no error
< 100 notice
< 1000 warning
< 10000 error
>= 10000 fatal error
========= ===========
In cas of a fatal error (error code >= 10000), no further compilation
stages will be processed, because it is assumed that the syntax tree
is too distorted for further processing.
:ivar orig_pos: the position of the error in the original source file,
not in the preprocessed document. This is a write-once value!
:ivar orig_doc: the name or path or url of the original source file to
which ``orig_pos`` is related. This is relevant, if the preprocessed
document has been plugged together from several source files.
:ivar line: the line number where the error occurred in the original text.
Lines are counted from 1 onward.
:ivar column: the column where the error occurred in the original text.
Columns are counted from 1 onward.
:ivar length: the length in characters of the faulty passage (default is 1)
:ivar end_line: the line number of the position after the last character
covered by the error in the original source.
:ivar end_column: the column number of the position after the last character
covered by the error in the original source.
:ivar related: a sequence of related errors.
"""
__slots__ = ['message', 'code', '_pos', 'line', 'column', 'length',
'end_line', 'end_column', 'related', 'orig_pos', 'orig_doc',
'relatedUri']
def __init__(self, message: str, pos: int, code: ErrorCode = ERROR,
line: int = -1, column: int = -1, length: int = 1,
related: Sequence['Error'] = [],
orig_pos: int = -1, orig_doc: str = '') -> None:
assert isinstance(code, ErrorCode)
assert not isinstance(pos, ErrorCode)
assert code >= 0
assert pos >= 0
assert length >= 1
self.message = message # type: str
self._pos = pos # type: int
# Add some logic to avoid double assignment of the same error code?
# Problem: Same code might legitimately be used by two different parsers/compilers
self.code = code # type: ErrorCode
self.orig_pos = orig_pos # type: int
self.orig_doc = orig_doc # type: str
self.line = line # type: int
self.column = column # type: int
# support for Language Server Protocol Diagnostics
# see: https://microsoft.github.io/language-server-protocol/specifications/specification-current/#diagnostic
self.length = length # type: int
self.end_line = -1 # type: int
self.end_column = -1 # type: int
self.related = tuple(related) # type: Sequence['Error']
def _normalize_msg(self, msg: str) -> str:
"""A hack to support recognition of duplicate error messages when using
produced as articaft by the seed and grow algorithm that catches
left-recursion in the parser. See :py:meth:`~parse.Forward.__call__`."""
if self.code == RESUME_NOTICE:
i = msg.find('with ')
if i > 0: msg = msg[:i]
return msg
def __eq__(self, other):
return self._normalize_msg(self.message) == self._normalize_msg(other.message) \
and self.code == other.code \
and self._pos == other._pos # and self.length == other.length
def __hash__(self):
return hash((self._normalize_msg(self.message), self.code, self._pos))
def __str__(self):
if self.orig_doc and self.orig_doc != 'UNKNOWN_FILE':
prefix = self.orig_doc + ':'
else: prefix = ''
if self.line > 0:
# prefix += "%i:%i: " % (max(self.line, 0), max(self.column, 0))
prefix += f"{max(self.line, 0)}:{max(self.column, 0)}: "
# return prefix + "%s (%i): %s" % (self.severity, self.code, self.message)
return prefix + f"{self.severity} ({self.code}): {self.message}"
def __repr__(self):
return 'Error("%s", %s, %i, %i, %i, %i)' \
% (self.message, repr(self.code), self.pos, self.orig_pos, self.line, self.column)
@property
def pos(self) -> int:
return self._pos
@pos.setter
def pos(self, value: int):
self._pos = value
# reset line and column values, because they might now not be valid anymore
self.orig_pos = -1
self.line, self.column = -1, -1
self.end_line, self.end_column = -1, -1
@property
def severity(self):
"""Returns a string representation of the error level, e.g. "warning"."""
if self.code < WARNING:
return "Notice"
elif self.code < ERROR:
return "Warning"
elif self.code < FATAL:
return "Error"
else:
return "Fatal"
[docs]
def visualize(self, document: str) -> str:
"""Shows the line of the document and the position where the error
occurred."""
start = document.rfind('\n', 0, self.pos) + 1
stop = document.find('\n', self.pos)
return document[start:stop] + '\n' + ' ' * (self.pos - start) + '^\n'
[docs]
def signature(self) -> bytes:
"""Returns a signature to quickly check the equality of errors"""
return (self.line << 32 | self.column << 16 | self.code).to_bytes(8, 'big')
[docs]
def range_obj(self) -> dict:
"""Returns the range (position plus length) of the error as an LSP-Range-Object.
https://microsoft.github.io/language-server-protocol/specifications/specification-current/#range
"""
assert self.line >= 1 and self.column >= 1 and self.end_line >= 1 and self.end_column >= 1
return {'start': {'line': self.line - 1, 'character': self.column - 1},
'end': {'line': self.end_line - 1, 'character': self.end_column - 1}}
[docs]
def diagnostic_obj(self) -> dict:
"""Returns the Error as Language Server Protocol Diagnostic object.
https://microsoft.github.io/language-server-protocol/specifications/specification-current/#diagnostic
"""
def relatedObj(relatedError: 'Error') -> dict:
uri = relatedError.orig_doc
return {
'location': {'uri': uri, 'range': relatedError.range_obj()},
'message': relatedError.message
}
if self.code < WARNING:
severity = 3
elif self.code < ERROR:
severity = 2
else:
severity = 1
diagnostic = {
'range': self.range_obj(),
'severity': severity,
'code': self.code,
'source': 'DHParser',
'message': self.message,
# 'tags': []
}
if self.related:
diagnostic['relatedInformation'] = [relatedObj(err) for err in self.related]
return diagnostic
[docs]
def is_warning(code: Union[Error, int]) -> bool:
"""Returns True, if error is merely a warning or a message."""
if isinstance(code, Error): code = code.code
return code < ERROR
[docs]
def is_error(code: Union[Error, int]) -> bool:
"""Returns True, if error is a (fatal) error, not just a warning."""
if isinstance(code, Error): code = code.code
return code >= ERROR
[docs]
def is_fatal(code: Union[Error, int]) -> bool:
"""Returns True, ir error is fatal. Fatal errors are typically raised
when a crash (i.e. Python exception) occurs at later stages of the
processing pipeline (e.g. ast transformation, compiling). """
if isinstance(code, Error): code = code.code
return code >= FATAL
# def Warning(message: str, pos, code: ErrorCode = WARNING,
# orig_pos: int = -1, line: int = -1, column: int = -1) -> Error:
# """
# Syntactic sugar for creating Error-objects that contain only a warning.
# Raises a ValueError if `code`` is not within the range for warnings.
# """
# if not is_warning(code):
# raise ValueError("Tried to create a warning with a error code {}. "
# "Warning codes must be smaller than {}".format(code, ERROR))
# return Error(message, pos, code, orig_pos, line, column)
[docs]
def has_errors(messages: Iterable[Error], level: ErrorCode = ERROR) -> bool:
"""
Returns True, if at least one entry in ``messages`` has at
least the given error ``level``.
"""
for err_obj in messages:
if err_obj.code >= level:
return True
return False
[docs]
def only_errors(messages: Iterable[Error], level: ErrorCode = ERROR) -> Iterator[Error]:
"""
Returns an Iterator that yields only those messages that have
at least the given error level.
"""
return (err for err in messages if err.code >= level)
#######################################################################
#
# support for canonical representation, i.e.
# filename:line:column:severity (code):error string
#
#######################################################################
[docs]
def add_source_locations(errors: List[Error], source_mapping: SourceMapFunc):
"""Adds (or adjusts) line and column numbers of error messages inplace.
Args:
errors: The list of errors as returned by the method
``errors()`` of a Node object
source_mapping: A function that maps error positions to their
positions in the original source file.
"""
lb_dict = {}
for err in errors:
if err.pos < 0:
raise ValueError(f'Illegal error position: {err.pos} Must be >= 0!')
if err.orig_pos < 0: # do not overwrite orig_pos if already set
err.orig_doc, orig_text, err.orig_pos = source_mapping(err.pos)
lbreaks = lb_dict.setdefault(orig_text, linebreaks(orig_text))
err.line, err.column = line_col(lbreaks, err.orig_pos)
if err.orig_pos + err.length > lbreaks[-1]:
err.length = lbreaks[-1] - err.orig_pos # err.length should not exceed text length
err.end_line, err.end_column = line_col(lbreaks, err.orig_pos + err.length)
[docs]
def canonical_error_strings(errors: List[Error]) -> List[str]:
"""Returns the list of error strings in canonical form that can be parsed by most
editors, i.e. "relative filepath : line : column : severity (code) : error string"
"""
if errors:
error_strings = []
for err in errors:
source_file_name = err.orig_doc
if source_file_name and is_filename(source_file_name):
cwd = os.getcwd()
if source_file_name.startswith(cwd):
rel_path = source_file_name[len(cwd):]
else:
rel_path = source_file_name
err_str = str(err)
err_str = err_str[err_str.find(':'):]
error_strings.append(rel_path + err_str)
else:
error_strings.append(str(err))
else:
error_strings = []
return error_strings