Source code for error

# error.py - error handling for DHParser
#
# Copyright 2016  by Eckhart Arnold (arnold@badw.de)
#                 Bavarian Academy of Sciences an Humanities (badw.de)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.  See the License for the specific language governing
# permissions and limitations under the License.

"""
Module ``error`` defines class Error and a few helpful functions that are
needed for error reporting of DHParser. Usually, what is of interest are
the string representations of the error objects. For example::

    from DHParser import compile_source, has_errors

    result, errors, ast = compile_source(source, preprocessor, grammar,
                                         transformer, compiler)
    if errors:
        for error in errors:
            print(error)

        if has_errors(errors):
            print("There have been fatal errors!")
            sys.exit(1)
        else:
            print("There have been warnings, but no errors.")

The central class of module DHParser's ``error``  is the ``Error``-class.
The easiest way to create an error object is by instantiating
the Error class with an error message and a source position::

    >>> error = Error('Something went wrong', 123)
    >>> print(error)
    Error (1000): Something went wrong

However, in order to report errors, usually at least a line and
column-number

"""

from __future__ import annotations

from collections import namedtuple
import functools
import os
from typing import Iterable, Iterator, Union, List, Sequence, Callable

from DHParser.toolkit import linebreaks, line_col, is_filename, TypeAlias


__all__ = ('SourceMap',
           'SourceLocation',
           'SourceMapFunc',
           'ErrorCode',
           'Error',
           'is_fatal',
           'is_error',
           'is_warning',
           'has_errors',
           'only_errors',
           'add_source_locations',
           'canonical_error_strings',
           'NO_ERROR',
           'NOTICE',
           'WARNING',
           'ERROR',
           'FATAL',
           'HIGHEST',
           'RESUME_NOTICE',
           'REDECLARED_TOKEN_WARNING',
           'UNUSED_ERROR_HANDLING_WARNING',
           'LEFT_RECURSION_WARNING',
           'INFINITE_LOOP_WARNING',
           'UNDEFINED_SYMBOL_IN_TRANSTABLE_WARNING',
           'CANNOT_VERIFY_TRANSTABLE_WARNING',
           'CAPTURE_DROPPED_CONTENT_WARNING',
           'ZERO_LENGTH_CAPTURE_POSSIBLE_WARNING',
           'OPTIONAL_REDUNDANTLY_NESTED_WARNING',
           'UNCONNECTED_SYMBOL_WARNING',
           'REDUNDANT_PARSER_WARNING',
           'UNUSED_MACRO_ARGUMENTS_WARNING',
           'REORDERING_OF_ALTERNATIVES_REQUIRED',
           'MANDATORY_CONTINUATION',
           'MANDATORY_CONTINUATION_AT_EOF',
           'MANDATORY_CONTINUATION_AT_EOF_NON_ROOT',
           'CAPTURE_STACK_NOT_EMPTY_NON_ROOT_ONLY',
           'AUTOCAPTURED_SYMBOL_NOT_CLEARED_NON_ROOT',
           'ERROR_WHILE_RECOVERING_FROM_ERROR',
           'PARSER_NEVER_TOUCHES_DOCUMENT',
           'PARSER_NEVER_TOUCHES_DOCUMENT',
           'PARSER_LOOKAHEAD_FAILURE_ONLY',
           'PARSER_STOPPED_BEFORE_END',
           'PARSER_STOPPED_ON_RETRY',
           'PARSER_LOOKAHEAD_MATCH_ONLY',
           'CUSTOM_PARSER_FAILURE',
           'UNDEFINED_SYMBOL',
           'WRONG_NUMBER_OF_ARGUMENTS',
           'UNKNOWN_MACRO_ARGUMENT',
           'UNDEFINED_MACRO',
           'RECURSIVE_MACRO_CALL',
           'CAPTURE_STACK_NOT_EMPTY',
           'CAPTURE_STACK_NOT_EMPTY_WARNING',
           'AUTOCAPTURED_SYMBOL_NOT_CLEARED',
           'MALFORMED_ERROR_STRING',
           'AMBIGUOUS_ERROR_HANDLING',
           'REDEFINED_DIRECTIVE',
           'UNDEFINED_RETRIEVE',
           'DIRECTIVE_FOR_NONEXISTANT_SYMBOL',
           'INAPPROPRIATE_SYMBOL_FOR_DIRECTIVE',
           'PEG_EXPRESSION_IN_DIRECTIVE_WO_BRACKETS',
           'CAPTURE_WITHOUT_PARSERNAME',
           'LOOKAHEAD_WITH_OPTIONAL_PARSER',
           'BADLY_NESTED_OPTIONAL_PARSER',
           'BAD_MANDATORY_SETUP',
           'DUPLICATE_PARSERS_IN_ALTERNATIVE',
           'SYMBOL_NAME_IS_PYTHON_KEYWORD',
           'BAD_ORDER_OF_ALTERNATIVES',
           'BAD_REPETITION_COUNT',
           'MALFORMED_REGULAR_EXPRESSION',
           'EMPTY_GRAMMAR_ERROR',
           'STRUCTURAL_ERROR_IN_AST',
           'TREE_PROCESSING_CRASH',
           'COMPILER_CRASH',
           'AST_TRANSFORM_CRASH',
           'RECURSION_DEPTH_LIMIT_HIT')


#######################################################################
#
#  source mapping
#
#######################################################################


# class SourceMap(NamedTuple):
#     original_name: str           # nome or path or uri of the original source file
#     positions: List[int]        # a list of locations
#     offsets: List[int]          # the corresponding offsets to be added from these locations onward
#     file_names: List[str]       # list of file_names to which the source locations relate
#     originals_dict: Dict[str, Union[str, StringView]]  # File names => (included) source texts

# SourceMap = NamedTuple('SourceMap',
#     [('original_name', str),
#      ('positions', List[int]),
#      ('offsets', List[int]),
#      ('file_names', List[str]),
#      ('originals_dict', Dict[str, Union[str, StringView]])])
# SourceMap.__module__ = __name__

SourceMap = namedtuple('SourceMap',
    ['original_name',  ## type: str
     'positions',      ## type: List[int]
     'offsets',        ## type: List[int]
     'file_names',     ## type: List[str]
     'originals_dict', ## type: Dict[str, Union[str, StringView]]
    ], module=__name__)

# class SourceLocation(NamedTuple):
#     original_name: str          # the file name (or path or uri) of the source code
#     original_text: Union[str, StringView]  # the source code itself
#     pos: int                  # a position within the code

# SourceLocation = NamedTuple('SourceLocation',
#     [('original_name', str),
#      ('original_text', Union[str, StringView]),
#      ('pos', int)])

SourceLocation = namedtuple('SourceLocation',
    ['original_name',  ## type: str
     'original_text',  ## type: Union[str, StringView]
     'pos',            ## type: int
    ], module=__name__)

SourceMapFunc: TypeAlias = Union[Callable[[int], SourceLocation], functools.partial]


#######################################################################
#
#  error codes
#
#######################################################################



[docs]
class ErrorCode(int):
    pass



# error levels

NO_ERROR = ErrorCode(0)
NOTICE   = ErrorCode(1)
WARNING  = ErrorCode(100)
ERROR    = ErrorCode(1000)
FATAL    = ErrorCode(10000)
HIGHEST  = FATAL

# notice codes

RESUME_NOTICE                            = ErrorCode(50)

# warning codes

REDECLARED_TOKEN_WARNING                 = ErrorCode(120)
UNUSED_ERROR_HANDLING_WARNING            = ErrorCode(130)
LEFT_RECURSION_WARNING                   = ErrorCode(140)  # obsolete!
INFINITE_LOOP_WARNING                    = ErrorCode(150)

UNDEFINED_SYMBOL_IN_TRANSTABLE_WARNING   = ErrorCode(610)
CANNOT_VERIFY_TRANSTABLE_WARNING         = ErrorCode(620)
CAPTURE_DROPPED_CONTENT_WARNING          = ErrorCode(630)
CAPTURE_STACK_NOT_EMPTY_WARNING          = ErrorCode(640)
ZERO_LENGTH_CAPTURE_POSSIBLE_WARNING     = ErrorCode(650)
OPTIONAL_REDUNDANTLY_NESTED_WARNING      = ErrorCode(660)
UNCONNECTED_SYMBOL_WARNING               = ErrorCode(670)
REDUNDANT_PARSER_WARNING                 = ErrorCode(680)
UNUSED_MACRO_ARGUMENTS_WARNING           = ErrorCode(690)

REORDERING_OF_ALTERNATIVES_REQUIRED      = ErrorCode(710)

# error codes

MANDATORY_CONTINUATION                   = ErrorCode(1010)
MANDATORY_CONTINUATION_AT_EOF            = ErrorCode(1015)
MANDATORY_CONTINUATION_AT_EOF_NON_ROOT   = ErrorCode(1017)
PARSER_NEVER_TOUCHES_DOCUMENT            = ErrorCode(1020)
PARSER_LOOKAHEAD_FAILURE_ONLY            = ErrorCode(1030)
PARSER_STOPPED_BEFORE_END                = ErrorCode(1040)
PARSER_STOPPED_ON_RETRY                  = ErrorCode(1042)
PARSER_LOOKAHEAD_MATCH_ONLY              = ErrorCode(1045)
CAPTURE_STACK_NOT_EMPTY                  = ErrorCode(1050)
CAPTURE_STACK_NOT_EMPTY_NON_ROOT_ONLY    = ErrorCode(1052)
AUTOCAPTURED_SYMBOL_NOT_CLEARED          = ErrorCode(1055)
AUTOCAPTURED_SYMBOL_NOT_CLEARED_NON_ROOT = ErrorCode(1057)
MALFORMED_ERROR_STRING                   = ErrorCode(1060)
AMBIGUOUS_ERROR_HANDLING                 = ErrorCode(1070)
REDEFINED_DIRECTIVE                      = ErrorCode(1080)
UNDEFINED_RETRIEVE                       = ErrorCode(1090)
DIRECTIVE_FOR_NONEXISTANT_SYMBOL         = ErrorCode(1100)
INAPPROPRIATE_SYMBOL_FOR_DIRECTIVE       = ErrorCode(1110)
PEG_EXPRESSION_IN_DIRECTIVE_WO_BRACKETS  = ErrorCode(1120)
CUSTOM_PARSER_FAILURE                    = ErrorCode(1130)
UNDEFINED_SYMBOL                         = ErrorCode(1140)
WRONG_NUMBER_OF_ARGUMENTS                = ErrorCode(1160)
UNKNOWN_MACRO_ARGUMENT                   = ErrorCode(1170)
UNDEFINED_MACRO                          = ErrorCode(1180)
RECURSIVE_MACRO_CALL                     = ErrorCode(1190)

ERROR_WHILE_RECOVERING_FROM_ERROR        = ErrorCode(1301)

# EBNF-specific and static analysis errors

CAPTURE_WITHOUT_PARSERNAME               = ErrorCode(1510)
LOOKAHEAD_WITH_OPTIONAL_PARSER           = ErrorCode(1520)
BADLY_NESTED_OPTIONAL_PARSER             = ErrorCode(1530)
BAD_MANDATORY_SETUP                      = ErrorCode(1550)
SYMBOL_NAME_IS_PYTHON_KEYWORD            = ErrorCode(1555)
DUPLICATE_PARSERS_IN_ALTERNATIVE         = ErrorCode(1560)
BAD_ORDER_OF_ALTERNATIVES                = ErrorCode(1570)
BAD_REPETITION_COUNT                     = ErrorCode(1580)
MALFORMED_REGULAR_EXPRESSION             = ErrorCode(1585)
EMPTY_GRAMMAR_ERROR                      = ErrorCode(1590)

# Other Errors

PYTHON_ERROR_IN_TEST                     = ErrorCode(1710)

# fatal errors

TREE_PROCESSING_CRASH                    = ErrorCode(10100)
COMPILER_CRASH                           = ErrorCode(10200)
AST_TRANSFORM_CRASH                      = ErrorCode(10300)
RECURSION_DEPTH_LIMIT_HIT                = ErrorCode(10400)
STRUCTURAL_ERROR_IN_AST                  = ErrorCode(10500)


#######################################################################
#
#  class Error
#
#######################################################################



[docs]
class Error:
    """The Error class encapsulates the all information for a single
    error.

    :ivar message:  the error message as text string
    :ivar pos:  the position where the error occurred in the preprocessed text
    :ivar code:  the error-code, which also indicates the severity of the
        error::

               ========= ===========
               code      severity
               ========= ===========
               0         no error
               < 100     notice
               < 1000    warning
               < 10000   error
               >= 10000  fatal error
               ========= ===========

        In cas of a fatal error (error code >= 10000), no further compilation
        stages will be processed, because it is assumed that the syntax tree
        is too distorted for further processing.

    :ivar orig_pos:  the position of the error in the original source file,
        not in the preprocessed document. This is a write-once value!
    :ivar orig_doc:  the name or path or url of the original source file to
        which ``orig_pos`` is related. This is relevant, if the preprocessed
        document has been plugged together from several source files.
    :ivar line:  the line number where the error occurred in the original text.
        Lines are counted from 1 onward.
    :ivar column:  the column where the error occurred in the original text.
        Columns are counted from 1 onward.
    :ivar length:  the length in characters of the faulty passage (default is 1)
    :ivar end_line:  the line number of the position after the last character
        covered by the error in the original source.
    :ivar end_column:  the column number of the position after the last character
        covered by the error in the original source.
    :ivar related: a sequence of related errors.
    """

    __slots__ = ['message', 'code', '_pos', 'line', 'column', 'length',
                 'end_line', 'end_column', 'related', 'orig_pos', 'orig_doc',
                 'relatedUri']

    def __init__(self, message: str, pos: int, code: ErrorCode = ERROR,
                 line: int = -1, column: int = -1, length: int = 1,
                 related: Sequence['Error'] = [],
                 orig_pos: int = -1, orig_doc: str = '') -> None:
        assert isinstance(code, ErrorCode)
        assert not isinstance(pos, ErrorCode)
        assert code >= 0
        assert pos >= 0
        assert length >= 1
        self.message = message    # type: str
        self._pos = pos           # type: int
        # Add some logic to avoid double assignment of the same error code?
        # Problem: Same code might legitimately be used by two different parsers/compilers
        self.code = code          # type: ErrorCode
        self.orig_pos = orig_pos  # type: int
        self.orig_doc = orig_doc  # type: str
        self.line = line          # type: int
        self.column = column      # type: int
        # support for Language Server Protocol Diagnostics
        # see: https://microsoft.github.io/language-server-protocol/specifications/specification-current/#diagnostic
        self.length = length      # type: int
        self.end_line = -1        # type: int
        self.end_column = -1      # type: int
        self.related = tuple(related)   # type: Sequence['Error']

    def _normalize_msg(self, msg: str) -> str:
        """A hack to support recognition of duplicate error messages when using
        produced as articaft by the seed and grow algorithm that catches
        left-recursion in the parser. See :py:meth:`~parse.Forward.__call__`."""
        if self.code == RESUME_NOTICE:
            i = msg.find('with ')
            if i > 0:  msg = msg[:i]
        return msg

    def __eq__(self, other):
        return self._normalize_msg(self.message) == self._normalize_msg(other.message) \
            and self.code == other.code \
            and self._pos == other._pos  # and self.length == other.length

    def __hash__(self):
        return hash((self._normalize_msg(self.message), self.code, self._pos))

    def __str__(self):
        if self.orig_doc and self.orig_doc != 'UNKNOWN_FILE':
            prefix = self.orig_doc + ':'
        else:  prefix = ''
        if self.line > 0:
            # prefix += "%i:%i: " % (max(self.line, 0), max(self.column, 0))
            prefix += f"{max(self.line, 0)}:{max(self.column, 0)}: "
        # return prefix + "%s (%i): %s" % (self.severity, self.code, self.message)
        return prefix + f"{self.severity} ({self.code}): {self.message}"

    def __repr__(self):
        return 'Error("%s", %s, %i, %i, %i, %i)' \
               % (self.message, repr(self.code), self.pos, self.orig_pos, self.line, self.column)

    @property
    def pos(self) -> int:
        return self._pos

    @pos.setter
    def pos(self, value: int):
        self._pos = value
        # reset line and column values, because they might now not be valid anymore
        self.orig_pos = -1
        self.line, self.column = -1, -1
        self.end_line, self.end_column = -1, -1

    @property
    def severity(self):
        """Returns a string representation of the error level, e.g. "warning"."""
        if self.code < WARNING:
            return "Notice"
        elif self.code < ERROR:
            return "Warning"
        elif self.code < FATAL:
            return "Error"
        else:
            return "Fatal"


[docs]
    def visualize(self, document: str) -> str:
        """Shows the line of the document and the position where the error
        occurred."""
        start = document.rfind('\n', 0, self.pos) + 1
        stop = document.find('\n', self.pos)
        return document[start:stop] + '\n' + ' ' * (self.pos - start) + '^\n'



[docs]
    def signature(self) -> bytes:
        """Returns a signature to quickly check the equality of errors"""
        return (self.line << 32 | self.column << 16 | self.code).to_bytes(8, 'big')



[docs]
    def range_obj(self) -> dict:
        """Returns the range (position plus length) of the error as an LSP-Range-Object.
        https://microsoft.github.io/language-server-protocol/specifications/specification-current/#range
        """
        assert self.line >= 1 and self.column >= 1 and self.end_line >= 1 and self.end_column >= 1
        return {'start': {'line': self.line - 1, 'character': self.column - 1},
                'end': {'line': self.end_line - 1, 'character': self.end_column - 1}}



[docs]
    def diagnostic_obj(self) -> dict:
        """Returns the Error as Language Server Protocol Diagnostic object.
        https://microsoft.github.io/language-server-protocol/specifications/specification-current/#diagnostic
        """
        def relatedObj(relatedError: 'Error') -> dict:
            uri = relatedError.orig_doc
            return {
                'location': {'uri': uri, 'range': relatedError.range_obj()},
                'message': relatedError.message
            }

        if self.code < WARNING:
            severity = 3
        elif self.code < ERROR:
            severity = 2
        else:
            severity = 1

        diagnostic = {
            'range': self.range_obj(),
            'severity': severity,
            'code': self.code,
            'source': 'DHParser',
            'message': self.message,
            # 'tags': []
        }
        if self.related:
            diagnostic['relatedInformation'] = [relatedObj(err) for err in self.related]
        return diagnostic





[docs]
def is_warning(code: Union[Error, int]) -> bool:
    """Returns True, if error is merely a warning or a message."""
    if isinstance(code, Error):  code = code.code
    return code < ERROR




[docs]
def is_error(code: Union[Error, int]) -> bool:
    """Returns True, if error is a (fatal) error, not just a warning."""
    if isinstance(code, Error):  code = code.code
    return code >= ERROR




[docs]
def is_fatal(code: Union[Error, int]) -> bool:
    """Returns True, ir error is fatal. Fatal errors are typically raised
    when a crash (i.e. Python exception) occurs at later stages of the
    processing pipeline (e.g. ast transformation, compiling). """
    if isinstance(code, Error):  code = code.code
    return code >= FATAL



# def Warning(message: str, pos, code: ErrorCode = WARNING,
#             orig_pos: int = -1, line: int = -1, column: int = -1) -> Error:
#     """
#     Syntactic sugar for creating Error-objects that contain only a warning.
#     Raises a ValueError if `code`` is not within the range for warnings.
#     """
#     if not is_warning(code):
#         raise ValueError("Tried to create a warning with a error code {}. "
#                          "Warning codes must be smaller than {}".format(code, ERROR))
#     return Error(message, pos, code, orig_pos, line, column)



[docs]
def has_errors(messages: Iterable[Error], level: ErrorCode = ERROR) -> bool:
    """
    Returns True, if at least one entry in ``messages`` has at
    least the given error ``level``.
    """
    for err_obj in messages:
        if err_obj.code >= level:
            return True
    return False




[docs]
def only_errors(messages: Iterable[Error], level: ErrorCode = ERROR) -> Iterator[Error]:
    """
    Returns an Iterator that yields only those messages that have
    at least the given error level.
    """
    return (err for err in messages if err.code >= level)



#######################################################################
#
# support for canonical representation, i.e.
# filename:line:column:severity (code):error string
#
#######################################################################



[docs]
def add_source_locations(errors: List[Error], source_mapping: SourceMapFunc):
    """Adds (or adjusts) line and column numbers of error messages inplace.

    Args:
        errors:  The list of errors as returned by the method
            ``errors()`` of a Node object
        source_mapping:  A function that maps error positions to their
            positions in the original source file.
    """
    lb_dict = {}
    for err in errors:
        if err.pos < 0:
            raise ValueError(f'Illegal error position: {err.pos} Must be >= 0!')
        if err.orig_pos < 0:  # do not overwrite orig_pos if already set
            err.orig_doc, orig_text, err.orig_pos = source_mapping(err.pos)
            lbreaks = lb_dict.setdefault(orig_text, linebreaks(orig_text))
            err.line, err.column = line_col(lbreaks, err.orig_pos)
            if err.orig_pos + err.length > lbreaks[-1]:
                err.length = lbreaks[-1] - err.orig_pos  # err.length should not exceed text length
            err.end_line, err.end_column = line_col(lbreaks, err.orig_pos + err.length)




[docs]
def canonical_error_strings(errors: List[Error]) -> List[str]:
    """Returns the list of error strings in canonical form that can be parsed by most
    editors, i.e. "relative filepath : line : column : severity (code) : error string"
    """
    if errors:
        error_strings = []
        for err in errors:
            source_file_name = err.orig_doc
            if source_file_name and is_filename(source_file_name):
                cwd = os.getcwd()
                if source_file_name.startswith(cwd):
                    rel_path = source_file_name[len(cwd):]
                else:
                    rel_path = source_file_name
                err_str = str(err)
                err_str = err_str[err_str.find(':'):]
                error_strings.append(rel_path + err_str)
            else:
                error_strings.append(str(err))
    else:
        error_strings = []
    return error_strings