Source code for transform

# transform.py - transformation functions for converting the
#                concrete into the abstract syntax tree
#
# Copyright 2016  by Eckhart Arnold (arnold@badw.de)
#                 Bavarian Academy of Sciences an Humanities (badw.de)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.  See the License for the specific language governing
# permissions and limitations under the License.


"""
Module ``transform`` contains the functions for transforming the
concrete syntax tree (CST) into an abstract syntax tree (AST).

As these functions are very generic, they can in principle be
used for any kind of tree transformations, not necessarily only
for CST -> AST transformations.
"""

from __future__ import annotations

import collections.abc
from functools import partial, singledispatch, reduce
import inspect
import operator
from typing import AbstractSet, ByteString, Callable, cast, Container, Dict, \
    Tuple, List, Sequence, Union, Optional

try:
    import cython
except ImportError:
    import DHParser.externallibs.shadow_cython as cython

from DHParser.error import ErrorCode, AST_TRANSFORM_CRASH, ERROR
from DHParser.nodetree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE, LEAF_PTYPES, PLACEHOLDER, \
    RootNode, parse_sxpr, flatten_sxpr, Path
from DHParser.toolkit import issubtype, isgenerictype, expand_table, smart_list, re, \
    deprecation_warning, TypeAlias


__all__ = ('TransformationDict',
           'TransformationProc',
           'TransformationTableType',
           'ConditionFunc',
           'KeyFunc',
           'TransformerFunc',
           'TransformerCallable',
           'transformation_factory',
           'key_node_name',
           'Filter',
           'BLOCK_LEAVES',
           'BLOCK_ANONYMOUS_LEAVES',
           'BLOCK_CHILDREN',
           'traverse',
           'transformer',
           'merge_treetops',
           'always',
           'never',
           'neg',
           'any_of',
           'all_of',
           'is_named',
           'update_attr',
           'swap_attributes',
           'replace_by_single_child',
           'replace_by_children',
           'reduce_single_child',
           'replace_or_reduce',
           'change_name',
           # 'change_tag_name',
           'replace_child_names',
           'replace_tag_names',
           'collapse',
           'join_content',
           'pick_longest_content',
           'fix_content',
           'collapse_children_if',
           'transform_result',
           'replace_content_with',
           'add_attributes',
           'del_attributes',
           'normalize_whitespace',
           'merge_adjacent',
           'merge_leaves',
           'merge_connected',
           'merge_results',
           'move_fringes',
           'left_associative',
           'lean_left',
           'apply_if',
           'apply_unless',
           'apply_ifelse',
           'traverse_locally',
           'is_anonymous',
           'is_anonymous_leaf',
           'contains_only_whitespace',
           'is_empty',
           'is_token',
           'is_one_of',
           'not_one_of',
           'name_matches',
           'has_attr',
           'content_matches',
           'has_content',
           'has_ancestor',
           'has_parent',
           'has_descendant',
           'has_child',
           'has_children',
           'has_sibling',
           'lstrip',
           'rstrip',
           'strip',
           'keep_children',
           'keep_children_if',
           'keep_tokens',
           'keep_nodes',
           'keep_content',
           'remove_children_if',
           'remove_children',
           'remove_content',
           # 'remove_first',
           # 'remove_last',
           'remove_whitespace',
           'remove_empty',
           'remove_anonymous_empty',
           'remove_anonymous_tokens',
           'remove_brackets',
           'remove_infix_operator',
           'remove_tokens',
           'remove',
           'remove_if',
           'flatten',
           'forbid',
           'require',
           'assert_content',
           'AT_THE_END',
           'node_maker',
           'delimit_children',
           'positions_of',
           'PositionType',
           'normalize_position_representation',
           'insert',
           'add_error',
           'error_on',
           'assert_has_children',
           'peek')


class Filter:
    def __call__(self, children: Tuple[Node, ...]) -> Tuple[Node, ...]:
        raise NotImplementedError


TransformationProc: TypeAlias = Callable[[Path], None]
TransformationDict: TypeAlias = Dict[str, Union[Callable, Sequence[Callable]]]
TransformationCache: TypeAlias = Dict[str, Tuple[Sequence[Filter], Sequence[Callable]]]
TransformationTableType: TypeAlias = Dict[str, Union[Sequence[Callable], TransformationDict]]
ConditionFunc: TypeAlias = Callable  # Callable[[Path], bool]
KeyFunc: TypeAlias = Callable[[Node], str]
CriteriaType: TypeAlias = Union[int, str, Callable]
TransformerFunc: TypeAlias = Union[Callable[[RootNode], RootNode], partial]
TransformerCallable = TransformerFunc  # Deprecated: Use TransformerFunc!
TransformerFactory: TypeAlias = Callable[[], TransformerFunc]


[docs] def transformation_factory(t1=None, t2=None, t3=None, t4=None, t5=None): """ Creates factory functions from transformation-functions that dispatch on the first parameter after the path parameter. Decorating a transformation-function that has more than merely the ``path``-parameter with ``transformation_factory`` creates a function with the same name, which returns a partial-function that takes just the path-parameter. Additionally, there is some syntactic sugar for transformation-functions that receive a collection as their second parameter and do not have any further parameters. In this case a list of parameters passed to the factory function will be converted into a single collection-parameter. The primary benefit is the readability of the transformation-tables. Usage:: @transformation_factory(AbstractSet[str]) def remove_tokens(path, tokens): ... or, alternatively:: @transformation_factory def remove_tokens(path, tokens: AbstractSet[str]): ... Example:: trans_table = { 'expression': remove_tokens('+', '-') } instead of:: trans_table = { 'expression': partial(remove_tokens, tokens={'+', '-'}) } Parameters: t1: type of the second argument of the transformation function, only necessary if the transformation functions' parameter list does not have type annotations. """ def real_type(t): """get type from string alias, e.g. "Dict" -> typing.Dict.""" if isinstance(t, str): # ensure compatibility with python versions return eval(t.replace('unicode', 'str')) # with alternative type handling. elif isinstance(t, ByteString): return eval(t.replace(b'unicode', b'str')) return t def type_guard(t): """Raises an error if type `t` is a generic type or could be mistaken for the type of the canonical first parameter "Path" of transformation functions. Returns `t`.""" # if isinstance(t, GenericMeta): # raise TypeError("Generic Type %s not permitted\n in transformation_factory " # "decorator. Use the equivalent non-generic type instead!" # % str(t)) t = real_type(t) if isgenerictype(t): raise TypeError("Generic Type %s not permitted\n in transformation_factory " "decorator. Use the equivalent non-generic type instead!" % str(t)) if issubtype(Path, t): raise TypeError("Sequence type %s not permitted\nin transformation_factory " "decorator, because it could be mistaken for a base class " "of Path\nwhich is the type of the canonical first " "argument of transformation functions. Try 'tuple' instead!" % str(t)) return t def decorator(f): nonlocal t1 sig = inspect.signature(f) params = list(sig.parameters.values())[1:] if len(params) == 0: return f # '@transformer' not needed w/o free parameters assert t1 or params[0].annotation != params[0].empty, \ "No type information on second parameter found! Please, use type " \ "annotation or provide the type information via transformer-decorator." f = singledispatch(f) p1type = real_type(params[0].annotation) if t1 is None: t1 = type_guard(p1type) elif issubtype(p1type, type_guard(t1)): try: if len(params) == 1 and issubtype(p1type, Container) \ and not (issubtype(p1type, str) or issubtype(p1type, ByteString)): def gen_special(*args): c = set(args) if issubtype(p1type, AbstractSet) else \ tuple(args) if issubtype(p1type, Sequence) else args d = {params[0].name: c} return partial(f, **d) assert p1type.__args__ is not None, "Please use more specific container " \ "type, e.g. %s[Callable] instead of %s, in signature of function " \ "%s !" % (str(p1type), str(p1type), f.__name__) for alt_t in p1type.__args__: f.register(type_guard(alt_t), gen_special) # f.register(type_guard(p1type.__args__[0]), gen_special) except AttributeError: pass # Union Type does not allow subclassing, but is not needed here else: raise TypeError("Annotated type %s is not a subclass of decorated type %s !" % (str(p1type), str(t1))) def gen_partial(*args, **kwargs): d = {p.name: arg for p, arg in zip(params, args)} d.update(kwargs) return partial(f, **d) for t in (t1, t2, t3, t4, t5): if t: f.register(type_guard(t), gen_partial) else: break return f if isinstance(t1, type(lambda: 1)): # Provide for the case that transformation_factory has been # written as plain decorator and not as a function call that # returns the decorator proper. func = t1 t1 = None return decorator(func) else: return decorator
[docs] def key_node_name(node: Node) -> str: """ Returns the tag name of the node as key for selecting transformations from the transformation table in function `traverse`. """ return node.name
class BlockChildren(Filter): def __call__(self, children: Tuple[Node, ...]) -> Tuple[Node, ...]: return () class BlockLeaves(Filter): def __call__(self, children: Tuple[Node, ...]) -> Tuple[Node, ...]: return tuple(child for child in children if child._children) class BlockAnonymousLeaves(Filter): def __call__(self, children: Tuple[Node, ...]) -> Tuple[Node, ...]: try: return tuple(child for child in children if child._children or not child.name[0] == ':') except IndexError: return tuple(child for child in children if child._children or not child.anonymous) BLOCK_CHILDREN = BlockChildren() BLOCK_LEAVES = BlockLeaves() BLOCK_ANONYMOUS_LEAVES = BlockAnonymousLeaves()
[docs] def traverse(tree: Node, transformation_table: TransformationTableType, key_func: KeyFunc = key_node_name) -> Node: """ Traverses the syntax tree starting with the given ``node`` depth first and applies the sequences of callback-functions registered in the ``transformation_table``-dictionary. The most important use case is the transformation of a concrete syntax tree into an abstract tree (AST). But it is also imaginable to employ tree-traversal for the semantic analysis of the AST. In order to assign sequences of callback-functions to nodes, a dictionary ("processing table") is used. The keys usually represent tag names, but any other key function is possible. There exist three special keys: - '<': always called (before any other processing function) - '*': called for those nodes for which no (other) processing function appears in the table - '>': always called (after any other processing function) :param tree: The root-node of the syntax tree to be traversed :param transformation_table: A mapping node key -> sequence of functions that will be applied to matching nodes in order. This dictionary is interpreted as a ``compact_table``. See :func:`expand_table` or :func:`EBNFCompiler.EBNFTransTable` :param key_func: A mapping key_func(node) -> keystr. The default key_func yields node.name. :returns: The tree that has been transformed in-place. The returned object is the same that has been passed in parameter tree, but be aware that this tree has been changed in-place! Example:: table = { "term": [replace_by_single_child, flatten], "factor, flowmarker, retrieveop": replace_by_single_child } traverse(node, table) """ # Is this optimization really needed? if '__cache__' in transformation_table: # assume that processing table has already been expanded table = transformation_table # type: TransformationTableType cache = cast(TransformationDict, transformation_table['__cache__']) # type: TransformationDict else: # normalize transformation_table entries by turning single values # into lists with a single value table = {name: cast(Sequence[Callable], smart_list(call)) for name, call in list(transformation_table.items())} table = expand_table(table) # substitute key for insignificant whitespace assert '+' not in table, 'Symbol "+" in processing table is obsolete, use "<" instead' if '~' in table: if ':Whitespace' in table: raise AssertionError( '"~" is a synonym for ":Whitespace" in the processing table. ' 'To avoid confusion, choose either of the two, but do not use ' 'both at the same time!') whitespace_transformation = table['~'] del table['~'] table[':Whitespace'] = whitespace_transformation # cache expanded table cache = cast(TransformationDict, table.setdefault('__cache__', cast(TransformationDict, dict()))) # change processing table in place, so its already expanded and cache filled next time transformation_table.clear() transformation_table.update(table) def split_filter(callables: Sequence[Callable]) -> Tuple[List[Filter], List[Callable]]: i = 0 filter = [] for callable in callables: if isinstance(callable, Filter): filter.append(callable) i += 1 else: break callables = list(callables[i:]) assert not any(isinstance(callable, Filter) for callable in callables) return filter, callables def traverse_recursive(path): nonlocal cache node = path[-1] key = key_func(node) try: filters, sequence = cache[key] except KeyError: filters, pre = split_filter(table.get('<', [])) assert BLOCK_CHILDREN not in filters more_filters, main = split_filter(table.get(key, table.get('*', []))) post = table.get('>', []) assert not any(isinstance(callable, Filter) for callable in post) sequence = pre + main + post all_filters = filters + more_filters if BLOCK_CHILDREN in all_filters: all_filters = [BLOCK_CHILDREN] cache[key] = (all_filters, sequence) children = node._children for filter in filters: children = filter(children) if children: path.append(PLACEHOLDER) for child in children: path[-1] = child traverse_recursive(path) # depth first path.pop() for call in sequence: try: call(path) except Exception as ae: raise AssertionError('An exception occurred when transforming "%s" with %s:\n%s' % (key, str(call), ae.__class__.__name__ + ': ' + str(ae))) traverse_recursive([tree]) return tree
# assert transformation_table['__cache__']
[docs] def transformer(tree: RootNode, transformation_table: TransformationTableType, key_func: KeyFunc = key_node_name, src_stage: str = '', dst_stage: str = '') -> RootNode: """ Same as :func:`traverse`, but expects a node of type RootNode to be passed in parameter ``tree`` and retruns this RootNode. Furthermore, the names of the source and destination stages can be passed optionally in the parameters ``src_stage`` and ``dst_stage``. If these parameters are not empty strings, the ``tree.stage`` will be checked against ``src_stage`` before transforming the tree and set to ``dst_stage`` after the transformation. See :func:`traverse` for the first three parameters and the general explanation of what ``transform`` does. :param src_stage: The name of the source stage or the empty string (default) if the source stage shall not be checked. :param dst_stage: The name of the destination stage or the empty string (default) :raises: ValueError, if ``tree.stage != src_stage`` """ assert isinstance(tree, RootNode) if src_stage and tree.stage and tree.stage.lower() != src_stage.lower(): raise ValueError(f'Tree in stage "{src_stage}" expected, but "{tree.stage}" found!') tree = traverse(tree, transformation_table, key_func=key_func) if not isinstance(tree, RootNode): tree = RootNode(tree) tree.stage = dst_stage return tree
####################################################################### # # specialized full tree transformations # #######################################################################
[docs] def merge_treetops(node: Node): """Recursively traverses the tree and "merges" nodes that contain only anonymous child nodes that are leaves. "mergeing" here means the node's result will be replaced by the merged content of the children. """ if node._children: crunch = True for child in node._children: if child._children: merge_treetops(child) crunch = False elif not child.anonymous: crunch = False if crunch: node.result = node.content
####################################################################### # # meta transformations, i.e. transformations that call other # transformations # #######################################################################
[docs] @transformation_factory(dict) def traverse_locally(path: Path, transformation_table: dict, # actually: TransformationTableType key_func: Callable = key_node_name): # actually: KeyFunc """ Transforms the syntax tree starting from the last node in the path according to the given transformation table. The purpose of this function is to apply certain transformations locally, i.e. only for those nodes that have the last node in the path as their parent node. """ traverse(path[-1], transformation_table, key_func)
def transformation_guard(value) -> None: if value is not None: raise AssertionError('Transformation of a value instead of None!') def condition_guard(value) -> bool: if value is None: raise AssertionError('Condition returned None instead of a bool!') return value def apply_transformations(path: Path, transformation: Union[Callable, Sequence[Callable]]): """Applies a single or a sequence of transformations to a path.""" if callable(transformation): transformation_guard(transformation(path)) else: assert isinstance(transformation, tuple) for trans in cast(tuple, transformation): transformation_guard(trans(path))
[docs] @transformation_factory(collections.abc.Callable, tuple) def apply_if(path: Path, transformation: Union[Callable, Tuple[Callable, ...]], condition: Callable): """Applies a transformation only if a certain condition is met.""" if condition_guard(condition(path)): apply_transformations(path, transformation)
[docs] @transformation_factory(collections.abc.Callable, tuple) def apply_ifelse(path: Path, if_transformation: Union[Callable, Tuple[Callable, ...]], else_transformation: Union[Callable, Tuple[Callable, ...]], condition: Callable): """Applies a one particular transformation if a certain condition is met and another transformation otherwise.""" if condition_guard(condition(path)): apply_transformations(path, if_transformation) else: apply_transformations(path, else_transformation)
[docs] @transformation_factory(collections.abc.Callable, tuple) def apply_unless(path: Path, transformation: Union[Callable, Tuple[Callable, ...]], condition: Callable): """Applies a transformation if a certain condition is *not* met.""" if not condition_guard(condition(path)): apply_transformations(path, transformation)
## boolean operators
[docs] def always(path: Path) -> bool: """Always returns True, no matter what the state of the path is.""" return True
[docs] def never(path: Path) -> bool: """Always returns True, no matter what the state of the path is.""" return False
[docs] @transformation_factory(collections.abc.Callable) def neg(path: Path, bool_func: collections.abc.Callable) -> Optional[bool]: """Returns the inverted boolean result of ``bool_func(path)``, unless the result is None. In that case None is passed through.""" cond = bool_func(path) if cond is None: return None return not cond
[docs] @transformation_factory(collections.abc.Set) def any_of(path: Path, bool_func_set: AbstractSet[collections.abc.Callable]) -> bool: """Returns True, if any of the bool functions in `bool_func_set` evaluate to True for the given path.""" return any(bf(path) for bf in bool_func_set)
[docs] @transformation_factory(collections.abc.Set) def all_of(path: Path, bool_func_set: AbstractSet[collections.abc.Callable]) -> bool: """Returns True, if all of the bool functions in `bool_func_set` evaluate to True for the given path.""" return all(bf(path) for bf in bool_func_set)
####################################################################### # # conditionals that determine whether the path (or the last node in # the path for that matter) fulfill a specific condition. # --------------------------------------------------------------------- # # The path of a node is understood as a list of all parent nodes # leading up to and including the node itself. If represented as list, # the last element of the list is the node itself. # ####################################################################### @transformation_factory(collections.abc.Callable) def three_valued(path: Path, cond_true: Callable, cond_false: Callable) -> Optional[bool]: """Returns True, if ``cond_true`` evaluates to True, Returns False, if ``cond_true`` does not evaluate to True but cond_false evaluates to True. Returns None, otherwise. Note that this means that the first parameter has precedence. Expresses as truth-table, this looks like:: T T -> T T F -> T F T -> F F F -> None """ if cond_true(path): return True elif cond_false(path): return False else: return None def is_single_child(path: Path) -> bool: """Returns ``True`` if the current node does not have any siblings.""" return len(path[-2]._children) == 1 # TODO: ambiguous: named, tagging...
[docs] def is_named(path: Path) -> bool: """Returns ``True`` if the current node's parser is a named parser.""" # return not path[-1].anonymous tn = path[-1].name return bool(tn) and tn[0] != ':'
[docs] def is_anonymous(path: Path) -> bool: """Returns ``True`` if the current node is anonymous.""" # return path[-1].anonymous tn = path[-1].name return not tn or tn[0] == ':'
[docs] def is_anonymous_leaf(path: Path) -> bool: """Returns `True` if path ends in an anonymous leaf-node""" # return not path[-1].children and path[-1].anonymous node = path[-1] if node._children: return False tn = node.name return not tn or tn[0] == ':'
RX_WHITESPACE = re.compile(r'\s*$')
[docs] def contains_only_whitespace(path: Path) -> bool: r"""Returns ``True`` for nodes that contain only whitespace regardless of the name, i.e. nodes the content of which matches the regular expression /\s*/, including empty nodes. Note, that this is not true for anonymous whitespace nodes that contain comments.""" return bool(RX_WHITESPACE.match(path[-1].content))
[docs] def is_empty(path: Path) -> bool: """Returns ``True`` if the current node's content is empty.""" return not path[-1].result
[docs] @transformation_factory(collections.abc.Set) def is_token(path: Path, tokens: AbstractSet[str] = frozenset()) -> bool: """ Checks whether the last node in the path has the name ":Text" and it's content matches one of the given tokens. Leading and trailing whitespace-tokens will be ignored. In case an empty set of tokens is passed, any token is a match. """ node = path[-1] return node.name == TOKEN_PTYPE and (not tokens or node.content in tokens)
[docs] @transformation_factory(collections.abc.Set) def is_one_of(path: Path, name_set: AbstractSet[str]) -> bool: """Returns true, if the node's name is one of the given tag names.""" return path[-1].name in name_set
[docs] @transformation_factory(collections.abc.Set) def not_one_of(path: Path, name_set: AbstractSet[str]) -> bool: """Returns true, if the node's name is not one of the given tag names.""" return path[-1].name not in name_set
[docs] @transformation_factory(str) def name_matches(path: Path, regexp: str) -> bool: """ Returns true, if the node's name matches the regular expression `regexp` completely. For example, ':.*' matches all anonymous nodes. """ if not regexp.endswith('$'): regexp = f"(?:{regexp})$" return bool(re.match(regexp, path[-1].name))
[docs] @transformation_factory def content_matches(path: Path, regexp: str) -> bool: """ Checks a node's content against a regular expression. In contrast to ``re.match`` the regular expression must match the complete string and not just the beginning of the string to succeed! """ if not regexp.endswith('$'): regexp = f"(?:{regexp})$" return bool(re.match(regexp, path[-1].content))
[docs] @transformation_factory def has_content(path: Path, content: str) -> bool: """Checks a node's content against a given string. This is faster than content_matches for mere string comparisons.""" return path[-1].content == content
[docs] @transformation_factory(str) def has_attr(path: Path, attr: str, value: Optional[str] = None) -> bool: """ Returns true, if the node has the attribute ``attr`` and its value equals ``value``. If ``value`` is None, True is returned if the attribute exists, no matter what it value is. """ node = path[-1] if value is None: return node.has_attr(attr) else: return node.get_attr(attr, None) == value
[docs] @transformation_factory(collections.abc.Set) def has_ancestor(path: Path, name_set: AbstractSet[str], generations: int = -1, until: Union[AbstractSet[str], str] = frozenset()) -> bool: """ Checks whether a node with one of the given tag names appears somewhere in the path before the last node in the path. :param generations: determines how deep `has_ancestor` should dive into the ancestry. "1" means only the immediate parents wil be considered, "2" means also the grandparents, ans so on. A value smaller or equal zero means all ancestors will be considered. :param until: node-names which, when reached, will stop `has_ancestor` from searching further, even if the `generations`-parameter would allow a deeper search. """ if until: if isinstance(until, str): until = {until} for i in range(len(path) - 1, -1, -1): if path[i].name in until: break trl = path[i:] else: trl = path if generations <= 0: return any(nd.name in name_set for nd in trl) for i in range(2, min(generations + 2, len(trl) + 1)): if trl[-i].name in name_set: return True return False
[docs] @transformation_factory(collections.abc.Set) def has_parent(path: Path, name_set: AbstractSet[str]) -> bool: """Checks whether the immediate predecessor in the path has one of the given tags.""" return has_ancestor(path, name_set, 1)
[docs] def has_children(path: Path) -> bool: """Checks whether last node in path has children.""" return bool(path[-1]._children)
@transformation_factory(collections.abc.Set) def has_descendant(path: Path, name_set: AbstractSet[str], generations: int = -1, until: Union[AbstractSet[str], str] = frozenset()) -> bool: assert generations != 0 if until: if isinstance(until, str): until = {until} else: until = frozenset() if path[-1].name in until: for child in path[-1]._children: if child.name in name_set: return True return False for child in path[-1]._children: if child.name in name_set: return True if (generations < 0 or generations > 1) \ and has_descendant(path + [child], name_set, generations - 1, until): return True return False
[docs] @transformation_factory(collections.abc.Set) def has_child(path: Path, name_set: AbstractSet[str]) -> bool: """Checks whether at least one child (i.e. immediate descendant) has one of the given tags.""" return has_descendant(path, name_set, 1)
@transformation_factory(collections.abc.Set) def has_sibling(path: Path, name_set: AbstractSet[str]): if len(path) >= 2: node = path[-1] for child in path[-2]._children: if child != node and child.name in name_set: return True return False ####################################################################### # # utility functions # #######################################################################
[docs] def update_attr(dest: Node, src: Union[Node, Tuple[Node, ...]], root: RootNode): """ Adds all attributes from `src` to `dest` and transfers all errors from `src` to `dest`. This is needed, in order to keep the attributes if the child node is going to be eliminated. """ if isinstance(src, Node): src = (Node,) for s in src: # update attributes if s != dest and hasattr(s, '_attributes'): for k, v in s.attr.items(): if k in dest.attr and v != dest.attr[k]: raise ValueError('Conflicting attribute values %s and %s for key %s ' 'when reducing %s to %s ! Tree transformation stopped.' % (v, dest.attr[k], k, str(src), str(dest))) dest.attr[k] = v # transfer errors try: root.transfer_errors(s, dest) # ids = id(s) # if ids in root.error_nodes: # root.error_nodes.setdefault(id(dest), []).extend(root.error_nodes[ids]) # del root.error_nodes[ids] except AttributeError: # root was not of type RootNode, probably a doc-test pass
[docs] def swap_attributes(node: Node, other: Node): """ Exchanges the attributes between node and other. This might be needed when re-arranging trees. """ NA = node.has_attr() OA = other.has_attr() if NA or OA: save = node._attributes if NA else None if OA: node._attributes = other._attributes elif NA: node._attributes = None if NA: other._attributes = save elif OA: other._attributes = None
def _replace_by(node: Node, child: Node, root: RootNode): """ Replaces node's contents by child's content including the tag name. """ # if node.anonymous or not child.anonymous: # node.name = child.name nd_tn = node.name ch_tn = child.name if not nd_tn or nd_tn[0] == ':' or (ch_tn and ch_tn[0] != ':'): node.name = ch_tn node._set_result(child._result) update_attr(node, (child,), root) def _reduce_child(node: Node, child: Node, root: RootNode): """ Sets node's results to the child's result, keeping node's name.Example:: >>> nested = RootNode(parse_sxpr('(i (span `(style "letter-spacing:.25pt") "m."))')) >>> _reduce_child(nested, nested[0], nested) >>> nested.as_sxpr() '(i `(style "letter-spacing:.25pt") "m.")' """ node._set_result(child._result) update_attr(node, (child,), root) # update_attr(child, (node,), root) # if child.has_attr(): # node._attributes = child._attributes ####################################################################### # # rearranging transformations # # - tree may be rearranged (e.g.flattened) # - nodes that are not leaves may be dropped # - order is preserved # - leave content is preserved (though not necessarily the leaves # themselves) # #######################################################################
[docs] def replace_by_single_child(path: Path): """ Removes single branch node, replacing it by its immediate descendant. Replacement only takes place, if the last node in the path has exactly one child. Attributes will be merged. In case one and the same attribute is defined for the child as well as the parent, the child's attribute value take precedence. """ node = path[-1] if len(node._children) == 1: _replace_by(node, node._children[0], cast(RootNode, path[0]))
[docs] def replace_by_children(path: Path): """ Eliminates the last node in the path by replacing it with its children. The attributes of this node will be dropped. In case the last node is the root-note (i.e. len(path) == 1), it will only be eliminated, if there is but one child. WARNING: This should never be followed by move_fringes() in the transformation list!!! """ try: parent = path[-2] except IndexError: replace_by_single_child(path) return node = path[-1] if node._children: result = parent._children # type: Tuple[Node, ...] i = result.index(node) parent._set_result(result[:i] + node._children + result[i + 1:])
[docs] def reduce_single_child(path: Path): """ Reduces a single branch node by transferring the result of its immediate descendant to this node, but keeping this node's parser entry. Reduction only takes place if the last node in the path has exactly one child. Attributes will be merged. In case one and the same attribute is defined for the child as well as the parent, the parent's attribute value take precedence. """ node = path[-1] if len(node._children) == 1: _reduce_child(node, node._children[0], cast(RootNode, path[0]))
[docs] @transformation_factory(collections.abc.Callable) def replace_or_reduce(path: Path, condition: Callable = is_named): """ Replaces node by a single child, if condition is True on child. Reduces the child, if condition is not True and not None. If the condition is None nothing is changed. """ node = path[-1] if len(node._children) == 1: child = node._children[0] cond = condition(path) if cond: _replace_by(node, child, cast(RootNode, path[0])) elif cond is not None: _reduce_child(node, child, cast(RootNode, path[0]))
[docs] @transformation_factory(str) def change_name(path: Path, name: str): """ Changes the tag name of the last node in the path. Parameters: path: the path where the parser shall be replaced name: The new tag name. """ node = path[-1] # # ZOMBIE_TAGS will not be changed, so that errors don't get overlooked # if node.name != ZOMBIE_TAG: node.name = name
# @transformation_factory(str) # def change_tag_name(path: Path, name: str): # deprecation_warning('"DHParser.transform.change_tag_name()" is deprecated. ' # 'Use "change_name()" instead!') # change_name(path, name)
[docs] @transformation_factory(dict) def replace_child_names(path: Path, replacements: Dict[str, str]): """ Replaces the tag names of the children of the last node in the path according to the replacement dictionary. :param path: The current path (i.e. list of ancestors and current node) :param replacements: A dictionary of names. Each tag name of a child node that exists as a key in the dictionary will be replaced by the value for that key. """ # assert ZOMBIE_TAG not in replacements, 'Replacing ZOMBIE_TAGS is not allowed, " \ # "because they result from errors that could otherwise be overlooked, subsequently!' for child in path[-1]._children: original_name = child.name child.name = replacements.get(original_name, original_name)
@transformation_factory(dict) def replace_tag_names(path: Path, replacements: Dict[str, str]): deprecation_warning('"DHParser.transform.replace_tag_names()" is deprecated. ' 'Use "replace_child_names()" instead!') replace_child_names(path, replacements)
[docs] @transformation_factory(collections.abc.Callable) def flatten(path: Path, condition: Callable = is_anonymous, recursive: bool = True): """ Flattens all children, that fulfill the given ``condition`` (default: all unnamed children). Flattening means that wherever a node has child nodes, the child nodes are inserted in place of the node. If the parameter ``recursive`` is ``True`` the same will recursively be done with the child-nodes, first. In other words, all leaves of this node and its child nodes are collected in-order as direct children of this node. Applying flatten recursively will result in these kinds of structural transformation:: (1 (+ 2) (+ 3)) -> (1 + 2 + 3) (1 (+ (2 + (3)))) -> (1 + 2 + 3) """ node = path[-1] if node._children: new_result = [] # type: List[Node] path.append(PLACEHOLDER) for child in node._children: path[-1] = child if child._children and condition(path): if recursive: flatten(path, condition, recursive) new_result.extend(child._children) update_attr(node, (child,), cast(RootNode, path[0])) else: new_result.append(child) path.pop() node._set_result(tuple(new_result))
[docs] def collapse(path: Path): """ Collapses all sub-nodes of a node by replacing them with the string representation of the node. USE WITH CARE! >>> sxpr = '(place (abbreviation "p.") (page "26") (superscript "b") (mark ",") (page "18"))' >>> tree = parse_sxpr(sxpr) >>> collapse([tree]) >>> print(flatten_sxpr(tree.as_sxpr())) (place "p.26b,18") """ node = path[-1] for descendant in node.select_if(Node.has_attr): node.attr.update(descendant.attr) node.result = node.content
MergeRule = Callable[[Sequence[Node]], Node] def join_content(package: Sequence[Node]) -> Node: return Node('joined', "".join(nd.content for nd in package), True) def pick_longest_content(package: Sequence[Node]) -> Node: longest = '' L = 0 for nd in package: content = nd.content ndL = len(content) if ndL > L: longest = content L = ndL return Node('joined', longest, True) def fix_content(fixed_content: str) -> MergeRule: def fix(package: Sequence[Node]) -> Node: nonlocal fixed_content return Node('joined', fixed_content) return fix
[docs] @transformation_factory(collections.abc.Callable) def collapse_children_if(path: Path, condition: Callable, target_name: str, merge_rule: MergeRule = join_content): """ (Recursively) merges the content of all adjacent child nodes that fulfill the given `condition` into a single leaf node with the tag-name `target_tag`. Nodes that do not fulfil the condition will be preserved. >>> sxpr = '(place (abbreviation "p.") (page "26") (superscript "b") (mark ",") (page "18"))' >>> tree = parse_sxpr(sxpr) >>> collapse_children_if([tree], not_one_of({'superscript', 'subscript'}), 'text') >>> print(flatten_sxpr(tree.as_sxpr())) (place (text "p.26") (superscript "b") (text ",18")) See `test_transform.TestComplexTransformations` for more examples. """ assert isinstance(target_name, str) # TODO: Delete this when safe node = path[-1] if not node._children: return # do nothing if its a leaf node package = [] # type: List[Node] result = [] # type: List[Node] def close_package(): nonlocal package, target_name, merge_rule if package: merged_node = merge_rule(package) merged_node.name = target_name attrs = dict() for nd in package: if nd.has_attr(): attrs.update(nd.attr) result.append(merged_node.with_attr(attrs)) package = [] for child in node._children: if condition(path + [child]): if child._children: collapse_children_if(path + [child], condition, target_name) for c in child._children: if condition(path + [child, c]): package.append(c) else: close_package() result.append(c) close_package() else: package.append(child) else: close_package() result.append(child) close_package() node.result = tuple(result)
[docs] @transformation_factory(collections.abc.Callable) def merge_adjacent(path: Path, condition: Callable, preferred_name: str = ''): """ Merges adjacent nodes that fulfill the given `condition`. It is assumed that ``condition`` is never true for leaf-nodes and non-leaf-nodes alike. Otherwise, a type-error might ensue! The merged node's name will be set to the value ``preferred_name`` unless that value is the empty string. In this case the name of the first node of the merge will be chosen. (Note that the assignment of the preferred name only happens if a merge actually took place, i.e. if there are at least two nodes that have been merged. `´merge_adjacent()`` will not rename single nodes.) 'merge_adjacent' differs from :py:func:`collapse_children_if` in two respects: 1. The merged nodes are not "collapsed" to their string content. 2. The naming rule for merged nodes is different, in so far as the 'preferred_name' passed to `merge_adjacent` is only used if it actually occurs among the nodes to be merged. This, if 'merge_adjacent' is substituted for 'collapse_children_if' in doc-string example of the latter function, the example yields:: >>> sxpr = '(place (abbreviation "p.") (page "26") (superscript "b") (mark ",") (page "18"))' >>> tree = parse_sxpr(sxpr) >>> merge_adjacent([tree], not_one_of({'superscript', 'subscript'}), '') >>> print(flatten_sxpr(tree.as_sxpr())) (place (abbreviation "p.26") (superscript "b") (mark ",18")) """ node = path[-1] children = node._children if children: new_result = [] i = 0 L = len(children) while i < L: if condition(path + [children[i]]): initial = () if children[i]._children else '' k = i i += 1 while i < L and condition(path + [children[i]]): i += 1 if i > k: adjacent = children[k:i] head = adjacent[0] names = {nd.name for nd in adjacent} head.result = reduce(operator.add, (nd.result for nd in adjacent), initial) update_attr(head, adjacent[1:], cast(RootNode, path[0])) if preferred_name and len(adjacent) > 1: # in names: head.name = preferred_name new_result.append(head) else: new_result.append(children[i]) i += 1 node._set_result(tuple(new_result))
merge_leaves = merge_adjacent(is_anonymous_leaf)
[docs] @transformation_factory(collections.abc.Callable) def merge_connected(path: Path, content: Callable, delimiter: Callable, content_name: str = '', delimiter_name: str = ''): """ Merges sequences of content and delimiters. Other than `merge_adjacent()`, which does not make this distinction, delimiters at the fringe of content blocks are not included in the merge. :param path: The path, i.e. list of "ancestor" nodes, ranging from the root node (`path[0]`) to the current node (`path[-1]`) :param content: Condition to identify content nodes. (Path -> bool) :param delimiter: Condition to identify delimiter nodes. (Path -> bool) :param content_name: tag name for the merged content blocks :param delimiter_name: tag name for the merged delimiters at the fringe ATTENTION: The condition to identify content nodes and the condition to identify delimiter nodes must never come true for one and the same node!!! """ # first, merge all delimiters merge_adjacent(path, delimiter, delimiter_name) node = path[-1] children = node._children if children: new_result = [] i = 0 L = len(children) while i < L: if content(path + [children[i]]): initial = () if children[i]._children else '' k = i i += 1 while i < L and (content(path + [children[i]]) or delimiter(path + [children[i]])): i += 1 if delimiter(path + [children[i - 1]]): i -= 1 if i > k: adjacent = children[k:i] head = adjacent[0] names = {nd.name for nd in adjacent} head.result = reduce(operator.add, (nd.result for nd in adjacent), initial) update_attr(head, adjacent[1:], cast(RootNode, path[0])) if content_name in names: head.name = content_name new_result.append(head) else: new_result.append(children[i]) i += 1 node._set_result(tuple(new_result))
[docs] def merge_results(dest: Node, src: Tuple[Node, ...], root: RootNode) -> bool: """ Merges the results of nodes `src` and writes them to the result of `dest` type-safely, if all src nodes are leaf-nodes (in which case their result-strings are concatenated) or none are leaf-nodes (in which case the tuples of children are concatenated). Returns `True` in case of a successful merge, `False` if some source nodes were leaf-nodes and some weren't and the merge could thus not be done. Example: >>> head, tail = Node('head', '123'), Node('tail', '456') >>> merge_results(head, (head, tail), head) # merge head and tail (in that order) into head True >>> str(head) '123456' """ if all(nd._children for nd in src): dest.result = reduce(operator.add, (nd._children for nd in src[1:]), src[0]._children) update_attr(dest, src, root) return True elif all(not nd._children for nd in src): dest.result = reduce(operator.add, (nd.content for nd in src[1:]), src[0].content) update_attr(dest, src, root) return True return False
[docs] @transformation_factory(collections.abc.Callable) @cython.locals(a=cython.int, b=cython.int, i=cython.int) def move_fringes(path: Path, condition: Callable, merge: bool = True): """ Moves adjacent nodes on the left and right fringe that fulfill the given condition to the parent node. If the `merge`-flag is set, a moved node will be merged with its predecessor (or successor, respectively) in the parent node in case it also fulfills the given `condition`. Example: >>> tree = parse_sxpr('''(paragraph ... (sentence ... (word "Hello ") ... (S " ") ... (word "world,") ... (S " ")) ... (sentence ... (word "said") ... (S " ") ... (word "Hal.")))''') >>> tree = traverse(tree, {'sentence': move_fringes(is_one_of({'S'}))}) >>> print(tree.as_sxpr()) (paragraph (sentence (word "Hello ") (S " ") (word "world,")) (S " ") (sentence (word "said") (S " ") (word "Hal."))) In this example the blank at the end of the first sentence has been moved BETWEEN the two sentences. This is desirable, because if you extract a sentence from the data, most likely you are not interested in the trailing blank. Of course, this situation can best be avoided by a careful formulation of the grammar in the first place. WARNING: This function should never follow replace_by_children() in the transformation list!!! """ node = path[-1] if len(path) <= 1 or not node._children: return parent = path[-2] assert node in parent._children, \ ("Node %s (%s) is not among its parent's children, any more! " "This could be due to a transformation that manipulates the parent's result earlier " "in the transformation pipeline, like replace_by_children.") % (node.name, id(node)) children = node._children a, b = 0, len(children) while a < b: if condition(path + [children[a]]): a += 1 elif condition(path + [children[b - 1]]): b -= 1 else: break before = children[:a] after = children[b:] children = children[a:b] if before or after: node._set_result(children) parent_children = parent._children for i, child in enumerate(parent_children): if id(child) == id(node): break a = i - 1 b = i + 1 # merge adjacent nodes that fulfil the condition if merge: while a >= 0 and condition([parent_children[a]]): a -= 1 prevN = parent_children[a + 1:i] N = len(parent_children) while b < N and condition([parent_children[b]]): b += 1 nextN = parent_children[i + 1:b] if len(before) + len(prevN) > 1: target = before[-1] if before else prevN[0] if merge_results(target, prevN + before, cast(RootNode, path[0])): before = (target,) before = before or prevN if len(after) + len(nextN) > 1: target = after[0] if after else nextN[-1] if merge_results(target, after + nextN, cast(RootNode, path[0])): after = (target,) after = after or nextN parent._set_result(parent_children[:a + 1] + before + (node,) + after + parent_children[b:])
[docs] def left_associative(path: Path): """ Rearranges a flat node with infix operators into a left associative tree. """ node = path[-1] if len(node._children) >= 3: assert (len(node._children) + 1) % 2 == 0 rest = list(node._children) left, rest = rest[0], rest[1:] while rest: infix, right, rest = rest[0], rest[1], rest[2:] assert not infix._children assert infix.name[0:1] != ":" left = Node(infix.name, (left, right)) node.result = (left,)
[docs] @transformation_factory(collections.abc.Set) def lean_left(path: Path, operators: AbstractSet[str]): """ Turns a right leaning tree into a left leaning tree: (op1 a (op2 b c)) -> (op2 (op1 a b) c) If a left-associative operator is parsed with a right-recursive parser, `lean_left` can be used to rearrange the tree structure so that it properly reflects the order of association. This transformation is needed, if you want to get the order of precedence right, when writing a grammar, say, for arithmetic that avoids left-recursion. (DHParser does support left-recursion but left-recursive grammars might not be compatible with other PEG-frameworks any more.) ATTENTION: This transformation function moves forward recursively, so grouping nodes must not be eliminated during traversal! This must be done in a second pass. """ node = path[-1] assert node._children and len(node._children) == 2 assert node.name in operators right = node._children[1] if right.name in operators: assert right._children and len(right._children) == 2 a, b, c = node._children[0], right._children[0], right._children[1] op1 = node.name op2 = right.name right.result = (a, b) right.name = op1 node.result = (right, c) node.name = op2 swap_attributes(node, right) # continue recursively on the left branch lean_left([right], operators)
####################################################################### # # destructive transformations: # # - nodes and content may be dropped entirely (if deemed irrelevant) # - no promise that order will be preserved # #######################################################################
[docs] @transformation_factory(collections.abc.Callable) def lstrip(path: Path, condition: Callable = contains_only_whitespace): """Recursively removes all leading child-nodes that fulfill a given condition.""" node = path[-1] i = 1 while i > 0 and node._children: node_children = node._children lstrip(path + [node_children[0]], condition) i, L = 0, len(node_children) while i < L and condition(path + [node_children[i]]): i += 1 if i > 0: node._set_result(node_children[i:])
[docs] @transformation_factory(collections.abc.Callable) def rstrip(path: Path, condition: Callable = contains_only_whitespace): """Recursively removes all trailing nodes that fulfill a given condition.""" node = path[-1] i, L = 0, len(node._children) while i < L and node._children: node_children = node._children rstrip(path + [node_children[-1]], condition) L = len(node_children) i = L while i > 0 and condition(path + [node_children[i - 1]]): i -= 1 if i < L: node._set_result(node_children[:i])
[docs] @transformation_factory(collections.abc.Callable) def strip(path: Path, condition: Callable = contains_only_whitespace): """Removes leading and trailing child-nodes that fulfill a given condition.""" lstrip(path, condition) rstrip(path, condition)
[docs] @transformation_factory # (slice) def keep_children(path: Path, section: slice = slice(None)): """Keeps only child-nodes which fall into a slice of the result field.""" node = path[-1] if node._children: node._set_result(node._children[section])
[docs] @transformation_factory(collections.abc.Callable) def keep_children_if(path: Path, condition: Callable): """Removes all children for which `condition()` returns `True`.""" node = path[-1] if node._children: node._set_result(tuple(c for c in node._children if condition(path + [c])))
[docs] @transformation_factory(collections.abc.Set) def keep_tokens(path: Path, tokens: AbstractSet[str] = frozenset()): """Removes any among a particular set of tokens from the immediate descendants of a node. If ``tokens`` is the empty set, all tokens are removed.""" keep_children_if(path, partial(is_token, tokens=tokens))
[docs] @transformation_factory(collections.abc.Set) def keep_nodes(path: Path, names: AbstractSet[str]): """Removes children by tag name.""" keep_children_if(path, partial(is_one_of, name_set=names))
[docs] @transformation_factory def keep_content(path: Path, regexp: str): """Removes children depending on their string value.""" keep_children_if(path, partial(content_matches, regexp=regexp))
[docs] @transformation_factory(collections.abc.Callable) def remove_children_if(path: Path, condition: Callable): """Removes all children for which `condition()` returns `True`.""" node = path[-1] if node._children: node._set_result(tuple(c for c in node._children if not condition(path + [c])))
remove_whitespace = remove_children_if(is_one_of(WHITESPACE_PTYPE)) remove_empty = remove_children_if(is_empty) remove_anonymous_empty = remove_children_if(lambda trl: is_empty(trl) and is_anonymous(trl)) remove_anonymous_tokens = remove_children_if(lambda trl: is_token(trl) and is_anonymous(trl)) remove_infix_operator = keep_children(slice(0, None, 2)) # remove_single_child = apply_if(keep_children(slice(0)), lambda trl: len(trl[-1].children) == 1) # def remove_first(path: Path): # """Removes the first non-whitespace child.""" # node = path[-1] # if node.children: # for i, child in enumerate(node.children): # if child.name != WHITESPACE_PTYPE: # break # else: # return # node.result = node.children[:i] + node.children[i + 1:] # # # def remove_last(path: Path): # """Removes the last non-whitespace child.""" # node = path[-1] # if node.children: # for i, child in enumerate(reversed(node.children)): # if child.name != WHITESPACE_PTYPE: # break # else: # return # i = len(node.children) - i - 1 # node.result = node.children[:i] + node.children[i + 1:]
[docs] def remove_brackets(path: Path): """Removes any leading or trailing sequence of whitespaces, tokens or regexps.""" children = path[-1]._children if children: disposables = LEAF_PTYPES i = 0 while (i < len(children) and (children[i].name in disposables or (children[i].name == ':Series' and all(c.name in disposables for c in children[i]._children)))): i += 1 k = len(children) while (k > 0 and (children[k - 1].name in disposables or (children[k - 1].name == ':Series' and all(c.name in disposables for c in children[k - 1]._children)))): k -= 1 path[-1]._set_result(children[i:k])
[docs] @transformation_factory(collections.abc.Set) def remove_tokens(path: Path, tokens: AbstractSet[str] = frozenset()): """Removes any among a particular set of tokens from the immediate descendants of a node. If ``tokens`` is the empty set, all tokens are removed.""" remove_children_if(path, partial(is_token, tokens=tokens))
[docs] @transformation_factory(collections.abc.Set) def remove_children(path: Path, names: AbstractSet[str]): """Removes children by tag name.""" remove_children_if(path, partial(is_one_of, name_set=names))
[docs] @transformation_factory def remove_content(path: Path, regexp: str): """Removes children depending on their string value.""" remove_children_if(path, partial(content_matches, regexp=regexp))
[docs] def remove(path: Path): """Removes node unconditionally.""" try: parent = path[-2] except IndexError: return node = path[-1] parent._set_result(tuple(nd for nd in parent._children if nd != node))
[docs] @transformation_factory(collections.abc.Callable) def remove_if(path: Path, condition: Callable): """Removes node if condition is `True`""" if condition(path): remove(path)
####################################################################### # # constructive transformations: # # nodes may be added (attention: position value ) # #######################################################################
[docs] @transformation_factory(collections.abc.Callable) def transform_result(path: Path, func: Callable): # Callable[[ResultType], ResultType] """ Replaces the result of the node. ``func`` takes the node's result as an argument an returns the mapped result. """ node = path[-1] node.result = func(node.result)
[docs] @transformation_factory # (str) def replace_content_with(path: Path, content: str): # Callable[[Node], ResultType] """ Replaces the content of the node with the given text content. """ node = path[-1] node.result = content
[docs] def normalize_whitespace(path): """ Normalizes Whitespace inside a leaf node, i.e. any sequence of whitespaces, tabs and line feeds will be replaced by a single whitespace. Empty (i.e. zero-length) Whitespace remains empty, however. """ node = path[-1] assert not node._children if path[-1].name == WHITESPACE_PTYPE: if node.result: node.result = ' ' else: node.result = re.sub(r'\s+', ' ', node.result)
[docs] @transformation_factory def add_attributes(path: Path, attributes: dict): # Dict[str, str] """ Adds the attributes in the given dictionary to the XML-attributes of the last node in the given path. """ path[-1].attr.update(attributes)
[docs] @transformation_factory def del_attributes(path: Path, attributes: collections.abc.Set = frozenset()): # Set[str] """ Removes XML-attributes from the last node in the given path. If the given set is empty, all attributes will be deleted. """ node = path[-1] if node.has_attr(): if attributes: for attr in attributes: try: del node.attr[attr] except KeyError: pass else: node.attr = dict()
NodeGenerator = Callable[[], Node] DynamicResultType = Union[Tuple[NodeGenerator, ...], NodeGenerator, str] AT_THE_END = 2**32 # VERY VERY last position in a tuple of childe nodes
[docs] def node_maker(name: str, result: DynamicResultType, attributes: dict = {}) -> Callable: """ Returns a parameter-free function that upon calling returns a freshly instantiated node with the given result, where `result` can again contain recursively nested node-factory functions which will be evaluated before instantiating the node. Example: >>> factory = node_maker('d', (node_maker('c', ','), node_maker('l', ' '))) >>> node = factory() >>> node.serialize() '(d (c ",") (l " "))' """ def dynamic_result(result: DynamicResultType) -> Union[Tuple[Node, ...], Node, str]: if isinstance(result, str): return result elif isinstance(result, tuple): return tuple(node_generator() for node_generator in result) else: assert isinstance(result, Callable) return result() def create_leaf() -> Node: assert isinstance(result, str) return Node(name, result, True).with_attr(attributes) def create_branch() -> Node: return Node(name, dynamic_result(result)).with_attr(attributes) if isinstance(result, str): return create_leaf else: return create_branch
[docs] @transformation_factory(collections.abc.Set) def positions_of(path: Path, names: AbstractSet[str] = frozenset()) -> Tuple[int, ...]: """Returns a (potentially empty) tuple of the positions of the children that have one of the given `names`. """ return tuple(i for i, c in enumerate(path[-1]._children) if c.name in names)
@transformation_factory def delimiter_positions(path: Path): """Returns a tuple of positions "between" all children.""" return tuple(range(1, len(path[-1]._children))) PositionType = Union[int, tuple, Callable]
[docs] def normalize_position_representation(path: Path, position: PositionType) -> Tuple[int, ...]: """Converts a position-representation in any of the forms that `PositionType` allows into a (possibly empty) tuple of integers.""" if callable(position): position = position(path) if isinstance(position, int): return (position,) elif not position: # empty tuple or None return () else: # assert isinstance(position, tuple) and all(isinstance(i, int) for i in position) return position
[docs] @transformation_factory(int, tuple, collections.abc.Callable) def insert(path: Path, position: PositionType, node_factory: Callable): """Inserts a delimiter at a specific position within the children. If `position` is `None` nothing will be inserted. Position values greater or equal the number of children mean that the delimiter will be appended to the tuple of children. Example:: insert(pos_of('paragraph'), node_maker('LF', '\\n')) """ pos_tuple = normalize_position_representation(path, position) if not pos_tuple: return node = path[-1] children = list(node._children) assert children or not node.result, "Cannot add nodes to a leaf-node!" L = len(children) pos_tuple = sorted(tuple((p if p >= 0 else (p + L)) for p in pos_tuple), reverse=True) for n in pos_tuple: n = min(L, n) text_pos = (children[n - 1].pos + children[n - 1].strlen()) if n > 0 else node.pos children.insert(n, node_factory().with_pos(text_pos)) node.result = tuple(children)
[docs] @transformation_factory(collections.abc.Callable) def delimit_children(path: Path, node_factory: Callable): """Add a delimiter drawn from the `node_factory` between all children.""" insert(path, delimiter_positions, node_factory)
######################################################################## # # AST semantic validation functions (EXPERIMENTAL!!!) # ######################################################################## # @transformation_factory
[docs] @transformation_factory(str) def add_error(path: Path, error_msg: str, error_code: ErrorCode = ERROR): """ Raises an error unconditionally. This makes sense in case illegal paths are encoded in the syntax to provide more accurate error messages. """ node = path[-1] assert isinstance(path[0], RootNode) root = cast(RootNode, path[0]) if not error_msg: error_msg = "Syntax Error" try: root.new_error(node, error_msg.format( name=node.name, content=node.content, pos=node.pos), error_code) except KeyError as key_error: root.new_error( node, 'Schlüssel %s nicht erlaubt in Format-Zeichenkette: "%s"! ' 'Erlaubt sind "name", "content", "pos"' % (str(key_error), error_msg), AST_TRANSFORM_CRASH)
[docs] @transformation_factory(collections.abc.Callable) def error_on(path: Path, condition: Callable, error_msg: str = '', error_code: ErrorCode = ERROR): """ Checks for `condition`; adds an error or warning message if condition is not met. """ if condition(path): if not error_msg: cond_name = condition.__name__ if hasattr(condition, '__name__') \ else condition.__class__.__name__ if hasattr(condition, '__class__') \ else '<unknown>' error_msg = "transform.error_on: Failed to meet condition " + cond_name add_error(path, error_msg, error_code)
# # @transformation_factory(collections.abc.Callable) # def warn_on(path: Path, condition: Callable, warning: str = ''): # """ # Checks for `condition`; adds an warning message if condition is not met. # """ # node = path[-1] # if not condition(path): # if warning: # node.add_error(warning % node.name if warning.find("%s") > 0 else warning, # WARNING) # else: # cond_name = condition.__name__ if hasattr(condition, '__name__') \ # else condition.__class__.__name__ if hasattr(condition, '__class__') \ # else '<unknown>' # path[0].new_error(node, "transform.warn_on: Failed to meet condition " + cond_name, # WARNING) assert_has_children = error_on(lambda nd: nd._children, 'Element "%s" has no children') @transformation_factory def assert_content(path: Path, regexp: str): node = path[-1] if not content_matches(path, regexp): cast(RootNode, path[0]).new_error(node, 'Element "%s" violates %s on %s' % (node.name, str(regexp), node.content)) @transformation_factory(collections.abc.Set) def require(path: Path, child_tags: AbstractSet[str]): node = path[-1] for child in node._children: if child.name not in child_tags: cast(RootNode, path[0]).new_error(node, 'Element "%s" is not allowed inside "%s".' % (child.name, node.name)) @transformation_factory(collections.abc.Set) def forbid(path: Path, child_tags: AbstractSet[str]): node = path[-1] for child in node._children: if child.name in child_tags: cast(RootNode, path[0]).new_error(node, 'Element "%s" cannot be nested inside "%s".' % (child.name, node.name))
[docs] def peek(path: Path): """For debugging: Prints the last node in the path as S-expression.""" print(path[-1].as_sxpr(compact=True))