10.12., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit e07e8b97 authored by eckhart's avatar eckhart

- DHParser/syntaxtree.py: more docstrings; adjusted examples to refactoring

parent 366020e0
......@@ -92,11 +92,11 @@ from DHParser import logging, is_filename, load_if_file, \\
Lookbehind, Lookahead, Alternative, Pop, Token, DropToken, Synonym, AllOf, SomeOf, \\
Unordered, Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, is_empty, \\
Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \\
remove_children_if, move_adjacent, normalize_whitespace, is_anonymous, matches_re, \\
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, is_empty, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, \\
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \\
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \\
......
......@@ -285,11 +285,14 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
raise ValueError('Leave node cannot contain other nodes')
def equals(self, other):
def equals(self, other: 'Node') -> bool:
"""
Equality of nodes: Two nodes are considered as equal, if their tag
name is the same, if their results are equal and if their attributes
and attribute values are the same.
Equality of value: Two nodes are considered as having the same value,
if their tag name is the same, if their results are equal and
if their attributes and attribute values are the same.
Returns True, if the tree originating in node `self` is equal by
value to the tree originating in node `other`.
"""
if self.tag_name == other.tag_name and self.compare_attr(other):
if self.children:
......@@ -314,7 +317,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
return surrogate
def is_anonymous(self):
def is_anonymous(self) -> bool:
return not self.tag_name or self.tag_name[0] == ':'
......@@ -729,6 +732,16 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
class FrozenNode(Node):
"""
FrozenNode is an immutable kind of Node, i.e. it must not be changed
after initialization. The purpose is mainly to allow certain kinds of
optimization, like not having to instantiate empty nodes (because they
are always the same and will be dropped while parsing, anyway).
Frozen nodes must be used only temporarily during parsing or
tree-transformation and should not occur in the product of the
transformation any more. This can be verified with `tree_sanity_check()`.
"""
def __init__(self, tag_name: str, result: ResultType) -> None:
if isinstance(result, str) or isinstance(result, StringView):
......@@ -750,41 +763,55 @@ class FrozenNode(Node):
def attr(self):
raise AssertionError("Attributes cannot be accessed on a frozen node")
# @property
# def errors(self) -> List[Error]:
# return ()
#
# @errors.setter
# def errors(self, errors: List[Error]):
# if errors:
# raise AssertionError('Cannot assign error list to frozen node')
def with_pos(self, pos: int) -> 'Node':
pass
PLACEHOLDER = Node('__PLACEHOLDER__', '')
PLACEHOLDER = FrozenNode('__PLACEHOLDER__', '')
def tree_sanity_check(tree: Node) -> bool:
"""
Sanity check for syntax trees: One and the same node must never appear
twice in the syntax tree. Frozen Nodes (EMTPY_NODE, PLACEHOLDER)
should only exist temporarily and must have been dropped or eliminated
before any kind of tree generation (i.e. parsing) or transformation
is finished.
:param tree: the root of the tree to be checked
:return: True, if the tree is `sane`, False otherwise.
"""
node_set = set()
for node in tree.select(lambda nd: True, include_root=True):
if node in node_set and not (isinstance(node, FrozenNode)
or node.tag_name == '__PLACEHOLDER__'):
if node in node_set or isinstance(Node, FrozenNode):
return False
node_set.add(node)
return True
class RootNode(Node):
"""TODO: Add Documentation!!!
errors (list): A list of all errors that have occured so far during
"""The root node for the syntax tree is a special kind of node that keeps
and manages global properties of the tree as a whole. These are first and
foremost the list off errors that occurred during tree generation
(i.e. parsing) or any transformation of the tree. Other properties concern
the customization of the XML-serialization.
The root node can be instantiated before the tree is fully parsed. This is
necessary, because the root node is needed for managing error messages
during the parsing process, already. In order to connect the root node to
the tree, when parsing is finished, the swallow()-method must be called.
errors (list): A list of all errors that have occurred so far during
processing (i.e. parsing, AST-transformation, compiling)
of this tree.
error_flag (int): the highest warning or error level of all errors
that occurred.
inline_tags (set of strings): see `Node.as_xml()` for an explanation.
omit_tags (set of strings): see `Node.as_xml()` for an explanation.
empty_tags (set oif strings): see `Node.as_xml()` for an explanation.
"""
def __init__(self, node: Optional[Node] = None):
......@@ -1037,7 +1064,8 @@ def parse_xml(xml: Union[str, StringView]) -> Node:
# mock_parsers = {TOKEN_PTYPE: PlainText}
def parse_attributes(s: StringView) -> Tuple[StringView, OrderedDict]:
"""Parses a sqeuence of XML-Attributes. Returns the string-slice
"""
Parses a sqeuence of XML-Attributes. Returns the string-slice
beginning after the end of the attr.
"""
attributes = OrderedDict() # type: OrderedDict[str, str]
......@@ -1049,7 +1077,8 @@ def parse_xml(xml: Union[str, StringView]) -> Node:
return (s[restart:], attributes)
def parse_opening_tag(s: StringView) -> Tuple[StringView, str, OrderedDict, bool]:
"""Parses an opening tag. Returns the string segment following the
"""
Parses an opening tag. Returns the string segment following the
the opening tag, the tag name, a dictionary of attr and
a flag indicating whether the tag is actually a solitary tag as
indicated by a slash at the end, i.e. <br/>.
......@@ -1064,7 +1093,8 @@ def parse_xml(xml: Union[str, StringView]) -> Node:
return s[i + 1:], tagname, attributes, s[i - 1] == "/"
def parse_closing_tag(s: StringView) -> Tuple[StringView, str]:
"""Parses a closing tag and returns the string segment, just after
"""
Parses a closing tag and returns the string segment, just after
the closing tag.
"""
match = s.match(re.compile(r'</\s*(?P<tagname>[\w:]+)>'))
......@@ -1073,7 +1103,8 @@ def parse_xml(xml: Union[str, StringView]) -> Node:
return s[s.index(match.end()):], tagname
def parse_leaf_content(s: StringView) -> Tuple[StringView, StringView]:
"""Parses a piece of the content of a tag, just until the next opening,
"""
Parses a piece of the content of a tag, just until the next opening,
closing or solitary tag is reached.
"""
i = 0
......@@ -1082,7 +1113,8 @@ def parse_xml(xml: Union[str, StringView]) -> Node:
return s[i:], s[:i]
def parse_full_content(s: StringView) -> Tuple[StringView, Node]:
"""Parses the full content of a tag, starting right at the beginning
"""
Parses the full content of a tag, starting right at the beginning
of the opening tag and ending right after the closing tag.
"""
res = [] # type: List[Node]
......@@ -1112,6 +1144,9 @@ def parse_xml(xml: Union[str, StringView]) -> Node:
def parse_tree(xml_or_sxpr: str) -> Optional[Node]:
"""
Parses either XML or S-expressions. Which of these is detected automatically.
"""
if re.match('\s*<', xml_or_sxpr):
return parse_xml(xml_or_sxpr)
elif re.match('\s*\(', xml_or_sxpr):
......
......@@ -25,7 +25,7 @@ from DHParser import logging, is_filename, load_if_file, \
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \
remove_children_if, move_whitespace, normalize_whitespace, is_anonymous, matches_re, \
remove_children_if, move_adjacent, normalize_whitespace, is_anonymous, matches_re, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, is_empty, \
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
......@@ -59,7 +59,7 @@ class ArithmeticGrammar(Grammar):
r"""Parser for an Arithmetic source file.
"""
expression = Forward()
source_hash__ = "50681341ebb2536b3eadd7eb5540ece0"
source_hash__ = "d77842f8b59d2ec3736b21778c0c9c78"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'#.*'
......
......@@ -59,7 +59,7 @@ class ArithmeticGrammar(Grammar):
digit = Forward()
expression = Forward()
variable = Forward()
source_hash__ = "120070baa84f5a2bd1bbb900627078fc"
source_hash__ = "cf537b22b7a1a2a58c426f99f784285d"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r''
......
......@@ -57,7 +57,7 @@ class BibTeXGrammar(Grammar):
r"""Parser for a BibTeX source file.
"""
text = Forward()
source_hash__ = "e402951b290cb0fce63ba0cbca3f23e9"
source_hash__ = "f0e945d8b504317cdfb6e08fd2fcf596"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'(?i)%.*(?:\n|$)'
......@@ -85,12 +85,12 @@ class BibTeXGrammar(Grammar):
def get_grammar() -> BibTeXGrammar:
global GLOBALS
try:
grammar = GLOBALS.BibTeX_1_grammar_singleton
grammar = GLOBALS.BibTeX_00000001_grammar_singleton
except AttributeError:
GLOBALS.BibTeX_1_grammar_singleton = BibTeXGrammar()
GLOBALS.BibTeX_00000001_grammar_singleton = BibTeXGrammar()
if hasattr(get_grammar, 'python_src__'):
GLOBALS.BibTeX_1_grammar_singleton.python_src__ = get_grammar.python_src__
grammar = GLOBALS.BibTeX_1_grammar_singleton
GLOBALS.BibTeX_00000001_grammar_singleton.python_src__ = get_grammar.python_src__
grammar = GLOBALS.BibTeX_00000001_grammar_singleton
return grammar
......
......@@ -25,7 +25,7 @@ from DHParser import logging, is_filename, load_if_file, \
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \
remove_children_if, move_whitespace, normalize_whitespace, is_anonymous, matches_re, \
remove_children_if, move_adjacent, normalize_whitespace, is_anonymous, matches_re, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, is_empty, \
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
......@@ -59,7 +59,7 @@ class EBNFGrammar(Grammar):
r"""Parser for an EBNF source file.
"""
expression = Forward()
source_hash__ = "a7119a157d38270e4215972858d0b930"
source_hash__ = "de6d0516ea104e7d8318b998e488b2d1"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'#.*(?:\n|$)'
......
......@@ -57,7 +57,7 @@ class EBNFGrammar(Grammar):
r"""Parser for an EBNF source file.
"""
expression = Forward()
source_hash__ = "5e9e65a057bec7da29989dba47f40394"
source_hash__ = "7ca2bbabfc9bc19ec54e2318bbc4c9c2"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'#.*(?:\n|$)'
......
......@@ -25,7 +25,7 @@ from DHParser import logging, is_filename, load_if_file, \
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \
remove_children_if, move_whitespace, normalize_whitespace, is_anonymous, matches_re, \
remove_children_if, move_adjacent, normalize_whitespace, is_anonymous, matches_re, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, is_empty, \
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
......@@ -58,7 +58,7 @@ def get_preprocessor() -> PreprocessorFunc:
class Lyrik_explicit_whitespaceGrammar(Grammar):
r"""Parser for a Lyrik_explicit_whitespace source file.
"""
source_hash__ = "824c3970f8997489b9a0faa53f2dff51"
source_hash__ = "bcb3cee425961a2148941b492e614bd2"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r''
......
......@@ -67,7 +67,7 @@ class XMLGrammar(Grammar):
extSubsetDecl = Forward()
ignoreSectContents = Forward()
markupdecl = Forward()
source_hash__ = "afe79281456bb2625a0c90c58a699d32"
source_hash__ = "3b6f8c0aafa133d9139684e42a30adfa"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r''
......
......@@ -25,7 +25,7 @@ from DHParser import logging, is_filename, load_if_file, Grammar, Compiler, nil_
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \
remove_children_if, move_whitespace, normalize_whitespace, is_anonymous, matches_re, \
remove_children_if, move_adjacent, normalize_whitespace, is_anonymous, matches_re, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, is_empty, \
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
......@@ -60,7 +60,7 @@ class XMLSnippetGrammar(Grammar):
"""
Name = Forward()
element = Forward()
source_hash__ = "49e51a7b2ad79e95ba239427830ba02f"
source_hash__ = "ef0fa6d8c7a96ee0fe2a8e209c3f2ae9"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r''
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment