In January 2021 we will introduce a 10 GB quota for project repositories. Higher limits for individual projects will be available on request. Please see https://doku.lrz.de/display/PUBLIC/GitLab for more information.

Commit 426f5db8 authored by eckhart's avatar eckhart

- changed semantics of Node.__getitem__

parent e2a544b0
......@@ -14,8 +14,8 @@ testdata/*.pdf
*~
*.old
DEBUG*
LOGS/
REPORT/
LOGS
REPORT
external_resources/
tmp/*
test/tmp*
......
......@@ -372,9 +372,19 @@ def log_ST(syntax_tree, log_file_name):
f.write(syntax_tree.as_sxpr())
def log_parsing_history(grammar, log_file_name: str = '', html: bool=True) -> None:
def log_parsing_history(grammar, log_file_name: str = '', html: bool=False) -> None:
"""
Writes a log of the parsing history of the most recently parsed document.
Parameters:
grammar (Grammar): The Grammar object from which the parsing history
shall be logged.
log_file_name (str): The (base-)name of the log file to be written.
If no name is given (default), then the class name of the grammar
object will be used.
html (bool): If true (default), the log will be output as html-Table,
otherwise as plain test. (Browsers might take a few seconds or
minutes to display the table for long histories.)
"""
def write_log(history, log_name):
htm = '.html' if html else ''
......@@ -419,7 +429,7 @@ def log_parsing_history(grammar, log_file_name: str = '', html: bool=True) -> No
if record.node.error_flag:
append_line(errors_only, line)
write_log(full_history, log_file_name + '_full')
if len(full_history) > 250:
write_log(full_history[-200:], log_file_name + '_full.tail')
if len(full_history) > 500:
write_log(full_history[-500:], log_file_name + '_full.tail')
write_log(match_history, log_file_name + '_match')
write_log(errors_only, log_file_name + '_errors')
......@@ -30,7 +30,7 @@ import copy
from DHParser.error import Error, linebreaks, line_col
from DHParser.stringview import StringView
from DHParser.toolkit import re, typing
from typing import Callable, cast, Iterator, List, Union, Tuple, Optional
from typing import Callable, cast, Iterator, List, Set, Union, Tuple, Optional
__all__ = ('ParserBase',
......@@ -314,48 +314,54 @@ class Node(collections.abc.Sized):
def __getitem__(self, index_or_tagname: Union[int, str]) -> Union['Node', Iterator['Node']]:
"""
Returns the child node with the given index if ``index_or_tagname`` is
an integer value or a generator that yields all descendant nodes that
match a particular tag name. Examples::
an integer or the first child node with the given tag name. Examples::
>>> tree = mock_syntax_tree('(a (b "X") (X (c "d")) (e (X "F")))')
>>> tree = mock_syntax_tree('(a (b "X") (X (c "d")) (e (X "F")))')
>>> flatten_sxpr(tree[0].as_sxpr())
'(b "X")'
>>> list(flatten_sxpr(item.as_sxpr()) for item in tree["X"])
['(X (c "d"))', '(X "F")']
>>> flatten_sxpr(tree["X"].as_sxpr())
'(X (c "d"))'
Args:
index_or_tagname(str): Either an index of a child node or a
tag name.
Return:
Returns:
Node: All nodes which have a given tag name.
"""
if isinstance(index_or_tagname, int):
children = self.children
if children:
return children[index_or_tagname]
if self.children:
if isinstance(index_or_tagname, int):
return self.children[index_or_tagname]
else:
raise ValueError('Leave nodes have no children that can be indexed!')
else:
match_function = lambda node: node.tag_name == index_or_tagname
return self.find(match_function, False)
for child in self.children:
if child.tag_name == index_or_tagname:
return child
raise KeyError(index_or_tagname)
raise ValueError('Leave nodes have no children that can be indexed!')
def __contains__(self, tag_name: str) -> bool:
"""
Returns true if a descendant with the given tag name exists.
Returns true if a child with the given tag name exists.
Args:
tag_name: tag_name which will be searched among the descendant
nodes
tag_name (str): tag_name which will be searched among to immediate
descendants of this node.
Returns:
bool: True, if at least one descendant node with the given tag
name exists, False otherwise
"""
generator = self[tag_name]
try:
generator.__next__()
return True
except StopIteration:
# assert isinstance(tag_name, str)
if self.children:
for child in self.children:
if child.tag_name == tag_name:
return True
return False
raise ValueError('Leave node cannot contain other nodes')
# generator = self.select_tags(tag_name, False)
# try:
# generator.__next__()
# return True
# except StopIteration:
# return False
@property # this needs to be a (dynamic) property, in case sef.parser gets updated
......@@ -406,7 +412,7 @@ class Node(collections.abc.Sized):
@property
def content(self) -> str:
def content(self) -> Union[StringView, str]:
"""
Returns content as string, omitting error messages.
"""
......@@ -414,6 +420,7 @@ class Node(collections.abc.Sized):
if self.children:
self._content = "".join(child.content for child in self.children)
else:
# self._content = self._result
self._content = str(self._result)
self._result = self._content # self._result might be more efficient as a string!?
return self._content
......@@ -635,13 +642,13 @@ class Node(collections.abc.Sized):
return self._tree_repr(' ', opening, closing, density=1)
def find(self, match_function: Callable, include_root: bool=True) -> Iterator['Node']:
def select(self, match_function: Callable, include_root: bool=True) -> Iterator['Node']:
"""
Finds nodes in the tree that fulfill a given criterion.
`find` is a generator that yields all nodes for which the
`select` is a generator that yields all nodes for which the
given `match_function` evaluates to True. The tree is
traversed pre-order.
traversed pre-order, depth last.
Args:
match_function (function): A function that takes as Node
......@@ -656,19 +663,38 @@ class Node(collections.abc.Sized):
yield self
else:
for child in self.children:
for node in child.find(match_function, True):
for node in child.select(match_function, True):
yield node
def find_by_tag(self, tag_name: str) -> Iterator['Node']:
def select_tags(self, tag_names: Union[str, Set[str]],
include_root: bool=True) -> Iterator['Node']:
"""
Finds all nodes with the given tag name.
Returns an iterator that runs through all descendants that have the
given tag name.
Example::
>>> tree = mock_syntax_tree('(a (b "X") (X (c "d")) (e (X "F")))')
>>> list(flatten_sxpr(item.as_sxpr()) for item in tree.select_tags("X", False))
['(X (c "d"))', '(X "F")']
>>> list(flatten_sxpr(item.as_sxpr()) for item in tree.select_tags({"X", "b"}, False))
['(b "X")', '(X (c "d"))', '(X "F")']
>>> any(tree.select_tags('a', False))
False
>>> list(flatten_sxpr(item.as_sxpr()) for item in tree.select_tags('a', True))
['(a (b "X") (X (c "d")) (e (X "F")))']
Args:
tag_name(str): The tag name that is being searched for.
tag_name(set): A tag name or set of tag names that is being
searched for
include_root (bool): If False, only descendant nodes will be
checked for a match.
Yields:
Node: All nodes which have a given tag name.
"""
if isinstance(tag_names, str):
tag_names = frozenset(tag_names)
return self.select(lambda node: node.tag_name in tag_names, include_root)
def tree_size(self) -> int:
......
......@@ -98,7 +98,7 @@ def is_filename(strg: str) -> bool:
"""Tries to guess whether string ``s`` is a file name."""
return strg.find('\n') < 0 and strg[:1] != " " and strg[-1:] != " " \
and all(strg.find(ch) < 0 for ch in '*?"<>|')
# and strg.find('*') < 0 and strg.find('?') < 0
# and strg.select('*') < 0 and strg.select('?') < 0
#######################################################################
......
......@@ -38,9 +38,9 @@ all_symbols.sort()
def start(module):
i = module.find('__all__')
i = module.find(')', i)
i = module.find('\n', i) + 1
i = module.select('__all__')
i = module.select(')', i)
i = module.select('\n', i) + 1
return i
......
......@@ -43,12 +43,15 @@ more of the modules further above in the list, but not the other way round:
- syntaxtree.py -- syntax tree classes for DHParser
- transform.py -- transformation functions for converting the concrete
into the abstract syntax tree
- logging.py -- logging and debugging for DHParser
- parse.py -- parser combinators for for DHParser
- transform.py -- transformation functions for converting the concrete
into the abstract syntax tree
- compile.py -- abstract base class for compilers that transform an AST
into something useful
- ebnf.py -- EBNF -> Python-Parser compilation for DHParser
......
......@@ -344,17 +344,16 @@ def streamline_whitespace(context):
return
node = context[-1]
assert node.tag_name in ['WSPC', ':Whitespace']
s = str(node)
c = s.find('%')
n = s.find('\n')
if c >= 0:
s = node.content
if s.find('%') >= 0:
node.result = '\n'
# c = s.find('%')
# node.result = (' ' if (n >= c) or (n < 0) else '\n')+ s[c:].rstrip(' \t')
# node.parser = MockParser('COMMENT', '')
elif s.find('\n') >= 0:
node.result = '\n'
else:
node.result = ' '
node.result = ' ' if s else ''
def watch(node):
......@@ -482,205 +481,252 @@ class LaTeXCompiler(Compiler):
assert re.match('\w+\Z', grammar_name)
def on_latexdoc(self, node):
self.compile(node['preamble'])
self.compile(node['document'])
return node
def on_preamble(self, node):
pass
return node
# def on_document(self, node):
# return node
# def on_frontpages(self, node):
# return node
# def on_Chapters(self, node):
# return node
# def on_Chapter(self, node):
# return node
# def on_Sections(self, node):
# return node
# def on_Section(self, node):
# return node
# def on_SubSections(self, node):
# return node
# def on_SubSection(self, node):
# return node
def on_document(self, node):
pass
# def on_SubSubSections(self, node):
# return node
def on_frontpages(self, node):
pass
# def on_SubSubSection(self, node):
# return node
def on_Chapters(self, node):
pass
# def on_Paragraphs(self, node):
# return node
def on_Chapter(self, node):
pass
# def on_Paragraph(self, node):
# return node
def on_Sections(self, node):
pass
# def on_SubParagraphs(self, node):
# return node
def on_Section(self, node):
pass
# def on_SubParagraph(self, node):
# return node
def on_SubSections(self, node):
pass
# def on_Bibliography(self, node):
# return node
def on_SubSection(self, node):
pass
# def on_Index(self, node):
# return node
def on_SubSubSections(self, node):
pass
# def on_heading(self, node):
# return node
def on_SubSubSection(self, node):
pass
# def on_block_environment(self, node):
# return node
def on_Paragraphs(self, node):
pass
# def on_known_environment(self, node):
# return node
def on_Paragraph(self, node):
pass
# def on_generic_block(self, node):
# return node
def on_SubParagraphs(self, node):
pass
# def on_begin_generic_block(self, node):
# return node
def on_SubParagraph(self, node):
pass
# def on_end_generic_block(self, node):
# return node
def on_Bibliography(self, node):
pass
# def on_itemize(self, node):
# return node
def on_Index(self, node):
pass
# def on_enumerate(self, node):
# return node
def on_block_environment(self, node):
pass
# def on_item(self, node):
# return node
def on_known_environment(self, node):
pass
# def on_figure(self, node):
# return node
def on_generic_block(self, node):
pass
# def on_quotation(self, node):
# return node
def on_begin_generic_block(self, node):
pass
# def on_verbatim(self, node):
# return node
def on_end_generic_block(self, node):
pass
# def on_tabular(self, node):
# return node
def on_itemize(self, node):
pass
# def on_tabular_row(self, node):
# return node
def on_enumerate(self, node):
pass
# def on_tabular_cell(self, node):
# return node
def on_item(self, node):
pass
# def on_tabular_config(self, node):
# return node
def on_figure(self, node):
pass
# def on_block_of_paragraphs(self, node):
# return node
def on_quotation(self, node):
pass
# def on_sequence(self, node):
# return node
def on_verbatim(self, node):
pass
# def on_paragraph(self, node):
# return node
def on_table(self, node):
pass
# def on_text_element(self, node):
# return node
def on_table_config(self, node):
pass
# def on_line_element(self, node):
# return node
def on_block_of_paragraphs(self, node):
pass
# def on_inline_environment(self, node):
# return node
def on_sequence(self, node):
pass
# def on_known_inline_env(self, node):
# return node
def on_paragraph(self, node):
pass
# def on_generic_inline_env(self, node):
# return node
def on_text_element(self, node):
pass
# def on_begin_inline_env(self, node):
# return node
def on_inline_environment(self, node):
pass
# def on_end_inline_env(self, node):
# return node
def on_known_inline_env(self, node):
pass
# def on_begin_environment(self, node):
# return node
def on_generic_inline_env(self, node):
pass
# def on_end_environment(self, node):
# return node
def on_begin_inline_env(self, node):
pass
# def on_inline_math(self, node):
# return node
def on_begin_environment(self, node):
pass
# def on_command(self, node):
# return node
def on_end_environment(self, node):
pass
# def on_known_command(self, node):
# return node
def on_inline_math(self, node):
pass
# def on_text_command(self, node):
# return node
def on_command(self, node):
pass
# def on_generic_command(self, node):
# return node
def on_known_command(self, node):
pass
# def on_footnote(self, node):
# return node
def on_generic_command(self, node):
pass
# def on_includegraphics(self, node):
# return node
def on_footnote(self, node):
pass
# def on_caption(self, node):
# return node
def on_includegraphics(self, node):
pass
# def on_multicolumn(self, node):
# return node
def on_caption(self, node):
pass
# def on_hline(self, node):
# return node
def on_config(self, node):
pass
# def on_cline(self, node):
# return node
def on_block(self, node):
pass
# def on_config(self, node):
# return node
def on_text(self, node):
pass
# def on_cfg_text(self, node):
# return node
def on_cfgtext(self, node):
pass
# def on_block(self, node):
# return node
def on_word_sequence(self, node):
pass
# def on_text(self, node):
# return node
def on_no_command(self, node):
pass
# def on_no_command(self, node):
# return node
def on_blockcmd(self, node):
pass
# def on_blockcmd(self, node):
# return node
def on_structural(self, node):
pass
# def on_structural(self, node):
# return node
def on_CMDNAME(self, node):
pass
# def on_CMDNAME(self, node):
# return node
def on_NAME(self, node):
pass
# def on_TXTCOMMAND(self, node):
# return node
def on_ESCAPED(self, node):
pass
# def on_ESCAPED(self, node):
# return node
def on_BRACKETS(self, node):
pass
# def on_SPECIAL(self, node):
# return node
def on_TEXTCHUNK(self, node):
pass
# def on_BRACKETS(self, node):
# return node
def on_WSPC(self, node):
pass
# def on_LINEFEED(self, node):
# return node
def on_LF(self, node):
pass
# def on_NAME(self, node):
# return node
def on_PARSEP(self, node):
pass
# def on_INTEGER(self, node):
# return node
def on_LB(self, node):
pass
# def on_TEXTCHUNK(self, node):
# return node
def on_BACKSLASH(self, node):
pass
# def on_LF(self, node):
# return node
def on_EOF(self, node):
pass
# def on_LFF(self, node):
# return node
# def on_PARSEP(self, node):
# return node
# def on_WSPC(self, node):
# return node
# def on_GAP(self, node):
# return node
# def on_NEW_LINE(self, node):
# return node
# def on_LB(self, node):
# return node