11.08., 9:00 - 11:00: Due to updates GitLab will be unavailable for some minutes between 09:00 and 11:00.

Commit a73fbdb5 authored by eckhart's avatar eckhart

No tail parser logs any more, because size of history logs can be configured, anyway.

parent 536565a6
......@@ -240,7 +240,7 @@ CONFIG_PRESET['left_recursion'] = True
# Possible values are:
# 'XML' - output as XML
# 'S-expression' - output as S-expression, i.e. a list-like format
# 'compact' - compact tree output, i.e. children a represented on
# 'indented' - compact tree output, i.e. children a represented on
# indented lines with no opening or closing tags, brackets
# etc.
# 'smart' - serialize as S-expression if the S-expression fits on
......@@ -273,8 +273,9 @@ CONFIG_PRESET['default_serialization'] = SXPRESSION_SERIALIZATION
# Default value: 120
CONFIG_PRESET['flatten_sxpr_threshold'] = 120
# Defines the maximum number of LINES before the "smart" serialization
# will switch from S-expression output to compact output
# Defines the maximum number of LINES before the "S-expression" serialization
# will switch to a compact output where the closing brackets are placed on
# the same line as the last line of the content.
CONFIG_PRESET['compact_sxpr_threshold'] = 25
......@@ -455,6 +456,11 @@ CONFIG_PRESET['log_server'] = False
# Default value: False
CONFIG_PRESET['echo_server_log'] = False
# Maximum size (i.e.) number of parsing steps before the parsing ended
# that are logged to the html-parsing-history-file
# Default value: 10000
CONFIG_PRESET['log_size_threshold'] = 10000
########################################################################
#
......
......@@ -513,10 +513,6 @@ def log_ST(syntax_tree, log_file_name) -> bool:
return False
LOG_SIZE_THRESHOLD = 10000 # maximum number of history records to log
LOG_TAIL_THRESHOLD = 500 # maximum number of history records for "tail log"
def log_parsing_history(grammar, log_file_name: str = '', html: bool = True) -> bool:
"""
Writes a log of the parsing history of the most recently parsed document, if
......@@ -565,24 +561,21 @@ def log_parsing_history(grammar, log_file_name: str = '', html: bool = True) ->
elif log_file_name.lower().endswith('.log'):
log_file_name = log_file_name[:-4]
full_history = ['<h1>Full parsing history of "%s"</h1>' % log_file_name] # type: List[str]
history = ['<h1>Parsing history of "%s"</h1>' % log_file_name] # type: List[str]
LOG_SIZE_THRESHOLD = get_config_value('log_size_threshold')
if len(grammar.history__) > LOG_SIZE_THRESHOLD:
warning = ('Sorry, man, %iK history records is just too many! '
'Only looking at the last %iK records.'
% (len(grammar.history__) // 1000, LOG_SIZE_THRESHOLD // 1000))
html_warning = '<p><strong>' + warning + '</strong></p>'
full_history.append(html_warning)
history.append(html_warning)
lead_in = '\n'. join(['<table>', HistoryRecord.COLGROUP, HistoryRecord.HEADINGS])
full_history.append(lead_in)
history.append(lead_in)
for record in grammar.history__[-LOG_SIZE_THRESHOLD:]:
line = record.as_html_tr() if html else (str(record) + '\n')
append_line(full_history, line)
append_line(history, line)
write_log(full_history, log_file_name + '_full')
if len(full_history) > LOG_TAIL_THRESHOLD + 10:
heading = '<h1>Last 500 records of parsing history of "%s"</h1>' % log_file_name + lead_in
write_log([heading] + full_history[-LOG_TAIL_THRESHOLD:], log_file_name + '_full.tail')
write_log(history, log_file_name)
return True
......@@ -816,9 +816,6 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
def select_children(self, criterion: CriteriaType, reverse: bool = False) -> Iterator['Node']:
"""Returns an iterator over all direct children of a node that fulfill `criterion`."""
# if not self._children and self.result:
# raise ValueError("Leaf-Node %s does not have any children to iterate over"
# % self.serialize())
match_function = create_match_function(criterion)
if reverse:
for child in reversed(tuple(self.select_children(criterion, False))):
......@@ -1198,13 +1195,12 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
# txt.append(str(id(node))) # for debugging
if node.has_attr():
txt.extend(' `(%s "%s")' % (k, v) for k, v in node.attr.items())
if src:
line, col = line_col(lbreaks, node.pos)
txt.append(' `(pos %i %i %i)' % (node.pos, line, col))
elif src is not None and node._pos >= 0:
txt.append(' `(pos %i)' % node.pos)
# if node.tag_name == ZOMBIE_TAG:
# print(node.pos, id(node), id(node) in root.error_nodes, root.get_errors(node))
if node._pos >= 0:
if src:
line, col = line_col(lbreaks, node.pos)
txt.append(' `(pos %i %i %i)' % (node.pos, line, col))
elif src is not None:
txt.append(' `(pos %i)' % node.pos)
if root and id(node) in root.error_nodes and not node.has_attr('err'):
txt.append(" `(%s)" % '; '.join(str(err) for err in root.get_errors(node)))
return "".join(txt)
......@@ -1300,7 +1296,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
"""Serialize node or tree as JSON-serializable nested list."""
jo = [self.tag_name,
[nd.to_json_obj() for nd in self._children] if self._children else str(self.result)]
pos = self.pos
pos = self._pos
if pos >= 0:
jo.append(pos)
if self.has_attr():
......@@ -1338,7 +1334,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
"""
Serializes the tree starting with `node` either as S-expression, XML, JSON,
or in compact form. Possible values for `how` are 'S-expression', 'XML',
'JSON', 'compact' and 'smart' accordingly, or 'AST', 'CST', 'default' in
'JSON', 'indented' accordingly, or 'AST', 'CST', 'default' in
which case the value of respective configuration variable determines the
serialization format. (See module `configuration.py`.)
"""
......
......@@ -43,7 +43,7 @@ def cpu_profile(func, repetitions=1):
# after your program ends
stats = pstats.Stats(profile)
stats.strip_dirs()
stats.sort_stats('time').print_stats(80)
stats.sort_stats('time').print_stats(20)
return success
......@@ -51,11 +51,18 @@ def profile_serializing():
with open(os.path.join(scriptpath, 'data', 'inferus.ausgabe.xml')) as f:
data = f.read()
tree = parse_xml(data)
success = cpu_profile(tree.as_xml, 100)
print('XML')
cpu_profile(tree.as_xml, 100)
print('S-Expression')
print(tree.as_sxpr())
cpu_profile(lambda :tree.as_sxpr(compact=True), 100)
print('json')
cpu_profile(tree.as_json, 100)
with open(os.path.join(scriptpath, 'data', 'testdoc3.xml')) as f:
data = f.read()
tree = parse_xml(data)
success = cpu_profile(tree.as_xml, 100)
print('XML')
cpu_profile(tree.as_xml, 100)
if __name__ == "__main__":
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment