Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit d8cc42fb authored by di68kap's avatar di68kap
Browse files

- syntaxtree.py: fixed "parse_xml()"

parent 9f1872d8
......@@ -38,7 +38,7 @@ from DHParser.log import is_logging, HistoryRecord
from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME
from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, RootNode, ParserBase, WHITESPACE_PTYPE, \
TOKEN_PTYPE, ZOMBIE_PARSER
PLAINTEXT_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER
from DHParser.toolkit import sane_parser_name, escape_control_characters, re, typing
from typing import Callable, cast, Dict, DefaultDict, List, Set, Tuple, Union, Optional
......@@ -894,6 +894,7 @@ class PlainText(Parser):
>>> Grammar(while_token)("while").content
'while'
"""
assert PLAINTEXT_PTYPE == ":PlainText"
def __init__(self, text: str, name: str = '') -> None:
super().__init__(name)
......
......@@ -118,6 +118,7 @@ class StringView(collections.abc.Sized):
return self.fullstring
# since the slice is being copyied now, anyway, the copy might
# as well be stored in the string view
# return self.text[self.begin:self.end] # use this for debugging!
self.fullstring = self.text[self.begin:self.end]
return self.fullstring
......
......@@ -36,6 +36,7 @@ from typing import Callable, cast, Iterator, List, AbstractSet, Set, Union, Tupl
__all__ = ('ParserBase',
'WHITESPACE_PTYPE',
'PLAINTEXT_PTYPE',
'TOKEN_PTYPE',
'MockParser',
'ZombieParser',
......@@ -109,6 +110,7 @@ class ParserBase:
WHITESPACE_PTYPE = ':Whitespace'
PLAINTEXT_PTYPE = ':PlainText'
TOKEN_PTYPE = ':Token'
......@@ -873,13 +875,16 @@ def parse_sxpr(sxpr: str) -> Node:
return inner_parser(sxpr)
RX_WHITESPACE_TAIL = re.compile(r'\s*$')
def parse_xml(xml: str) -> Node:
"""
Generates a tree of nodes from a (Pseudo-)XML-source.
"""
xml = StringView(xml)
PlainText = MockParser('', ':PlainText')
mock_parsers = {':PlainText': PlainText}
PlainText = MockParser('', PLAINTEXT_PTYPE)
mock_parsers = {PLAINTEXT_PTYPE: PlainText}
def parse_attributes(s: StringView) -> Tuple[StringView, OrderedDict]:
"""Parses a sqeuence of XML-Attributes. Returns the string-slice
......@@ -900,7 +905,8 @@ def parse_xml(xml: str) -> Node:
match = s.match(re.compile(r'<\s*(?P<tagname>[\w:]+)\s*'))
assert match
tagname = match.groupdict()['tagname']
s, attributes = parse_attributes(s[match.end() - s.begin:])
section = s[match.end() - s.begin:]
s, attributes = parse_attributes(section)
i = s.find('>')
assert i >= 0
return s[i+1:], tagname, attributes, s[i-1] == "/"
......@@ -931,22 +937,24 @@ def parse_xml(xml: str) -> Node:
if not solitary:
while s and not s[:2] == "</":
s, leaf = parse_leaf_content(s)
if not s.match(re.compile("\s*$")):
if not leaf.match(RX_WHITESPACE_TAIL):
result.append(Node(PlainText, leaf))
if s[:1] == "<" and s[:2] != "</":
s, child = parse_full_content(s)
result.append(child)
s, closing_tagname = parse_closing_tag(s)
assert tagname == closing_tagname
if len(result) == 1 and isinstance(result[0].parser == PlainText):
if len(result) == 1 and result[0].parser.ptype == PLAINTEXT_PTYPE:
result = result[0].result
else:
result = tuple(result)
return Node(mock_parsers.setdefault(tagname, MockParser(name, ":" + class_name)), result)
return s, Node(mock_parsers.setdefault(tagname, MockParser(name, ":" + class_name)), result)
match_header = xml.search(re.compile(r'<(?!\?)'))
start = match_header.start() if match_header else 0
return parse_full_content(xml[start:])
_, tree = parse_full_content(xml[start:])
assert _.match(RX_WHITESPACE_TAIL)
return tree
# if __name__ == "__main__":
# st = parse_sxpr("(alpha (beta (gamma i\nj\nk) (delta y)) (epsilon z))")
......
......@@ -51,7 +51,15 @@ class TestParseXML:
fxml = flatten_xml(xml)
assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
tree2 = parse_xml(fxml)
print(tree2.as_sxpr())
assert fxml == flatten_xml(tree2.as_xml())
def test_plaintext_handling(self):
tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
assert flatten_sxpr(tree.as_sxpr()) == \
'(a (:PlainText "alpha ") (b "beta") (:PlainText " gamma"))'
tree = parse_xml(' <a> <b>beta</b> </a> ')
assert flatten_xml(tree.as_xml()) == '<a><b>beta</b></a>'
class TestNode:
"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment