diff --git a/DHParser/parse.py b/DHParser/parse.py index 5a5a8d3ed74b4de742d64d8316b84aa1e7e2afcc..100a53ac50e2d49609c6427ffc3e83d902c1dbba 100644 --- a/DHParser/parse.py +++ b/DHParser/parse.py @@ -38,7 +38,7 @@ from DHParser.log import is_logging, HistoryRecord from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME from DHParser.stringview import StringView, EMPTY_STRING_VIEW from DHParser.syntaxtree import Node, RootNode, ParserBase, WHITESPACE_PTYPE, \ - TOKEN_PTYPE, ZOMBIE_PARSER + PLAINTEXT_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER from DHParser.toolkit import sane_parser_name, escape_control_characters, re, typing from typing import Callable, cast, Dict, DefaultDict, List, Set, Tuple, Union, Optional @@ -894,6 +894,7 @@ class PlainText(Parser): >>> Grammar(while_token)("while").content 'while' """ + assert PLAINTEXT_PTYPE == ":PlainText" def __init__(self, text: str, name: str = '') -> None: super().__init__(name) diff --git a/DHParser/stringview.py b/DHParser/stringview.py index 4484676ff1e73c0027716ca1f7b0e746f39f7ad8..54a12bd6abcc7ced54b29bdc92f845740c86366e 100644 --- a/DHParser/stringview.py +++ b/DHParser/stringview.py @@ -118,6 +118,7 @@ class StringView(collections.abc.Sized): return self.fullstring # since the slice is being copyied now, anyway, the copy might # as well be stored in the string view + # return self.text[self.begin:self.end] # use this for debugging! self.fullstring = self.text[self.begin:self.end] return self.fullstring diff --git a/DHParser/syntaxtree.py b/DHParser/syntaxtree.py index c2b5ba5dcd265ef5fc0a00de5546871e46ea124d..a884b0f9f5306ce0147ba12b02c0821625b1fdf9 100644 --- a/DHParser/syntaxtree.py +++ b/DHParser/syntaxtree.py @@ -36,6 +36,7 @@ from typing import Callable, cast, Iterator, List, AbstractSet, Set, Union, Tupl __all__ = ('ParserBase', 'WHITESPACE_PTYPE', + 'PLAINTEXT_PTYPE', 'TOKEN_PTYPE', 'MockParser', 'ZombieParser', @@ -109,6 +110,7 @@ class ParserBase: WHITESPACE_PTYPE = ':Whitespace' +PLAINTEXT_PTYPE = ':PlainText' TOKEN_PTYPE = ':Token' @@ -873,13 +875,16 @@ def parse_sxpr(sxpr: str) -> Node: return inner_parser(sxpr) +RX_WHITESPACE_TAIL = re.compile(r'\s*$') + + def parse_xml(xml: str) -> Node: """ Generates a tree of nodes from a (Pseudo-)XML-source. """ xml = StringView(xml) - PlainText = MockParser('', ':PlainText') - mock_parsers = {':PlainText': PlainText} + PlainText = MockParser('', PLAINTEXT_PTYPE) + mock_parsers = {PLAINTEXT_PTYPE: PlainText} def parse_attributes(s: StringView) -> Tuple[StringView, OrderedDict]: """Parses a sqeuence of XML-Attributes. Returns the string-slice @@ -900,7 +905,8 @@ def parse_xml(xml: str) -> Node: match = s.match(re.compile(r'<\s*(?P[\w:]+)\s*')) assert match tagname = match.groupdict()['tagname'] - s, attributes = parse_attributes(s[match.end() - s.begin:]) + section = s[match.end() - s.begin:] + s, attributes = parse_attributes(section) i = s.find('>') assert i >= 0 return s[i+1:], tagname, attributes, s[i-1] == "/" @@ -931,22 +937,24 @@ def parse_xml(xml: str) -> Node: if not solitary: while s and not s[:2] == "cfi' tree2 = parse_xml(fxml) - print(tree2.as_sxpr()) + assert fxml == flatten_xml(tree2.as_xml()) + + def test_plaintext_handling(self): + tree = parse_xml('alpha beta gamma') + assert flatten_sxpr(tree.as_sxpr()) == \ + '(a (:PlainText "alpha ") (b "beta") (:PlainText " gamma"))' + tree = parse_xml(' beta ') + assert flatten_xml(tree.as_xml()) == 'beta' + class TestNode: """