Commit 65158426 authored by di68kap's avatar di68kap

- parser and ebnf-compiler refactoring finished; examples need to be tested...

parent 77f7890b
......@@ -90,7 +90,7 @@ except ImportError:
from DHParser import logging, is_filename, load_if_file, \\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, \\
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, _RE, Capture, \\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \\
Node, TransformationFunc, TransformationDict, transformation_factory, \\
......
......@@ -30,8 +30,8 @@ from functools import partial
from DHParser.compile import CompilerError, Compiler
from DHParser.error import Error
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, _RE, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token, _Token
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, expand_table, \
......@@ -132,32 +132,35 @@ class EBNFGrammar(Grammar):
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP_RE__
whitespace__ = Whitespace(WSP_RE__)
wsp__ = Whitespace(WSP_RE__)
EOF = NegativeLookahead(RegExp('.'))
list_ = Series(RegExp('\\w+'), whitespace__, ZeroOrMore(Series(_Token(","), RegExp('\\w+'), whitespace__)))
whitespace = Series(RegExp('~'), whitespace__)
regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), whitespace__)
plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), whitespace__)
literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), whitespace__), Series(RegExp("'(?:[^']|\\\\')*?'"), whitespace__))
symbol = Series(RegExp('(?!\\d)\\w+'), whitespace__)
option = Series(_Token("["), expression, _Token("]"), mandatory=1)
repetition = Series(_Token("{"), expression, _Token("}"), mandatory=1)
oneormore = Series(_Token("{"), expression, _Token("}+"))
unordered = Series(_Token("<"), expression, _Token(">"), mandatory=1)
group = Series(_Token("("), expression, _Token(")"), mandatory=1)
retrieveop = Alternative(_Token("::"), _Token(":"))
flowmarker = Alternative(_Token("!"), _Token("&"), _Token("-!"), _Token("-&"))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(_Token("="))),
list_ = Series(RegExp('\\w+'), wsp__, ZeroOrMore(Series(Series(Token(","), wsp__), RegExp('\\w+'), wsp__)))
whitespace = Series(RegExp('~'), wsp__)
regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__)
plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), wsp__)
literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), wsp__), Series(RegExp("'(?:[^']|\\\\')*?'"), wsp__))
symbol = Series(RegExp('(?!\\d)\\w+'), wsp__)
option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1)
repetition = Series(Series(Token("{"), wsp__), expression, Series(Token("}"), wsp__), mandatory=1)
oneormore = Series(Series(Token("{"), wsp__), expression, Series(Token("}+"), wsp__))
unordered = Series(Series(Token("<"), wsp__), expression, Series(Token(">"), wsp__), mandatory=1)
group = Series(Series(Token("("), wsp__), expression, Series(Token(")"), wsp__), mandatory=1)
retrieveop = Alternative(Series(Token("::"), wsp__), Series(Token(":"), wsp__))
flowmarker = Alternative(Series(Token("!"), wsp__), Series(Token("&"), wsp__),
Series(Token("-!"), wsp__), Series(Token("-&"), wsp__))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol,
NegativeLookahead(Series(Token("="), wsp__))),
Series(Option(flowmarker), literal), Series(Option(flowmarker), plaintext),
Series(Option(flowmarker), regexp), Series(Option(flowmarker), whitespace),
Series(Option(flowmarker), oneormore),
Series(Option(flowmarker), group),
Series(Option(flowmarker), oneormore), Series(Option(flowmarker), group),
Series(Option(flowmarker), unordered), repetition, option)
term = OneOrMore(Series(Option(_Token("§")), factor))
expression.set(Series(term, ZeroOrMore(Series(_Token("|"), term))))
directive = Series(_Token("@"), symbol, _Token("="), Alternative(regexp, literal, list_), mandatory=1)
definition = Series(symbol, _Token("="), expression, mandatory=1)
syntax = Series(Option(Series(whitespace__, RegExp(''))), ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
term = OneOrMore(Series(Option(Series(Token("§"), wsp__)), factor))
expression.set(Series(term, ZeroOrMore(Series(Series(Token("|"), wsp__), term))))
directive = Series(Series(Token("@"), wsp__), symbol, Series(Token("="), wsp__),
Alternative(regexp, literal, list_), mandatory=1)
definition = Series(symbol, Series(Token("="), wsp__), expression, mandatory=1)
syntax = Series(Option(Series(wsp__, RegExp(''))),
ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
root__ = syntax
......@@ -459,7 +462,7 @@ class EBNFCompiler(Compiler):
elif rule.startswith('Synonym'):
transformations = '[reduce_single_child]'
transtable.append(' "' + name + '": %s,' % transformations)
transtable.append(' ":_Token, :_RE": reduce_single_child,')
transtable.append(' ":Token": reduce_single_child,')
transtable += [' "*": replace_by_single_child', '}', '']
transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name)]
return '\n'.join(transtable)
......@@ -777,9 +780,7 @@ class EBNFCompiler(Compiler):
Compiles any non-terminal, where `parser_class` indicates the Parser class
name for the particular non-terminal.
"""
# print(node.as_sxpr())
arguments = [self.compile(r) for r in node.children] + custom_args
# node.error_flag = max(node.error_flag, max(t.error_flag for t in node.children))
return parser_class + '(' + ', '.join(arguments) + ')'
......@@ -931,7 +932,13 @@ class EBNFCompiler(Compiler):
def on_plaintext(self, node: Node) -> str:
return 'Token(' + node.content.replace('\\', r'\\') + ')'
tk = node.content.replace('\\', r'\\')
rpl = '"' if tk.find('"') < 0 else "'" if tk.find("'") < 0 else ''
if rpl:
tk = rpl + tk[1:-1] + rpl
else:
tk = rpl + tk.replace('"', '\\"')[1:-1] + rpl
return 'Token(' + tk + ')'
def on_regexp(self, node: Node) -> str:
......
This diff is collapsed.
......@@ -110,7 +110,7 @@ class ParserBase:
WHITESPACE_PTYPE = ':Whitespace'
TOKEN_PTYPE = ':_Token'
TOKEN_PTYPE = ':Token'
class MockParser(ParserBase):
......@@ -936,8 +936,8 @@ def parse_xml(xml: str) -> Node:
Generates a tree of nodes from a (Pseudo-)XML-source.
"""
xml = StringView(xml)
PlainText = MockParser('', PLAINTEXT_PTYPE)
mock_parsers = {PLAINTEXT_PTYPE: PlainText}
PlainText = MockParser('', TOKEN_PTYPE)
mock_parsers = {TOKEN_PTYPE: PlainText}
def parse_attributes(s: StringView) -> Tuple[StringView, OrderedDict]:
"""Parses a sqeuence of XML-Attributes. Returns the string-slice
......@@ -997,7 +997,7 @@ def parse_xml(xml: str) -> Node:
result.append(child)
s, closing_tagname = parse_closing_tag(s)
assert tagname == closing_tagname
if len(result) == 1 and result[0].parser.ptype == PLAINTEXT_PTYPE:
if len(result) == 1 and result[0].parser.ptype == TOKEN_PTYPE:
result = result[0].result
else:
result = tuple(result)
......
......@@ -133,7 +133,6 @@ class TestEBNFParser:
def test_RE(self):
gr = get_ebnf_grammar()
print(gr.regexp.parsers)
m = gr.regexp.parsers[0].regexp.match(r'/[\\\\]/ xxx /')
rs = m.group()
assert rs.find('x') < 0, rs.group()
......
......@@ -27,7 +27,7 @@ sys.path.extend(['../', './'])
from DHParser.toolkit import compile_python_object
from DHParser.log import logging, is_logging, log_ST
from DHParser.error import Error
from DHParser.parse import Retrieve, Grammar, Forward, _Token, ZeroOrMore, _RE, \
from DHParser.parse import Retrieve, Grammar, Forward, TKN, ZeroOrMore, RE, \
RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series, Alternative, AllOf, SomeOf, UnknownParserError
from DHParser import compile_source
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
......@@ -261,10 +261,10 @@ class TestGrammar:
INTEGER = /\d+/~
'''
expression = Forward()
INTEGER = _RE('\\d+')
factor = INTEGER | _Token("(") + expression + _Token(")")
term = factor + ZeroOrMore((_Token("*") | _Token("/")) + factor)
expression.set(term + ZeroOrMore((_Token("+") | _Token("-")) + term))
INTEGER = RE('\\d+')
factor = INTEGER | TKN("(") + expression + TKN(")")
term = factor + ZeroOrMore((TKN("*") | TKN("/")) + factor)
expression.set(term + ZeroOrMore((TKN("+") | TKN("-")) + term))
root__ = expression
grammar = Arithmetic()
......@@ -304,7 +304,7 @@ class TestSeries:
assert st.collect_errors()[0].code == Error.MANDATORY_CONTINUATION
def test_series_composition(self):
TA, TB, TC, TD, TE = (_Token(b) for b in "ABCDE")
TA, TB, TC, TD, TE = (TKN(b) for b in "ABCDE")
s1 = Series(TA, TB, TC, mandatory=2)
s2 = Series(TD, TE)
......@@ -342,23 +342,23 @@ class TestSeries:
class TestAllOfSomeOf:
def test_allOf_order(self):
"""Test that parsers of an AllOf-List can match in arbitrary order."""
prefixes = AllOf(_Token("A"), _Token("B"))
prefixes = AllOf(TKN("A"), TKN("B"))
assert Grammar(prefixes)('A B').content == 'A B'
assert Grammar(prefixes)('B A').content == 'B A'
# aternative Form
prefixes = AllOf(Series(_Token("B"), _Token("A")))
prefixes = AllOf(Series(TKN("B"), TKN("A")))
assert Grammar(prefixes)('A B').content == 'A B'
def test_allOf_completeness(self):
"""Test that an error is raised if not all parsers of an AllOf-List
match."""
prefixes = AllOf(_Token("A"), _Token("B"))
prefixes = AllOf(TKN("A"), TKN("B"))
assert Grammar(prefixes)('B').error_flag
def test_allOf_redundance(self):
"""Test that one and the same parser may be listed several times
and must be matched several times accordingly."""
prefixes = AllOf(_Token("A"), _Token("B"), _Token("A"))
prefixes = AllOf(TKN("A"), TKN("B"), TKN("A"))
assert Grammar(prefixes)('A A B').content == 'A A B'
assert Grammar(prefixes)('A B A').content == 'A B A'
assert Grammar(prefixes)('B A A').content == 'B A A'
......@@ -366,18 +366,18 @@ class TestAllOfSomeOf:
def test_someOf_order(self):
"""Test that parsers of an AllOf-List can match in arbitrary order."""
prefixes = SomeOf(_Token("A"), _Token("B"))
prefixes = SomeOf(TKN("A"), TKN("B"))
assert Grammar(prefixes)('A B').content == 'A B'
assert Grammar(prefixes)('B A').content == 'B A'
# aternative Form
prefixes = SomeOf(Alternative(_Token("B"), _Token("A")))
prefixes = SomeOf(Alternative(TKN("B"), TKN("A")))
assert Grammar(prefixes)('A B').content == 'A B'
assert Grammar(prefixes)('B').content == 'B'
def test_someOf_redundance(self):
"""Test that one and the same parser may be listed several times
and must be matched several times accordingly."""
prefixes = SomeOf(_Token("A"), _Token("B"), _Token("A"))
prefixes = SomeOf(TKN("A"), TKN("B"), TKN("A"))
assert Grammar(prefixes)('A A B').content == 'A A B'
assert Grammar(prefixes)('A B A').content == 'A B A'
assert Grammar(prefixes)('B A A').content == 'B A A'
......
......@@ -55,7 +55,7 @@ class TestParseXML:
def test_plaintext_handling(self):
tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
assert flatten_sxpr(tree.as_sxpr()) == \
'(a (:PlainText "alpha ") (b "beta") (:PlainText " gamma"))'
'(a (:Token "alpha ") (b "beta") (:Token " gamma"))'
tree = parse_xml(' <a> <b>beta</b> </a> ')
assert flatten_xml(tree.as_xml()) == '<a><b>beta</b></a>'
......@@ -100,14 +100,13 @@ class TestNode:
def test_equality2(self):
ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
att = {"term": [replace_by_single_child, flatten],
att = {"term": [remove_expendables, replace_by_single_child, flatten],
"factor": [remove_expendables, reduce_single_child],
(TOKEN_PTYPE): [remove_expendables, reduce_single_child],
"?": [remove_expendables, replace_by_single_child]}
"*": [remove_expendables, replace_by_single_child]}
parser = grammar_provider(ebnf)()
tree = parser("20 / 4 * 3")
traverse(tree, att)
compare_tree = parse_sxpr("(term (term (factor 20) (:_Token /) (factor 4)) (:_Token *) (factor 3))")
compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
assert tree == compare_tree, tree.as_sxpr()
def test_copy(self):
......
......@@ -162,9 +162,9 @@ class TestGrammarTest:
3: "20 / 4 * 3"
},
"ast": {
'1*': "(term (factor 4) (:_Token *) (factor 5))",
2: "(term (factor 20) (:_Token /) (factor 4))",
3: "(term (term (factor 20) (:_Token /) (factor 4)) (:_Token *) (factor 3))"
'1*': "(term (factor 4) (:Token *) (factor 5))",
2: "(term (factor 20) (:Token /) (factor 4))",
3: "(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))"
},
"fail": {
4: "4 + 5",
......@@ -186,9 +186,9 @@ class TestGrammarTest:
3: "20 / 4 * 3"
},
"ast": {
1: "(term (factor 4) (:_Token *) (factor 5))",
2: "(term (factor 20) (:_Token /) (factor 4))",
3: "(term (term (factor 19) (:_Token /) (factor 4)) (:_Token *) (factor 3))" # error 19 != 20
1: "(term (factor 4) (:Token *) (factor 5))",
2: "(term (factor 20) (:Token /) (factor 4))",
3: "(term (term (factor 19) (:Token /) (factor 4)) (:Token *) (factor 3))" # error 19 != 20
},
"fail": {
4: "4 * 5", # error: this should match
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment