Commit 65158426 authored by di68kap's avatar di68kap

- parser and ebnf-compiler refactoring finished; examples need to be tested...

parent 77f7890b
...@@ -90,7 +90,7 @@ except ImportError: ...@@ -90,7 +90,7 @@ except ImportError:
from DHParser import logging, is_filename, load_if_file, \\ from DHParser import logging, is_filename, load_if_file, \\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, \\ Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, \\
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \\ Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, _RE, Capture, \\ Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\ ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \\ grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \\
Node, TransformationFunc, TransformationDict, transformation_factory, \\ Node, TransformationFunc, TransformationDict, transformation_factory, \\
......
...@@ -30,8 +30,8 @@ from functools import partial ...@@ -30,8 +30,8 @@ from functools import partial
from DHParser.compile import CompilerError, Compiler from DHParser.compile import CompilerError, Compiler
from DHParser.error import Error from DHParser.error import Error
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, _RE, \ from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token, _Token NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, expand_table, \ from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, expand_table, \
...@@ -132,32 +132,35 @@ class EBNFGrammar(Grammar): ...@@ -132,32 +132,35 @@ class EBNFGrammar(Grammar):
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = '' wspL__ = ''
wspR__ = WSP_RE__ wspR__ = WSP_RE__
whitespace__ = Whitespace(WSP_RE__) wsp__ = Whitespace(WSP_RE__)
EOF = NegativeLookahead(RegExp('.')) EOF = NegativeLookahead(RegExp('.'))
list_ = Series(RegExp('\\w+'), whitespace__, ZeroOrMore(Series(_Token(","), RegExp('\\w+'), whitespace__))) list_ = Series(RegExp('\\w+'), wsp__, ZeroOrMore(Series(Series(Token(","), wsp__), RegExp('\\w+'), wsp__)))
whitespace = Series(RegExp('~'), whitespace__) whitespace = Series(RegExp('~'), wsp__)
regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), whitespace__) regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__)
plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), whitespace__) plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), wsp__)
literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), whitespace__), Series(RegExp("'(?:[^']|\\\\')*?'"), whitespace__)) literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), wsp__), Series(RegExp("'(?:[^']|\\\\')*?'"), wsp__))
symbol = Series(RegExp('(?!\\d)\\w+'), whitespace__) symbol = Series(RegExp('(?!\\d)\\w+'), wsp__)
option = Series(_Token("["), expression, _Token("]"), mandatory=1) option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1)
repetition = Series(_Token("{"), expression, _Token("}"), mandatory=1) repetition = Series(Series(Token("{"), wsp__), expression, Series(Token("}"), wsp__), mandatory=1)
oneormore = Series(_Token("{"), expression, _Token("}+")) oneormore = Series(Series(Token("{"), wsp__), expression, Series(Token("}+"), wsp__))
unordered = Series(_Token("<"), expression, _Token(">"), mandatory=1) unordered = Series(Series(Token("<"), wsp__), expression, Series(Token(">"), wsp__), mandatory=1)
group = Series(_Token("("), expression, _Token(")"), mandatory=1) group = Series(Series(Token("("), wsp__), expression, Series(Token(")"), wsp__), mandatory=1)
retrieveop = Alternative(_Token("::"), _Token(":")) retrieveop = Alternative(Series(Token("::"), wsp__), Series(Token(":"), wsp__))
flowmarker = Alternative(_Token("!"), _Token("&"), _Token("-!"), _Token("-&")) flowmarker = Alternative(Series(Token("!"), wsp__), Series(Token("&"), wsp__),
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(_Token("="))), Series(Token("-!"), wsp__), Series(Token("-&"), wsp__))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol,
NegativeLookahead(Series(Token("="), wsp__))),
Series(Option(flowmarker), literal), Series(Option(flowmarker), plaintext), Series(Option(flowmarker), literal), Series(Option(flowmarker), plaintext),
Series(Option(flowmarker), regexp), Series(Option(flowmarker), whitespace), Series(Option(flowmarker), regexp), Series(Option(flowmarker), whitespace),
Series(Option(flowmarker), oneormore), Series(Option(flowmarker), oneormore), Series(Option(flowmarker), group),
Series(Option(flowmarker), group),
Series(Option(flowmarker), unordered), repetition, option) Series(Option(flowmarker), unordered), repetition, option)
term = OneOrMore(Series(Option(_Token("§")), factor)) term = OneOrMore(Series(Option(Series(Token("§"), wsp__)), factor))
expression.set(Series(term, ZeroOrMore(Series(_Token("|"), term)))) expression.set(Series(term, ZeroOrMore(Series(Series(Token("|"), wsp__), term))))
directive = Series(_Token("@"), symbol, _Token("="), Alternative(regexp, literal, list_), mandatory=1) directive = Series(Series(Token("@"), wsp__), symbol, Series(Token("="), wsp__),
definition = Series(symbol, _Token("="), expression, mandatory=1) Alternative(regexp, literal, list_), mandatory=1)
syntax = Series(Option(Series(whitespace__, RegExp(''))), ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2) definition = Series(symbol, Series(Token("="), wsp__), expression, mandatory=1)
syntax = Series(Option(Series(wsp__, RegExp(''))),
ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
root__ = syntax root__ = syntax
...@@ -459,7 +462,7 @@ class EBNFCompiler(Compiler): ...@@ -459,7 +462,7 @@ class EBNFCompiler(Compiler):
elif rule.startswith('Synonym'): elif rule.startswith('Synonym'):
transformations = '[reduce_single_child]' transformations = '[reduce_single_child]'
transtable.append(' "' + name + '": %s,' % transformations) transtable.append(' "' + name + '": %s,' % transformations)
transtable.append(' ":_Token, :_RE": reduce_single_child,') transtable.append(' ":Token": reduce_single_child,')
transtable += [' "*": replace_by_single_child', '}', ''] transtable += [' "*": replace_by_single_child', '}', '']
transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name)] transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name)]
return '\n'.join(transtable) return '\n'.join(transtable)
...@@ -777,9 +780,7 @@ class EBNFCompiler(Compiler): ...@@ -777,9 +780,7 @@ class EBNFCompiler(Compiler):
Compiles any non-terminal, where `parser_class` indicates the Parser class Compiles any non-terminal, where `parser_class` indicates the Parser class
name for the particular non-terminal. name for the particular non-terminal.
""" """
# print(node.as_sxpr())
arguments = [self.compile(r) for r in node.children] + custom_args arguments = [self.compile(r) for r in node.children] + custom_args
# node.error_flag = max(node.error_flag, max(t.error_flag for t in node.children))
return parser_class + '(' + ', '.join(arguments) + ')' return parser_class + '(' + ', '.join(arguments) + ')'
...@@ -931,7 +932,13 @@ class EBNFCompiler(Compiler): ...@@ -931,7 +932,13 @@ class EBNFCompiler(Compiler):
def on_plaintext(self, node: Node) -> str: def on_plaintext(self, node: Node) -> str:
return 'Token(' + node.content.replace('\\', r'\\') + ')' tk = node.content.replace('\\', r'\\')
rpl = '"' if tk.find('"') < 0 else "'" if tk.find("'") < 0 else ''
if rpl:
tk = rpl + tk[1:-1] + rpl
else:
tk = rpl + tk.replace('"', '\\"')[1:-1] + rpl
return 'Token(' + tk + ')'
def on_regexp(self, node: Node) -> str: def on_regexp(self, node: Node) -> str:
......
This diff is collapsed.
...@@ -110,7 +110,7 @@ class ParserBase: ...@@ -110,7 +110,7 @@ class ParserBase:
WHITESPACE_PTYPE = ':Whitespace' WHITESPACE_PTYPE = ':Whitespace'
TOKEN_PTYPE = ':_Token' TOKEN_PTYPE = ':Token'
class MockParser(ParserBase): class MockParser(ParserBase):
...@@ -936,8 +936,8 @@ def parse_xml(xml: str) -> Node: ...@@ -936,8 +936,8 @@ def parse_xml(xml: str) -> Node:
Generates a tree of nodes from a (Pseudo-)XML-source. Generates a tree of nodes from a (Pseudo-)XML-source.
""" """
xml = StringView(xml) xml = StringView(xml)
PlainText = MockParser('', PLAINTEXT_PTYPE) PlainText = MockParser('', TOKEN_PTYPE)
mock_parsers = {PLAINTEXT_PTYPE: PlainText} mock_parsers = {TOKEN_PTYPE: PlainText}
def parse_attributes(s: StringView) -> Tuple[StringView, OrderedDict]: def parse_attributes(s: StringView) -> Tuple[StringView, OrderedDict]:
"""Parses a sqeuence of XML-Attributes. Returns the string-slice """Parses a sqeuence of XML-Attributes. Returns the string-slice
...@@ -997,7 +997,7 @@ def parse_xml(xml: str) -> Node: ...@@ -997,7 +997,7 @@ def parse_xml(xml: str) -> Node:
result.append(child) result.append(child)
s, closing_tagname = parse_closing_tag(s) s, closing_tagname = parse_closing_tag(s)
assert tagname == closing_tagname assert tagname == closing_tagname
if len(result) == 1 and result[0].parser.ptype == PLAINTEXT_PTYPE: if len(result) == 1 and result[0].parser.ptype == TOKEN_PTYPE:
result = result[0].result result = result[0].result
else: else:
result = tuple(result) result = tuple(result)
......
...@@ -133,7 +133,6 @@ class TestEBNFParser: ...@@ -133,7 +133,6 @@ class TestEBNFParser:
def test_RE(self): def test_RE(self):
gr = get_ebnf_grammar() gr = get_ebnf_grammar()
print(gr.regexp.parsers)
m = gr.regexp.parsers[0].regexp.match(r'/[\\\\]/ xxx /') m = gr.regexp.parsers[0].regexp.match(r'/[\\\\]/ xxx /')
rs = m.group() rs = m.group()
assert rs.find('x') < 0, rs.group() assert rs.find('x') < 0, rs.group()
......
...@@ -27,7 +27,7 @@ sys.path.extend(['../', './']) ...@@ -27,7 +27,7 @@ sys.path.extend(['../', './'])
from DHParser.toolkit import compile_python_object from DHParser.toolkit import compile_python_object
from DHParser.log import logging, is_logging, log_ST from DHParser.log import logging, is_logging, log_ST
from DHParser.error import Error from DHParser.error import Error
from DHParser.parse import Retrieve, Grammar, Forward, _Token, ZeroOrMore, _RE, \ from DHParser.parse import Retrieve, Grammar, Forward, TKN, ZeroOrMore, RE, \
RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series, Alternative, AllOf, SomeOf, UnknownParserError RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series, Alternative, AllOf, SomeOf, UnknownParserError
from DHParser import compile_source from DHParser import compile_source
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
...@@ -261,10 +261,10 @@ class TestGrammar: ...@@ -261,10 +261,10 @@ class TestGrammar:
INTEGER = /\d+/~ INTEGER = /\d+/~
''' '''
expression = Forward() expression = Forward()
INTEGER = _RE('\\d+') INTEGER = RE('\\d+')
factor = INTEGER | _Token("(") + expression + _Token(")") factor = INTEGER | TKN("(") + expression + TKN(")")
term = factor + ZeroOrMore((_Token("*") | _Token("/")) + factor) term = factor + ZeroOrMore((TKN("*") | TKN("/")) + factor)
expression.set(term + ZeroOrMore((_Token("+") | _Token("-")) + term)) expression.set(term + ZeroOrMore((TKN("+") | TKN("-")) + term))
root__ = expression root__ = expression
grammar = Arithmetic() grammar = Arithmetic()
...@@ -304,7 +304,7 @@ class TestSeries: ...@@ -304,7 +304,7 @@ class TestSeries:
assert st.collect_errors()[0].code == Error.MANDATORY_CONTINUATION assert st.collect_errors()[0].code == Error.MANDATORY_CONTINUATION
def test_series_composition(self): def test_series_composition(self):
TA, TB, TC, TD, TE = (_Token(b) for b in "ABCDE") TA, TB, TC, TD, TE = (TKN(b) for b in "ABCDE")
s1 = Series(TA, TB, TC, mandatory=2) s1 = Series(TA, TB, TC, mandatory=2)
s2 = Series(TD, TE) s2 = Series(TD, TE)
...@@ -342,23 +342,23 @@ class TestSeries: ...@@ -342,23 +342,23 @@ class TestSeries:
class TestAllOfSomeOf: class TestAllOfSomeOf:
def test_allOf_order(self): def test_allOf_order(self):
"""Test that parsers of an AllOf-List can match in arbitrary order.""" """Test that parsers of an AllOf-List can match in arbitrary order."""
prefixes = AllOf(_Token("A"), _Token("B")) prefixes = AllOf(TKN("A"), TKN("B"))
assert Grammar(prefixes)('A B').content == 'A B' assert Grammar(prefixes)('A B').content == 'A B'
assert Grammar(prefixes)('B A').content == 'B A' assert Grammar(prefixes)('B A').content == 'B A'
# aternative Form # aternative Form
prefixes = AllOf(Series(_Token("B"), _Token("A"))) prefixes = AllOf(Series(TKN("B"), TKN("A")))
assert Grammar(prefixes)('A B').content == 'A B' assert Grammar(prefixes)('A B').content == 'A B'
def test_allOf_completeness(self): def test_allOf_completeness(self):
"""Test that an error is raised if not all parsers of an AllOf-List """Test that an error is raised if not all parsers of an AllOf-List
match.""" match."""
prefixes = AllOf(_Token("A"), _Token("B")) prefixes = AllOf(TKN("A"), TKN("B"))
assert Grammar(prefixes)('B').error_flag assert Grammar(prefixes)('B').error_flag
def test_allOf_redundance(self): def test_allOf_redundance(self):
"""Test that one and the same parser may be listed several times """Test that one and the same parser may be listed several times
and must be matched several times accordingly.""" and must be matched several times accordingly."""
prefixes = AllOf(_Token("A"), _Token("B"), _Token("A")) prefixes = AllOf(TKN("A"), TKN("B"), TKN("A"))
assert Grammar(prefixes)('A A B').content == 'A A B' assert Grammar(prefixes)('A A B').content == 'A A B'
assert Grammar(prefixes)('A B A').content == 'A B A' assert Grammar(prefixes)('A B A').content == 'A B A'
assert Grammar(prefixes)('B A A').content == 'B A A' assert Grammar(prefixes)('B A A').content == 'B A A'
...@@ -366,18 +366,18 @@ class TestAllOfSomeOf: ...@@ -366,18 +366,18 @@ class TestAllOfSomeOf:
def test_someOf_order(self): def test_someOf_order(self):
"""Test that parsers of an AllOf-List can match in arbitrary order.""" """Test that parsers of an AllOf-List can match in arbitrary order."""
prefixes = SomeOf(_Token("A"), _Token("B")) prefixes = SomeOf(TKN("A"), TKN("B"))
assert Grammar(prefixes)('A B').content == 'A B' assert Grammar(prefixes)('A B').content == 'A B'
assert Grammar(prefixes)('B A').content == 'B A' assert Grammar(prefixes)('B A').content == 'B A'
# aternative Form # aternative Form
prefixes = SomeOf(Alternative(_Token("B"), _Token("A"))) prefixes = SomeOf(Alternative(TKN("B"), TKN("A")))
assert Grammar(prefixes)('A B').content == 'A B' assert Grammar(prefixes)('A B').content == 'A B'
assert Grammar(prefixes)('B').content == 'B' assert Grammar(prefixes)('B').content == 'B'
def test_someOf_redundance(self): def test_someOf_redundance(self):
"""Test that one and the same parser may be listed several times """Test that one and the same parser may be listed several times
and must be matched several times accordingly.""" and must be matched several times accordingly."""
prefixes = SomeOf(_Token("A"), _Token("B"), _Token("A")) prefixes = SomeOf(TKN("A"), TKN("B"), TKN("A"))
assert Grammar(prefixes)('A A B').content == 'A A B' assert Grammar(prefixes)('A A B').content == 'A A B'
assert Grammar(prefixes)('A B A').content == 'A B A' assert Grammar(prefixes)('A B A').content == 'A B A'
assert Grammar(prefixes)('B A A').content == 'B A A' assert Grammar(prefixes)('B A A').content == 'B A A'
......
...@@ -55,7 +55,7 @@ class TestParseXML: ...@@ -55,7 +55,7 @@ class TestParseXML:
def test_plaintext_handling(self): def test_plaintext_handling(self):
tree = parse_xml('<a>alpha <b>beta</b> gamma</a>') tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
assert flatten_sxpr(tree.as_sxpr()) == \ assert flatten_sxpr(tree.as_sxpr()) == \
'(a (:PlainText "alpha ") (b "beta") (:PlainText " gamma"))' '(a (:Token "alpha ") (b "beta") (:Token " gamma"))'
tree = parse_xml(' <a> <b>beta</b> </a> ') tree = parse_xml(' <a> <b>beta</b> </a> ')
assert flatten_xml(tree.as_xml()) == '<a><b>beta</b></a>' assert flatten_xml(tree.as_xml()) == '<a><b>beta</b></a>'
...@@ -100,14 +100,13 @@ class TestNode: ...@@ -100,14 +100,13 @@ class TestNode:
def test_equality2(self): def test_equality2(self):
ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~' ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
att = {"term": [replace_by_single_child, flatten], att = {"term": [remove_expendables, replace_by_single_child, flatten],
"factor": [remove_expendables, reduce_single_child], "factor": [remove_expendables, reduce_single_child],
(TOKEN_PTYPE): [remove_expendables, reduce_single_child], "*": [remove_expendables, replace_by_single_child]}
"?": [remove_expendables, replace_by_single_child]}
parser = grammar_provider(ebnf)() parser = grammar_provider(ebnf)()
tree = parser("20 / 4 * 3") tree = parser("20 / 4 * 3")
traverse(tree, att) traverse(tree, att)
compare_tree = parse_sxpr("(term (term (factor 20) (:_Token /) (factor 4)) (:_Token *) (factor 3))") compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
assert tree == compare_tree, tree.as_sxpr() assert tree == compare_tree, tree.as_sxpr()
def test_copy(self): def test_copy(self):
......
...@@ -162,9 +162,9 @@ class TestGrammarTest: ...@@ -162,9 +162,9 @@ class TestGrammarTest:
3: "20 / 4 * 3" 3: "20 / 4 * 3"
}, },
"ast": { "ast": {
'1*': "(term (factor 4) (:_Token *) (factor 5))", '1*': "(term (factor 4) (:Token *) (factor 5))",
2: "(term (factor 20) (:_Token /) (factor 4))", 2: "(term (factor 20) (:Token /) (factor 4))",
3: "(term (term (factor 20) (:_Token /) (factor 4)) (:_Token *) (factor 3))" 3: "(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))"
}, },
"fail": { "fail": {
4: "4 + 5", 4: "4 + 5",
...@@ -186,9 +186,9 @@ class TestGrammarTest: ...@@ -186,9 +186,9 @@ class TestGrammarTest:
3: "20 / 4 * 3" 3: "20 / 4 * 3"
}, },
"ast": { "ast": {
1: "(term (factor 4) (:_Token *) (factor 5))", 1: "(term (factor 4) (:Token *) (factor 5))",
2: "(term (factor 20) (:_Token /) (factor 4))", 2: "(term (factor 20) (:Token /) (factor 4))",
3: "(term (term (factor 19) (:_Token /) (factor 4)) (:_Token *) (factor 3))" # error 19 != 20 3: "(term (term (factor 19) (:Token /) (factor 4)) (:Token *) (factor 3))" # error 19 != 20
}, },
"fail": { "fail": {
4: "4 * 5", # error: this should match 4: "4 * 5", # error: this should match
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment