Commit f53a902c authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

EBNF Syntax of AllOf-, SomeOf-parsers implementes + tests

parent cd78155a
...@@ -71,7 +71,7 @@ except ImportError: ...@@ -71,7 +71,7 @@ except ImportError:
import re import re
from DHParser import logging, is_filename, load_if_file, \\ from DHParser import logging, is_filename, load_if_file, \\
Grammar, Compiler, nil_preprocessor, \\ Grammar, Compiler, nil_preprocessor, \\
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym, \\ Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym, AllOf, SomeOf, Unordered, \\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \\ Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\ ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\
last_value, counterpart, accumulate, PreprocessorFunc, \\ last_value, counterpart, accumulate, PreprocessorFunc, \\
......
...@@ -19,20 +19,19 @@ permissions and limitations under the License. ...@@ -19,20 +19,19 @@ permissions and limitations under the License.
import keyword import keyword
from collections import OrderedDict from collections import OrderedDict
from functools import partial from functools import partial
from typing import Callable, Dict, List, Set, Tuple
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, typing from DHParser.error import Error
from DHParser.parser import Grammar, mixin_comment, nil_preprocessor, Forward, RegExp, RE, \ from DHParser.parser import Grammar, mixin_comment, nil_preprocessor, Forward, RegExp, RE, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token, \ NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token, \
Required, Compiler, PreprocessorFunc Compiler, PreprocessorFunc
from DHParser.syntaxtree import Node, TransformationFunc, WHITESPACE_PTYPE, TOKEN_PTYPE from DHParser.syntaxtree import Node, TransformationFunc, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.error import Error from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re
from DHParser.transform import traverse, remove_brackets, \ from DHParser.transform import traverse, remove_brackets, \
reduce_single_child, replace_by_single_child, remove_expendables, \ reduce_single_child, replace_by_single_child, remove_expendables, \
remove_tokens, flatten, forbid, assert_content, remove_infix_operator remove_tokens, flatten, forbid, assert_content, remove_infix_operator
from DHParser.versionnumber import __version__ from DHParser.versionnumber import __version__
from typing import Callable, Dict, List, Set, Tuple, Union
__all__ = ('get_ebnf_preprocessor', __all__ = ('get_ebnf_preprocessor',
'get_ebnf_grammar', 'get_ebnf_grammar',
'get_ebnf_transformer', 'get_ebnf_transformer',
...@@ -66,79 +65,6 @@ def get_ebnf_preprocessor() -> PreprocessorFunc: ...@@ -66,79 +65,6 @@ def get_ebnf_preprocessor() -> PreprocessorFunc:
######################################################################## ########################################################################
# class EBNFGrammar(Grammar):
# r"""Parser for an EBNF_variant source file, with this grammar:
#
# # EBNF-Grammar in EBNF
#
# @ comment = /#.*(?:\n|$)/ # comments start with '#' and eat all chars up to and including '\n'
# @ whitespace = /\s*/ # whitespace includes linefeed
# @ literalws = right # trailing whitespace of literals will be ignored tacitly
#
# syntax = [~//] { definition | directive } §EOF
# definition = symbol §"=" §expression
# directive = "@" §symbol §"=" §( regexp | literal | list_ )
#
# expression = term { "|" term }
# term = { factor }+
# factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
# | [flowmarker] literal
# | [flowmarker] regexp
# | [flowmarker] group
# | [flowmarker] oneormore
# | repetition
# | option
#
# flowmarker = "!" | "&" | "§" # '!' negative lookahead, '&' positive lookahead, '§' required
# | "-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
# retrieveop = "::" | ":" # '::' pop, ':' retrieve
#
# group = "(" expression §")"
# oneormore = "{" expression "}+"
# repetition = "{" expression §"}"
# option = "[" expression §"]"
#
# symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
# literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
# | /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
# regexp = /~?\/(?:\\\/|[^\/])*?\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# # '~' is a whitespace-marker, if present leading or trailing
# # whitespace of a regular expression will be ignored tacitly.
# list_ = /\w+/~ { "," /\w+/~ } # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
# # BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
# EOF = !/./
# """
# expression = Forward()
# source_hash__ = "4735db10f0b79d44209d1de0184b2ca0"
# parser_initialization__ = "upon instantiation"
# COMMENT__ = r'#.*(?:\n|$)'
# WHITESPACE__ = r'\s*'
# WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
# wspL__ = ''
# wspR__ = WSP__
# EOF = NegativeLookahead(RegExp('.'))
# list_ = Series(RE('\\w+'), ZeroOrMore(Series(Token(","), RE('\\w+'))))
# regexp = RE('~?/(?:\\\\/|[^/])*?/~?')
# literal = Alternative(RE('"(?:[^"]|\\\\")*?"'), RE("'(?:[^']|\\\\')*?'"))
# symbol = RE('(?!\\d)\\w+')
# option = Series(Token("["), expression, Required(Token("]")))
# repetition = Series(Token("{"), expression, Required(Token("}")))
# oneormore = Series(Token("{"), expression, Token("}+"))
# group = Series(Token("("), expression, Required(Token(")")))
# retrieveop = Alternative(Token("::"), Token(":"))
# flowmarker = Alternative(Token("!"), Token("&"), Token("§"), Token("-!"), Token("-&"))
# factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Token("="))),
# Series(Option(flowmarker), literal), Series(Option(flowmarker), regexp),
# Series(Option(flowmarker), group), Series(Option(flowmarker), oneormore), repetition, option)
# term = OneOrMore(factor)
# expression.set(Series(term, ZeroOrMore(Series(Token("|"), term))))
# directive = Series(Token("@"), Required(symbol), Required(Token("=")),
# Required(Alternative(regexp, literal, list_)))
# definition = Series(symbol, Required(Token("=")), Required(expression))
# syntax = Series(Option(RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)), Required(EOF))
# root__ = syntax
class EBNFGrammar(Grammar): class EBNFGrammar(Grammar):
r"""Parser for an EBNF source file, with this grammar: r"""Parser for an EBNF source file, with this grammar:
...@@ -157,8 +83,9 @@ class EBNFGrammar(Grammar): ...@@ -157,8 +83,9 @@ class EBNFGrammar(Grammar):
factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
| [flowmarker] literal | [flowmarker] literal
| [flowmarker] regexp | [flowmarker] regexp
| [flowmarker] group
| [flowmarker] oneormore | [flowmarker] oneormore
| [flowmarker] group
| [flowmarker] unordered
| repetition | repetition
| option | option
...@@ -166,10 +93,11 @@ class EBNFGrammar(Grammar): ...@@ -166,10 +93,11 @@ class EBNFGrammar(Grammar):
| "-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind | "-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
retrieveop = "::" | ":" # '::' pop, ':' retrieve retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" expression §")" group = "(" §expression ")"
unordered = "<" §expression ">" # elements of expression in arbitrary order
oneormore = "{" expression "}+" oneormore = "{" expression "}+"
repetition = "{" expression §"}" repetition = "{" §expression "}"
option = "[" expression §"]" option = "[" §expression "]"
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while' literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
...@@ -182,8 +110,8 @@ class EBNFGrammar(Grammar): ...@@ -182,8 +110,8 @@ class EBNFGrammar(Grammar):
EOF = !/./ EOF = !/./
""" """
expression = Forward() expression = Forward()
source_hash__ = "a131abc5259738631000cda90d2fc65b" source_hash__ = "3fc9f5a340f560e847d9af0b61a68743"
tialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
COMMENT__ = r'#.*(?:\n|$)' COMMENT__ = r'#.*(?:\n|$)'
WHITESPACE__ = r'\s*' WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__) WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
...@@ -197,17 +125,22 @@ class EBNFGrammar(Grammar): ...@@ -197,17 +125,22 @@ class EBNFGrammar(Grammar):
option = Series(Token("["), expression, Token("]"), mandatory=1) option = Series(Token("["), expression, Token("]"), mandatory=1)
repetition = Series(Token("{"), expression, Token("}"), mandatory=1) repetition = Series(Token("{"), expression, Token("}"), mandatory=1)
oneormore = Series(Token("{"), expression, Token("}+")) oneormore = Series(Token("{"), expression, Token("}+"))
unordered = Series(Token("<"), expression, Token(">"), mandatory=1)
group = Series(Token("("), expression, Token(")"), mandatory=1) group = Series(Token("("), expression, Token(")"), mandatory=1)
retrieveop = Alternative(Token("::"), Token(":")) retrieveop = Alternative(Token("::"), Token(":"))
flowmarker = Alternative(Token("!"), Token("&"), Token("-!"), Token("-&")) flowmarker = Alternative(Token("!"), Token("&"), Token("-!"), Token("-&"))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Token("="))), factor = Alternative(
Series(Option(flowmarker), literal), Series(Option(flowmarker), regexp), Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Token("="))),
Series(Option(flowmarker), group), Series(Option(flowmarker), oneormore), repetition, option) Series(Option(flowmarker), literal), Series(Option(flowmarker), regexp),
Series(Option(flowmarker), oneormore), Series(Option(flowmarker), group),
Series(Option(flowmarker), unordered), repetition, option)
term = OneOrMore(Series(Option(Token("§")), factor)) term = OneOrMore(Series(Option(Token("§")), factor))
expression.set(Series(term, ZeroOrMore(Series(Token("|"), term)))) expression.set(Series(term, ZeroOrMore(Series(Token("|"), term))))
directive = Series(Token("@"), symbol, Token("="), Alternative(regexp, literal, list_), mandatory=1) directive = Series(Token("@"), symbol, Token("="), Alternative(regexp, literal, list_),
mandatory=1)
definition = Series(symbol, Token("="), expression, mandatory=1) definition = Series(symbol, Token("="), expression, mandatory=1)
syntax = Series(Option(RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2) syntax = Series(Option(RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)),
EOF, mandatory=2)
root__ = syntax root__ = syntax
...@@ -275,6 +208,8 @@ EBNF_AST_transformation_table = { ...@@ -275,6 +208,8 @@ EBNF_AST_transformation_table = {
replace_by_single_child, replace_by_single_child,
"group": "group":
[remove_brackets, replace_by_single_child], [remove_brackets, replace_by_single_child],
"unordered":
remove_brackets,
"oneormore, repetition, option": "oneormore, repetition, option":
[reduce_single_child, remove_brackets, [reduce_single_child, remove_brackets,
forbid('repetition', 'option', 'oneormore'), assert_content(r'(?!§)')], forbid('repetition', 'option', 'oneormore'), assert_content(r'(?!§)')],
...@@ -893,6 +828,9 @@ class EBNFCompiler(Compiler): ...@@ -893,6 +828,9 @@ class EBNFCompiler(Compiler):
raise EBNFCompilerError("Group nodes should have been eliminated by " raise EBNFCompilerError("Group nodes should have been eliminated by "
"AST transformation!") "AST transformation!")
def on_unordered(self, node) -> str:
# TODO: implementation must support AllOf as well as SomeOf
return self.non_terminal(node, 'Unordered')
def on_symbol(self, node: Node) -> str: # called only for symbols on the right hand side! def on_symbol(self, node: Node) -> str: # called only for symbols on the right hand side!
symbol = str(node) # ; assert result == cast(str, node.result) symbol = str(node) # ; assert result == cast(str, node.result)
......
...@@ -93,6 +93,7 @@ __all__ = ('PreprocessorFunc', ...@@ -93,6 +93,7 @@ __all__ = ('PreprocessorFunc',
'Alternative', 'Alternative',
'AllOf', 'AllOf',
'SomeOf', 'SomeOf',
'Unordered',
'FlowOperator', 'FlowOperator',
'Required', 'Required',
'Lookahead', 'Lookahead',
...@@ -110,7 +111,6 @@ __all__ = ('PreprocessorFunc', ...@@ -110,7 +111,6 @@ __all__ = ('PreprocessorFunc',
'compile_source') 'compile_source')
######################################################################## ########################################################################
# #
# Grammar and parsing infrastructure # Grammar and parsing infrastructure
...@@ -1633,6 +1633,17 @@ class SomeOf(NaryOperator): ...@@ -1633,6 +1633,17 @@ class SomeOf(NaryOperator):
return '<' + ' | '.join(parser.repr for parser in self.parsers) + '>' return '<' + ' | '.join(parser.repr for parser in self.parsers) + '>'
def Unordered(parser: NaryOperator, name: str = '') -> NaryOperator:
"""Returns an AllOf- or SomeOf-parser depending on whether `parser`
is a Series (AllOf) or an Alternative (SomeOf).
"""
if isinstance(parser, Series):
return AllOf(parser, name=name)
elif isinstance(parser, Alternative):
return SomeOf(parser, name=name)
else:
raise AssertionError("Unordered can take only Series or Alternative as parser.")
######################################################################## ########################################################################
# #
......
...@@ -15,7 +15,7 @@ factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be ...@@ -15,7 +15,7 @@ factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be
| [flowmarker] regexp | [flowmarker] regexp
| [flowmarker] oneormore | [flowmarker] oneormore
| [flowmarker] group | [flowmarker] group
# | [flowmarker] set | [flowmarker] unordered
| repetition | repetition
| option | option
......
...@@ -397,6 +397,18 @@ class TestFlowControlOperators: ...@@ -397,6 +397,18 @@ class TestFlowControlOperators:
# print(error) # print(error)
class TestAllSome:
def test_all(self):
ebnf = 'prefix = <"A" "B">'
grammar = grammar_provider(ebnf)()
assert grammar('B A').content() == 'B A'
def test_some(self):
ebnf = 'prefix = <"A" | "B">'
grammar = grammar_provider(ebnf)()
assert grammar('B A').content() == 'B A'
assert grammar('B').content() == 'B'
if __name__ == "__main__": if __name__ == "__main__":
from DHParser.testing import runner from DHParser.testing import runner
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment