Commit a901f263 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

ebnf.py: new EBNF-parser and -compiler tested :-)

parent 9e99cf2b
......@@ -96,7 +96,7 @@ except ImportError:
import re
from DHParser import start_logging, suspend_logging, resume_logging, is_filename, load_if_file, \\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, Drop, \\
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, \\
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, Interleave, \\
Unordered, Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\
grammar_changed, last_value, matching_bracket, PreprocessorFunc, is_empty, remove_if, \\
......@@ -148,7 +148,7 @@ class EBNFGrammar(Grammar):
@ whitespace = /\s*/ # whitespace includes linefeed
@ literalws = right # trailing whitespace of literals will be ignored tacitly
@ drop = whitespace # do not include whitespace in concrete syntax tree
@ anonymous = pure_elem, element
@ anonymous = pure_elem # remove this element early, if possible
#: top-level
......@@ -203,7 +203,7 @@ class EBNFGrammar(Grammar):
element = Forward()
expression = Forward()
source_hash__ = "d69cbf8e455c989628fc2dd75aa97bb6"
anonymous__ = re.compile('pure_elem$|element$')
anonymous__ = re.compile('pure_elem$')
static_analysis_pending__ = [True]
parser_initialization__ = ["upon instantiation"]
COMMENT__ = r'#.*(?:\n|$)'
......@@ -323,8 +323,12 @@ EBNF_AST_transformation_table = {
[flatten, remove_tokens('@', '=', ',')],
"expression":
[replace_by_single_child, flatten, remove_tokens('|')],
"sequence, interleave":
"sequence":
[replace_by_single_child, flatten],
"interleave":
[replace_by_single_child, flatten, remove_tokens('°')],
"lookaround":
[],
"term, pure_elem, element":
[replace_by_single_child],
"flowmarker, retrieveop":
......@@ -1607,25 +1611,33 @@ class EBNFCompiler(Compiler):
def on_sequence(self, node) -> str:
new_result, custom_args = self._error_customization(node)
mock_node = Node(node.tag_name, new_result)
filtered_result, custom_args = self._error_customization(node)
mock_node = Node(node.tag_name, filtered_result)
return self.non_terminal(mock_node, 'Series', custom_args)
def on_interleave(self, node) -> str:
children = []
repetitions = []
children, custom_args = self._error_customization(node)
for child in children:
filtered_result, custom_args = self._error_customization(node)
for child in filtered_result:
if child.tag_name == "oneormore":
repetitions.append((1, INFINITE))
assert len(child.children) == 1
children.append(child.children[0])
elif child.tag_name == "repetition":
repetitions.append((0, INFINITE))
elif child.rag_name == "option":
assert len(child.children) == 1
children.append(child.children[0])
elif child.tag_name == "option":
repetitions.append((0, 1))
assert len(child.children) == 1
children.append(child.children[0])
else:
repetitions.append((1, 1))
custom_args.append('repetitions=%' % str(repetitions))
mock_node = Node(node.tag_name, children)
children.append(child)
custom_args.append('repetitions={}'.format(repetitions))
mock_node = Node(node.tag_name, tuple(children))
return self.non_terminal(mock_node, 'Interleave', custom_args)
......@@ -1666,7 +1678,7 @@ class EBNFCompiler(Compiler):
def on_element(self, node: Node) -> str:
assert node.children
assert len(node.children) == 2
assert node.children[0].tag_name == "retrieve_op"
assert node.children[0].tag_name == "retrieveop"
assert node.children[1].tag_name == "symbol"
prefix = node.children[0].content # type: str
arg = node.children[1].content # type: str
......
......@@ -394,7 +394,8 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
self.children = result
self._result = result or ''
else:
assert result is not None
# assert isinstance(result, StringView) \
# or isinstance(result, str)
self.children = tuple()
self._result = result
......
......@@ -4,7 +4,7 @@
@ whitespace = /\s*/ # whitespace includes linefeed
@ literalws = right # trailing whitespace of literals will be ignored tacitly
@ drop = whitespace # do not include whitespace in concrete syntax tree
@ anonymous = pure_elem, element
@ anonymous = pure_elem
#: top-level
......
......@@ -118,8 +118,8 @@ class NewEBNFGrammar(Grammar):
"""
element = Forward()
expression = Forward()
source_hash__ = "d69cbf8e455c989628fc2dd75aa97bb6"
anonymous__ = re.compile('pure_elem$|element$')
source_hash__ = "abd9bf9a97d8534bd6d7eaf27e1e4b8b"
anonymous__ = re.compile('pure_elem$')
static_analysis_pending__ = [True]
parser_initialization__ = ["upon instantiation"]
COMMENT__ = r'#.*(?:\n|$)'
......
......@@ -68,8 +68,8 @@ block_environment = known_environment | generic_block
known_environment = itemize | enumerate | figure | tabular | quotation
| verbatim
generic_block = begin_generic_block sequence §end_generic_block
begin_generic_block = -&_LB begin_environment LFF
end_generic_block = -&_LB end_environment LFF
begin_generic_block = <-&_LB begin_environment LFF
end_generic_block = <-&_LB end_environment LFF
itemize = "\begin{itemize}" [_WSPC] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [_WSPC] {item } §"\end{enumerate}"
......@@ -100,9 +100,9 @@ line_element = text | block |inline_environment |command
inline_environment = known_inline_env | generic_inline_env
known_inline_env = inline_math
generic_inline_env = begin_inline_env ~ paragraph §end_inline_env
begin_inline_env = (-!_LB begin_environment) | (begin_environment !LFF)
begin_inline_env = (<-!_LB begin_environment) | (begin_environment !LFF)
end_inline_env = end_environment
## (-!_LB end_environment) | (end_environment !LFF) # ambiguity with generic_block when EOF
## (<-!_LB end_environment) | (end_environment !LFF) # ambiguity with generic_block when EOF
begin_environment = /\\begin{/ §NAME /}/
end_environment = /\\end{/ §::NAME /}/
......
......@@ -61,7 +61,7 @@ class LaTeXGrammar(Grammar):
paragraph = Forward()
tabular_config = Forward()
text_element = Forward()
source_hash__ = "678cf594491b6ade65a4dd1f20897e61"
source_hash__ = "eec38ff3979d07f4dc4d4699dee8b080"
anonymous__ = re.compile('_WSPC$|_GAP$|_LB$|_PARSEP$|block_environment$|known_environment$|text_element$|line_element$|inline_environment$|known_inline_env$|begin_inline_env$|end_inline_env$|command$|known_command$')
static_analysis_pending__ = [True]
parser_initialization__ = ["upon instantiation"]
......
......@@ -104,12 +104,12 @@ def get_grammar() -> Lyrik_explicit_whitespaceGrammar:
"""Returns a thread/process-exclusive Lyrik_explicit_whitespaceGrammar-singleton."""
THREAD_LOCALS = access_thread_locals()
try:
grammar = THREAD_LOCALS.Lyrik_explicit_whitespace_00000002_grammar_singleton
grammar = THREAD_LOCALS.Lyrik_explicit_whitespace_00000001_grammar_singleton
except AttributeError:
THREAD_LOCALS.Lyrik_explicit_whitespace_00000002_grammar_singleton = Lyrik_explicit_whitespaceGrammar()
THREAD_LOCALS.Lyrik_explicit_whitespace_00000001_grammar_singleton = Lyrik_explicit_whitespaceGrammar()
if hasattr(get_grammar, 'python_src__'):
THREAD_LOCALS.Lyrik_explicit_whitespace_00000002_grammar_singleton.python_src__ = get_grammar.python_src__
grammar = THREAD_LOCALS.Lyrik_explicit_whitespace_00000002_grammar_singleton
THREAD_LOCALS.Lyrik_explicit_whitespace_00000001_grammar_singleton.python_src__ = get_grammar.python_src__
grammar = THREAD_LOCALS.Lyrik_explicit_whitespace_00000001_grammar_singleton
if get_config_value('resume_notices'):
resume_notices_on(grammar)
elif get_config_value('history_tracking'):
......
......@@ -36,7 +36,7 @@ from DHParser import start_logging, suspend_logging, resume_logging, is_filename
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator, \
add_error, error_on, recompile_grammar, left_associative, lean_left, set_config_value, \
chain, get_config_value, XML_SERIALIZATION, SXPRESSION_SERIALIZATION, \
get_config_value, XML_SERIALIZATION, SXPRESSION_SERIALIZATION, \
COMPACT_SERIALIZATION, JSON_SERIALIZATION, access_thread_locals, access_presets, \
finalize_presets, ErrorCode, RX_NEVER_MATCH, set_tracer, resume_notices_on, \
trace_history, has_descendant, neg, has_parent
......
......@@ -36,7 +36,7 @@ from DHParser import start_logging, suspend_logging, resume_logging, is_filename
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator, \
add_error, error_on, recompile_grammar, left_associative, lean_left, set_config_value, \
chain, get_config_value, XML_SERIALIZATION, SXPRESSION_SERIALIZATION, \
get_config_value, XML_SERIALIZATION, SXPRESSION_SERIALIZATION, \
COMPACT_SERIALIZATION, JSON_SERIALIZATION, access_thread_locals, access_presets, \
finalize_presets, ErrorCode, RX_NEVER_MATCH, set_tracer, resume_notices_on, \
trace_history, has_descendant, neg, has_parent
......
......@@ -483,16 +483,16 @@ class TestWhitespace:
assert not cst.error_flag
class TestAllSome:
class TestInterleave:
def test_all(self):
ebnf = 'prefix = <"A" "B">'
ebnf = 'prefix = "A" ° "B"'
grammar = grammar_provider(ebnf)()
assert len(grammar.prefix.parsers) > 1
assert grammar('B A').content == 'B A'
assert grammar('A B').content == 'A B'
def test_some(self):
ebnf = 'prefix = <"A" | "B">'
ebnf = 'prefix = "A"? ° "B"?'
grammar = grammar_provider(ebnf)()
assert len(grammar.prefix.parsers) > 1
assert grammar('B A').content == 'B A'
......@@ -734,13 +734,13 @@ class TestInSeriesResume:
assert len(errors) >= 1 # cannot really recover from permutation errors
class TestAllOfResume:
class TestInterleaveResume:
def setup(self):
lang = """
document = allof
@ allof_error = '{} erwartet, {} gefunden :-('
@ allof_skip = "D", "E", "F", "G"
allof = < "A" "B" § "C" "D" "E" "F" "G" >
allof = "A" ° "B" ° §"C" ° "D" ° "E" ° "F" ° "G"
"""
self.gr = grammar_provider(lang)()
......@@ -758,7 +758,7 @@ class TestAllOfResume:
@ flow_resume = "."
flow = allof | series
@ allof_error = '{} erwartet, {} gefunden :-('
allof = < "A" "B" § "C" "D" "E" "F" "G" >
allof = "A" ° "B" ° §"C" ° "D" ° "E" ° "F" ° "G"
series = "E" "X" "Y" "Z"
"""
gr = grammar_provider(lang)()
......@@ -781,7 +781,7 @@ class TestAllOfResume:
flow = allof | series
@ allof_error = '{} erwartet, {} gefunden :-('
@ allof_resume = "E", "A"
allof = < "A" "B" § "C" "D" "E" "F" "G" >
allof = "A" ° "B" ° §"C" °"D" ° "E" ° "F" ° "G"
@ series_resume = "E", "A"
series = "E" "X" §"Y" "Z"
"""
......
......@@ -572,11 +572,11 @@ class TestErrorRecovery:
assert 'Skipping' in str(st.errors_sorted[1])
def test_AllOf_skip(self):
def test_Interleave_skip(self):
lang = """
document = allof | /.*/
@allof_skip = /[A-Z]/
allof = < "A" §"B" "C" "D" >
allof = "A" ° §"B" ° "C" ° "D"
"""
parser = grammar_provider(lang)()
st = parser('CADB')
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment