Commit 625ad39d authored by di68kap's avatar di68kap
Browse files

- parser.py: bug: pos values not initialized in nodes in history record, in...

- parser.py: bug: pos values not initialized in nodes in history record, in case root parser did not match
parent 90515440
...@@ -76,11 +76,11 @@ from DHParser.parsers import Grammar, Compiler, nil_scanner, \\ ...@@ -76,11 +76,11 @@ from DHParser.parsers import Grammar, Compiler, nil_scanner, \\
Lookbehind, Lookahead, Alternative, Pop, Required, Token, \\ Lookbehind, Lookahead, Alternative, Pop, Required, Token, \\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \\ Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\ ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\
nop_filter, counterpart_filter, accumulating_filter nop_filter, counterpart_filter, accumulating_filter, ScannerFunc
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \\ from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \\
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \\ remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \\
no_operation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \\ no_operation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \\
collapse, map_content, WHITESPACE_PTYPE, TOKEN_PTYPE collapse, map_content, WHITESPACE_PTYPE, TOKEN_PTYPE, TransformerFunc
''' '''
......
...@@ -252,13 +252,13 @@ CompilerFactoryFunc = Callable[[], Compiler] ...@@ -252,13 +252,13 @@ CompilerFactoryFunc = Callable[[], Compiler]
SCANNER_FACTORY = ''' SCANNER_FACTORY = '''
def get_scanner(): def get_scanner() -> ScannerFunc:
return {NAME}Scanner return {NAME}Scanner
''' '''
GRAMMAR_FACTORY = ''' GRAMMAR_FACTORY = '''
def get_grammar(): def get_grammar() -> {NAME}Grammar:
global thread_local_{NAME}_grammar_singleton global thread_local_{NAME}_grammar_singleton
try: try:
grammar = thread_local_{NAME}_grammar_singleton grammar = thread_local_{NAME}_grammar_singleton
...@@ -270,13 +270,13 @@ def get_grammar(): ...@@ -270,13 +270,13 @@ def get_grammar():
TRANSFORMER_FACTORY = ''' TRANSFORMER_FACTORY = '''
def get_transformer(): def get_transformer() -> TransformerFunc:
return {NAME}Transform return {NAME}Transform
''' '''
COMPILER_FACTORY = ''' COMPILER_FACTORY = '''
def get_compiler(grammar_name="{NAME}", grammar_source=""): def get_compiler(grammar_name="{NAME}", grammar_source="") -> {NAME}Compiler:
global thread_local_{NAME}_compiler_singleton global thread_local_{NAME}_compiler_singleton
try: try:
compiler = thread_local_{NAME}_compiler_singleton compiler = thread_local_{NAME}_compiler_singleton
......
...@@ -412,6 +412,13 @@ class Grammar: ...@@ -412,6 +412,13 @@ class Grammar:
stitches.append(Node(None, skip)) stitches.append(Node(None, skip))
stitches[-1].add_error(error_msg) stitches[-1].add_error(error_msg)
if self.history_tracking: if self.history_tracking:
# some parsers may have matched and left history records with nodes != None.
# Because these are not connected to the stiched root node, their pos
# properties will not be initialized by setting the root node's pos property
# to zero. Therefore, their pos properties need to be initialized here
for record in self.history:
if record.node and record.node._pos < 0:
record.node.pos = 0
record = HistoryRecord(self.call_stack.copy(), stitches[-1], len(rest)) record = HistoryRecord(self.call_stack.copy(), stitches[-1], len(rest))
self.history.append(record) self.history.append(record)
self.history_tracking = False self.history_tracking = False
......
...@@ -204,7 +204,7 @@ def load_if_file(text_or_file) -> str: ...@@ -204,7 +204,7 @@ def load_if_file(text_or_file) -> str:
content = f.read() content = f.read()
return content return content
except FileNotFoundError as error: except FileNotFoundError as error:
if re.fullmatch(r'[\w/:\\]+', text_or_file): if re.fullmatch(r'[\w/:. \\]+', text_or_file):
raise FileNotFoundError('Not a valid file: ' + text_or_file + '\nAdd "\\n" ' raise FileNotFoundError('Not a valid file: ' + text_or_file + '\nAdd "\\n" '
'to distinguish source data from a file name!') 'to distinguish source data from a file name!')
else: else:
......
...@@ -19,11 +19,11 @@ from DHParser.parsers import Grammar, Compiler, nil_scanner, \ ...@@ -19,11 +19,11 @@ from DHParser.parsers import Grammar, Compiler, nil_scanner, \
Lookbehind, Lookahead, Alternative, Pop, Required, Token, \ Lookbehind, Lookahead, Alternative, Pop, Required, Token, \
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \ Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \ ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
nop_filter, counterpart_filter, accumulating_filter nop_filter, counterpart_filter, accumulating_filter, ScannerFunc
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \ from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \ remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \
no_operation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \ no_operation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \
collapse, map_content, WHITESPACE_PTYPE, TOKEN_PTYPE collapse, map_content, WHITESPACE_PTYPE, TOKEN_PTYPE, TransformerFunc
####################################################################### #######################################################################
...@@ -35,7 +35,7 @@ from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \ ...@@ -35,7 +35,7 @@ from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \
def LyrikScanner(text): def LyrikScanner(text):
return text return text
def get_scanner(): def get_scanner() -> ScannerFunc:
return LyrikScanner return LyrikScanner
...@@ -111,7 +111,7 @@ class LyrikGrammar(Grammar): ...@@ -111,7 +111,7 @@ class LyrikGrammar(Grammar):
gedicht = Sequence(bibliographisches, OneOrMore(LEERZEILE), Optional(serie), Required(titel), Required(text), RE('\\s*', wR=''), Required(ENDE)) gedicht = Sequence(bibliographisches, OneOrMore(LEERZEILE), Optional(serie), Required(titel), Required(text), RE('\\s*', wR=''), Required(ENDE))
root__ = gedicht root__ = gedicht
def get_grammar(): def get_grammar() -> LyrikGrammar:
global thread_local_Lyrik_grammar_singleton global thread_local_Lyrik_grammar_singleton
try: try:
grammar = thread_local_Lyrik_grammar_singleton grammar = thread_local_Lyrik_grammar_singleton
...@@ -159,7 +159,7 @@ Lyrik_AST_transformation_table = { ...@@ -159,7 +159,7 @@ Lyrik_AST_transformation_table = {
LyrikTransform = partial(traverse, processing_table=Lyrik_AST_transformation_table) LyrikTransform = partial(traverse, processing_table=Lyrik_AST_transformation_table)
def get_transformer(): def get_transformer() -> TransformerFunc:
return LyrikTransform return LyrikTransform
...@@ -250,7 +250,7 @@ class LyrikCompiler(Compiler): ...@@ -250,7 +250,7 @@ class LyrikCompiler(Compiler):
pass pass
def get_compiler(grammar_name="Lyrik", grammar_source=""): def get_compiler(grammar_name="Lyrik", grammar_source="") -> LyrikCompiler:
global thread_local_Lyrik_compiler_singleton global thread_local_Lyrik_compiler_singleton
try: try:
compiler = thread_local_Lyrik_compiler_singleton compiler = thread_local_Lyrik_compiler_singleton
...@@ -285,7 +285,7 @@ def compile_src(source): ...@@ -285,7 +285,7 @@ def compile_src(source):
if __name__ == "__main__": if __name__ == "__main__":
if len(sys.argv) == 1: if len(sys.argv) == 1:
sys.argv.append('Lyrisches_Intermezzo_iV.txt') sys.argv.append("Lyrisches_Intermezzo_IV.txt")
if len(sys.argv) > 1: if len(sys.argv) > 1:
result, errors, ast = compile_src(sys.argv[1]) result, errors, ast = compile_src(sys.argv[1])
if errors: if errors:
......
...@@ -27,6 +27,7 @@ sys.path.extend(['../', './']) ...@@ -27,6 +27,7 @@ sys.path.extend(['../', './'])
from DHParser.parsers import Grammar, Compiler from DHParser.parsers import Grammar, Compiler
from DHParser.dsl import compile_on_disk, run_compiler, compileEBNF, parser_factory, \ from DHParser.dsl import compile_on_disk, run_compiler, compileEBNF, parser_factory, \
load_compiler_suite load_compiler_suite
from DHParser.toolkit import is_filename
ARITHMETIC_EBNF = """ ARITHMETIC_EBNF = """
...@@ -57,12 +58,12 @@ class TestCompileFunctions: ...@@ -57,12 +58,12 @@ class TestCompileFunctions:
class TestCompilerGeneration: class TestCompilerGeneration:
trivial_lang = """ trivial_lang = """
text = { word | WSPC } "." text = { word | WSPC } "." [/\s/]
word = /\w+/ word = /\w+/
WSPC = /\s+/ WSPC = /\s+/
""" """
tmp = 'tmp/' if os.path.isdir('tmp') else ('test/tmp/') tmp = 'tmp/' if os.path.isdir('tmp') else ('test/tmp/')
trivial_text = u"""Es war ein Koenig in Thule.""" trivial_text = u"""Es war ein Koenig in Thule.\n"""
grammar_name = tmp + "TestCompilerGeneration.ebnf" grammar_name = tmp + "TestCompilerGeneration.ebnf"
compiler_name = tmp + "TestCompilerGenerationCompiler.py" compiler_name = tmp + "TestCompilerGenerationCompiler.py"
text_name = tmp + "TestCompilerGeneration_text.txt" text_name = tmp + "TestCompilerGeneration_text.txt"
...@@ -105,6 +106,7 @@ class TestCompilerGeneration: ...@@ -105,6 +106,7 @@ class TestCompilerGeneration:
assert compiler_suite == compiler_suite_2nd_run assert compiler_suite == compiler_suite_2nd_run
# test compiling with a generated compiler suite # test compiling with a generated compiler suite
# assert is_filename(self.text_name)
errors = compile_on_disk(self.text_name, self.compiler_name) errors = compile_on_disk(self.text_name, self.compiler_name)
assert not errors, str(errors) assert not errors, str(errors)
assert os.path.exists(self.result_name) assert os.path.exists(self.result_name)
......
...@@ -25,7 +25,7 @@ import sys ...@@ -25,7 +25,7 @@ import sys
sys.path.extend(['../', './']) sys.path.extend(['../', './'])
from DHParser import parsers from DHParser import parsers
from DHParser.toolkit import is_logging, compile_python_object from DHParser.toolkit import is_logging, logging, compile_python_object
from DHParser.syntaxtree import no_operation, traverse, remove_expendables, \ from DHParser.syntaxtree import no_operation, traverse, remove_expendables, \
replace_by_single_child, reduce_single_child, flatten, TOKEN_PTYPE replace_by_single_child, reduce_single_child, flatten, TOKEN_PTYPE
from DHParser.parsers import compile_source from DHParser.parsers import compile_source
...@@ -120,6 +120,27 @@ class TestRegex: ...@@ -120,6 +120,27 @@ class TestRegex:
assert not result.error_flag assert not result.error_flag
class TestGrammar:
def test_pos_values_initialized(self):
# checks whether pos values in the parsing result and in the
# history record have been initialized
grammar = r"""@whitespace = horizontal
haupt = textzeile LEERZEILE
textzeile = { WORT }+
WORT = /[^ \t]+/~
LEERZEILE = /\n[ \t]*(?=\n)/~
"""
result, messages, syntax_tree = compile_source(grammar, None, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler("PosTest"))
assert result
assert not messages
with logging("LOGS"):
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
result = parser("no_file_name*")
for record in parser.history:
assert not record.node or record.node.pos >= 0
if __name__ == "__main__": if __name__ == "__main__":
from DHParser.testing import runner from DHParser.testing import runner
runner("", globals()) runner("", globals())
...@@ -56,7 +56,7 @@ class TestToolkit: ...@@ -56,7 +56,7 @@ class TestToolkit:
assert load_if_file('this_is_code_and_not_a_file\n') assert load_if_file('this_is_code_and_not_a_file\n')
# neither will text that does not look like a file name # neither will text that does not look like a file name
s = "this is code and not a file" s = "this is code * and not a file"
assert s == load_if_file(s) assert s == load_if_file(s)
# not a file and not mistaken for a file # not a file and not mistaken for a file
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment