Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
D
DHParser
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Iterations
Merge Requests
0
Merge Requests
0
Requirements
Requirements
List
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Operations
Operations
Incidents
Analytics
Analytics
Code Review
Insights
Issue
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
badw-it
DHParser
Commits
6e5b22ea
Commit
6e5b22ea
authored
Feb 17, 2019
by
eckhart
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
- Early static analysis... work in progress!!!
parent
61e8e4f8
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
241 additions
and
91 deletions
+241
-91
DHParser/dsl.py
DHParser/dsl.py
+21
-50
DHParser/ebnf.py
DHParser/ebnf.py
+57
-3
DHParser/parse.py
DHParser/parse.py
+89
-24
examples/BibTeX/BibTeX.ebnf
examples/BibTeX/BibTeX.ebnf
+2
-2
examples/BibTeX/BibTeXCompiler.py
examples/BibTeX/BibTeXCompiler.py
+3
-3
examples/BibTeX/grammar_tests/REPORT/01_test_entry.md
examples/BibTeX/grammar_tests/REPORT/01_test_entry.md
+1
-1
test/test_ebnf.py
test/test_ebnf.py
+5
-2
test/test_parse.py
test/test_parse.py
+53
-4
test/test_testing.py
test/test_testing.py
+10
-2
No files found.
DHParser/dsl.py
View file @
6e5b22ea
...
...
@@ -26,8 +26,9 @@ import os
import
platform
import
stat
import
DHParser.ebnf
from
DHParser.compile
import
Compiler
,
compile_source
from
DHParser.ebnf
import
EBNFCompiler
,
grammar_changed
,
\
from
DHParser.ebnf
import
EBNFCompiler
,
grammar_changed
,
DHPARSER_IMPORTS
,
\
get_ebnf_preprocessor
,
get_ebnf_grammar
,
get_ebnf_transformer
,
get_ebnf_compiler
,
\
PreprocessorFactoryFunc
,
ParserFactoryFunc
,
TransformerFactoryFunc
,
CompilerFactoryFunc
from
DHParser.error
import
Error
,
is_error
,
has_errors
,
only_errors
...
...
@@ -37,12 +38,12 @@ from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from
DHParser.syntaxtree
import
Node
from
DHParser.transform
import
TransformationFunc
from
DHParser.toolkit
import
load_if_file
,
is_python_code
,
compile_python_object
,
\
re
,
typing
from
typing
import
Any
,
cast
,
List
,
Tuple
,
Union
,
Iterator
,
Iterable
,
Optional
,
Callable
re
from
typing
import
Any
,
cast
,
List
,
Tuple
,
Union
,
Iterator
,
Iterable
,
Optional
,
\
Callable
,
Generator
__all__
=
(
'DHPARSER_IMPORTS'
,
'GrammarError'
,
__all__
=
(
'DefinitionError'
,
'CompilationError'
,
'load_compiler_suite'
,
'compileDSL'
,
...
...
@@ -71,41 +72,6 @@ AST_SECTION = "AST SECTION - Can be edited. Changes will be preserved."
COMPILER_SECTION
=
"COMPILER SECTION - Can be edited. Changes will be preserved."
END_SECTIONS_MARKER
=
"END OF DHPARSER-SECTIONS"
dhparserdir
=
os
.
path
.
dirname
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)))
DHPARSER_IMPORTS
=
'''
import collections
from functools import partial
import os
import sys
sys.path.append(r'{dhparserdir}')
try:
import regex as re
except ImportError:
import re
from DHParser import logging, is_filename, load_if_file,
\\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, DropWhitespace,
\\
Lookbehind, Lookahead, Alternative, Pop, Token, DropToken, Synonym, AllOf, SomeOf,
\\
Unordered, Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture,
\\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source,
\\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, is_empty,
\\
Node, TransformationFunc, TransformationDict, transformation_factory, traverse,
\\
remove_children_if, move_adjacent, normalize_whitespace, is_anonymous, matches_re,
\\
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace,
\\
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace,
\\
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE,
\\
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens,
\\
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last,
\\
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip,
\\
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator,
\\
flatten_anonymous_nodes, error_on, recompile_grammar, GLOBALS
'''
.
format
(
dhparserdir
=
dhparserdir
)
DHPARSER_MAIN
=
'''
def compile_src(source, log_dir=''):
"""Compiles ``source`` and returns (result, errors, ast).
...
...
@@ -156,19 +122,23 @@ class DSLException(Exception):
"""
Base class for DSL-exceptions.
"""
def
__init__
(
self
,
errors
):
def
__init__
(
self
,
errors
:
Union
[
List
[
Error
],
Generator
[
Error
,
None
,
None
]]
):
assert
isinstance
(
errors
,
Iterator
)
or
isinstance
(
errors
,
list
)
\
or
isinstance
(
errors
,
tuple
)
self
.
errors
=
errors
self
.
errors
=
list
(
errors
)
def
__str__
(
self
):
return
'
\n
'
.
join
(
str
(
err
)
for
err
in
self
.
errors
)
if
len
(
self
.
errors
)
==
1
:
return
str
(
self
.
errors
[
0
])
return
'
\n
'
+
'
\n
'
.
join
((
"%i. "
%
(
i
+
1
)
+
str
(
err
))
for
i
,
err
in
enumerate
(
self
.
errors
))
# return '\n'.join(str(err) for err in self.errors)
class
Grammar
Error
(
DSLException
):
class
Definition
Error
(
DSLException
):
"""
Raised when (already) the grammar of a domain specific language (DSL)
contains errors.
contains errors.
Usually, these are repackaged parse.GrammarError(s).
"""
def
__init__
(
self
,
errors
,
grammar_src
):
super
().
__init__
(
errors
)
...
...
@@ -178,7 +148,8 @@ class GrammarError(DSLException):
class
CompilationError
(
DSLException
):
"""
Raised when a string or file in a domain specific language (DSL)
contains errors.
contains errors. These can also contain definition errors that
have been caught early.
"""
def
__init__
(
self
,
errors
,
dsl_text
,
dsl_grammar
,
AST
,
result
):
super
().
__init__
(
errors
)
...
...
@@ -215,7 +186,7 @@ def grammar_instance(grammar_representation) -> Tuple[Grammar, str]:
get_ebnf_grammar
(),
get_ebnf_transformer
(),
get_ebnf_compiler
())
parser_py
=
cast
(
str
,
result
)
if
has_errors
(
messages
):
raise
Grammar
Error
(
only_errors
(
messages
),
grammar_src
)
raise
Definition
Error
(
only_errors
(
messages
),
grammar_src
)
parser_root
=
compile_python_object
(
DHPARSER_IMPORTS
+
parser_py
,
r
'\w+Grammar$'
)()
else
:
# assume that dsl_grammar is a ParserHQ-object or Grammar class
...
...
@@ -360,7 +331,7 @@ def load_compiler_suite(compiler_suite: str) -> \
get_ebnf_transformer
(),
get_ebnf_compiler
(
compiler_suite
,
source
))
if
has_errors
(
messages
):
raise
Grammar
Error
(
only_errors
(
messages
),
source
)
raise
Definition
Error
(
only_errors
(
messages
),
source
)
preprocessor
=
get_ebnf_preprocessor
parser
=
get_ebnf_grammar
ast
=
get_ebnf_transformer
...
...
@@ -476,7 +447,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
ebnf_compiler
=
cast
(
EBNFCompiler
,
compiler1
)
global
SECTION_MARKER
,
RX_SECTION_MARKER
,
PREPROCESSOR_SECTION
,
PARSER_SECTION
,
\
AST_SECTION
,
COMPILER_SECTION
,
END_SECTIONS_MARKER
,
RX_WHITESPACE
,
\
DHPARSER_MAIN
,
DHPARSER_IMPORTS
DHPARSER_MAIN
f
=
None
try
:
f
=
open
(
rootname
+
'Compiler.py'
,
'r'
,
encoding
=
"utf-8"
)
...
...
@@ -503,7 +474,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
if
RX_WHITESPACE
.
fullmatch
(
outro
):
outro
=
DHPARSER_MAIN
.
format
(
NAME
=
compiler_name
)
if
RX_WHITESPACE
.
fullmatch
(
imports
):
imports
=
DHPARSER_IMPORTS
imports
=
DHP
arser
.
ebnf
.
DHP
ARSER_IMPORTS
if
RX_WHITESPACE
.
fullmatch
(
preprocessor
):
preprocessor
=
ebnf_compiler
.
gen_preprocessor_skeleton
()
if
RX_WHITESPACE
.
fullmatch
(
ast
):
...
...
DHParser/ebnf.py
View file @
6e5b22ea
...
...
@@ -32,11 +32,12 @@ import os
from
DHParser.compile
import
CompilerError
,
Compiler
,
compile_source
,
visitor_name
from
DHParser.error
import
Error
from
DHParser.parse
import
Grammar
,
mixin_comment
,
Forward
,
RegExp
,
Whitespace
,
\
NegativeLookahead
,
Alternative
,
Series
,
Option
,
OneOrMore
,
ZeroOrMore
,
Token
NegativeLookahead
,
Alternative
,
Series
,
Option
,
OneOrMore
,
ZeroOrMore
,
Token
,
\
GrammarError
from
DHParser.preprocess
import
nil_preprocessor
,
PreprocessorFunc
from
DHParser.syntaxtree
import
Node
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
from
DHParser.toolkit
import
load_if_file
,
escape_re
,
md5
,
sane_parser_name
,
re
,
expand_table
,
\
GLOBALS
,
CONFIG_PRESET
,
get_config_value
,
unrepr
,
typing
GLOBALS
,
CONFIG_PRESET
,
get_config_value
,
unrepr
,
compile_python_object
,
typing
from
DHParser.transform
import
TransformationFunc
,
traverse
,
remove_brackets
,
\
reduce_single_child
,
replace_by_single_child
,
remove_expendables
,
\
remove_tokens
,
flatten
,
forbid
,
assert_content
...
...
@@ -67,6 +68,48 @@ __all__ = ('get_ebnf_preprocessor',
########################################################################
CONFIG_PRESET
[
'add_grammar_source_to_parser_docstring'
]
=
False
CONFIG_PRESET
[
'early_static_analysis'
]
=
True
# do a static analysis right after ebnf compilation
########################################################################
#
# source code support
#
########################################################################
dhparserdir
=
os
.
path
.
dirname
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)))
DHPARSER_IMPORTS
=
'''
import collections
from functools import partial
import os
import sys
sys.path.append(r'{dhparserdir}')
try:
import regex as re
except ImportError:
import re
from DHParser import logging, is_filename, load_if_file,
\\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, DropWhitespace,
\\
Lookbehind, Lookahead, Alternative, Pop, Token, DropToken, Synonym, AllOf, SomeOf,
\\
Unordered, Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture,
\\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source,
\\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, is_empty,
\\
Node, TransformationFunc, TransformationDict, transformation_factory, traverse,
\\
remove_children_if, move_adjacent, normalize_whitespace, is_anonymous, matches_re,
\\
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace,
\\
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace,
\\
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE,
\\
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens,
\\
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last,
\\
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip,
\\
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator,
\\
flatten_anonymous_nodes, error_on, recompile_grammar, GLOBALS
'''
.
format
(
dhparserdir
=
dhparserdir
)
########################################################################
...
...
@@ -799,6 +842,7 @@ class EBNFCompiler(Compiler):
+
' source file'
+
(
'. Grammar:'
if
self
.
grammar_source
and
show_source
else
'.'
)]
definitions
.
append
((
'parser_initialization__'
,
'["upon instantiation"]'
))
definitions
.
append
((
'static_analysis_pending__'
,
'True'
))
if
self
.
grammar_source
:
definitions
.
append
((
'source_hash__'
,
'"%s"'
%
md5
(
self
.
grammar_source
,
__version__
)))
...
...
@@ -877,7 +921,17 @@ class EBNFCompiler(Compiler):
# node.error_flag = max(node.error_flag, nd.error_flag)
self
.
definitions
.
update
(
definitions
)
return
self
.
assemble_parser
(
definitions
,
node
)
grammar_python_src
=
self
.
assemble_parser
(
definitions
,
node
)
if
get_config_value
(
'early_static_analysis'
):
grammar_class
=
compile_python_object
(
DHPARSER_IMPORTS
+
grammar_python_src
,
self
.
grammar_name
)
try
:
_
=
grammar_class
()
except
GrammarError
as
error
:
for
sym
,
prs
,
err
in
error
.
errors
:
symdef_node
=
self
.
rules
[
sym
][
0
]
err
.
pos
=
self
.
rules
[
sym
][
0
].
pos
self
.
tree
.
add_error
(
symdef_node
,
err
)
return
grammar_python_src
def
on_definition
(
self
,
node
:
Node
)
->
Tuple
[
str
,
str
]:
...
...
DHParser/parse.py
View file @
6e5b22ea
...
...
@@ -46,6 +46,8 @@ from typing import Callable, cast, List, Tuple, Set, Dict, DefaultDict, Union, O
__all__
=
(
'Parser'
,
'UnknownParserError'
,
'GrammarErrorType'
,
'GrammarError'
,
'Grammar'
,
'EMPTY_NODE'
,
'PreprocessorToken'
,
...
...
@@ -369,13 +371,13 @@ class Parser:
# don't track returning parsers except in case an error has occurred
# remaining = len(rest)
if
grammar
.
moving_forward__
:
record
=
HistoryRecord
(
grammar
.
call_stack__
,
node
or
EMPTY_NODE
,
text
,
record
=
HistoryRecord
(
grammar
.
call_stack__
,
node
,
text
,
grammar
.
line_col__
(
text
))
grammar
.
history__
.
append
(
record
)
elif
node
:
nid
=
id
(
node
)
# type: int
if
nid
in
grammar
.
tree__
.
error_nodes
:
record
=
HistoryRecord
(
grammar
.
call_stack__
,
node
or
EMPTY_NODE
,
text
,
record
=
HistoryRecord
(
grammar
.
call_stack__
,
node
,
text
,
grammar
.
line_col__
(
text
),
grammar
.
tree__
.
error_nodes
[
nid
])
grammar
.
history__
.
append
(
record
)
...
...
@@ -514,6 +516,24 @@ class UnknownParserError(KeyError):
is referred to that does not exist."""
GrammarErrorType
=
List
[
Tuple
[
str
,
Parser
,
Error
]]
# TODO: replace with a named tuple?
class
GrammarError
(
Exception
):
"""GrammarError will be raised if static analysis reveals errors
in the grammar.
"""
def
__init__
(
self
,
static_analysis_result
:
List
[
GrammarErrorType
]):
assert
static_analysis_result
# must not be empty
self
.
errors
=
static_analysis_result
def
__str__
(
self
):
if
len
(
self
.
errors
)
==
1
:
return
str
(
self
.
errors
[
0
][
2
])
return
'
\n
'
+
'
\n
'
.
join
((
"%i. "
%
(
i
+
1
)
+
str
(
err_tuple
[
2
]))
for
i
,
err_tuple
in
enumerate
(
self
.
errors
))
class
Grammar
:
r
"""
Class Grammar directs the parsing process and stores global state
...
...
@@ -609,6 +629,14 @@ class Grammar:
field contains a value other than "done". A value of "done" indicates
that the class has already been initialized.
static_analysis_pending__: True as long as no static analysis (see the method
with the same name for more information) has been done to check
parser tree for correctness (e.g. no infinite loops). Static analysis
is done at instiantiation and the flag is then set to false, but it
can also be carried out once the class has been generated
(by DHParser.ebnf.EBNFCompiler) and then be set to false in the
definition of the grammar clase already.
python__src__: For the purpose of debugging and inspection, this field can
take the python src of the concrete grammar class
(see `dsl.grammar_provider`).
...
...
@@ -710,7 +738,7 @@ class Grammar:
# some default values
# COMMENT__ = r'' # type: str # r'#.*(?:\n|$)'
# WSP_RE__ = mixin_comment(whitespace=r'[\t ]*', comment=COMMENT__) # type: str
static_analysis_
done__
=
False
static_analysis_
pending__
=
True
# type: bool
@
classmethod
...
...
@@ -771,12 +799,12 @@ class Grammar:
assert
'root_parser__'
in
self
.
__dict__
assert
self
.
root_parser__
==
self
.
__dict__
[
'root_parser__'
]
if
not
self
.
__class__
.
static_analysis_done
__
:
if
self
.
__class__
.
static_analysis_pending
__
:
try
:
result
=
self
.
static_analysis
()
if
result
:
raise
AssertionError
(
str
(
result
)
)
self
.
__class__
.
static_analysis_
done__
=
Tru
e
raise
GrammarError
(
result
)
self
.
__class__
.
static_analysis_
pending__
=
Fals
e
except
(
NameError
,
AttributeError
):
pass
# don't fail the initialization of PLACEHOLDER
...
...
@@ -875,6 +903,40 @@ class Grammar:
predecessors to the node."""
return
predecessors
[
-
1
].
pos
+
len
(
predecessors
[
-
1
])
if
predecessors
else
0
def
lookahead_failure_only
(
parser
):
"""EXPERIMENTAL!
Checks if failure to match document was only due to a succeeding
lookahead parser, which is a common design pattern that can break test
cases. (Testing for this case allows to modify the error message, so
that the testing framework can know that the failure is only a
test-case-artifact and no real failure.
(See test/test_testing.TestLookahead !)
"""
last_record
=
self
.
history__
[
-
2
]
if
len
(
self
.
history__
)
>
1
else
None
# type: Optional[HistoryRecord]
# # TODO: Checking match status of history__[-2] is inaccurate if ending
# # lookahead parser is part of an Alternative-parser !!!
# # (Need a test-case!)
# return last_record and parser != self.root_parser__ \
# and last_record.status == HistoryRecord.MATCH \
# and last_record.node.pos \
# + len(last_record.node) >= len(self.document__) \
# and any(tn in self and isinstance(self[tn], Lookahead)
# or tn[0] == ':' and issubclass(eval(tn[1:]), Lookahead)
# for tn in last_record.call_stack)
last_record
=
self
.
history__
[
-
2
]
if
len
(
self
.
history__
)
>
1
else
None
# type: Optional[HistoryRecord]
# TODO: Checking match status of history__[-2] is inaccurate if ending
# lookahead parser is part of an Alternative-parser !!!
# (Need a test-case!)
return
last_record
and
parser
!=
self
.
root_parser__
\
and
any
(
self
.
history__
[
i
].
status
==
HistoryRecord
.
MATCH
\
and
self
.
history__
[
i
].
node
.
pos
\
+
len
(
self
.
history__
[
i
].
node
)
>=
len
(
self
.
document__
)
\
and
any
(
tn
in
self
and
isinstance
(
self
[
tn
],
Lookahead
)
or
tn
[
0
]
==
':'
and
issubclass
(
eval
(
tn
[
1
:]),
Lookahead
)
for
tn
in
self
.
history__
[
i
].
call_stack
)
for
i
in
range
(
-
2
,
-
len
(
self
.
history__
)
-
1
,
-
1
))
# assert isinstance(document, str), type(document)
if
self
.
_dirty_flag__
:
self
.
_reset__
()
...
...
@@ -901,9 +963,16 @@ class Grammar:
result
,
_
=
parser
(
rest
)
if
result
is
None
:
result
=
Node
(
ZOMBIE_TAG
,
''
).
with_pos
(
0
)
self
.
tree__
.
new_error
(
result
,
'Parser "%s" did not match empty document.'
%
str
(
parser
),
Error
.
PARSER_DID_NOT_MATCH
)
if
lookahead_failure_only
(
parser
):
self
.
tree__
.
new_error
(
result
,
'Parser "%s" did not match empty document except for lookahead'
%
str
(
parser
),
Error
.
PARSER_LOOKAHEAD_MATCH_ONLY
)
else
:
self
.
tree__
.
new_error
(
result
,
'Parser "%s" did not match empty document.'
%
str
(
parser
),
Error
.
PARSER_DID_NOT_MATCH
)
while
rest
and
len
(
stitches
)
<
MAX_DROPOUTS
:
result
,
rest
=
parser
(
rest
)
if
rest
:
...
...
@@ -916,15 +985,7 @@ class Grammar:
str
(
HistoryRecord
.
last_match
(
self
.
history__
)))
# Check if a Lookahead-Parser did match. Needed for testing, because
# in a test case this is not necessarily an error.
last_record
=
self
.
history__
[
-
2
]
if
len
(
self
.
history__
)
>
1
else
None
# type: Optional[HistoryRecord]
if
last_record
and
parser
!=
self
.
root_parser__
\
and
last_record
.
status
==
HistoryRecord
.
MATCH
\
and
last_record
.
node
.
pos
\
+
len
(
last_record
.
node
)
>=
len
(
self
.
document__
)
\
and
any
(
tn
in
self
and
isinstance
(
self
[
tn
],
Lookahead
)
or
tn
[
0
]
==
':'
and
issubclass
(
eval
(
tn
[
1
:]),
Lookahead
)
for
tn
in
last_record
.
call_stack
):
if
lookahead_failure_only
(
parser
):
error_msg
=
'Parser did not match except for lookahead! '
+
err_info
error_code
=
Error
.
PARSER_LOOKAHEAD_MATCH_ONLY
else
:
...
...
@@ -1021,8 +1082,10 @@ class Grammar:
return
line_col
(
self
.
document_lbreaks__
,
self
.
document_length__
-
len
(
text
))
def
static_analysis
(
self
)
->
List
[
Tuple
[
str
,
Parser
,
Error
]
]:
def
static_analysis
(
self
)
->
List
[
GrammarErrorType
]:
"""
EXPERIMENTAL (does not catch inifinite loops due to regular expressions...)
Checks the parser tree statically for possible errors. At the moment only
infinite loops will be detected.
:return: a list of error-tuples consisting of the narrowest containing
...
...
@@ -1030,7 +1093,7 @@ class Grammar:
the actual parser that failed and an error object.
"""
containing_named_parser
=
''
# type: str
error_list
=
[]
# type: List[
Tuple[str, Parser, Error]
]
error_list
=
[]
# type: List[
GrammarErrorType
]
def
visit_parser
(
parser
:
Parser
)
->
None
:
nonlocal
containing_named_parser
,
error_list
...
...
@@ -1038,7 +1101,7 @@ class Grammar:
containing_named_parser
=
parser
.
pname
if
isinstance
(
parser
,
ZeroOrMore
)
or
isinstance
(
parser
,
OneOrMore
):
inner_parser
=
cast
(
UnaryParser
,
parser
).
parser
tree
=
self
(
''
,
inner_parser
)
tree
=
self
(
''
,
inner_parser
,
True
)
if
not
tree
.
error_flag
:
if
not
parser
.
pname
:
msg
=
'Parser "%s" in %s can become caught up in an infinite loop!'
\
...
...
@@ -2057,8 +2120,10 @@ class Lookahead(FlowParser):
"""
def
_parse
(
self
,
text
:
StringView
)
->
Tuple
[
Optional
[
Node
],
StringView
]:
node
,
_
=
self
.
parser
(
text
)
if
self
.
sign
(
node
is
not
None
):
return
Node
(
self
.
tag_name
,
''
),
text
if
(
self
.
sign
(
node
is
not
None
)
# static analysis requires lookahead to be disabled at document end
or
(
self
.
grammar
.
static_analysis_pending__
and
not
text
)):
return
Node
(
self
.
tag_name
,
''
)
if
self
.
pname
else
EMPTY_NODE
,
text
else
:
return
None
,
text
...
...
@@ -2220,7 +2285,7 @@ class Retrieve(Parser):
stack
=
self
.
grammar
.
variables__
[
self
.
symbol
.
pname
]
value
=
self
.
filter
(
stack
)
except
(
KeyError
,
IndexError
):
node
=
Node
(
self
.
tag_name
,
''
)
node
=
Node
(
self
.
tag_name
,
''
)
.
with_pos
(
self
.
grammar
.
document_length__
-
len
(
text
))
self
.
grammar
.
tree__
.
new_error
(
node
,
dsl_error_msg
(
self
,
"'%s' undefined or exhausted."
%
self
.
symbol
.
pname
))
return
node
,
text
...
...
examples/BibTeX/BibTeX.ebnf
View file @
6e5b22ea
...
...
@@ -43,7 +43,7 @@ text = { CONTENT_STRING | "{" text "}" }
WORD = /\w+/~
NO_BLANK_STRING = /[^ \t\n,%]+/~
COMMA_TERMINATED_STRING = { /[^,%]+/ |
/(?=%)
/~ }
CONTENT_STRING = { /[^{}%]+/ |
/(?=%)
/~ }+
COMMA_TERMINATED_STRING = { /[^,%]+/ |
&/%
/~ }
CONTENT_STRING = { /[^{}%]+/ |
&/%
/~ }+
EOF = !/./
\ No newline at end of file
examples/BibTeX/BibTeXCompiler.py
View file @
6e5b22ea
...
...
@@ -57,7 +57,7 @@ class BibTeXGrammar(Grammar):
r
"""Parser for a BibTeX source file.
"""
text
=
Forward
()
source_hash__
=
"
d9a1a1b431a3185dab127be165a37719
"
source_hash__
=
"
ece0314c999ac86f22796331c05efd62
"
parser_initialization__
=
[
"upon instantiation"
]
resume_rules__
=
{}
COMMENT__
=
r
'//'
...
...
@@ -65,8 +65,8 @@ class BibTeXGrammar(Grammar):
WSP_RE__
=
mixin_comment
(
whitespace
=
WHITESPACE__
,
comment
=
COMMENT__
)
wsp__
=
Whitespace
(
WSP_RE__
)
EOF
=
NegativeLookahead
(
RegExp
(
'(?i).'
))
CONTENT_STRING
=
OneOrMore
(
Alternative
(
RegExp
(
'(?i)[^{}%]+'
),
Series
(
RegExp
(
'(?i)(?=%)'
),
wsp__
)))
COMMA_TERMINATED_STRING
=
ZeroOrMore
(
Alternative
(
RegExp
(
'(?i)[^,%]+'
),
Series
(
RegExp
(
'(?i)(?=%)'
),
wsp__
)))
CONTENT_STRING
=
OneOrMore
(
Alternative
(
RegExp
(
'(?i)[^{}%]+'
),
Series
(
Lookahead
(
RegExp
(
'(?i)%'
)
),
wsp__
)))
COMMA_TERMINATED_STRING
=
ZeroOrMore
(
Alternative
(
RegExp
(
'(?i)[^,%]+'
),
Series
(
Lookahead
(
RegExp
(
'(?i)%'
)
),
wsp__
)))
NO_BLANK_STRING
=
Series
(
RegExp
(
'(?i)[^
\\
t
\\
n,%]+'
),
wsp__
)
WORD
=
Series
(
RegExp
(
'(?i)
\\
w+'
),
wsp__
)
text
.
set
(
ZeroOrMore
(
Alternative
(
CONTENT_STRING
,
Series
(
Series
(
Token
(
"{"
),
wsp__
),
text
,
Series
(
Token
(
"}"
),
wsp__
)))))
...
...
examples/BibTeX/grammar_tests/REPORT/01_test_entry.md
View file @
6e5b22ea
...
...
@@ -68,7 +68,7 @@ Match test "entry" for parser "entry" failed:
organization = {Wikipedia}
}
6:68: Error (1090): DSL parser specification error: Infinite Loop encountered. Caught by parser "CONTENT_STRING = {/(?i)[^{}%]+/ |
/(?i)(?=%)
/ ~}+".
6:68: Error (1090): DSL parser specification error: Infinite Loop encountered. Caught by parser "CONTENT_STRING = {/(?i)[^{}%]+/ |
&/(?i)%
/ ~}+".
Call stack: entry->:ZeroOrMore->:Series->content->:Series->text->:Alternative->CONTENT_STRING->:Alternative->:Series->:Whitespace
6:68: Error (1010): '}' ~ expected, "%E2\%80\%9" found!
6:69: Error (1040): Parser stopped before end! trying to recover but stopping history recording at this point.
...
...
test/test_ebnf.py
View file @
6e5b22ea
...
...
@@ -31,8 +31,8 @@ from DHParser import compile_source
from
DHParser.error
import
has_errors
,
Error
from
DHParser.syntaxtree
import
WHITESPACE_PTYPE
from
DHParser.ebnf
import
get_ebnf_grammar
,
get_ebnf_transformer
,
EBNFTransform
,
\
get_ebnf_compiler
,
compile_ebnf
from
DHParser.dsl
import
CompilationError
,
compileDSL
,
DHPARSER_IMPORTS
,
grammar_provider
get_ebnf_compiler
,
compile_ebnf
,
DHPARSER_IMPORTS
from
DHParser.dsl
import
CompilationError
,
compileDSL
,
grammar_provider
from
DHParser.testing
import
grammar_unit
...
...
@@ -724,6 +724,9 @@ class TestAllOfResume:
assert
len
(
st
.
errors_sorted
)
==
1
class
TestStaticAnalysis
:
pass
if
__name__
==
"__main__"
:
from
DHParser.testing
import
runner
...
...
test/test_parse.py
View file @
6e5b22ea
...
...
@@ -29,10 +29,10 @@ from DHParser.log import logging, is_logging, log_ST, log_parsing_history
from
DHParser.error
import
Error
,
is_error
from
DHParser.parse
import
Parser
,
Grammar
,
Forward
,
TKN
,
ZeroOrMore
,
RE
,
\
RegExp
,
Lookbehind
,
NegativeLookahead
,
OneOrMore
,
Series
,
Alternative
,
AllOf
,
SomeOf
,
\
UnknownParserError
,
MetaParser
,
EMPTY_NODE
UnknownParserError
,
MetaParser
,
GrammarError
,
EMPTY_NODE
from
DHParser
import
compile_source
from
DHParser.ebnf
import
get_ebnf_grammar
,
get_ebnf_transformer
,
get_ebnf_compiler
from
DHParser.dsl
import
grammar_provider
,
DHPARSER_IMPORTS
from
DHParser.ebnf
import
get_ebnf_grammar
,
get_ebnf_transformer
,
get_ebnf_compiler
,
DHPARSER_IMPORTS
from
DHParser.dsl
import
grammar_provider
,
CompilationError
from
DHParser.syntaxtree
import
Node
...
...
@@ -115,7 +115,16 @@ class TestInfiLoopsAndRecursion:
def
test_infinite_loops
(
self
):
minilang
=
"""forever = { // }
\n
"""
snippet
=
" "
parser
=
grammar_provider
(
minilang
)()
try
:
parser_class
=
grammar_provider
(
minilang
)
except
CompilationError
as
error
:
assert
all
(
e
.
code
==
Error
.
INFINITE_LOOP
for
e
in
error
.
errors
)
print
(
error
)
save
=
get_config_value
(
'early_static_analysis'
)
set_config_value
(
'early_static_analysis'
,
False
)
parser_class
=
grammar_provider
(
minilang
)
parser
=
parser_class
()
set_config_value
(
'early_static_analysis'
,
save
)
syntax_tree
=
parser
(
snippet
)
assert
any
(
e
.
code
==
Error
.
INFINITE_LOOP
for
e
in
syntax_tree
.
errors
)
res
=
parser
.
static_analysis
()
...
...
@@ -837,6 +846,46 @@ class TestMetaParser:
assert
rv
[
-
1
].
tag_name
!=
EMPTY_NODE
.
tag_name
,
rv
[
-
1
].
tag_name
class
TestStaticAnalysis
:
bibtex_grammar
=
"""# bad bibtex-grammar
@ whitespace = /\s*/
@ ignorecase = True
@ comment = //
bibliography = { preamble | comment | entry }
preamble = "@Preamble{" /"/ pre_code /"/~ §"}"
pre_code = { /[^"%]+/ | /%.*
\n
/ }
comment = "@Comment{" text §"}"
entry = /@/ type "{" key { "," field §"=" content } [","] §"}"
type = WORD
key = NO_BLANK_STRING
field = WORD
content = "{" text "}" | plain_content
plain_content = COMMA_TERMINATED_STRING
text = { CONTENT_STRING | "{" text "}" }
WORD = /\w+/~
NO_BLANK_STRING = /[^
\t\n
,%]+/~
COMMA_TERMINATED_STRING = { /[^,%]+/ | &/%/~ } # BOOM !!!
CONTENT_STRING = { /[^{}%]+/ | &/%/~ }+ # BOOM !!!
EOF = !/./
"""
def
test_static_analysis
(
self
):
gr_class
=
grammar_provider
(
self
.
bibtex_grammar
,
'BibTex'
)
try
:
gr_instance
=
gr_class
()
except
GrammarError
as
error
:
affected_parsers
=
{
e
[
0
]
for
e
in
error
.
errors
}
assert
affected_parsers
==
{
'CONTENT_STRING'
,
'COMMA_TERMINATED_STRING'
}
assert
all
(
e
[
2
].
code
==
Error
.
INFINITE_LOOP
for
e
in
error
.
errors
)
if
__name__
==
"__main__"
:
from
DHParser.testing
import
runner
...
...
test/test_testing.py
View file @
6e5b22ea
...
...
@@ -30,6 +30,7 @@ from DHParser.syntaxtree import parse_sxpr, flatten_sxpr, TOKEN_PTYPE
from
DHParser.transform
import
traverse
,
remove_expendables
,
remove_empty
,
\
replace_by_single_child
,
reduce_single_child
,
flatten
from
DHParser.dsl
import
grammar_provider
from
DHParser.error
import
Error
from
DHParser.testing
import
get_report
,
grammar_unit
,
unit_from_file
,
\
reset_unit
from
DHParser.log
import
logging
...
...
@@ -261,8 +262,8 @@ class TestLookahead:
"category"
:
{
"match"
:
{
1
:
"""Mountains: big:
K2"""
,
2
:
"""Rivers:"""
#
allowed because lookahaead failure occurs at end of file and is mandatory!
K2"""
,
# case 1: matches only with lookahead (but should not fail in a test)
2
:
"""Rivers:"""
#
case 2: lookahaead failure occurs at end of file and is mandatory. (should not fail as a test)
},
"fail"
:
{
6
:
"""Mountains: big:"""
...
...
@@ -310,6 +311,13 @@ class TestLookahead:
assert
not
cst
.
error_flag
def
test_unit_lookahead
(
self
):
gr
=
self
.
grammar_fac
()
# Case 1: Lookahead string is part of the test case; parser fails but for the lookahead
result
=
gr
(
self
.
cases
[
'category'
][
'match'
][
1
],
'category'
,
True
)
assert
any
(
e
.
code
==
Error
.
PARSER_LOOKAHEAD_MATCH_ONLY
for
e
in
result
.
errors
)
# Case 2: Lookahead string is not part of the test case; parser matches but for the mandatory continuation
result
=
gr
(
self
.
cases
[
'category'
][
'match'
][
2
],
'category'
,
True
)
assert
any
(
e
.
code
==
Error
.
MANDATORY_CONTINUATION_AT_EOF
for
e
in
result
.
errors
)
errata
=
grammar_unit
(
self
.
cases
,
self
.
grammar_fac
,
self
.
trans_fac
)
assert
not
errata
,
str
(
errata
)