Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
9.2.2023: Due to updates GitLab will be unavailable for some minutes between 9:00 and 11:00.
Open sidebar
badw-it
DHParser
Commits
f9ea4738
Commit
f9ea4738
authored
Feb 10, 2019
by
eckhart
Browse files
- DHParser/parse.py Parser.__call__() and class Grammar: added left recursion warning
parent
a33838c1
Changes
13
Hide whitespace changes
Inline
Side-by-side
DHParser/dsl.py
View file @
f9ea4738
...
...
@@ -102,7 +102,7 @@ from DHParser import logging, is_filename, load_if_file, \\
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last,
\\
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip,
\\
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator,
\\
reduce
_anonymous_nodes, error_on, recompile_grammar, GLOBALS
flatten
_anonymous_nodes, error_on, recompile_grammar, GLOBALS
'''
.
format
(
dhparserdir
=
dhparserdir
)
...
...
DHParser/ebnf.py
View file @
f9ea4738
...
...
@@ -60,6 +60,15 @@ __all__ = ('get_ebnf_preprocessor',
'CompilerFactoryFunc'
)
########################################################################
#
# Presets
#
########################################################################
CONFIG_PRESET
[
'add_grammar_source_to_parser_docstring'
]
=
False
########################################################################
#
# EBNF scanning
...
...
@@ -562,7 +571,7 @@ class EBNFCompiler(Compiler):
tt_name
=
self
.
grammar_name
+
'_AST_transformation_table'
transtable
=
[
tt_name
+
' = {'
,
' # AST Transformations for the '
+
self
.
grammar_name
+
'-grammar'
]
transtable
.
append
(
' "<":
reduce
_anonymous_nodes,'
)
transtable
.
append
(
' "<":
flatten
_anonymous_nodes,'
)
for
name
in
self
.
rules
:
transformations
=
'[]'
# rule = self.definitions[name]
...
...
@@ -1276,11 +1285,3 @@ def compile_ebnf(ebnf_source: str, branding: str = 'DSL') \
get_ebnf_transformer
(),
get_ebnf_compiler
(
branding
,
ebnf_source
))
########################################################################
#
# Presets
#
########################################################################
CONFIG_PRESET
[
'add_grammar_source_to_parser_docstring'
]
=
False
DHParser/error.py
View file @
f9ea4738
...
...
@@ -73,6 +73,7 @@ class Error:
REDECLARED_TOKEN_WARNING
=
ErrorCode
(
120
)
UNUSED_ERROR_HANDLING_WARNING
=
ErrorCode
(
130
)
LEFT_RECURSION_WARING
=
ErrorCode
(
140
)
UNDEFINED_SYMBOL_IN_TRANSTABLE_WARNING
=
ErrorCode
(
610
)
...
...
DHParser/parse.pxd
View file @
f9ea4738
...
...
@@ -39,6 +39,7 @@ cdef class Grammar:
cdef
public
list
history__
cdef
public
bint
moving_forward__
cdef
public
set
recursion_locations__
cdef
public
int
last_recursion_location__
cdef
class
PreprocessorToken
(
Parser
):
pass
...
...
DHParser/parse.py
View file @
f9ea4738
...
...
@@ -39,7 +39,8 @@ from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME
from
DHParser.stringview
import
StringView
,
EMPTY_STRING_VIEW
from
DHParser.syntaxtree
import
Node
,
FrozenNode
,
RootNode
,
WHITESPACE_PTYPE
,
\
TOKEN_PTYPE
,
ZOMBIE_TAG
,
ResultType
from
DHParser.toolkit
import
sane_parser_name
,
escape_control_characters
,
re
,
typing
,
cython
from
DHParser.toolkit
import
sane_parser_name
,
escape_control_characters
,
get_config_value
,
\
CONFIG_PRESET
,
re
,
typing
,
cython
from
typing
import
Callable
,
cast
,
List
,
Tuple
,
Set
,
Dict
,
DefaultDict
,
Union
,
Optional
,
Any
...
...
@@ -82,8 +83,13 @@ __all__ = ('Parser',
'Forward'
)
########################################################################
#
# Presets
#
########################################################################
EMPTY_NODE
=
FrozenNode
(
':EMPTY__'
,
''
)
CONFIG_PRESET
[
'flatten_tree_while_parsing'
]
=
True
########################################################################
...
...
@@ -98,6 +104,7 @@ LEFT_RECURSION_DEPTH = 8 # type: int
# set too high. PyPy allows higher values than CPython
MAX_DROPOUTS
=
3
# type: int
# stop trying to recover parsing after so many errors
EMPTY_NODE
=
FrozenNode
(
':EMPTY__'
,
''
)
class
ParserError
(
Exception
):
...
...
@@ -330,6 +337,11 @@ class Parser:
if
location
in
self
.
visited
:
node
,
rest
=
self
.
visited
[
location
]
# TODO: maybe add a warning about occurrence of left-recursion here?
if
location
!=
grammar
.
last_recursion_location__
:
grammar
.
tree__
.
add_error
(
node
,
Error
(
"Left recursion encountered. "
"Refactor grammar to avoid slow parsing."
,
node
.
pos
,
Error
.
LEFT_RECURSION_WARING
))
grammar
.
last_recursion_location__
=
location
# don't overwrite any positive match (i.e. node not None) in the cache
# and don't add empty entries for parsers returning from left recursive calls!
elif
grammar
.
memoization__
:
...
...
@@ -671,8 +683,11 @@ class Grammar:
recursion_locations__: Stores the locations where left recursion was
detected. Needed to provide minimal memoization for the left
recursion detection algorithm, but, strictly speaking, superfluous
if full memoization is enabled. (See :func:`add_parser_guard` and its
local function :func:`guarded_call`)
if full memoization is enabled. (See :func:`Parser.__call__()`)
last_recursion_location__: Last location where left recursion was
detected. This is used to avoid reduplicating warning messages
about left recursion.
memoization__: Turns full memoization on or off. Turning memoization off
results in less memory usage and sometimes reduced parsing time.
...
...
@@ -682,6 +697,12 @@ class Grammar:
left_recursion_handling__: Turns left recursion handling on or off.
If turned off, a recursion error will result in case of left
recursion.
flatten_tree__: If True (default), anonymous nodes will be flattened
during parsing already. This greatly reduces the concrete syntax
tree and simplifies and speeds up abstract syntax tree generation.
The initial value will be read from the config variable
'flatten_tree_while_parsing' upon class instantiation.
"""
python_src__
=
''
# type: str
root__
=
PARSER_PLACEHOLDER
# type: Parser
...
...
@@ -736,6 +757,7 @@ class Grammar:
self
.
history_tracking__
=
False
# type: bool
self
.
memoization__
=
True
# type: bool
self
.
left_recursion_handling__
=
True
# type: bool
self
.
flatten_tree__
=
get_config_value
(
'flatten_tree_while_parsing'
)
# type: bool
self
.
_reset__
()
# prepare parsers in the class, first
...
...
@@ -784,6 +806,7 @@ class Grammar:
# also needed for call stack tracing
self
.
moving_forward__
=
False
# type: bool
self
.
recursion_locations__
=
set
()
# type: Set[int]
self
.
last_recursion_location__
=
-
1
# type: int
@
property
...
...
@@ -1247,40 +1270,52 @@ class DropWhitespace(Whitespace):
class
MetaParser
(
Parser
):
# TODO: Allow to turn optimization off
def
_return_value
(
self
,
node
:
Optional
[
Node
])
->
Node
:
# Node(self.tag_name, node) # unoptimized code
"""
Generate a return node if a single node has been returned from
any descendant parsers. Empty nodes will be dropped silently.
If `self` is an unnamed parser, a non-empty descendant node
will be passed through. If the descendant node is anonymous,
it will be dropped and only its result will be kept.
In all other cases or if the optimization is turned off by
setting `grammar.flatten_tree__` to False, a new node will be
generated and the descendant node will be its gingle child.
"""
assert
node
is
None
or
isinstance
(
node
,
Node
)
if
node
:
if
self
.
pname
:
if
node
.
tag_name
[
0
]
==
':'
:
# faster than node.is_anonymous()
return
Node
(
self
.
tag_name
,
node
.
_result
)
return
Node
(
self
.
tag_name
,
node
)
return
node
if
self
.
pname
:
return
Node
(
self
.
tag_name
,
())
# type: Node
return
EMPTY_NODE
# avoid creation of a node object for anonymous empty nodes
if
self
.
grammar
.
flatten_tree__
:
if
node
:
if
self
.
pname
:
if
node
.
tag_name
[
0
]
==
':'
:
# faster than node.is_anonymous()
return
Node
(
self
.
tag_name
,
node
.
_result
)
return
Node
(
self
.
tag_name
,
node
)
return
node
elif
self
.
pname
:
return
Node
(
self
.
tag_name
,
())
# type: Node
return
EMPTY_NODE
# avoid creation of a node object for anonymous empty nodes
return
Node
(
self
.
tag_name
,
node
or
())
# unoptimized code
@
cython
.
locals
(
N
=
cython
.
int
)
def
_return_values
(
self
,
results
:
Tuple
[
Node
,
...])
->
Node
:
# return Node(self.tag_name, results) # unoptimized code
assert
isinstance
(
results
,
tuple
)
N
=
len
(
results
)
if
N
>
1
:
nr
=
[]
for
child
in
results
:
if
child
.
children
and
child
.
tag_name
[
0
]
==
':'
:
# faster than c.is_anonymous():
nr
.
extend
(
child
.
children
)
else
:
nr
.
append
(
child
)
return
Node
(
self
.
tag_name
,
tuple
(
nr
))
if
self
.
grammar
.
flatten_tree__
:
nr
=
[]
for
child
in
results
:
if
child
.
children
and
child
.
tag_name
[
0
]
==
':'
:
# faster than c.is_anonymous():
nr
.
extend
(
child
.
children
)
else
:
nr
.
append
(
child
)
return
Node
(
self
.
tag_name
,
tuple
(
nr
))
return
Node
(
self
.
tag_name
,
results
)
# unoptimized code
elif
N
==
1
:
return
self
.
_return_value
(
results
[
0
])
elif
self
.
pname
:
return
Node
(
self
.
tag_name
,
())
return
EMPTY_NODE
# avoid creation of a node object for anonymous empty nodes
elif
self
.
grammar
.
flatten_tree__
:
if
self
.
pname
:
return
Node
(
self
.
tag_name
,
())
return
EMPTY_NODE
# avoid creation of a node object for anonymous empty nodes
return
Node
(
self
.
tag_name
,
results
)
# unoptimized code
class
UnaryParser
(
MetaParser
):
...
...
DHParser/transform.py
View file @
f9ea4738
...
...
@@ -47,7 +47,7 @@ __all__ = ('TransformationDict',
'traverse'
,
'is_named'
,
'update_attr'
,
'
reduce
_anonymous_nodes'
,
'
flatten
_anonymous_nodes'
,
'replace_by_single_child'
,
'reduce_single_child'
,
'replace_or_reduce'
,
...
...
@@ -559,12 +559,13 @@ def _reduce_child(node: Node, child: Node):
# _reduce_child(context[-1], child)
def
reduce
_anonymous_nodes
(
context
:
List
[
Node
]):
def
flatten
_anonymous_nodes
(
context
:
List
[
Node
]):
"""
Reduces (non-recursively) all anonymous non-leaf children by adding
their result to the result of the last node in the context. If the
last node is anonymous itself, it will be replaced by a single child.
Also drops any empty anonymous nodes.
Flattens non-recursively all anonymous non-leaf children by adding
their result to the result of the parent node. Empty anonymous children
will be dropped altogether. If the parent node (i.e. `context[-1]) is
anonymous itself and has only one child node, it will be replaced by
its single child node.
"""
node
=
context
[
-
1
]
if
node
.
children
:
...
...
examples/Arithmetic/ArithmeticCompiler.py
View file @
f9ea4738
...
...
@@ -33,7 +33,7 @@ from DHParser import logging, is_filename, load_if_file, \
keep_children
,
is_one_of
,
not_one_of
,
has_content
,
apply_if
,
remove_first
,
remove_last
,
\
remove_anonymous_empty
,
keep_nodes
,
traverse_locally
,
strip
,
lstrip
,
rstrip
,
\
replace_content
,
replace_content_by
,
forbid
,
assert_content
,
remove_infix_operator
,
\
error_on
,
recompile_grammar
,
reduce
_anonymous_nodes
,
GLOBALS
error_on
,
recompile_grammar
,
flatten
_anonymous_nodes
,
GLOBALS
#######################################################################
...
...
@@ -98,7 +98,7 @@ def get_grammar() -> ArithmeticGrammar:
Arithmetic_AST_transformation_table
=
{
# AST Transformations for the Arithmetic-grammar
"<"
:
reduce
_anonymous_nodes
,
"<"
:
flatten
_anonymous_nodes
,
"expression"
:
[],
"term"
:
[
reduce_single_child
],
"factor"
:
[
reduce_single_child
],
...
...
examples/EBNF/EBNFCompiler.py
View file @
f9ea4738
...
...
@@ -33,7 +33,7 @@ from DHParser import logging, is_filename, load_if_file, \
keep_children
,
is_one_of
,
not_one_of
,
has_content
,
apply_if
,
remove_first
,
remove_last
,
\
remove_anonymous_empty
,
keep_nodes
,
traverse_locally
,
strip
,
lstrip
,
rstrip
,
\
replace_content
,
replace_content_by
,
forbid
,
assert_content
,
remove_infix_operator
,
\
reduce
_anonymous_nodes
,
error_on
,
recompile_grammar
,
GLOBALS
flatten
_anonymous_nodes
,
error_on
,
recompile_grammar
,
GLOBALS
#######################################################################
...
...
@@ -107,7 +107,7 @@ def get_grammar() -> EBNFGrammar:
EBNF_AST_transformation_table
=
{
# AST Transformations for the EBNF-grammar
"<"
:
reduce
_anonymous_nodes
,
"<"
:
flatten
_anonymous_nodes
,
"syntax"
:
[],
"definition"
:
[],
"directive"
:
[],
...
...
examples/LaTeX/LaTeXCompiler.py
View file @
f9ea4738
...
...
@@ -22,7 +22,7 @@ from DHParser import is_filename, Grammar, Compiler, Lookbehind, Alternative, Po
ZeroOrMore
,
Forward
,
NegativeLookahead
,
mixin_comment
,
compile_source
,
\
PreprocessorFunc
,
TransformationDict
,
\
Node
,
TransformationFunc
,
traverse
,
remove_children_if
,
is_anonymous
,
\
reduce_single_child
,
replace_by_single_child
,
remove_whitespace
,
reduce
_anonymous_nodes
,
\
reduce_single_child
,
replace_by_single_child
,
remove_whitespace
,
flatten
_anonymous_nodes
,
\
flatten
,
is_empty
,
collapse
,
replace_content
,
replace_content_by
,
remove_brackets
,
\
is_one_of
,
traverse_locally
,
remove_tokens
,
remove_nodes
,
TOKEN_PTYPE
,
Error
,
GLOBALS
from
DHParser.log
import
logging
...
...
@@ -219,7 +219,7 @@ drop_expendables = remove_children_if(lambda context: is_empty(context) or
LaTeX_AST_transformation_table
=
{
# AST Transformations for the LaTeX-grammar
"<"
:
[
reduce
_anonymous_nodes
,
flatten_structure
],
"<"
:
[
flatten
_anonymous_nodes
,
flatten_structure
],
"latexdoc"
:
[],
"preamble"
:
[
traverse_locally
({
'<'
:
remove_whitespace
,
'block'
:
replace_by_single_child
})],
"document"
:
[
flatten_structure
],
...
...
examples/XML/XMLCompiler.py
View file @
f9ea4738
...
...
@@ -26,7 +26,7 @@ from DHParser import logging, is_filename, load_if_file, \
ZeroOrMore
,
Forward
,
NegativeLookahead
,
Required
,
mixin_comment
,
compile_source
,
\
grammar_changed
,
last_value
,
counterpart
,
accumulate
,
PreprocessorFunc
,
\
Node
,
TransformationFunc
,
TransformationDict
,
Token
,
DropToken
,
DropWhitespace
,
\
traverse
,
remove_children_if
,
is_anonymous
,
GLOBALS
,
reduce
_anonymous_nodes
,
\
traverse
,
remove_children_if
,
is_anonymous
,
GLOBALS
,
flatten
_anonymous_nodes
,
\
reduce_single_child
,
replace_by_single_child
,
replace_or_reduce
,
remove_whitespace
,
\
remove_expendables
,
remove_empty
,
remove_tokens
,
flatten
,
is_whitespace
,
\
is_empty
,
is_expendable
,
collapse
,
replace_content
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
\
...
...
@@ -194,7 +194,7 @@ def get_grammar() -> XMLGrammar:
XML_AST_transformation_table
=
{
# AST Transformations for the XML-grammar
"<"
:
[
reduce
_anonymous_nodes
,
remove_empty
,
remove_anonymous_tokens
,
remove_whitespace
,
remove_nodes
(
"S"
)],
"<"
:
[
flatten
_anonymous_nodes
,
remove_empty
,
remove_anonymous_tokens
,
remove_whitespace
,
remove_nodes
(
"S"
)],
"document"
:
[
flatten
(
lambda
context
:
context
[
-
1
].
tag_name
==
'prolog'
,
recursive
=
False
)],
"prolog"
:
[],
"XMLDecl"
:
[],
...
...
examples/XMLSnippet/XMLSnippetCompiler.py
View file @
f9ea4738
...
...
@@ -19,7 +19,7 @@ try:
except
ImportError
:
import
re
from
DHParser
import
logging
,
is_filename
,
load_if_file
,
Grammar
,
Compiler
,
nil_preprocessor
,
\
PreprocessorToken
,
Whitespace
,
DropWhitespace
,
DropToken
,
reduce
_anonymous_nodes
,
\
PreprocessorToken
,
Whitespace
,
DropWhitespace
,
DropToken
,
flatten
_anonymous_nodes
,
\
Lookbehind
,
Lookahead
,
Alternative
,
Pop
,
Token
,
Synonym
,
AllOf
,
SomeOf
,
Unordered
,
\
Option
,
NegativeLookbehind
,
OneOrMore
,
RegExp
,
Retrieve
,
Series
,
Capture
,
\
ZeroOrMore
,
Forward
,
NegativeLookahead
,
Required
,
mixin_comment
,
compile_source
,
\
...
...
@@ -140,7 +140,7 @@ def get_grammar() -> XMLSnippetGrammar:
XMLSnippet_AST_transformation_table
=
{
# AST Transformations for the XMLSnippet-grammar
"<"
:
reduce
_anonymous_nodes
,
"<"
:
flatten
_anonymous_nodes
,
"document"
:
[],
"prolog"
:
[],
"XMLDecl"
:
[],
...
...
test/test_dsl.py
View file @
f9ea4738
...
...
@@ -52,7 +52,7 @@ class TestCompileFunctions:
assert
callable
(
factory
)
parser
=
factory
()
result
=
parser
(
"5 + 3 * 4"
)
assert
not
result
.
error_flag
,
str
(
result
.
errors_sorted
)
assert
not
is_error
(
result
.
error_flag
)
,
str
(
result
.
errors_sorted
)
result
=
parser
(
"5A + 4B ** 4C"
)
assert
is_error
(
result
.
error_flag
)
...
...
test/test_parse.py
View file @
f9ea4738
...
...
@@ -24,9 +24,9 @@ from functools import partial
sys
.
path
.
extend
([
'../'
,
'./'
])
from
DHParser.toolkit
import
compile_python_object
from
DHParser.toolkit
import
compile_python_object
,
get_config_value
,
set_config_value
from
DHParser.log
import
logging
,
is_logging
,
log_ST
,
log_parsing_history
from
DHParser.error
import
Error
from
DHParser.error
import
Error
,
is_error
from
DHParser.parse
import
Parser
,
Grammar
,
Forward
,
TKN
,
ZeroOrMore
,
RE
,
\
RegExp
,
Lookbehind
,
NegativeLookahead
,
OneOrMore
,
Series
,
Alternative
,
AllOf
,
SomeOf
,
\
UnknownParserError
,
MetaParser
,
EMPTY_NODE
...
...
@@ -70,8 +70,8 @@ class TestInfiLoopsAndRecursion:
parser
=
grammar_provider
(
minilang
)()
assert
parser
syntax_tree
=
parser
(
snippet
)
assert
not
syntax_tree
.
error_flag
,
str
(
syntax_tree
.
errors_sorted
)
assert
snippet
==
str
(
syntax_tree
)
assert
not
is_error
(
syntax_tree
.
error_flag
)
,
str
(
syntax_tree
.
errors_sorted
)
assert
snippet
==
syntax_tree
.
content
,
str
(
syntax_tree
)
if
is_logging
():
log_ST
(
syntax_tree
,
"test_LeftRecursion_direct.cst"
)
log_parsing_history
(
parser
,
"test_LeftRecursion_direct"
)
...
...
@@ -87,8 +87,8 @@ class TestInfiLoopsAndRecursion:
parser
=
grammar_provider
(
minilang
)()
assert
parser
syntax_tree
=
parser
(
snippet
)
assert
not
syntax_tree
.
error_flag
,
syntax_tree
.
errors_sorted
assert
snippet
==
str
(
syntax_tree
)
assert
not
is_error
(
syntax_tree
.
error_flag
)
,
syntax_tree
.
errors_sorted
assert
snippet
==
syntax_tree
.
content
def
test_indirect_left_recursion1
(
self
):
minilang
=
"""
...
...
@@ -101,14 +101,14 @@ class TestInfiLoopsAndRecursion:
assert
parser
snippet
=
"8 * 4"
syntax_tree
=
parser
(
snippet
)
assert
not
syntax_tree
.
error_flag
,
syntax_tree
.
errors_sorted
assert
not
is_error
(
syntax_tree
.
error_flag
)
,
syntax_tree
.
errors_sorted
snippet
=
"7 + 8 * 4"
syntax_tree
=
parser
(
snippet
)
assert
not
syntax_tree
.
error_flag
,
syntax_tree
.
errors_sorted
assert
not
is_error
(
syntax_tree
.
error_flag
)
,
syntax_tree
.
errors_sorted
snippet
=
"9 + 8 * (4 + 3)"
syntax_tree
=
parser
(
snippet
)
assert
not
syntax_tree
.
error_flag
,
syntax_tree
.
errors_sorted
assert
snippet
==
str
(
syntax_tree
)
assert
not
is_error
(
syntax_tree
.
error_flag
)
,
syntax_tree
.
errors_sorted
assert
snippet
==
syntax_tree
.
content
if
is_logging
():
log_ST
(
syntax_tree
,
"test_LeftRecursion_indirect.cst"
)
log_parsing_history
(
parser
,
"test_LeftRecursion_indirect"
)
...
...
@@ -788,7 +788,10 @@ class TestEarlyTokenWhitespaceDrop:
class
TestMetaParser
:
def
test_meta_parser
(
self
):
save
=
get_config_value
(
'flatten_tree_while_parsing'
)
set_config_value
(
'flatten_tree_while_parsing'
,
True
)
mp
=
MetaParser
()
mp
.
grammar
=
Grammar
()
# override placeholder warning
mp
.
pname
=
"named"
mp
.
tag_name
=
mp
.
pname
nd
=
mp
.
_return_value
(
Node
(
'tagged'
,
'non-empty'
))
...
...
@@ -828,6 +831,7 @@ class TestMetaParser:
assert
not
nd
.
children
assert
not
nd
.
content
assert
mp
.
_return_value
(
None
)
==
EMPTY_NODE
set_config_value
(
'flatten_tree_while_parsing'
,
save
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment