Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
739242bd
Commit
739242bd
authored
Jun 20, 2017
by
Eckhart Arnold
Browse files
synonymhandling solved + reporting of unconnected rules in EBNFCompiler from ebnf.py
parent
72799c38
Changes
4
Hide whitespace changes
Inline
Side-by-side
DHParser/dsl.py
View file @
739242bd
...
...
@@ -73,7 +73,7 @@ except ImportError:
import re
from DHParser.toolkit import logging, is_filename, load_if_file
from DHParser.parsers import Grammar, Compiler, nil_scanner,
\\
Lookbehind, Lookahead, Alternative, Pop, Required, Token,
\\
Lookbehind, Lookahead, Alternative, Pop, Required, Token,
Synonym,
\\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture,
\\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source,
\\
nop_filter, counterpart_filter, accumulating_filter, ScannerFunc
...
...
@@ -128,11 +128,12 @@ class CompilationError(Exception):
contains errors.
"""
def
__init__
(
self
,
error_messages
,
dsl_text
,
dsl_grammar
,
AST
):
def
__init__
(
self
,
error_messages
,
dsl_text
,
dsl_grammar
,
AST
,
result
):
self
.
error_messages
=
error_messages
self
.
dsl_text
=
dsl_text
self
.
dsl_grammar
=
dsl_grammar
self
.
AST
=
AST
self
.
result
=
result
def
__str__
(
self
):
return
'
\n
'
.
join
(
self
.
error_messages
)
...
...
@@ -163,7 +164,6 @@ def grammar_instance(grammar_representation) -> Tuple[Grammar, str]:
parser_root
=
grammar_representation
else
:
# assume ``grammar_representation`` is a grammar class and get the root object
# TODO: further case: grammar_representation is a method
parser_root
=
grammar_representation
()
return
parser_root
,
grammar_src
...
...
@@ -188,7 +188,7 @@ def compileDSL(text_or_file: str,
ast_transformation
,
compiler
)
if
errors
:
src
=
load_if_file
(
text_or_file
)
raise
CompilationError
(
errors
,
src
,
grammar_src
,
AST
)
raise
CompilationError
(
errors
,
src
,
grammar_src
,
AST
,
result
)
return
result
...
...
DHParser/ebnf.py
View file @
739242bd
...
...
@@ -16,13 +16,13 @@ implied. See the License for the specific language governing
permissions and limitations under the License.
"""
from
collections
import
OrderedDict
import
keyword
try
:
import
regex
as
re
except
ImportError
:
import
re
from
typing
import
Callable
,
List
,
Set
,
Tuple
from
typing
import
Callable
,
Dict
,
List
,
Set
,
Tuple
from
DHParser.toolkit
import
load_if_file
,
escape_re
,
md5
,
sane_parser_name
from
DHParser.parsers
import
Grammar
,
mixin_comment
,
nil_scanner
,
Forward
,
RE
,
NegativeLookahead
,
\
...
...
@@ -291,7 +291,7 @@ def get_compiler(grammar_name="{NAME}", grammar_source="") -> {NAME}Compiler:
class
EBNFCompilerError
(
Exception
):
"""Error raised by `EBNFCompiler` class. (Not compilation errors
in the strict sense, see `CompilationError`
below
)"""
in the strict sense, see `CompilationError`
in module ``dsl.py``
)"""
pass
...
...
@@ -318,10 +318,11 @@ class EBNFCompiler(Compiler):
def
_reset
(
self
):
self
.
_result
=
''
# type: str
self
.
rules
=
set
()
# type: Set[str]
self
.
rules
=
OrderedDict
()
# type: OrderedDict[str, List[Node]]
self
.
current_symbols
=
[]
# type: List[Node]
self
.
symbols
=
{}
# type: Dict[str, Node]
self
.
variables
=
set
()
# type: Set[str]
self
.
symbol_nodes
=
[]
# type: List[Node]
self
.
definition_names
=
[]
# type: List[str]
# self.definition_names = [] # type: List[str]
self
.
recursive
=
set
()
# type: Set[str]
self
.
root
=
""
# type: str
self
.
directives
=
{
'whitespace'
:
self
.
WHITESPACE
[
'horizontal'
],
...
...
@@ -340,7 +341,7 @@ class EBNFCompiler(Compiler):
+
SCANNER_FACTORY
.
format
(
NAME
=
self
.
grammar_name
)
def
gen_transformer_skeleton
(
self
)
->
str
:
if
not
self
.
definition_nam
es
:
if
not
self
.
rul
es
:
raise
EBNFCompilerError
(
'Compiler must be run before calling '
'"gen_transformer_Skeleton()"!'
)
tt_name
=
self
.
grammar_name
+
'_AST_transformation_table'
...
...
@@ -348,7 +349,7 @@ class EBNFCompiler(Compiler):
transtable
=
[
tt_name
+
' = {'
,
' # AST Transformations for the '
+
self
.
grammar_name
+
'-grammar'
]
for
name
in
self
.
definition_nam
es
:
for
name
in
self
.
rul
es
:
transtable
.
append
(
' "'
+
name
+
'": no_transformation,'
)
transtable
+=
[
' "*": no_transformation'
,
'}'
,
''
,
tf_name
+
' = partial(traverse, processing_table=%s)'
%
tt_name
,
''
]
...
...
@@ -356,7 +357,7 @@ class EBNFCompiler(Compiler):
return
'
\n
'
.
join
(
transtable
)
def
gen_compiler_skeleton
(
self
)
->
str
:
if
not
self
.
definition_nam
es
:
if
not
self
.
rul
es
:
raise
EBNFCompilerError
(
'Compiler has not been run before calling '
'"gen_Compiler_Skeleton()"!'
)
compiler
=
[
'class '
+
self
.
grammar_name
+
'Compiler(Compiler):'
,
...
...
@@ -368,7 +369,7 @@ class EBNFCompiler(Compiler):
' super('
+
self
.
grammar_name
+
'Compiler, self).__init__(grammar_name, grammar_source)'
,
" assert re.match('\w+\Z', grammar_name)"
,
''
]
for
name
in
self
.
definition_nam
es
:
for
name
in
self
.
rul
es
:
method_name
=
Compiler
.
derive_method_name
(
name
)
if
name
==
self
.
root
:
compiler
+=
[
' def '
+
method_name
+
'(self, node):'
,
...
...
@@ -387,7 +388,6 @@ class EBNFCompiler(Compiler):
if
definitions
[
i
][
0
]
in
self
.
variables
:
definitions
[
i
]
=
(
definitions
[
i
][
0
],
'Capture(%s)'
%
definitions
[
i
][
1
])
self
.
definition_names
=
[
defn
[
0
]
for
defn
in
definitions
]
definitions
.
append
((
'wspR__'
,
self
.
WHITESPACE_KEYWORD
if
'right'
in
self
.
directives
[
'literalws'
]
else
"''"
))
definitions
.
append
((
'wspL__'
,
self
.
WHITESPACE_KEYWORD
...
...
@@ -417,12 +417,6 @@ class EBNFCompiler(Compiler):
declarations
=
declarations
[:
-
1
]
declarations
.
append
(
'"""'
)
# add default functions for filter filters of pop or retrieve operators
# for symbol, fun in self.directives['filter']:
# declarations.append(symbol + '_filter = lambda value: value.replace("(", ")")'
# '.replace("[", "]").replace("{", "}").replace(">", "<")')
# turn definitions into declarations in reverse order
self
.
root
=
definitions
[
0
][
0
]
if
definitions
else
""
...
...
@@ -434,11 +428,31 @@ class EBNFCompiler(Compiler):
declarations
+=
[
symbol
+
'.set('
+
statement
+
')'
]
else
:
declarations
+=
[
symbol
+
' = '
+
statement
]
known_symbols
=
self
.
rules
|
self
.
RESERVED_SYMBOLS
for
nd
in
self
.
symbol_nodes
:
if
nd
.
result
not
in
known_symbols
:
nd
.
add_error
(
"Missing production for symbol '%s'"
%
nd
.
result
)
# check for symbols used but never defined
defined_symbols
=
set
(
self
.
rules
.
keys
())
|
self
.
RESERVED_SYMBOLS
for
symbol
in
self
.
symbols
:
if
symbol
not
in
defined_symbols
:
self
.
symbols
[
symbol
].
add_error
(
"Missing definition for symbol '%s'"
%
symbol
)
root_node
.
error_flag
=
True
# check for unconnected rules
defined_symbols
.
difference_update
(
self
.
RESERVED_SYMBOLS
)
def
remove_connections
(
symbol
):
if
symbol
in
defined_symbols
:
defined_symbols
.
remove
(
symbol
)
for
related
in
self
.
rules
[
symbol
][
1
:]:
remove_connections
(
str
(
related
))
remove_connections
(
self
.
root
)
for
leftover
in
defined_symbols
:
self
.
rules
[
leftover
][
0
].
add_error
((
'Rule "%s" is not connected to parser '
'root "%s"'
)
%
(
leftover
,
self
.
root
))
# set root parser and assemble python grammar definition
if
self
.
root
and
'root__'
not
in
self
.
rules
:
declarations
.
append
(
'root__ = '
+
self
.
root
)
declarations
.
append
(
''
)
...
...
@@ -466,7 +480,7 @@ class EBNFCompiler(Compiler):
return
self
.
assemble_parser
(
definitions
,
node
)
def
on_definition
(
self
,
node
:
Node
)
->
Tuple
[
str
,
str
]:
rule
=
str
(
node
.
children
[
0
])
# cast(str, node.children[0].result)
rule
=
str
(
node
.
children
[
0
])
if
rule
in
self
.
rules
:
node
.
add_error
(
'A rule with name "%s" has already been defined.'
%
rule
)
elif
rule
in
EBNFCompiler
.
RESERVED_SYMBOLS
:
...
...
@@ -479,13 +493,17 @@ class EBNFCompiler(Compiler):
'a scanner token.'
%
rule
)
elif
keyword
.
iskeyword
(
rule
):
node
.
add_error
(
'Python keyword "%s" may not be used as a symbol. '
%
rule
+
'(This may change in the fu
rut
e.)'
)
%
rule
+
'(This may change in the fu
tur
e.)'
)
try
:
self
.
rules
.
add
(
rule
)
self
.
current_symbols
=
[
node
]
self
.
rules
[
rule
]
=
self
.
current_symbols
defn
=
self
.
_compile
(
node
.
children
[
1
])
if
rule
in
self
.
variables
:
defn
=
'Capture(%s)'
%
defn
self
.
variables
.
remove
(
rule
)
elif
defn
.
find
(
"("
)
<
0
:
# assume it's a synonym, like 'page = REGEX_PAGE_NR'
defn
=
'Synonym(%s)'
%
defn
except
TypeError
as
error
:
errmsg
=
EBNFCompiler
.
AST_ERROR
+
" ("
+
str
(
error
)
+
")
\n
"
+
node
.
as_sexpr
()
node
.
add_error
(
errmsg
)
...
...
@@ -622,21 +640,23 @@ class EBNFCompiler(Compiler):
raise
EBNFCompilerError
(
"Group nodes should have been eliminated by "
"AST transformation!"
)
def
on_symbol
(
self
,
node
:
Node
)
->
str
:
result
=
str
(
node
)
# ; assert result == cast(str, node.result)
if
result
in
self
.
directives
[
'tokens'
]:
return
'ScannerToken("'
+
result
+
'")'
def
on_symbol
(
self
,
node
:
Node
)
->
str
:
# called only for symbols on the right hand side!
symbol
=
str
(
node
)
# ; assert result == cast(str, node.result)
if
symbol
in
self
.
directives
[
'tokens'
]:
return
'ScannerToken("'
+
symbol
+
'")'
else
:
self
.
symbol_nodes
.
append
(
node
)
if
result
in
self
.
rules
:
self
.
recursive
.
add
(
result
)
return
result
self
.
current_symbols
.
append
(
node
)
if
symbol
not
in
self
.
symbols
:
self
.
symbols
[
symbol
]
=
node
if
symbol
in
self
.
rules
:
self
.
recursive
.
add
(
symbol
)
return
symbol
def
on_literal
(
self
,
node
)
->
str
:
return
'Token('
+
str
(
node
).
replace
(
'
\\
'
,
r
'\\'
)
+
')'
# return 'Token(' + ', '.join([node.result]) + ')' ?
def
on_regexp
(
self
,
node
:
Node
)
->
str
:
rx
=
str
(
node
)
# ; assert rx == cast(str, node.result)
rx
=
str
(
node
)
name
=
[]
# type: List[str]
if
rx
[:
2
]
==
'~/'
:
if
not
'left'
in
self
.
directives
[
'literalws'
]:
...
...
DHParser/parsers.py
View file @
739242bd
...
...
@@ -78,8 +78,9 @@ __all__ = ['ScannerFunc',
'RE'
,
'Token'
,
'mixin_comment'
,
'UnaryOperator'
,
'NaryOperator'
,
# 'UnaryOperator',
# 'NaryOperator',
'Synonym'
,
'Optional'
,
'ZeroOrMore'
,
'OneOrMore'
,
...
...
@@ -737,6 +738,24 @@ class NaryOperator(Parser):
parser
.
apply
(
func
)
class
Synonym
(
UnaryOperator
):
"""Simply calls another parser and encapsulates the result in
another node if that parser matches.
This parser is needed to support synonyms in EBNF, e.g.
jahr = JAHRESZAHL
JAHRESZAHL = /\d\d\d\d/
Otherwise the first line could not be represented by any parser
class, in which case it would be unclear whether the parser
RE('\d\d\d\d') carries the name 'JAHRESZAHL' or 'jahr'
"""
def
__call__
(
self
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
node
,
text
=
self
.
parser
(
text
)
if
node
:
return
Node
(
self
,
node
),
text
return
None
,
text
class
Optional
(
UnaryOperator
):
def
__init__
(
self
,
parser
:
Parser
,
name
:
str
=
''
)
->
None
:
super
(
Optional
,
self
).
__init__
(
parser
,
name
)
...
...
@@ -1141,8 +1160,7 @@ class Compiler:
else
:
compiler
=
self
.
__getattribute__
(
self
.
derive_method_name
(
elem
))
result
=
compiler
(
node
)
for
child
in
node
.
children
:
node
.
error_flag
=
node
.
error_flag
or
child
.
error_flag
node
.
propagate_error_flags
()
return
result
...
...
test/test_ebnf.py
View file @
739242bd
...
...
@@ -28,7 +28,7 @@ sys.path.extend(['../', './'])
from
DHParser.toolkit
import
is_logging
from
DHParser.parsers
import
compile_source
,
Retrieve
,
WHITESPACE_PTYPE
,
nil_scanner
from
DHParser.ebnf
import
get_ebnf_grammar
,
get_ebnf_transformer
,
EBNFTransformer
,
get_ebnf_compiler
from
DHParser.dsl
import
c
ompil
eEBNF
,
compileDSL
,
parser_factory
from
DHParser.dsl
import
C
ompil
ationError
,
compileDSL
,
parser_factory
class
TestDirectives
:
...
...
@@ -345,6 +345,29 @@ class TestBoundaryCases:
r
=
self
.
cp
(
t
)
assert
r
def
test_unconnected_symbols
(
self
):
ebnf
=
"""root = /.*/
unconnected = /.*/
"""
try
:
grammar
=
parser_factory
(
ebnf
)()
assert
False
,
"EBNF compiler should complain about unconnected rules."
except
CompilationError
as
err
:
grammar
=
err
.
result
assert
grammar
.
__dict__
[
'root'
]
assert
grammar
.
__dict__
[
'unconnected'
]
class
TestSynonymDetection
:
def
test_synonym_detection
(
self
):
ebnf
=
"""a = b
b = /b/
"""
grammar
=
parser_factory
(
ebnf
)()
assert
grammar
[
'a'
].
name
==
'a'
,
grammar
[
'a'
].
name
assert
grammar
[
'b'
].
name
==
'b'
,
grammar
[
'b'
].
name
assert
grammar
(
'b'
).
as_sexpr
().
count
(
'b'
)
==
2
if
__name__
==
"__main__"
:
from
DHParser.testing
import
runner
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment