Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
cc02687e
Commit
cc02687e
authored
Feb 08, 2019
by
Eckhart Arnold
Browse files
- bug fixes
parent
a21adc6a
Changes
10
Hide whitespace changes
Inline
Side-by-side
DHParser/compile.py
View file @
cc02687e
...
...
@@ -225,8 +225,9 @@ def compile_source(source: str,
log_ST
(
syntax_tree
,
log_file_name
+
'.cst'
)
log_parsing_history
(
parser
,
log_file_name
)
assert
is_error
(
syntax_tree
.
error_flag
)
or
str
(
syntax_tree
)
==
strip_tokens
(
source_text
),
\
str
(
syntax_tree
)
# TODO: Ony valid if neither tokens or whitespace are dropped early!s
# assert is_error(syntax_tree.error_flag) or str(syntax_tree) == strip_tokens(source_text), \
# str(syntax_tree) # Ony valid if neither tokens or whitespace are dropped early
# only compile if there were no syntax errors, for otherwise it is
# likely that error list gets littered with compile error messages
result
=
None
...
...
DHParser/parse.py
View file @
cc02687e
...
...
@@ -46,6 +46,7 @@ from typing import Callable, cast, List, Tuple, Set, Dict, DefaultDict, Union, O
__all__
=
(
'Parser'
,
'UnknownParserError'
,
'Grammar'
,
'EMPTY_NODE'
,
'PreprocessorToken'
,
'Token'
,
'DropToken'
,
...
...
@@ -866,7 +867,7 @@ class Grammar:
if
not
rest
:
result
,
_
=
parser
(
rest
)
if
result
is
None
:
result
=
Node
(
ZOMBIE_TAG
,
''
).
in
it_pos
(
0
)
result
=
Node
(
ZOMBIE_TAG
,
''
).
w
it
h
_pos
(
0
)
self
.
tree__
.
new_error
(
result
,
'Parser "%s" did not match empty document.'
%
str
(
parser
),
Error
.
PARSER_DID_NOT_MATCH
)
...
...
@@ -905,7 +906,7 @@ class Grammar:
if
len
(
stitches
)
<
MAX_DROPOUTS
else
" too often! Terminating parser."
)
error_code
=
Error
.
PARSER_STOPPED_BEFORE_END
stitches
.
append
(
Node
(
ZOMBIE_TAG
,
skip
).
in
it_pos
(
tail_pos
(
stitches
)))
stitches
.
append
(
Node
(
ZOMBIE_TAG
,
skip
).
w
it
h
_pos
(
tail_pos
(
stitches
)))
self
.
tree__
.
new_error
(
stitches
[
-
1
],
error_msg
,
error_code
)
if
self
.
history_tracking__
:
# # some parsers may have matched and left history records with nodes != None.
...
...
@@ -914,7 +915,7 @@ class Grammar:
# # to zero. Therefore, their pos properties need to be initialized here
# for record in self.history__:
# if record.node and record.node._pos < 0:
# record.node.
in
it_pos(0)
# record.node.
w
it
h
_pos(0)
record
=
HistoryRecord
(
self
.
call_stack__
.
copy
(),
stitches
[
-
1
],
rest
,
self
.
line_col__
(
rest
))
self
.
history__
.
append
(
record
)
...
...
@@ -924,7 +925,7 @@ class Grammar:
if
rest
:
stitches
.
append
(
Node
(
ZOMBIE_TAG
,
rest
))
#try:
result
=
Node
(
ZOMBIE_TAG
,
tuple
(
stitches
)).
in
it_pos
(
0
)
result
=
Node
(
ZOMBIE_TAG
,
tuple
(
stitches
)).
w
it
h
_pos
(
0
)
# except AssertionError as error:
# # some debugging output
# print(Node(ZOMBIE_TAG, tuple(stitches)).as_sxpr())
...
...
@@ -938,7 +939,7 @@ class Grammar:
# add another child node at the end to ensure that the position
# of the error will be the end of the text. Otherwise, the error
# message above ("...after end of parsing") would appear illogical.
error_node
=
Node
(
ZOMBIE_TAG
,
''
).
in
it_pos
(
tail_pos
(
result
.
children
))
error_node
=
Node
(
ZOMBIE_TAG
,
''
).
w
it
h
_pos
(
tail_pos
(
result
.
children
))
self
.
tree__
.
new_error
(
error_node
,
error_msg
,
error_code
)
result
.
result
=
result
.
children
+
(
error_node
,)
else
:
...
...
@@ -1251,17 +1252,12 @@ class MetaParser(Parser):
# Node(self.tag_name, node) # unoptimized code
assert
node
is
None
or
isinstance
(
node
,
Node
)
if
node
:
if
node
.
_result
:
return
Node
(
self
.
tag_name
,
node
)
if
self
.
pname
else
node
elif
self
.
pname
:
nd1
=
Node
(
self
.
tag_name
,
())
# type: Node
# nd1.errors = node.errors
return
nd1
# elif node.errors:
# nd2 = Node(self.tag_name, ()) # type: Node
# nd2.errors = node.errors
# return nd2
elif
self
.
pname
:
if
self
.
pname
:
if
node
.
tag_name
[
0
]
==
':'
:
# faster than node.is_anonymous()
return
Node
(
self
.
tag_name
,
node
.
_result
)
return
Node
(
self
.
tag_name
,
node
)
return
node
if
self
.
pname
:
return
Node
(
self
.
tag_name
,
())
# type: Node
return
EMPTY_NODE
# avoid creation of a node object for anonymous empty nodes
...
...
@@ -1486,7 +1482,7 @@ def mandatory_violation(grammar: Grammar,
reloc
:
int
)
->
Tuple
[
Error
,
Node
,
StringView
]:
i
=
reloc
if
reloc
>=
0
else
0
location
=
grammar
.
document_length__
-
len
(
text_
)
err_node
=
Node
(
ZOMBIE_TAG
,
text_
[:
i
]).
in
it_pos
(
location
)
err_node
=
Node
(
ZOMBIE_TAG
,
text_
[:
i
]).
w
it
h
_pos
(
location
)
found
=
text_
[:
10
].
replace
(
'
\n
'
,
'
\\
n '
)
for
search
,
message
in
err_msgs
:
rxs
=
not
isinstance
(
search
,
str
)
...
...
DHParser/syntaxtree.pxd
View file @
cc02687e
...
...
@@ -15,7 +15,7 @@ cdef class Node:
cpdef
get
(
self
,
index_or_tagname
,
surrogate
)
cpdef
is_anonymous
(
self
)
cpdef
in
it_pos
(
self
,
pos
)
cpdef
w
it
h
_pos
(
self
,
pos
)
cpdef
attr_active
(
self
)
# cpdef compare_attr(self, other)
# cpdef _tree_repr(self, tab, open_fn, close_fn, data_fn, density, inline, inline_fn)
...
...
@@ -28,7 +28,7 @@ cdef class Node:
cdef
class
FrozenNode
(
Node
):
cpdef
in
it_pos
(
self
,
pos
)
cpdef
w
it
h
_pos
(
self
,
pos
)
cdef
class
RootNode
(
Node
):
...
...
DHParser/syntaxtree.py
View file @
cc02687e
...
...
@@ -175,7 +175,6 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
self
.
_len
=
-
1
# type: int # lazy evaluation
else
:
self
.
result
=
result
# assert tag_name is not None
self
.
tag_name
=
tag_name
# type: str
def
__deepcopy__
(
self
,
memo
):
...
...
@@ -298,7 +297,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
def
is_anonymous
(
self
):
return
self
.
tag_name
[
0
]
==
':'
return
not
self
.
tag_name
or
self
.
tag_name
[
0
]
==
':'
@
property
...
...
@@ -368,30 +367,34 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
def
pos
(
self
)
->
int
:
"""Returns the position of the Node's content in the source text."""
if
self
.
_pos
<
0
:
raise
AssertionError
(
"Position value not initialized!"
)
raise
AssertionError
(
"Position value not initialized!
Use Node.with_pos()
"
)
return
self
.
_pos
def
in
it_pos
(
self
,
pos
:
int
)
->
'Node'
:
def
w
it
h
_pos
(
self
,
pos
:
int
)
->
'Node'
:
"""
(Re-)i
nitialize position value. Usually, the parser guard
I
nitialize position value. Usually, the parser guard
(`parsers.add_parser_guard()`) takes care of assigning the
position in the document to newly created nodes. However,
whe
re
Nodes are created outside the reach of the parser
whe
n
Nodes are created outside the reach of the parser
guard, their document-position must be assigned manually.
This function recursively
reassign
s the position values
of the child nodes, too
.
Position values of the child nodes a
re
assign
ed recursively, too.
Returns the node itself for convenience
.
"""
# condition self.pos == pos cannot be assumed when tokens or whitespace
# are dropped early!
# assert self._pos < 0 or self.pos == pos, ("pos mismatch %i != %i at Node: %s"
# % (self._pos, pos, repr(self)))
self
.
_pos
=
pos
# recursively adjust pos-values of all children
offset
=
self
.
pos
for
child
in
self
.
children
:
child
.
init_pos
(
offset
)
offset
=
child
.
pos
+
len
(
child
)
if
pos
!=
self
.
_pos
>=
0
:
raise
AssertionError
(
"Position value cannot be reassigned to a different value!"
)
if
self
.
_pos
<
0
:
self
.
_pos
=
pos
# recursively adjust pos-values of all children
offset
=
self
.
pos
for
child
in
self
.
children
:
if
child
.
_pos
<
0
:
child
.
with_pos
(
offset
)
offset
=
child
.
pos
+
len
(
child
)
return
self
...
...
@@ -736,7 +739,7 @@ class FrozenNode(Node):
# if errors:
# raise AssertionError('Cannot assign error list to frozen node')
def
in
it_pos
(
self
,
pos
:
int
)
->
'Node'
:
def
w
it
h
_pos
(
self
,
pos
:
int
)
->
'Node'
:
pass
...
...
DHParser/testing.py
View file @
cc02687e
...
...
@@ -401,7 +401,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
cst
=
parser
(
test_code
,
parser_name
,
track_history
=
has_lookahead
(
parser_name
))
except
UnknownParserError
as
upe
:
cst
=
RootNode
()
cst
=
cst
.
new_error
(
Node
(
ZOMBIE_TAG
,
""
).
in
it_pos
(
0
),
str
(
upe
))
cst
=
cst
.
new_error
(
Node
(
ZOMBIE_TAG
,
""
).
w
it
h
_pos
(
0
),
str
(
upe
))
clean_test_name
=
str
(
test_name
).
replace
(
'*'
,
''
)
# log_ST(cst, "match_%s_%s.cst" % (parser_name, clean_test_name))
tests
.
setdefault
(
'__cst__'
,
{})[
test_name
]
=
cst
...
...
@@ -450,7 +450,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
try
:
cst
=
parser
(
test_code
,
parser_name
,
track_history
=
has_lookahead
(
parser_name
))
except
UnknownParserError
as
upe
:
node
=
Node
(
ZOMBIE_TAG
,
""
).
in
it_pos
(
0
)
node
=
Node
(
ZOMBIE_TAG
,
""
).
w
it
h
_pos
(
0
)
cst
=
RootNode
(
node
).
new_error
(
node
,
str
(
upe
))
errata
.
append
(
'Unknown parser "{}" in fail test "{}"!'
.
format
(
parser_name
,
test_name
))
tests
.
setdefault
(
'__err__'
,
{})[
test_name
]
=
errata
[
-
1
]
...
...
examples/Tutorial/LyrikCompiler_example.py
View file @
cc02687e
...
...
@@ -146,9 +146,9 @@ Lyrik_AST_transformation_table = {
"jahr"
:
[
reduce_single_child
,
remove_whitespace
,
reduce_single_child
],
"wortfolge"
:
[
flatten
(
is_one_of
(
'WORT'
),
recursive
=
False
),
peek
,
rstrip
,
collapse
],
[
flatten
(
is_one_of
(
'WORT'
),
recursive
=
False
),
rstrip
,
collapse
],
"namenfolge"
:
[
flatten
(
is_one_of
(
'NAME'
),
recursive
=
False
),
peek
,
rstrip
,
collapse
],
[
flatten
(
is_one_of
(
'NAME'
),
recursive
=
False
),
rstrip
,
collapse
],
"verknüpfung"
:
[
flatten
,
remove_tokens
(
'<'
,
'>'
),
remove_whitespace
,
reduce_single_child
],
"ziel"
:
...
...
examples/Tutorial/Lyrik
.ebnf_alternative
→
examples/Tutorial/Lyrik
_explicit_whitespace.ebnf
View file @
cc02687e
@ whitespace = horizontal
@ drop = whitespace
@ drop = whitespace
, token
gedicht = bibliographisches { LEERZEILE }+ [serie] §titel text /\s*/ ENDE
...
...
examples/Tutorial/Lyrik_explicit_whitespaceCompiler.py
0 → 100755
View file @
cc02687e
#!/usr/bin/python
#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
import
collections
from
functools
import
partial
import
os
import
sys
sys
.
path
.
append
(
r
'/home/eckhart/Entwicklung/DHParser'
)
try
:
import
regex
as
re
except
ImportError
:
import
re
from
DHParser
import
logging
,
is_filename
,
load_if_file
,
\
Grammar
,
Compiler
,
nil_preprocessor
,
PreprocessorToken
,
Whitespace
,
DropWhitespace
,
\
Lookbehind
,
Lookahead
,
Alternative
,
Pop
,
Token
,
DropToken
,
Synonym
,
AllOf
,
SomeOf
,
\
Unordered
,
Option
,
NegativeLookbehind
,
OneOrMore
,
RegExp
,
Retrieve
,
Series
,
Capture
,
\
ZeroOrMore
,
Forward
,
NegativeLookahead
,
Required
,
mixin_comment
,
compile_source
,
\
grammar_changed
,
last_value
,
counterpart
,
accumulate
,
PreprocessorFunc
,
\
Node
,
TransformationFunc
,
TransformationDict
,
transformation_factory
,
traverse
,
\
remove_children_if
,
move_whitespace
,
normalize_whitespace
,
is_anonymous
,
matches_re
,
\
reduce_single_child
,
replace_by_single_child
,
replace_or_reduce
,
remove_whitespace
,
\
remove_expendables
,
remove_empty
,
remove_tokens
,
flatten
,
is_whitespace
,
is_empty
,
\
is_expendable
,
collapse
,
collapse_if
,
replace_content
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
\
remove_nodes
,
remove_content
,
remove_brackets
,
replace_parser
,
remove_anonymous_tokens
,
\
keep_children
,
is_one_of
,
not_one_of
,
has_content
,
apply_if
,
remove_first
,
remove_last
,
\
remove_anonymous_empty
,
keep_nodes
,
traverse_locally
,
strip
,
lstrip
,
rstrip
,
\
replace_content
,
replace_content_by
,
forbid
,
assert_content
,
remove_infix_operator
,
\
error_on
,
recompile_grammar
,
GLOBALS
#######################################################################
#
# PREPROCESSOR SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
def
Lyrik_explicit_whitespacePreprocessor
(
text
):
return
text
,
lambda
i
:
i
def
get_preprocessor
()
->
PreprocessorFunc
:
return
Lyrik_explicit_whitespacePreprocessor
#######################################################################
#
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
#
#######################################################################
class
Lyrik_explicit_whitespaceGrammar
(
Grammar
):
r
"""Parser for a Lyrik_explicit_whitespace source file.
"""
source_hash__
=
"824c3970f8997489b9a0faa53f2dff51"
parser_initialization__
=
[
"upon instantiation"
]
resume_rules__
=
{}
COMMENT__
=
r
''
WHITESPACE__
=
r
'[\t ]*'
WSP_RE__
=
mixin_comment
(
whitespace
=
WHITESPACE__
,
comment
=
COMMENT__
)
dwsp__
=
DropWhitespace
(
WSP_RE__
)
wsp__
=
Whitespace
(
WSP_RE__
)
L
=
Series
(
RegExp
(
'[
\\
t]+'
),
dwsp__
)
ENDE
=
NegativeLookahead
(
RegExp
(
'.'
))
JAHRESZAHL
=
RegExp
(
'
\\
d
\\
d
\\
d
\\
d'
)
LEERZEILE
=
Series
(
RegExp
(
'
\\
n[
\\
t]*(?=
\\
n)'
),
dwsp__
)
NZ
=
RegExp
(
'
\\
n'
)
ZEICHENFOLGE
=
RegExp
(
'[^
\\
n<>]+'
)
NAME
=
RegExp
(
'
\\
w+
\\
.?'
)
WORT
=
RegExp
(
'
\\
w+'
)
vers
=
OneOrMore
(
Series
(
ZEICHENFOLGE
,
Option
(
L
)))
strophe
=
OneOrMore
(
Series
(
NZ
,
vers
))
text
=
OneOrMore
(
Series
(
strophe
,
ZeroOrMore
(
LEERZEILE
)))
zeile
=
OneOrMore
(
Series
(
ZEICHENFOLGE
,
Option
(
L
)))
titel
=
OneOrMore
(
Series
(
NZ
,
Option
(
L
),
zeile
,
OneOrMore
(
LEERZEILE
)))
serie
=
Series
(
NegativeLookahead
(
Series
(
titel
,
vers
,
NZ
,
vers
)),
OneOrMore
(
Series
(
NZ
,
zeile
)),
OneOrMore
(
LEERZEILE
))
ziel
=
Series
(
ZEICHENFOLGE
,
dwsp__
)
verknüpfung
=
Series
(
Series
(
DropToken
(
"<"
),
dwsp__
),
ziel
,
Series
(
DropToken
(
">"
),
dwsp__
))
namenfolge
=
OneOrMore
(
Series
(
NAME
,
Option
(
L
)))
wortfolge
=
OneOrMore
(
Series
(
WORT
,
Option
(
L
)))
jahr
=
Series
(
JAHRESZAHL
,
dwsp__
)
ort
=
Series
(
wortfolge
,
Option
(
verknüpfung
))
untertitel
=
Series
(
wortfolge
,
Option
(
verknüpfung
))
werk
=
Series
(
wortfolge
,
Option
(
Series
(
Series
(
DropToken
(
"."
),
dwsp__
),
untertitel
,
mandatory
=
1
)),
Option
(
verknüpfung
))
autor
=
Series
(
namenfolge
,
Option
(
verknüpfung
))
bibliographisches
=
Series
(
autor
,
Series
(
DropToken
(
","
),
dwsp__
),
Option
(
Series
(
NZ
,
dwsp__
)),
werk
,
Series
(
DropToken
(
","
),
dwsp__
),
Option
(
Series
(
NZ
,
dwsp__
)),
ort
,
Series
(
DropToken
(
","
),
dwsp__
),
Option
(
Series
(
NZ
,
dwsp__
)),
jahr
,
Series
(
DropToken
(
"."
),
dwsp__
),
mandatory
=
1
)
gedicht
=
Series
(
bibliographisches
,
OneOrMore
(
LEERZEILE
),
Option
(
serie
),
titel
,
text
,
RegExp
(
'
\\
s*'
),
ENDE
,
mandatory
=
3
)
root__
=
gedicht
def
get_grammar
()
->
Lyrik_explicit_whitespaceGrammar
:
global
GLOBALS
try
:
grammar
=
GLOBALS
.
Lyrik_explicit_whitespace_00000002_grammar_singleton
except
AttributeError
:
GLOBALS
.
Lyrik_explicit_whitespace_00000002_grammar_singleton
=
Lyrik_explicit_whitespaceGrammar
()
if
hasattr
(
get_grammar
,
'python_src__'
):
GLOBALS
.
Lyrik_explicit_whitespace_00000002_grammar_singleton
.
python_src__
=
get_grammar
.
python_src__
grammar
=
GLOBALS
.
Lyrik_explicit_whitespace_00000002_grammar_singleton
return
grammar
#######################################################################
#
# AST SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
Lyrik_explicit_whitespace_AST_transformation_table
=
{
# AST Transformations for the Lyrik_explicit_whitespace-grammar
"<"
:
remove_empty
,
"gedicht"
:
[],
"bibliographisches"
:
[],
"autor"
:
[],
"werk"
:
[],
"untertitel"
:
[],
"ort"
:
[],
"jahr"
:
[],
"wortfolge"
:
[],
"namenfolge"
:
[],
"verknüpfung"
:
[],
"ziel"
:
[],
"serie"
:
[],
"titel"
:
[],
"zeile"
:
[],
"text"
:
[],
"strophe"
:
[],
"vers"
:
[],
"WORT"
:
[],
"NAME"
:
[],
"ZEICHENFOLGE"
:
[],
"NZ"
:
[],
"LEERZEILE"
:
[],
"JAHRESZAHL"
:
[],
"ENDE"
:
[],
"L"
:
[],
":Token"
:
reduce_single_child
,
"*"
:
replace_by_single_child
}
def
Lyrik_explicit_whitespaceTransform
()
->
TransformationDict
:
return
partial
(
traverse
,
processing_table
=
Lyrik_explicit_whitespace_AST_transformation_table
.
copy
())
def
get_transformer
()
->
TransformationFunc
:
try
:
transformer
=
GLOBALS
.
Lyrik_explicit_whitespace_00000002_transformer_singleton
except
AttributeError
:
GLOBALS
.
Lyrik_explicit_whitespace_00000002_transformer_singleton
=
Lyrik_explicit_whitespaceTransform
()
transformer
=
GLOBALS
.
Lyrik_explicit_whitespace_00000002_transformer_singleton
return
transformer
#######################################################################
#
# COMPILER SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
class
Lyrik_explicit_whitespaceCompiler
(
Compiler
):
"""Compiler for the abstract-syntax-tree of a Lyrik_explicit_whitespace source file.
"""
def
__init__
(
self
):
super
(
Lyrik_explicit_whitespaceCompiler
,
self
).
__init__
()
def
_reset
(
self
):
super
().
_reset
()
# initialize your variables here, not in the constructor!
def
on_gedicht
(
self
,
node
):
return
self
.
fallback_compiler
(
node
)
# def on_bibliographisches(self, node):
# return node
# def on_autor(self, node):
# return node
# def on_werk(self, node):
# return node
# def on_untertitel(self, node):
# return node
# def on_ort(self, node):
# return node
# def on_jahr(self, node):
# return node
# def on_wortfolge(self, node):
# return node
# def on_namenfolge(self, node):
# return node
# def on_verknüpfung(self, node):
# return node
# def on_ziel(self, node):
# return node
# def on_serie(self, node):
# return node
# def on_titel(self, node):
# return node
# def on_zeile(self, node):
# return node
# def on_text(self, node):
# return node
# def on_strophe(self, node):
# return node
# def on_vers(self, node):
# return node
# def on_WORT(self, node):
# return node
# def on_NAME(self, node):
# return node
# def on_ZEICHENFOLGE(self, node):
# return node
# def on_NZ(self, node):
# return node
# def on_LEERZEILE(self, node):
# return node
# def on_JAHRESZAHL(self, node):
# return node
# def on_ENDE(self, node):
# return node
# def on_L(self, node):
# return node
def
get_compiler
()
->
Lyrik_explicit_whitespaceCompiler
:
try
:
compiler
=
GLOBALS
.
Lyrik_explicit_whitespace_00000002_compiler_singleton
except
AttributeError
:
GLOBALS
.
Lyrik_explicit_whitespace_00000002_compiler_singleton
=
Lyrik_explicit_whitespaceCompiler
()
compiler
=
GLOBALS
.
Lyrik_explicit_whitespace_00000002_compiler_singleton
return
compiler
#######################################################################
#
# END OF DHPARSER-SECTIONS
#
#######################################################################
def
compile_src
(
source
,
log_dir
=
''
):
"""Compiles ``source`` and returns (result, errors, ast).
"""
with
logging
(
log_dir
):
compiler
=
get_compiler
()
cname
=
compiler
.
__class__
.
__name__
result_tuple
=
compile_source
(
source
,
get_preprocessor
(),
get_grammar
(),
get_transformer
(),
compiler
)
return
result_tuple
if
__name__
==
"__main__"
:
# recompile grammar if needed
grammar_path
=
os
.
path
.
abspath
(
__file__
).
replace
(
'Compiler.py'
,
'.ebnf'
)
if
os
.
path
.
exists
(
grammar_path
):
if
not
recompile_grammar
(
grammar_path
,
force
=
False
,
notify
=
lambda
:
print
(
'recompiling '
+
grammar_path
)):
error_file
=
os
.
path
.
basename
(
__file__
).
replace
(
'Compiler.py'
,
'_ebnf_ERRORS.txt'
)
with
open
(
error_file
,
encoding
=
"utf-8"
)
as
f
:
print
(
f
.
read
())
sys
.
exit
(
1
)
else
:
print
(
'Could not check whether grammar requires recompiling, '
'because grammar was not found at: '
+
grammar_path
)
if
len
(
sys
.
argv
)
>
1
:
# compile file
file_name
,
log_dir
=
sys
.
argv
[
1
],
''
if
file_name
in
[
'-d'
,
'--debug'
]
and
len
(
sys
.
argv
)
>
2
:
file_name
,
log_dir
=
sys
.
argv
[
2
],
'LOGS'
result
,
errors
,
ast
=
compile_src
(
file_name
,
log_dir
)
if
errors
:
cwd
=
os
.
getcwd
()
rel_path
=
file_name
[
len
(
cwd
):]
if
file_name
.
startswith
(
cwd
)
else
file_name
for
error
in
errors
:
print
(
rel_path
+
':'
+
str
(
error
))
sys
.
exit
(
1
)
else
:
print
(
result
.
as_xml
()
if
isinstance
(
result
,
Node
)
else
result
)
else
:
print
(
"Usage: Lyrik_explicit_whitespaceCompiler.py [FILENAME]"
)
test/test_parse.py
View file @
cc02687e
...
...
@@ -27,12 +27,13 @@ sys.path.extend(['../', './'])
from
DHParser.toolkit
import
compile_python_object
from
DHParser.log
import
logging
,
is_logging
,
log_ST
,
log_parsing_history
from
DHParser.error
import
Error
from
DHParser.parse
import
Retrieve
,
Parser
,
Grammar
,
Forward
,
TKN
,
ZeroOrMore
,
RE
,
\
from
DHParser.parse
import
Parser
,
Grammar
,
Forward
,
TKN
,
ZeroOrMore
,
RE
,
\
RegExp
,
Lookbehind
,
NegativeLookahead
,
OneOrMore
,
Series
,
Alternative
,
AllOf
,
SomeOf
,
\
UnknownParserError
UnknownParserError
,
MetaParser
,
EMPTY_NODE
from
DHParser
import
compile_source
from
DHParser.ebnf
import
get_ebnf_grammar
,
get_ebnf_transformer
,
get_ebnf_compiler
from
DHParser.dsl
import
grammar_provider
,
DHPARSER_IMPORTS
from
DHParser.syntaxtree
import
Node
class
TestParserClass
:
...
...
@@ -785,6 +786,51 @@ class TestEarlyTokenWhitespaceDrop:
assert
next
(
cst
.
select
(
lambda
node
:
node
.
content
==
'X'
))
class
TestMetaParser
:
def
test_meta_parser
(
self
):
mp
=
MetaParser
()
mp
.
pname
=
"named"
mp
.
tag_name
=
mp
.
pname
nd
=
mp
.
_return_value
(
Node
(
'tagged'
,
'non-empty'
))
assert
nd
.
tag_name
==
'named'
,
nd
.
as_sxpr
()
assert
len
(
nd
.
children
)
==
1
assert
nd
.
children
[
0
].
tag_name
==
'tagged'
assert
nd
.
children
[
0
].
result
==
"non-empty"
nd
=
mp
.
_return_value
(
Node
(
'tagged'
,
''
))
assert
nd
.
tag_name
==
'named'
,
nd
.
as_sxpr
()
assert
len
(
nd
.
children
)
==
1
assert
nd
.
children
[
0
].
tag_name
==
'tagged'
assert
not
nd
.
children
[
0
].
result
nd
=
mp
.
_return_value
(
Node
(
':anonymous'
,
'content'
))
assert
nd
.
tag_name
==
'named'
,
nd
.
as_sxpr
()
assert
not
nd
.
children
assert
nd
.
result
==
'content'
nd
=
mp
.
_return_value
(
Node
(
':anonymous'
,
''
))
assert
nd
.
tag_name
==
'named'
,
nd
.
as_sxpr
()
assert
not
nd
.
children
assert
not
nd
.
content
mp
.
pname
=
''
mp
.
tag_name
=
':unnamed'
nd
=
mp
.
_return_value
(
Node
(
'tagged'
,
'non-empty'
))
assert
nd
.
tag_name
==
'tagged'
,
nd
.
as_sxpr
()
assert
len
(
nd
.
children
)
==
0
assert
nd
.
content
==
'non-empty'
nd
=
mp
.
_return_value
(
Node
(
'tagged'
,
''
))
assert
nd
.
tag_name
==
'tagged'
,
nd
.
as_sxpr
()
assert
len
(
nd
.
children
)
==
0
assert
not
nd
.
content
nd
=
mp
.
_return_value
(
Node
(
':anonymous'
,
'content'
))
assert
nd
.
tag_name
==
':anonymous'
,
nd
.
as_sxpr
()
assert
not
nd
.
children
assert
nd
.
result
==
'content'
nd
=
mp
.
_return_value
(
Node
(
''
,
''
))
assert
nd
.
tag_name
==
''
,
nd
.
as_sxpr
()
assert
not
nd
.
children
assert
not
nd
.
content
assert
mp
.
_return_value
(
None
)
==
EMPTY_NODE