Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
60800f1c
Commit
60800f1c
authored
Jul 01, 2017
by
Eckhart Arnold
Browse files
- parsers.py: support for rolling back of discarded capture and pop operations
parent
2721ad5f
Changes
13
Expand all
Hide whitespace changes
Inline
Side-by-side
DHParser/dsl.py
View file @
60800f1c
...
...
@@ -24,7 +24,10 @@ try:
import
regex
as
re
except
ImportError
:
import
re
from
.typing
import
Any
,
cast
,
Tuple
,
Union
try
:
from
typing
import
Any
,
cast
,
Tuple
,
Union
except
ImportError
:
from
.typing34
import
Any
,
cast
,
Tuple
,
Union
from
DHParser.ebnf
import
EBNFTransformer
,
EBNFCompiler
,
grammar_changed
,
\
get_ebnf_scanner
,
get_ebnf_grammar
,
get_ebnf_transformer
,
get_ebnf_compiler
,
\
...
...
@@ -74,7 +77,7 @@ except ImportError:
from DHParser.toolkit import logging, is_filename, load_if_file
from DHParser.parsers import Grammar, Compiler, nil_scanner,
\\
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym,
\\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Se
quence
, RE, Capture,
\\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Se
ries
, RE, Capture,
\\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source,
\\
last_value, counterpart, accumulate, ScannerFunc
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters,
\\
...
...
DHParser/ebnf.py
View file @
60800f1c
...
...
@@ -23,11 +23,14 @@ try:
import
regex
as
re
except
ImportError
:
import
re
from
.typing
import
Callable
,
Dict
,
List
,
Set
,
Tuple
try
:
from
typing
import
Callable
,
Dict
,
List
,
Set
,
Tuple
except
ImportError
:
from
.typing34
import
Callable
,
Dict
,
List
,
Set
,
Tuple
from
DHParser.toolkit
import
load_if_file
,
escape_re
,
md5
,
sane_parser_name
from
DHParser.parsers
import
Grammar
,
mixin_comment
,
nil_scanner
,
Forward
,
RE
,
NegativeLookahead
,
\
Alternative
,
Se
quence
,
Optional
,
Required
,
OneOrMore
,
ZeroOrMore
,
Token
,
Compiler
,
\
Alternative
,
Se
ries
,
Optional
,
Required
,
OneOrMore
,
ZeroOrMore
,
Token
,
Compiler
,
\
ScannerFunc
from
DHParser.syntaxtree
import
Node
,
traverse
,
remove_enclosing_delimiters
,
reduce_single_child
,
\
replace_by_single_child
,
TOKEN_PTYPE
,
remove_expendables
,
remove_tokens
,
flatten
,
\
...
...
@@ -121,26 +124,26 @@ class EBNFGrammar(Grammar):
wspL__
=
''
wspR__
=
WSP__
EOF
=
NegativeLookahead
(
RE
(
'.'
,
wR
=
''
))
list_
=
Se
quence
(
RE
(
'
\\
w+'
),
ZeroOrMore
(
Se
quence
(
Token
(
","
),
RE
(
'
\\
w+'
))))
list_
=
Se
ries
(
RE
(
'
\\
w+'
),
ZeroOrMore
(
Se
ries
(
Token
(
","
),
RE
(
'
\\
w+'
))))
regexp
=
RE
(
'~?/(?:[^/]|(?<=
\\\\
)/)*/~?'
)
literal
=
Alternative
(
RE
(
'"(?:[^"]|
\\\\
")*?"'
),
RE
(
"'(?:[^']|
\\\\
')*?'"
))
symbol
=
RE
(
'(?!
\\
d)
\\
w+'
)
option
=
Se
quence
(
Token
(
"["
),
expression
,
Required
(
Token
(
"]"
)))
repetition
=
Se
quence
(
Token
(
"{"
),
expression
,
Required
(
Token
(
"}"
)))
oneormore
=
Se
quence
(
Token
(
"{"
),
expression
,
Token
(
"}+"
))
regexchain
=
Se
quence
(
Token
(
"<"
),
expression
,
Required
(
Token
(
">"
)))
group
=
Se
quence
(
Token
(
"("
),
expression
,
Required
(
Token
(
")"
)))
option
=
Se
ries
(
Token
(
"["
),
expression
,
Required
(
Token
(
"]"
)))
repetition
=
Se
ries
(
Token
(
"{"
),
expression
,
Required
(
Token
(
"}"
)))
oneormore
=
Se
ries
(
Token
(
"{"
),
expression
,
Token
(
"}+"
))
regexchain
=
Se
ries
(
Token
(
"<"
),
expression
,
Required
(
Token
(
">"
)))
group
=
Se
ries
(
Token
(
"("
),
expression
,
Required
(
Token
(
")"
)))
retrieveop
=
Alternative
(
Token
(
"::"
),
Token
(
":"
))
flowmarker
=
Alternative
(
Token
(
"!"
),
Token
(
"&"
),
Token
(
"§"
),
Token
(
"-!"
),
Token
(
"-&"
))
factor
=
Alternative
(
Se
quence
(
Optional
(
flowmarker
),
Optional
(
retrieveop
),
symbol
,
NegativeLookahead
(
Token
(
"="
))),
Se
quence
(
Optional
(
flowmarker
),
literal
),
Se
quence
(
Optional
(
flowmarker
),
regexp
),
Se
quence
(
Optional
(
flowmarker
),
group
),
Se
quence
(
Optional
(
flowmarker
),
regexchain
),
Se
quence
(
Optional
(
flowmarker
),
oneormore
),
repetition
,
option
)
factor
=
Alternative
(
Se
ries
(
Optional
(
flowmarker
),
Optional
(
retrieveop
),
symbol
,
NegativeLookahead
(
Token
(
"="
))),
Se
ries
(
Optional
(
flowmarker
),
literal
),
Se
ries
(
Optional
(
flowmarker
),
regexp
),
Se
ries
(
Optional
(
flowmarker
),
group
),
Se
ries
(
Optional
(
flowmarker
),
regexchain
),
Se
ries
(
Optional
(
flowmarker
),
oneormore
),
repetition
,
option
)
term
=
OneOrMore
(
factor
)
expression
.
set
(
Se
quence
(
term
,
ZeroOrMore
(
Se
quence
(
Token
(
"|"
),
term
))))
directive
=
Se
quence
(
Token
(
"@"
),
Required
(
symbol
),
Required
(
Token
(
"="
)),
Alternative
(
regexp
,
literal
,
list_
))
definition
=
Se
quence
(
symbol
,
Required
(
Token
(
"="
)),
expression
)
syntax
=
Se
quence
(
Optional
(
RE
(
''
,
wR
=
''
,
wL
=
WSP__
)),
ZeroOrMore
(
Alternative
(
definition
,
directive
)),
Required
(
EOF
))
expression
.
set
(
Se
ries
(
term
,
ZeroOrMore
(
Se
ries
(
Token
(
"|"
),
term
))))
directive
=
Se
ries
(
Token
(
"@"
),
Required
(
symbol
),
Required
(
Token
(
"="
)),
Alternative
(
regexp
,
literal
,
list_
))
definition
=
Se
ries
(
symbol
,
Required
(
Token
(
"="
)),
expression
)
syntax
=
Se
ries
(
Optional
(
RE
(
''
,
wR
=
''
,
wL
=
WSP__
)),
ZeroOrMore
(
Alternative
(
definition
,
directive
)),
Required
(
EOF
))
root__
=
syntax
...
...
@@ -601,7 +604,7 @@ class EBNFCompiler(Compiler):
return
self
.
non_terminal
(
node
,
'Alternative'
)
def
on_term
(
self
,
node
)
->
str
:
return
self
.
non_terminal
(
node
,
'Se
quence
'
)
return
self
.
non_terminal
(
node
,
'Se
ries
'
)
def
on_factor
(
self
,
node
:
Node
)
->
str
:
assert
node
.
children
...
...
DHParser/parsers.py
View file @
60800f1c
...
...
@@ -57,7 +57,10 @@ try:
import
regex
as
re
except
ImportError
:
import
re
from
.typing
import
Any
,
Callable
,
Dict
,
Iterator
,
List
,
Set
,
Tuple
,
Union
try
:
from
typing
import
Any
,
Callable
,
Collection
,
Dict
,
Iterator
,
List
,
Set
,
Tuple
,
Union
except
ImportError
:
from
.typing34
import
Any
,
Callable
,
Dict
,
Iterator
,
List
,
Set
,
Tuple
,
Union
from
DHParser.toolkit
import
is_logging
,
log_dir
,
logfile_basename
,
escape_re
,
sane_parser_name
from
DHParser.syntaxtree
import
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
ZOMBIE_PARSER
,
ParserBase
,
\
...
...
@@ -84,7 +87,7 @@ __all__ = ['ScannerFunc',
'Optional'
,
'ZeroOrMore'
,
'OneOrMore'
,
'Se
quence
'
,
'Se
ries
'
,
'Alternative'
,
'FlowOperator'
,
'Required'
,
...
...
@@ -163,9 +166,15 @@ def add_parser_guard(parser_func):
location
=
len
(
text
)
grammar
=
parser
.
grammar
# grammar may be 'None' for unconnected parsers!
if
grammar
.
history_tracking
:
grammar
.
call_stack
.
append
(
parser
)
grammar
.
moving_forward
=
True
if
not
grammar
.
moving_forward__
:
# rollback variable changes for discarded branch of parsing tree
while
grammar
.
rollback__
and
grammar
.
rollback__
[
-
1
][
0
]
<=
location
:
grammar
.
rollback__
[
-
1
][
1
]()
grammar
.
rollback__
.
pop
()
grammar
.
moving_forward__
=
True
if
grammar
.
history_tracking__
:
grammar
.
call_stack__
.
append
(
parser
)
# if location has already been visited by the current parser,
# return saved result
...
...
@@ -184,7 +193,7 @@ def add_parser_guard(parser_func):
# in case of a recursive call saves the result of the first
# (or left-most) call that matches
parser
.
visited
[
location
]
=
(
node
,
rest
)
grammar
.
last_node
=
node
# store last node for Lookbehind operator
grammar
.
last_node
__
=
node
# store last node for Lookbehind operator
elif
location
in
parser
.
visited
:
# if parser did non match but a saved result exits, assume
# left recursion and use the saved result
...
...
@@ -192,14 +201,14 @@ def add_parser_guard(parser_func):
parser
.
recursion_counter
[
location
]
-=
1
if
grammar
.
history_tracking
:
if
grammar
.
history_tracking
__
:
# don't track returning parsers except in case an error has occurred
if
grammar
.
moving_forward
or
(
node
and
node
.
_errors
):
grammar
.
moving_forward
=
False
record
=
HistoryRecord
(
grammar
.
call_stack
.
copy
(),
node
,
len
(
rest
))
grammar
.
history
.
append
(
record
)
if
grammar
.
moving_forward__
or
(
node
and
node
.
_errors
):
record
=
HistoryRecord
(
grammar
.
call_stack__
.
copy
(),
node
,
len
(
rest
))
grammar
.
history__
.
append
(
record
)
# print(record.stack, record.status, rest[:20].replace('\n', '|'))
grammar
.
call_stack
.
pop
()
grammar
.
call_stack__
.
pop
()
grammar
.
moving_forward__
=
False
except
RecursionError
:
node
=
Node
(
None
,
text
[:
min
(
10
,
max
(
1
,
text
.
find
(
"
\n
"
)))]
+
" ..."
)
...
...
@@ -248,7 +257,7 @@ class Parser(ParserBase, metaclass=ParserMetaClass):
return
self
.
name
or
self
.
ptype
def
__add__
(
self
,
other
):
return
Se
quence
(
self
,
other
)
return
Se
ries
(
self
,
other
)
def
__or__
(
self
,
other
):
return
Alternative
(
self
,
other
)
...
...
@@ -324,9 +333,9 @@ class Grammar:
self
.
wspL__
=
''
if
not
hasattr
(
self
.
__class__
,
'wspR__'
):
self
.
wspR__
=
''
self
.
all_parsers
=
set
()
# type: Set[Parser]
self
.
dirty_flag
=
False
self
.
history_tracking
=
False
self
.
all_parsers
__
=
set
()
# type: Set[Parser]
self
.
dirty_flag
__
=
False
self
.
history_tracking
__
=
False
self
.
_reset
()
# prepare parsers in the class, first
self
.
_assign_parser_names
()
...
...
@@ -338,13 +347,13 @@ class Grammar:
if
self
.
wspL__
:
self
.
wsp_left_parser__
=
Whitespace
(
self
.
wspL__
)
# type: ParserBase
self
.
wsp_left_parser__
.
grammar
=
self
self
.
all_parsers
.
add
(
self
.
wsp_left_parser__
)
# don't you forget about me...
self
.
all_parsers
__
.
add
(
self
.
wsp_left_parser__
)
# don't you forget about me...
else
:
self
.
wsp_left_parser__
=
ZOMBIE_PARSER
if
self
.
wspR__
:
self
.
wsp_right_parser__
=
Whitespace
(
self
.
wspR__
)
# type: ParserBase
self
.
wsp_right_parser__
.
grammar
=
self
self
.
all_parsers
.
add
(
self
.
wsp_right_parser__
)
# don't you forget about me...
self
.
all_parsers
__
.
add
(
self
.
wsp_right_parser__
)
# don't you forget about me...
else
:
self
.
wsp_right_parser__
=
ZOMBIE_PARSER
self
.
root__
.
apply
(
self
.
_add_parser
)
...
...
@@ -353,35 +362,39 @@ class Grammar:
try
:
return
self
.
__dict__
[
key
]
except
KeyError
:
parser
=
getattr
(
self
,
key
,
None
)
if
parser
:
parser
_template
=
getattr
(
self
,
key
,
None
)
if
parser
_template
:
# add parser to grammar object on the fly...
setattr
(
self
,
key
,
copy
.
deepcopy
(
parser
))
self
[
key
].
apply
(
self
.
_add_parser
)
parser
=
copy
.
deepcopy
(
parser_template
)
parser
.
apply
(
self
.
_add_parser
)
# assert self[key] == parser
return
self
[
key
]
raise
KeyError
(
'Unknown parser "%s" !'
%
key
)
def
_reset
(
self
):
self
.
document__
=
""
# type: str
# variables stored and recalled by Capture and Retrieve parsers
self
.
variables
=
dict
()
# type: Dict[str, List[str]]
self
.
document
=
""
# type: str
self
.
variables
__
=
dict
()
# type: Dict[str, List[str]]
self
.
rollback__
=
[]
# type: List[Tuple[int, Callable]]
# previously parsed node, needed by Lookbehind parser
self
.
last_node
=
None
# type: Node
self
.
last_node
__
=
None
# type: Node
# support for call stack tracing
self
.
call_stack
=
[]
# type: List[Parser]
self
.
call_stack
__
=
[]
# type: List[Parser]
# snapshots of call stacks
self
.
history
=
[]
# type: List[HistoryRecord]
self
.
history
__
=
[]
# type: List[HistoryRecord]
# also needed for call stack tracing
self
.
moving_forward
=
True
self
.
moving_forward
__
=
True
# type: bool
# TODO: Either make sure not to miss out unconnected parsers or raise an error! Actually, the EBNF-Compiler should keep track of this!
def
_add_parser
(
self
,
parser
:
Parser
)
->
None
:
"""Adds the particular copy of the parser object to this
particular instance of Grammar.
"""
if
parser
.
name
:
assert
parser
.
name
not
in
self
.
__dict__
,
\
(
'Cannot add parser "%s" because a field with the same name '
'already exists in grammar object!'
%
parser
.
name
)
setattr
(
self
,
parser
.
name
,
parser
)
self
.
all_parsers
.
add
(
parser
)
self
.
all_parsers
__
.
add
(
parser
)
parser
.
grammar
=
self
def
__call__
(
self
,
document
:
str
,
start_parser
=
"root__"
)
->
Node
:
...
...
@@ -398,14 +411,14 @@ class Grammar:
# assert isinstance(document, str), type(document)
if
self
.
root__
is
None
:
raise
NotImplementedError
()
if
self
.
dirty_flag
:
if
self
.
dirty_flag
__
:
self
.
_reset
()
for
parser
in
self
.
all_parsers
:
for
parser
in
self
.
all_parsers
__
:
parser
.
reset
()
else
:
self
.
dirty_flag
=
True
self
.
history_tracking
=
is_logging
()
self
.
document
=
document
self
.
dirty_flag
__
=
True
self
.
history_tracking
__
=
is_logging
()
self
.
document
__
=
document
parser
=
self
[
start_parser
]
if
isinstance
(
start_parser
,
str
)
else
start_parser
assert
parser
.
grammar
==
self
,
"Cannot run parsers from a different grammar object!"
\
" %s vs. %s"
%
(
str
(
self
),
str
(
parser
.
grammar
))
...
...
@@ -425,29 +438,29 @@ class Grammar:
error_msg
=
"Parser stopped before end"
+
\
((
"! trying to recover"
+
(
" but stopping history recording at this point."
if
self
.
history_tracking
else
"..."
))
if
self
.
history_tracking
__
else
"..."
))
if
len
(
stitches
)
<
MAX_DROPOUTS
else
" too often! Terminating parser."
)
stitches
.
append
(
Node
(
None
,
skip
))
stitches
[
-
1
].
add_error
(
error_msg
)
if
self
.
history_tracking
:
if
self
.
history_tracking
__
:
# some parsers may have matched and left history records with nodes != None.
# Because these are not connected to the stiched root node, their pos
# properties will not be initialized by setting the root node's pos property
# to zero. Therefore, their pos properties need to be initialized here
for
record
in
self
.
history
:
for
record
in
self
.
history
__
:
if
record
.
node
and
record
.
node
.
_pos
<
0
:
record
.
node
.
pos
=
0
record
=
HistoryRecord
(
self
.
call_stack
.
copy
(),
stitches
[
-
1
],
len
(
rest
))
self
.
history
.
append
(
record
)
self
.
history_tracking
=
False
record
=
HistoryRecord
(
self
.
call_stack
__
.
copy
(),
stitches
[
-
1
],
len
(
rest
))
self
.
history
__
.
append
(
record
)
self
.
history_tracking
__
=
False
if
stitches
:
if
rest
:
stitches
.
append
(
Node
(
None
,
rest
))
result
=
Node
(
None
,
tuple
(
stitches
))
if
any
(
self
.
variables
.
values
()):
if
any
(
self
.
variables
__
.
values
()):
result
.
add_error
(
"Capture-retrieve-stack not empty after end of parsing: "
+
str
(
self
.
variables
))
+
str
(
self
.
variables
__
))
result
.
pos
=
0
# calculate all positions
return
result
...
...
@@ -456,7 +469,7 @@ class Grammar:
document.
"""
def
prepare_line
(
record
):
excerpt
=
self
.
document
.
__getitem__
(
slice
(
*
record
.
extent
))[:
25
].
replace
(
'
\n
'
,
'
\\
n'
)
excerpt
=
self
.
document
__
.
__getitem__
(
slice
(
*
record
.
extent
))[:
25
].
replace
(
'
\n
'
,
'
\\
n'
)
excerpt
=
"'%s'"
%
excerpt
if
len
(
excerpt
)
<
25
else
"'%s...'"
%
excerpt
return
record
.
stack
,
record
.
status
,
excerpt
...
...
@@ -472,7 +485,7 @@ class Grammar:
name
=
self
.
__class__
.
__name__
log_file_name
=
name
[:
-
7
]
if
name
.
lower
().
endswith
(
'grammar'
)
else
name
full_history
,
match_history
,
errors_only
=
[],
[],
[]
for
record
in
self
.
history
:
for
record
in
self
.
history
__
:
line
=
"; "
.
join
(
prepare_line
(
record
))
full_history
.
append
(
line
)
if
record
.
node
and
record
.
node
.
parser
.
ptype
!=
WHITESPACE_PTYPE
:
...
...
@@ -498,8 +511,8 @@ def dsl_error_msg(parser: Parser, error_str: str) -> str:
tacking has been turned in the grammar object.
"""
msg
=
[
"DSL parser specification error:"
,
error_str
,
'Caught by parser "%s".'
%
str
(
parser
)]
if
parser
.
grammar
.
history
:
msg
.
extend
([
"
\n
Call stack:"
,
parser
.
grammar
.
history
[
-
1
].
stack
])
if
parser
.
grammar
.
history
__
:
msg
.
extend
([
"
\n
Call stack:"
,
parser
.
grammar
.
history
__
[
-
1
].
stack
])
else
:
msg
.
extend
([
"
\n
Enable history tracking in Grammar object to display call stack."
])
return
" "
.
join
(
msg
)
...
...
@@ -743,7 +756,7 @@ class NaryOperator(Parser):
def
__init__
(
self
,
*
parsers
:
Parser
,
name
:
str
=
''
)
->
None
:
super
(
NaryOperator
,
self
).
__init__
(
name
)
# assert all([isinstance(parser, Parser) for parser in parsers]), str(parsers)
self
.
parsers
=
parsers
# type: Co
ntainer ## [Parser]
self
.
parsers
=
parsers
# type: Co
llection
def
__deepcopy__
(
self
,
memo
):
parsers
=
copy
.
deepcopy
(
self
.
parsers
,
memo
)
...
...
@@ -830,9 +843,9 @@ class OneOrMore(UnaryOperator):
return
Node
(
self
,
results
),
text_
class
Se
quence
(
NaryOperator
):
class
Se
ries
(
NaryOperator
):
def
__init__
(
self
,
*
parsers
:
Parser
,
name
:
str
=
''
)
->
None
:
super
(
Se
quence
,
self
).
__init__
(
*
parsers
,
name
=
name
)
super
(
Se
ries
,
self
).
__init__
(
*
parsers
,
name
=
name
)
assert
len
(
self
.
parsers
)
>=
1
def
__call__
(
self
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
...
...
@@ -848,14 +861,14 @@ class Sequence(NaryOperator):
assert
len
(
results
)
<=
len
(
self
.
parsers
)
return
Node
(
self
,
results
),
text_
def
__add__
(
self
,
other
:
'Se
quence
'
)
->
'Se
quence
'
:
return
Se
quence
(
*
(
self
.
parsers
+
(
other
,)))
def
__add__
(
self
,
other
:
'Se
ries
'
)
->
'Se
ries
'
:
return
Se
ries
(
*
(
tuple
(
self
.
parsers
)
+
(
other
,)))
def
__radd__
(
self
,
other
:
'Se
quence
'
)
->
'Se
quence
'
:
return
Se
quence
(
other
,
*
self
.
parsers
)
def
__radd__
(
self
,
other
:
'Se
ries
'
)
->
'Se
ries
'
:
return
Se
ries
(
other
,
*
self
.
parsers
)
# def __iadd__(self, other):
# if isinstance(other, Se
quence
):
# if isinstance(other, Se
ries
):
# self.parsers = self.parsers + other.parsers
# else:
# self.parsers = self.parsers + (other,)
...
...
@@ -900,7 +913,7 @@ class Alternative(NaryOperator):
return
Alternative
(
other
,
*
self
.
parsers
)
# def __ior__(self, other):
# if isinstance(other, Se
quence
):
# if isinstance(other, Se
ries
):
# self.parsers = self.parsers + other.parsers
# else:
# self.parsers = self.parsers + (other,)
...
...
@@ -972,7 +985,7 @@ class Lookbehind(FlowOperator):
print
(
"WARNING: Lookbehind Operator is experimental!"
)
def
__call__
(
self
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
if
isinstance
(
self
.
grammar
.
last_node
,
Lookahead
):
if
isinstance
(
self
.
grammar
.
last_node
__
,
Lookahead
):
return
Node
(
self
,
''
).
add_error
(
'Lookbehind right after Lookahead '
'does not make sense!'
),
text
if
self
.
sign
(
self
.
condition
()):
...
...
@@ -985,7 +998,7 @@ class Lookbehind(FlowOperator):
def
condition
(
self
):
node
=
None
for
node
in
iter_right_branch
(
self
.
grammar
.
last_node
):
for
node
in
iter_right_branch
(
self
.
grammar
.
last_node
__
):
if
node
.
parser
.
name
==
self
.
parser
.
name
:
return
True
if
node
and
isinstance
(
self
.
parser
,
RegExp
)
and
\
...
...
@@ -1013,11 +1026,12 @@ class Capture(UnaryOperator):
super
(
Capture
,
self
).
__init__
(
parser
,
name
)
def
__call__
(
self
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
node
,
text
=
self
.
parser
(
text
)
node
,
text
_
=
self
.
parser
(
text
)
if
node
:
stack
=
self
.
grammar
.
variables
.
setdefault
(
self
.
name
,
[])
stack
=
self
.
grammar
.
variables
__
.
setdefault
(
self
.
name
,
[])
stack
.
append
(
str
(
node
))
return
Node
(
self
,
node
),
text
self
.
grammar
.
rollback__
.
append
((
len
(
text
),
lambda
:
stack
.
pop
()))
return
Node
(
self
,
node
),
text_
else
:
return
None
,
text
...
...
@@ -1053,7 +1067,7 @@ class Retrieve(Parser):
def
call
(
self
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
try
:
stack
=
self
.
grammar
.
variables
[
self
.
symbol
.
name
]
stack
=
self
.
grammar
.
variables
__
[
self
.
symbol
.
name
]
value
=
self
.
filter
(
stack
)
except
(
KeyError
,
IndexError
):
return
Node
(
self
,
''
).
add_error
(
dsl_error_msg
(
self
,
...
...
@@ -1070,8 +1084,9 @@ class Pop(Retrieve):
def
__call__
(
self
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
nd
,
txt
=
super
(
Pop
,
self
).
call
(
text
)
# call() instead of __call__() to avoid parser guard
if
nd
and
not
nd
.
error_flag
:
stack
=
self
.
grammar
.
variables
[
self
.
symbol
.
name
]
stack
.
pop
()
stack
=
self
.
grammar
.
variables__
[
self
.
symbol
.
name
]
value
=
stack
.
pop
()
self
.
grammar
.
rollback__
.
append
((
len
(
text
),
lambda
:
stack
.
append
(
value
)))
return
nd
,
txt
...
...
DHParser/syntaxtree.py
View file @
60800f1c
...
...
@@ -27,8 +27,12 @@ try:
import
regex
as
re
except
ImportError
:
import
re
from
.typing
import
AbstractSet
,
Any
,
ByteString
,
Callable
,
cast
,
Container
,
Iterator
,
List
,
\
NamedTuple
,
Sequence
,
Union
,
Text
,
Tuple
try
:
from
typing
import
AbstractSet
,
Any
,
ByteString
,
Callable
,
cast
,
Container
,
Dict
,
\
Iterator
,
List
,
NamedTuple
,
Sequence
,
Union
,
Text
,
Tuple
except
ImportError
:
from
.typing34
import
AbstractSet
,
Any
,
ByteString
,
Callable
,
cast
,
Container
,
Dict
,
\
Iterator
,
List
,
NamedTuple
,
Sequence
,
Union
,
Text
,
Tuple
from
DHParser.toolkit
import
log_dir
,
expand_table
,
line_col
,
smart_list
...
...
@@ -273,9 +277,9 @@ class Node:
def
show
(
self
)
->
str
:
"""Returns content as string, inserting error messages where
errors ocurred.
errors oc
c
urred.
"""
s
=
""
.
join
(
child
.
show
_errors
()
for
child
in
self
.
children
)
if
self
.
children
\
s
=
""
.
join
(
child
.
show
()
for
child
in
self
.
children
)
if
self
.
children
\
else
str
(
self
.
result
)
return
(
' <<< Error on "%s" | %s >>> '
%
(
s
,
'; '
.
join
(
self
.
_errors
)))
if
self
.
_errors
else
s
...
...
@@ -389,7 +393,7 @@ class Node:
"""
for
child
in
self
.
children
:
child
.
propagate_error_flags
()
self
.
error_flag
|
=
child
.
error_flag
self
.
error_flag
=
self
.
error_flag
or
child
.
error_flag
def
collect_errors
(
self
,
clear_errors
=
False
)
->
List
[
Error
]:
"""
...
...
@@ -605,7 +609,7 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None:
# with a single value
table
=
{
name
:
smart_list
(
call
)
for
name
,
call
in
list
(
processing_table
.
items
())}
table
=
expand_table
(
table
)
cache
=
{}
cache
=
{}
# type: Dict[str, List[Callable]]
def
traverse_recursive
(
node
):
if
node
.
children
:
...
...
DHParser/toolkit.py
View file @
60800f1c
...
...
@@ -38,7 +38,10 @@ try:
import
regex
as
re
except
ImportError
:
import
re
from
.typing
import
List
,
Tuple
try
:
from
typing
import
List
,
Tuple
except
ImportError
:
from
.typing34
import
List
,
Tuple
__all__
=
[
'logging'
,
...
...
DHParser/typing.LICENSE
→
DHParser/typing
34
.LICENSE
View file @
60800f1c
File moved
DHParser/typing.py
→
DHParser/typing
34
.py
View file @
60800f1c
File moved
OLDSTUFF/test_ParserCombinators.py
View file @
60800f1c
...
...
@@ -50,8 +50,8 @@ class ArithmeticGrammar(ParserRoot):
constant.set(Sequence("constant", digit, ZeroOrMore(None, digit)))
variable.set(Alternative("variable", Token("x", wspcR=wspc__), Token("y", wspcR=wspc__), Token("z", wspcR=wspc__)))
factor = Alternative("factor", constant, variable, Sequence(None, Token("(", wspcR=wspc__), expression, Token(")", wspcR=wspc__)))
term = Sequence("term", factor, ZeroOrMore(None, Se
quence
(None, Alternative(None, Token("*", wspcR=wspc__), Token("/", wspcR=wspc__)), factor)))
expression.set(Sequence("expression", term, ZeroOrMore(None, Se
quence
(None, Alternative(None, Token("+", wspcR=wspc__), Token("-", wspcR=wspc__)), term))))
term = Sequence("term", factor, ZeroOrMore(None, Se
ries
(None, Alternative(None, Token("*", wspcR=wspc__), Token("/", wspcR=wspc__)), factor)))
expression.set(Sequence("expression", term, ZeroOrMore(None, Se
ries
(None, Alternative(None, Token("+", wspcR=wspc__), Token("-", wspcR=wspc__)), term))))
root__ = expression
"""
...
...
@@ -96,7 +96,7 @@ class EBNFGrammar(ParserRoot):
option = Sequence("option", Token("[", wspcR=wspc__), expression, Token("]", wspcR=wspc__))
factor = Alternative("factor", symbol, literal, regexp, option, repetition, group)
term = Sequence("term", factor, ZeroOrMore(None, factor))
expression.set(Sequence("expression", term, ZeroOrMore(None, Se
quence
(None, Token("|", wspcR=wspc__), term))))
expression.set(Sequence("expression", term, ZeroOrMore(None, Se
ries
(None, Token("|", wspcR=wspc__), term))))
production = Sequence("production", symbol, Token("=", wspcR=wspc__), expression, Token(".", wspcR=wspc__))
syntax = ZeroOrMore("syntax", production)
root__ = syntax
...
...
bin/
dhparser.py
→
dhparser.py
View file @
60800f1c
File moved
examples/MLW/OLDSTUFF/MLW_compiler.py
View file @
60800f1c
...
...
@@ -15,7 +15,7 @@ try:
except
ImportError
:
import
re
from
DHParser.parsers
import
Grammar
,
Compiler
,
Alternative
,
Required
,
Token
,
\
Optional
,
OneOrMore
,
Se
quence
,
RE
,
ZeroOrMore
,
NegativeLookahead
,
mixin_comment
,
compile_source
Optional
,
OneOrMore
,
Se
ries
,
RE
,
ZeroOrMore
,
NegativeLookahead
,
mixin_comment
,
compile_source
from
DHParser.syntaxtree
import
traverse
,
reduce_single_child
,
replace_by_single_child
,
no_transformation
,
\
remove_expendables
,
remove_tokens
,
flatten
,
\
WHITESPACE_KEYWORD
,
TOKEN_KEYWORD
...
...
@@ -157,39 +157,39 @@ class MLWGrammar(Grammar):
WORT_GROSS
=
RE
(
'[A-ZÄÖÜ][a-zäöüß]+'
,
wL
=
''
)
WORT
=
RE
(
'[A-ZÄÖÜ]?[a-zäöüß]+'
,
wL
=
''
)
NAMENS_ABKÜRZUNG
=
RE
(
'[A-ZÄÖÜÁÀ]
\\
.'
,
wR
=
''
,
wL
=
''
)
Name
=
Se
quence
(
WORT
,
ZeroOrMore
(
Alternative
(
WORT
,
NAMENS_ABKÜRZUNG
)))
Autorinfo
=
Se
quence
(
Alternative
(
Token
(
"AUTORIN"
),
Token
(
"AUTOR"
)),
Name
)
Zusatz
=
Se
quence
(
Token
(
"ZUSATZ"
),
RE
(
'
\\
s*.*'
,
wR
=
''
,
wL
=
''
),
TRENNER
)
EinBeleg
=
Se
quence
(
OneOrMore
(
Se
quence
(
NegativeLookahead
(
Se
quence
(
Optional
(
LEER
),
Alternative
(
Token
(
"*"
),
Token
(
"BEDEUTUNG"
),
Token
(
"AUTOR"
),
Token
(
"NAME"
),
Token
(
"ZUSATZ"
)))),
RE
(
'
\\
s*.*
\\
s*'
,
wR
=
''
,
wL
=
''
))),
Optional
(
Zusatz
))
Belege
=
Se
quence
(
Token
(
"BELEGE"
),
Optional
(
LEER
),
ZeroOrMore
(
Se
quence
(
Token
(
"*"
),
EinBeleg
)))
DeutscheBedeutung
=
Se
quence
(
Token
(
"DEU"
),
RE
(
'(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+'
,
wL
=
''
))
LateinischeBedeutung
=
Se
quence
(
Token
(
"LAT"
),
RE
(
'(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+'
,
wL
=
''
))
Interpretamente
=
Se
quence
(
LateinischeBedeutung
,
Optional
(
LEER
),
Required
(
DeutscheBedeutung
),
Optional
(
LEER
))
Bedeutungskategorie
=
Se
quence
(
RE
(
'(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+'
,
wL
=
''
),
Optional
(
LEER
))
Bedeutung
=
Se
quence
(
Alternative
(
Interpretamente
,
Bedeutungskategorie
),
Optional
(
Belege
))
BedeutungsPosition
=
OneOrMore
(
Se
quence
(
Token
(
"BEDEUTUNG"
),
Optional
(
LEER
),
Required
(
Bedeutung
)))
Name
=
Se
ries
(
WORT
,
ZeroOrMore
(
Alternative
(
WORT
,
NAMENS_ABKÜRZUNG
)))
Autorinfo
=
Se
ries
(
Alternative
(
Token
(
"AUTORIN"
),
Token
(
"AUTOR"
)),
Name
)
Zusatz
=
Se
ries
(
Token
(
"ZUSATZ"
),
RE
(
'
\\
s*.*'
,
wR
=
''
,
wL
=
''
),
TRENNER
)
EinBeleg
=
Se
ries
(
OneOrMore
(
Se
ries
(
NegativeLookahead
(
Se
ries
(
Optional
(
LEER
),
Alternative
(
Token
(
"*"
),
Token
(
"BEDEUTUNG"
),
Token
(
"AUTOR"
),
Token
(
"NAME"
),
Token
(
"ZUSATZ"
)))),
RE
(
'
\\
s*.*
\\
s*'
,
wR
=
''
,
wL
=
''
))),
Optional
(
Zusatz
))
Belege
=
Se
ries
(
Token
(
"BELEGE"
),
Optional
(
LEER
),
ZeroOrMore
(
Se
ries
(
Token
(
"*"
),
EinBeleg
)))
DeutscheBedeutung
=
Se
ries
(
Token
(
"DEU"
),
RE
(
'(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+'
,
wL
=
''
))
LateinischeBedeutung
=
Se
ries
(
Token
(
"LAT"
),
RE
(
'(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+'
,
wL
=
''
))
Interpretamente
=
Se
ries
(
LateinischeBedeutung
,
Optional
(
LEER
),
Required
(
DeutscheBedeutung
),
Optional
(
LEER
))
Bedeutungskategorie
=
Se
ries
(
RE
(
'(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+'
,
wL
=
''
),
Optional
(
LEER
))
Bedeutung
=
Se
ries
(
Alternative
(
Interpretamente
,
Bedeutungskategorie
),
Optional
(
Belege
))
BedeutungsPosition
=
OneOrMore
(
Se
ries
(
Token
(
"BEDEUTUNG"
),
Optional
(
LEER
),
Required
(
Bedeutung
)))
VerweisZiel
=
RE
(
'<
\\
w+>'
)
Verweis
=
RE
(
'
\\
w+'
)
Beleg
=
Verweis
Schreibweise
=
Alternative
(
Token
(
"vizreg-"
),
Token
(
"festregel(a)"
),
Token
(
"fezdregl(a)"
),
Token
(
"fat-"
))
SWVariante
=
Se
quence
(
Schreibweise
,
Token
(
":"
),
Beleg
)
SWVariante
=
Se
ries
(
Schreibweise
,
Token
(
":"
),
Beleg
)
SWTyp
=
Alternative
(
Token
(
"script."
),
Token
(
"script. fat-"
))
SchreibweisenPosition
=
Se
quence
(
Token
(
"SCHREIBWEISE"
),
Optional
(
LEER
),
Required
(
SWTyp
),
Token
(
":"
),
Optional
(
LEER
),
Required
(
SWVariante
),
ZeroOrMore
(
Se
quence
(
TRENNER
,
SWVariante
)),
Optional
(
LEER
))
SchreibweisenPosition
=
Se
ries
(
Token
(
"SCHREIBWEISE"
),
Optional
(
LEER
),
Required
(
SWTyp
),
Token
(
":"
),
Optional
(
LEER
),
Required
(
SWVariante
),
ZeroOrMore
(
Se
ries
(
TRENNER
,
SWVariante
)),
Optional
(
LEER
))
ArtikelKopf
=
SchreibweisenPosition
_genus
=
Alternative
(
Token
(
"maskulinum"
),
Token
(
"m."
),
Token
(
"femininum"
),
Token
(
"f."
),
Token
(
"neutrum"
),
Token
(
"n."
))
Flexion
=
RE
(
'-?[a-z]+'
,
wL
=
''
)
Flexionen
=
Se
quence
(
Flexion
,
ZeroOrMore
(
Se
quence
(
Token
(
","
),
Required
(
Flexion
))))
GVariante
=
Se
quence
(
Flexionen
,
Optional
(
_genus
),
Token
(
":"
),
Beleg
)
GrammatikVarianten
=
Se
quence
(
TRENNER
,
GVariante
)
Flexionen
=
Se
ries
(
Flexion
,
ZeroOrMore
(
Se
ries
(
Token
(
","
),
Required
(
Flexion
))))
GVariante
=
Se
ries
(
Flexionen
,
Optional
(
_genus
),
Token
(
":"
),
Beleg
)
GrammatikVarianten
=
Se
ries
(
TRENNER
,
GVariante
)
_wortart
=
Alternative
(
Token
(
"nomen"
),
Token
(
"n."
),
Token
(
"verb"
),
Token
(
"v."
),
Token
(
"adverb"
),
Token
(
"adv."
),
Token
(
"adjektiv"
),
Token
(
"adj."
))
GrammatikPosition
=
Se
quence
(
Token
(
"GRAMMATIK"
),
Optional
(
LEER
),
Required
(
_wortart
),
Required
(
TRENNER
),
Required
(
Flexionen
),
Optional
(
_genus
),
ZeroOrMore
(
GrammatikVarianten
),
Optional
(
TRENNER
))
LVZusatz
=
Se
quence
(
Token
(
"ZUSATZ"
),
Token
(
"sim."
))
GrammatikPosition
=
Se
ries
(
Token
(
"GRAMMATIK"
),
Optional
(
LEER
),
Required
(
_wortart
),
Required
(
TRENNER
),
Required
(
Flexionen
),
Optional
(
_genus
),
ZeroOrMore
(
GrammatikVarianten
),
Optional
(
TRENNER
))
LVZusatz
=
Se
ries
(
Token
(
"ZUSATZ"
),
Token
(
"sim."
))
LVariante
=
RE
(
'(?:[a-z]|-)+'
)
LemmaVarianten
=
Se
quence
(
Token
(
"VARIANTEN"
),
Optional
(
LEER
),
Required
(
LVariante
),
ZeroOrMore
(
Se
quence
(
TRENNER
,
LVariante
)),
Optional
(
Se
quence
(
TRENNER
,
LVZusatz
)),
Optional
(
TRENNER
))
LemmaVarianten
=
Se
ries
(
Token
(
"VARIANTEN"
),
Optional
(
LEER
),
Required
(
LVariante
),
ZeroOrMore
(
Se
ries
(
TRENNER
,
LVariante
)),
Optional
(
Se
ries
(
TRENNER
,
LVZusatz
)),
Optional
(
TRENNER
))
_tll
=
Token
(
"*"
)
Lemma
=
Se
quence
(
Optional
(
_tll
),
WORT_KLEIN
,
Optional
(
LEER
))
LemmaPosition
=
Se
quence
(
Token
(
"LEMMA"
),
Required
(
Lemma
),
Optional
(
LemmaVarianten
),
Required
(
GrammatikPosition
))
Artikel
=
Se
quence
(
Optional
(
LEER
),
Required
(
LemmaPosition
),
Optional
(
ArtikelKopf
),
Required
(
BedeutungsPosition
),
Required
(
Autorinfo
),
Optional
(
LEER
),
DATEI_ENDE
)
Lemma
=
Se
ries
(
Optional
(
_tll
),
WORT_KLEIN
,
Optional
(
LEER
))
LemmaPosition
=
Se
ries
(
Token
(
"LEMMA"
),
Required
(
Lemma
),
Optional
(
LemmaVarianten
),
Required
(
GrammatikPosition
))
Artikel
=
Se
ries
(
Optional
(
LEER
),
Required
(
LemmaPosition
),
Optional
(
ArtikelKopf
),
Required
(
BedeutungsPosition
),
Required
(
Autorinfo
),
Optional
(
LEER
),
DATEI_ENDE
)
root__
=
Artikel