Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
028797e1
Commit
028797e1
authored
Nov 27, 2018
by
di68kap
Browse files
Merge remote-tracking branch 'origin/development' into development
parents
1beb283d
49f47560
Changes
16
Hide whitespace changes
Inline
Side-by-side
DHParser/compile.py
View file @
028797e1
...
...
@@ -39,7 +39,7 @@ import os
import
re
from
DHParser.preprocess
import
strip_tokens
,
with_source_mapping
,
PreprocessorFunc
from
DHParser.syntaxtree
import
Node
,
RootNode
,
StrictResultType
from
DHParser.syntaxtree
import
Node
,
RootNode
,
ZOMBIE_ROOTNODE
,
StrictResultType
from
DHParser.transform
import
TransformationFunc
from
DHParser.parse
import
Grammar
from
DHParser.error
import
adjust_error_locations
,
is_error
,
Error
...
...
@@ -97,7 +97,7 @@ class Compiler:
self
.
_reset
()
def
_reset
(
self
):
self
.
tree
=
None
# type:
Optional[
RootNode
]
self
.
tree
=
ZOMBIE_ROOTNODE
# type: RootNode
self
.
context
=
[]
# type: List[Node]
self
.
_dirty_flag
=
False
...
...
@@ -116,7 +116,7 @@ class Compiler:
result
=
self
.
compile
(
root
)
return
result
def
set_grammar_name
(
self
,
grammar_name
:
str
=
""
,
grammar_source
:
str
=
""
):
def
set_grammar_name
(
self
,
grammar_name
:
str
=
""
,
grammar_source
:
str
=
""
):
"""
Changes the grammar's name and the grammar's source.
...
...
@@ -190,9 +190,9 @@ class Compiler:
"""
elem
=
node
.
parser
.
name
or
node
.
parser
.
ptype
[
1
:]
if
not
sane_parser_name
(
elem
):
node
.
add
_error
(
"Reserved name '%s' not allowed as parser "
"name! "
%
elem
+
"(Any name starting with "
"'_' or '__' or ending with '__' is reserved.)"
)
self
.
tree
.
new
_error
(
node
,
"Reserved name '%s' not allowed as parser "
"name! "
%
elem
+
"(Any name starting with "
"'_' or '__' or ending with '__' is reserved.)"
)
return
None
else
:
try
:
...
...
@@ -217,9 +217,9 @@ class Compiler:
def
compile_source
(
source
:
str
,
preprocessor
:
Optional
[
PreprocessorFunc
],
# str -> str
parser
:
Grammar
,
# str -> Node (concrete syntax tree (CST))
transformer
:
TransformationFunc
,
# Node (CST) -> Node (abstract
syntax tree
(AST))
transformer
:
TransformationFunc
,
# Node (CST) -> Node (abstract
ST
(AST))
compiler
:
Compiler
,
# Node (AST) -> Any
preserve_ast
:
bool
=
False
)
->
Tuple
[
Any
,
List
[
Error
],
Node
]:
preserve_ast
:
bool
=
False
)
->
Tuple
[
Optional
[
Any
]
,
List
[
Error
],
Optional
[
Node
]
]
:
"""
Compiles a source in four stages:
1. Pre-Processing (if needed)
...
...
@@ -259,7 +259,7 @@ def compile_source(source: str,
source_mapping
=
lambda
i
:
i
else
:
source_text
,
source_mapping
=
with_source_mapping
(
preprocessor
(
original_text
))
syntax_tree
=
parser
(
source_text
)
syntax_tree
=
parser
(
source_text
)
# type: RootNode
if
is_logging
():
log_ST
(
syntax_tree
,
log_file_name
+
'.cst'
)
log_parsing_history
(
parser
,
log_file_name
)
...
...
@@ -287,3 +287,6 @@ def compile_source(source: str,
messages
=
syntax_tree
.
collect_errors
()
adjust_error_locations
(
messages
,
original_text
,
source_mapping
)
return
result
,
messages
,
ast
# TODO: Verify compiler against grammar, i.e. make sure that for all on_X()-methods, `X` is the name of a parser
DHParser/dsl.py
View file @
028797e1
...
...
@@ -101,7 +101,7 @@ from DHParser import logging, is_filename, load_if_file, MockParser, \\
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens,
\\
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last,
\\
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip,
\\
replace_content, replace_content_by, recompile_grammar
replace_content, replace_content_by,
error_on,
recompile_grammar
'''
.
format
(
dhparserdir
=
dhparserdir
)
...
...
@@ -133,7 +133,7 @@ if __name__ == "__main__":
'because grammar was not found at: ' + grammar_path)
if len(sys.argv) > 1:
# compile file
# compile file
file_name, log_dir = sys.argv[1], ''
if file_name in ['-d', '--debug'] and len(sys.argv) > 2:
file_name, log_dir = sys.argv[2], 'LOGS'
...
...
@@ -157,7 +157,7 @@ class DSLException(Exception):
"""
def
__init__
(
self
,
errors
):
assert
isinstance
(
errors
,
Iterator
)
or
isinstance
(
errors
,
list
)
\
or
isinstance
(
errors
,
tuple
)
or
isinstance
(
errors
,
tuple
)
self
.
errors
=
errors
def
__str__
(
self
):
...
...
@@ -209,9 +209,10 @@ def grammar_instance(grammar_representation) -> Tuple[Grammar, str]:
parser_py
,
messages
=
grammar_src
,
[]
# type: str, List[Error]
else
:
with
logging
(
False
):
parser_py
,
messages
,
_
=
compile_source
(
result
,
messages
,
_
=
compile_source
(
grammar_src
,
None
,
get_ebnf_grammar
(),
get_ebnf_transformer
(),
get_ebnf_compiler
())
parser_py
=
cast
(
str
,
result
)
if
has_errors
(
messages
):
raise
GrammarError
(
only_errors
(
messages
),
grammar_src
)
parser_root
=
compile_python_object
(
DHPARSER_IMPORTS
+
parser_py
,
r
'\w+Grammar$'
)()
...
...
@@ -227,7 +228,7 @@ def grammar_instance(grammar_representation) -> Tuple[Grammar, str]:
def
compileDSL
(
text_or_file
:
str
,
preprocessor
:
PreprocessorFunc
,
preprocessor
:
Optional
[
PreprocessorFunc
]
,
dsl_grammar
:
Union
[
str
,
Grammar
],
ast_transformation
:
TransformationFunc
,
compiler
:
Compiler
)
->
Any
:
...
...
@@ -461,7 +462,8 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
cfactory
=
get_ebnf_compiler
compiler1
=
cfactory
()
compiler1
.
set_grammar_name
(
compiler_name
,
source_file
)
result
,
messages
,
AST
=
compile_source
(
source
,
sfactory
(),
pfactory
(),
tfactory
(),
compiler1
)
result
,
messages
,
_
=
compile_source
(
source
,
sfactory
(),
pfactory
(),
tfactory
(),
compiler1
)
if
has_errors
(
messages
):
return
messages
...
...
@@ -477,17 +479,17 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
f
=
open
(
rootname
+
'Compiler.py'
,
'r'
,
encoding
=
"utf-8"
)
source
=
f
.
read
()
sections
=
RX_SECTION_MARKER
.
split
(
source
)
intro
,
imports
,
preprocessor
,
parser
,
ast
,
compiler
,
outro
=
sections
# TODO: Verify transformation table
intro
,
imports
,
preprocessor
,
_
,
ast
,
compiler
,
outro
=
sections
ast_trans_table
=
compile_python_object
(
DHPARSER_IMPORTS
+
ast
,
r
'(?:\w+_)?AST_transformation_table$'
)
messages
.
extend
(
ebnf_compiler
.
verify_transformation_table
(
ast_trans_table
))
except
(
PermissionError
,
FileNotFoundError
,
IOError
)
as
error
:
intro
,
imports
,
preprocessor
,
parser
,
ast
,
compiler
,
outro
=
''
,
''
,
''
,
''
,
''
,
''
,
''
except
ValueError
as
error
:
# TODO: Verify compiler
except
(
PermissionError
,
FileNotFoundError
,
IOError
):
intro
,
imports
,
preprocessor
,
_
,
ast
,
compiler
,
outro
=
''
,
''
,
''
,
''
,
''
,
''
,
''
except
ValueError
:
name
=
'"'
+
rootname
+
'Compiler.py"'
raise
ValueError
(
'Could not identify all required sections in '
+
name
+
'. Please delete or repair '
+
name
+
' manually!'
)
raise
ValueError
(
'Could not identify all required sections in '
+
name
+
'. Please delete or repair '
+
name
+
' manually!'
)
finally
:
if
f
:
f
.
close
()
...
...
@@ -515,7 +517,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
f
.
write
(
SECTION_MARKER
.
format
(
marker
=
PREPROCESSOR_SECTION
))
f
.
write
(
preprocessor
)
f
.
write
(
SECTION_MARKER
.
format
(
marker
=
PARSER_SECTION
))
f
.
write
(
result
)
f
.
write
(
cast
(
str
,
result
)
)
f
.
write
(
SECTION_MARKER
.
format
(
marker
=
AST_SECTION
))
f
.
write
(
ast
)
f
.
write
(
SECTION_MARKER
.
format
(
marker
=
COMPILER_SECTION
))
...
...
@@ -558,7 +560,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
def
recompile_grammar
(
ebnf_filename
,
force
=
False
,
notify
:
Callable
=
lambda
:
None
)
->
bool
:
notify
:
Callable
=
lambda
:
None
)
->
bool
:
"""
Re-compiles an EBNF-grammar if necessary, that is, if either no
corresponding 'XXXXCompiler.py'-file exists or if that file is
...
...
@@ -581,7 +583,7 @@ def recompile_grammar(ebnf_filename, force=False,
success
=
success
and
recompile_grammar
(
entry
,
force
)
return
success
base
,
ext
=
os
.
path
.
splitext
(
ebnf_filename
)
base
,
_
=
os
.
path
.
splitext
(
ebnf_filename
)
compiler_name
=
base
+
'Compiler.py'
error_file_name
=
base
+
'_ebnf_ERRORS.txt'
messages
=
[]
# type: Iterable[Error]
...
...
DHParser/ebnf.py
View file @
028797e1
...
...
@@ -183,7 +183,7 @@ def grammar_changed(grammar_class, grammar_source: str) -> bool:
# grammar_class = load_compiler_suite(grammar_class)[1]
with
open
(
grammar_class
,
'r'
,
encoding
=
'utf8'
)
as
f
:
pycode
=
f
.
read
()
m
=
re
.
search
(
'class \w*\(Grammar\)'
,
pycode
)
m
=
re
.
search
(
r
'class \w*\(Grammar\)'
,
pycode
)
if
m
:
m
=
re
.
search
(
' source_hash__ *= *"([a-z0-9]*)"'
,
pycode
[
m
.
span
()[
1
]:])
...
...
@@ -247,6 +247,7 @@ EBNF_AST_transformation_table = {
def
EBNFTransform
()
->
TransformationFunc
:
return
partial
(
traverse
,
processing_table
=
EBNF_AST_transformation_table
.
copy
())
def
get_ebnf_transformer
()
->
TransformationFunc
:
global
thread_local_EBNF_transformer_singleton
try
:
...
...
@@ -475,13 +476,13 @@ class EBNFCompiler(Compiler):
raise
EBNFCompilerError
(
'Compiler has not been run before calling '
'"gen_Compiler_Skeleton()"!'
)
compiler
=
[
'class '
+
self
.
grammar_name
+
'Compiler(Compiler):'
,
' """Compiler for the abstract-syntax-tree of a '
+
self
.
grammar_name
+
' source file.'
,
' """Compiler for the abstract-syntax-tree of a '
+
self
.
grammar_name
+
' source file.'
,
' """'
,
''
,
' def __init__(self, grammar_name="'
+
self
.
grammar_name
+
'", grammar_source=""):'
,
' super('
+
self
.
grammar_name
+
'Compiler, self).__init__(grammar_name, grammar_source)'
,
' def __init__(self, grammar_name="'
+
self
.
grammar_name
+
'", grammar_source=""):'
,
' super('
+
self
.
grammar_name
+
'Compiler, self).__init__(grammar_name, grammar_source)'
,
r
" assert re.match('\w+\Z', grammar_name)"
,
''
,
' def _reset(self):'
,
' super()._reset()'
,
...
...
@@ -515,6 +516,13 @@ class EBNFCompiler(Compiler):
0
,
Error
.
UNDEFINED_SYMBOL_IN_TRANSFORMATION_TABLE
))
return
messages
def
verify_compiler
(
self
,
compiler
):
"""
Checks for on_XXXX()-methods that occur in the compiler, although XXXX
has never been defined in the grammar. Usually, this kind of
inconsistency results from an error like a typo in the compiler-code.
"""
pass
# TODO: add verification code here
def
assemble_parser
(
self
,
definitions
:
List
[
Tuple
[
str
,
str
]],
root_node
:
Node
)
->
str
:
"""
...
...
@@ -541,8 +549,8 @@ class EBNFCompiler(Compiler):
definitions
.
append
((
self
.
WHITESPACE_PARSER_KEYWORD
,
'Whitespace(%s)'
%
self
.
WHITESPACE_KEYWORD
))
definitions
.
append
((
self
.
WHITESPACE_KEYWORD
,
(
"mixin_comment(whitespace="
+
self
.
RAW_WS_KEYWORD
+
", comment="
+
self
.
COMMENT_KEYWORD
+
")"
)))
(
"mixin_comment(whitespace="
+
self
.
RAW_WS_KEYWORD
+
", comment="
+
self
.
COMMENT_KEYWORD
+
")"
)))
definitions
.
append
((
self
.
RAW_WS_KEYWORD
,
"r'{whitespace}'"
.
format
(
**
self
.
directives
)))
definitions
.
append
((
self
.
COMMENT_KEYWORD
,
"r'{comment}'"
.
format
(
**
self
.
directives
)))
...
...
@@ -550,11 +558,11 @@ class EBNFCompiler(Compiler):
# add EBNF grammar to the doc string of the parser class
article
=
'an '
if
self
.
grammar_name
[
0
:
1
]
in
"AaEeIiOoUu"
else
'a '
# what about 'hour', 'universe' etc.?
declarations
=
[
'class '
+
self
.
grammar_name
+
'Grammar(Grammar):'
,
'r"""Parser for '
+
article
+
self
.
grammar_name
+
' source file'
+
(
', with this grammar:'
if
self
.
grammar_source
else
'.'
)]
declarations
=
[
'class '
+
self
.
grammar_name
+
'Grammar(Grammar):'
,
'r"""Parser for '
+
article
+
self
.
grammar_name
+
' source file'
+
(
', with this grammar:'
if
self
.
grammar_source
else
'.'
)]
definitions
.
append
((
'parser_initialization__'
,
'"upon instantiation"'
))
if
self
.
grammar_source
:
definitions
.
append
((
'source_hash__'
,
...
...
@@ -583,7 +591,7 @@ class EBNFCompiler(Compiler):
for
symbol
in
self
.
symbols
:
if
symbol
not
in
defined_symbols
:
self
.
tree
.
new_error
(
self
.
symbols
[
symbol
],
"Missing definition for symbol '%s'"
%
symbol
)
"Missing definition for symbol '%s'"
%
symbol
)
# root_node.error_flag = True
# check for unconnected rules
...
...
@@ -642,7 +650,7 @@ class EBNFCompiler(Compiler):
first
=
self
.
rules
[
rule
][
0
]
if
not
first
.
errors
:
self
.
tree
.
new_error
(
first
,
'First definition of rule "%s" '
'followed by illegal redefinitions.'
%
rule
)
'followed by illegal redefinitions.'
%
rule
)
self
.
tree
.
new_error
(
node
,
'A rule "%s" has already been defined earlier.'
%
rule
)
elif
rule
in
EBNFCompiler
.
RESERVED_SYMBOLS
:
self
.
tree
.
new_error
(
node
,
'Symbol "%s" is a reserved symbol.'
%
rule
)
...
...
@@ -682,7 +690,8 @@ class EBNFCompiler(Compiler):
prepended by the multiline-flag. Returns the regular expression string.
"""
flags
=
self
.
re_flags
|
{
'x'
}
if
rx
.
find
(
'
\n
'
)
>=
0
else
self
.
re_flags
if
flags
:
rx
=
"(?%s)%s"
%
(
""
.
join
(
flags
),
rx
)
if
flags
:
rx
=
"(?%s)%s"
%
(
""
.
join
(
flags
),
rx
)
try
:
re
.
compile
(
rx
)
except
Exception
as
re_error
:
...
...
@@ -769,7 +778,7 @@ class EBNFCompiler(Compiler):
return
""
def
non_terminal
(
self
,
node
:
Node
,
parser_class
:
str
,
custom_args
:
List
[
str
]
=
[])
->
str
:
def
non_terminal
(
self
,
node
:
Node
,
parser_class
:
str
,
custom_args
:
List
[
str
]
=
[])
->
str
:
"""
Compiles any non-terminal, where `parser_class` indicates the Parser class
name for the particular non-terminal.
...
...
@@ -833,7 +842,7 @@ class EBNFCompiler(Compiler):
# shift = (Node(node.parser, node.result[1].result),)
# node.result[1].result = shift + node.result[2:]
node
.
children
[
1
].
result
=
(
Node
(
node
.
children
[
1
].
parser
,
node
.
children
[
1
].
result
),)
\
+
node
.
children
[
2
:]
+
node
.
children
[
2
:]
node
.
children
[
1
].
parser
=
node
.
parser
node
.
result
=
(
node
.
children
[
0
],
node
.
children
[
1
])
...
...
@@ -943,7 +952,7 @@ class EBNFCompiler(Compiler):
else
:
parser
=
'_RE('
if
rx
[:
2
]
==
'~/'
:
if
not
'left'
in
self
.
directives
[
'literalws'
]:
if
'left'
not
in
self
.
directives
[
'literalws'
]:
name
=
[
'wL='
+
self
.
WHITESPACE_KEYWORD
]
+
name
rx
=
rx
[
1
:]
elif
'left'
in
self
.
directives
[
'literalws'
]:
...
...
DHParser/error.py
View file @
028797e1
...
...
@@ -41,7 +41,7 @@ import bisect
from
DHParser.preprocess
import
SourceMapFunc
from
DHParser.stringview
import
StringView
from
DHParser.toolkit
import
typing
from
typing
import
Iterable
,
Iterator
,
Union
,
Tuple
,
List
,
NewType
from
typing
import
Iterable
,
Iterator
,
Union
,
Tuple
,
List
__all__
=
(
'ErrorCode'
,
'Error'
,
...
...
@@ -200,7 +200,7 @@ def line_col(lbreaks: List[int], pos: int) -> Tuple[int, int]:
def
adjust_error_locations
(
errors
:
List
[
Error
],
original_text
:
Union
[
StringView
,
str
],
source_mapping
:
SourceMapFunc
=
lambda
i
:
i
)
->
List
[
Error
]:
source_mapping
:
SourceMapFunc
=
lambda
i
:
i
)
->
List
[
Error
]:
"""Adds (or adjusts) line and column numbers of error messages in place.
Args:
...
...
DHParser/log.py
View file @
028797e1
...
...
@@ -55,7 +55,7 @@ import os
from
DHParser.error
import
line_col
from
DHParser.stringview
import
StringView
from
DHParser.syntaxtree
import
Node
,
WHITESPACE_PTYPE
from
DHParser.syntaxtree
import
Node
from
DHParser.toolkit
import
is_filename
,
escape_control_characters
,
typing
from
typing
import
List
,
Tuple
,
Union
...
...
@@ -206,12 +206,13 @@ class HistoryRecord:
COLGROUP
=
'<colgroup>
\n
<col style="width:2%"/><col style="width:2%"/><col '
\
'style="width:75%"/><col style="width:6%"/><col style="width:15%"/>
\n
</colgroup>'
HEADINGS
=
(
'<tr><th>L</th><th>C</th><th>parser call sequence</th>'
'<th>success</th><th>text matched or failed</th></tr>'
)
HTML_LEAD_IN
=
(
'<!DOCTYPE html>
\n
'
'<th>success</th><th>text matched or failed</th></tr>'
)
HTML_LEAD_IN
=
(
'<!DOCTYPE html>
\n
'
'<html>
\n
<head>
\n
<meta charset="utf-8"/>
\n
<style>
\n
'
'td,th {font-family:monospace; '
'border-right: thin solid grey; border-bottom: thin solid grey}
\n
'
'td.line, td.column {color:darkgrey}
\n
'
# 'td.stack {}\n'
'border-right: thin solid grey; border-bottom: thin solid grey}
\n
'
'td.line, td.column {color:darkgrey}
\n
'
# 'td.stack {}\n'
'td.status {font-weight:bold}
\n
'
'td.text {color:darkblue}
\n
'
'table {border-spacing: 0px; border: thin solid darkgrey; width:100%}
\n
'
...
...
@@ -236,7 +237,7 @@ class HistoryRecord:
def
__str__
(
self
):
return
'%4i, %2i: %s; %s; "%s"'
%
self
.
as_tuple
()
def
as_tuple
(
self
)
->
Snapshot
:
def
as_tuple
(
self
)
->
'
Snapshot
'
:
"""
Returns history record formatted as a snapshot tuple.
"""
...
...
@@ -260,10 +261,10 @@ class HistoryRecord:
if
status
==
self
.
MATCH
:
status
=
'<span class="match">'
+
status
+
'</span>'
i
=
stack
.
rfind
(
'->'
)
chr
=
stack
[
i
+
12
:
i
+
13
]
chr
=
stack
[
i
+
12
:
i
+
13
]
while
not
chr
.
isidentifier
()
and
i
>=
0
:
i
=
stack
.
rfind
(
'->'
,
0
,
i
)
chr
=
stack
[
i
+
12
:
i
+
13
]
chr
=
stack
[
i
+
12
:
i
+
13
]
if
i
>=
0
:
i
+=
12
k
=
stack
.
find
(
'<'
,
i
)
...
...
@@ -294,7 +295,6 @@ class HistoryRecord:
def
status
(
self
)
->
str
:
return
self
.
FAIL
if
self
.
node
is
None
else
\
(
'"%s"'
%
self
.
err_msg
())
if
self
.
node
.
errors
else
self
.
MATCH
# has_errors(self.node._errors)
@
property
def
excerpt
(
self
):
...
...
@@ -344,8 +344,8 @@ class HistoryRecord:
remaining
=
-
1
result
=
None
for
record
in
history
:
if
(
record
.
status
==
HistoryRecord
.
MATCH
and
(
record
.
remaining
<
remaining
or
remaining
<
0
)):
if
(
record
.
status
==
HistoryRecord
.
MATCH
and
(
record
.
remaining
<
remaining
or
remaining
<
0
)):
result
=
record
remaining
=
record
.
remaining
return
result
...
...
@@ -376,7 +376,7 @@ LOG_SIZE_THRESHOLD = 10000 # maximum number of history records to log
LOG_TAIL_THRESHOLD
=
500
# maximum number of history recors for "tail log"
def
log_parsing_history
(
grammar
,
log_file_name
:
str
=
''
,
html
:
bool
=
True
)
->
None
:
def
log_parsing_history
(
grammar
,
log_file_name
:
str
=
''
,
html
:
bool
=
True
)
->
None
:
"""
Writes a log of the parsing history of the most recently parsed document.
...
...
@@ -415,8 +415,7 @@ def log_parsing_history(grammar, log_file_name: str = '', html: bool=True) -> No
if
not
is_logging
():
raise
AssertionError
(
"Cannot log history when logging is turned off!"
)
# assert self.history__, \
# "Parser did not yet run or logging was turned off when running parser!"
if
not
log_file_name
:
name
=
grammar
.
__class__
.
__name__
log_file_name
=
name
[:
-
7
]
if
name
.
lower
().
endswith
(
'grammar'
)
else
name
...
...
@@ -424,35 +423,22 @@ def log_parsing_history(grammar, log_file_name: str = '', html: bool=True) -> No
log_file_name
=
log_file_name
[:
-
4
]
full_history
=
[
'<h1>Full parsing history of "%s"</h1>'
%
log_file_name
]
# type: List[str]
# match_history = ['<h1>Match history of parsing "%s"</h1>' % log_file_name] # type: List[str]
# errors_only = ['<h1>Errors when parsing "%s"</h1>' % log_file_name] # type: List[str]
if
len
(
grammar
.
history__
)
>
LOG_SIZE_THRESHOLD
:
warning
=
(
'Sorry, man, %iK history records is just too many! '
'Only looking at the last %iK records.'
%
(
len
(
grammar
.
history__
)
//
1000
,
LOG_SIZE_THRESHOLD
//
1000
))
warning
=
(
'Sorry, man, %iK history records is just too many! '
'Only looking at the last %iK records.'
%
(
len
(
grammar
.
history__
)
//
1000
,
LOG_SIZE_THRESHOLD
//
1000
))
html_warning
=
'<p><strong>'
+
warning
+
'</strong></p>'
full_history
.
append
(
html_warning
)
# match_history.append(html_warning)
# errors_only.append(html_warning)
lead_in
=
'
\n
'
.
join
([
'<table>'
,
HistoryRecord
.
COLGROUP
,
HistoryRecord
.
HEADINGS
])
full_history
.
append
(
lead_in
)
# match_history.append(lead_in)
# errors_only.append(lead_in)
for
record
in
grammar
.
history__
[
-
LOG_SIZE_THRESHOLD
:]:
line
=
record
.
as_html_tr
()
if
html
else
str
(
record
)
append_line
(
full_history
,
line
)
# if record.node and record.node.parser.ptype != WHITESPACE_PTYPE:
# append_line(match_history, line)
# if record.node.errors:
# append_line(errors_only, line)
write_log
(
full_history
,
log_file_name
+
'_full'
)
if
len
(
full_history
)
>
LOG_TAIL_THRESHOLD
+
10
:
heading
=
'<h1>Last 500 records of parsing history of "%s"</h1>'
%
log_file_name
+
lead_in
write_log
([
heading
]
+
full_history
[
-
LOG_TAIL_THRESHOLD
:],
log_file_name
+
'_full.tail'
)
# write_log(match_history, log_file_name + '_match')
# if (len(errors_only) > 3 or (len(grammar.history__) <= LOG_SIZE_THRESHOLD
# and len(errors_only) > 2)):
# write_log(errors_only, log_file_name + '_errors')
DHParser/parse.py
View file @
028797e1
...
...
@@ -40,7 +40,7 @@ from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from
DHParser.syntaxtree
import
Node
,
RootNode
,
ParserBase
,
WHITESPACE_PTYPE
,
\
TOKEN_PTYPE
,
ZOMBIE_PARSER
from
DHParser.toolkit
import
sane_parser_name
,
escape_control_characters
,
re
,
typing
from
typing
import
Callable
,
cast
,
Dict
,
DefaultDict
,
List
,
Set
,
Tuple
,
Union
,
Optional
from
typing
import
Callable
,
cast
,
List
,
Tuple
,
Set
,
Dict
,
DefaultDict
,
Union
,
Optional
__all__
=
(
'Parser'
,
...
...
@@ -263,7 +263,7 @@ class Parser(ParserBase):
"""
duplicate
=
self
.
__class__
()
duplicate
.
name
=
self
.
name
duplicate
.
ptype
=
self
.
ptype
duplicate
.
ptype
=
self
.
ptype
return
duplicate
def
reset
(
self
):
...
...
@@ -271,7 +271,7 @@ class Parser(ParserBase):
the `reset()`-method of the parent class must be called from the
`reset()`-method of the derived class."""
self
.
visited
=
dict
()
# type: Dict[int, Tuple[Optional[Node], StringView]]
self
.
recursion_counter
=
defaultdict
(
lambda
:
0
)
# type: DefaultDict[int, int]
self
.
recursion_counter
=
defaultdict
(
lambda
:
0
)
# type: DefaultDict[int, int]
self
.
cycle_detection
=
set
()
# type: Set[Callable]
def
__call__
(
self
,
text
:
StringView
)
->
Tuple
[
Optional
[
Node
],
StringView
]:
...
...
@@ -293,7 +293,10 @@ class Parser(ParserBase):
@
property
def
grammar
(
self
)
->
'Grammar'
:
return
self
.
_grammar
if
self
.
_grammar
:
return
self
.
_grammar
else
:
raise
AssertionError
(
'Grammar has not yet been set!'
)
@
grammar
.
setter
def
grammar
(
self
,
grammar
:
'Grammar'
):
...
...
@@ -301,8 +304,9 @@ class Parser(ParserBase):
self
.
_grammar
=
grammar
self
.
_grammar_assigned_notifier
()
else
:
assert
self
.
_grammar
==
grammar
,
\
"Parser has already been assigned to a different Grammar object!"
if
self
.
_grammar
!=
grammar
:
raise
AssertionError
(
"Parser has already been assigned"
"to a different Grammar object!"
)
def
_grammar_assigned_notifier
(
self
):
"""A function that notifies the parser object that it has been
...
...
@@ -564,12 +568,6 @@ class Grammar:
def
__init__
(
self
,
root
:
Parser
=
None
)
->
None
:
# if not hasattr(self.__class__, 'parser_initialization__'):
# self.__class__.parser_initialization__ = "pending"
# if not hasattr(self.__class__, 'wspL__'):
# self.wspL__ = ''
# if not hasattr(self.__class__, 'wspR__'):
# self.wspR__ = ''
self
.
all_parsers__
=
set
()
# type: Set[ParserBase]
self
.
_dirty_flag__
=
False
# type: bool
self
.
history_tracking__
=
False
# type: bool
...
...
@@ -609,7 +607,7 @@ class Grammar:
self
.
document_length__
=
0
# type: int
self
.
document_lbreaks__
=
[]
# type: List[int]
# variables stored and recalled by Capture and Retrieve parsers
self
.
variables__
=
defaultdict
(
lambda
:[])
# type: DefaultDict[str, List[str]]
self
.
variables__
=
defaultdict
(
lambda
:
[])
# type: DefaultDict[str, List[str]]
self
.
rollback__
=
[]
# type: List[Tuple[int, Callable]]
self
.
last_rb__loc__
=
-
1
# type: int
# support for call stack tracing
...
...
@@ -650,7 +648,7 @@ class Grammar:
parser
.
grammar
=
self
def
__call__
(
self
,
document
:
str
,
start_parser
=
"root__"
,
track_history
=
False
)
->
Node
:
def
__call__
(
self
,
document
:
str
,
start_parser
=
"root__"
,
track_history
=
False
)
->
Root
Node
:
"""
Parses a document with with parser-combinators.
...
...
@@ -668,7 +666,7 @@ class Grammar:
Node: The root node to the parse tree.
"""
def
tail_pos
(
predecessors
:
Union
[
List
[
Node
],
Tuple
[
Node
,
...]])
->
int
:
def
tail_pos
(
predecessors
:
Union
[
List
[
Node
],
Tuple
[
Node
,
...]
,
None
])
->
int
:
"""Adds the position after the last node in the list of
predecessors to the node."""
return
predecessors
[
-
1
].
pos
+
len
(
predecessors
[
-
1
])
if
predecessors
else
0
...
...
@@ -715,10 +713,11 @@ class Grammar:
str
(
HistoryRecord
.
last_match
(
self
.
history__
)))
# Check if a Lookahead-Parser did match. Needed for testing, because
# in a test case this is not necessarily an error.
last_record
=
self
.
history__
[
-
2
]
if
len
(
self
.
history__
)
>
1
else
[
]
last_record
=
self
.
history__
[
-
2
]
if
len
(
self
.
history__
)
>
1
else
None
# type: Optional[HistoryRecord
]
if
last_record
and
parser
!=
self
.
root__
\
and
last_record
.
status
==
HistoryRecord
.
MATCH
\
and
last_record
.
node
.
pos
+
len
(
last_record
.
node
)
>=
len
(
self
.
document__
)
\
and
last_record
.
node
.
pos
\
+
len
(
last_record
.
node
)
>=
len
(
self
.
document__
)
\
and
any
(
isinstance
(
parser
,
Lookahead
)
for
parser
in
last_record
.
call_stack
):
error_msg
=
'Parser did not match except for lookahead! '
+
err_info
...
...
@@ -728,12 +727,12 @@ class Grammar:
error_code
=
Error
.
PARSER_DID_NOT_MATCH
else
:
stitches
.
append
(
result
)
error_msg
=
"Parser stopped before end"
+
\
((
"! trying to recover"
+
(
" but stopping history recording at this point."
if
self
.
history_tracking__
else
"..."
))
if
len
(
stitches
)
<
MAX_DROPOUTS
else
" too often! Terminating parser."
)
error_msg
=
"Parser stopped before end"
\
+
((
"! trying to recover"
+
(
" but stopping history recording at this point."
if
self
.
history_tracking__
else
"..."
))
if
len
(
stitches
)
<
MAX_DROPOUTS
else
" too often! Terminating parser."
)
error_code
=
Error
.
PARSER_STOPPED_BEFORE_END
stitches
.
append
(
Node
(
None
,
skip
).
init_pos
(
tail_pos
(
stitches
)))
self
.
tree__
.
new_error
(
stitches
[
-
1
],
error_msg
,
error_code
)
...
...
@@ -769,7 +768,8 @@ class Grammar:
self
.
tree__
.
new_error
(
result
,
error_msg
,
error_code
)
# result.pos = 0 # calculate all positions
# result.collect_errors(self.document__)
self
.
tree__
.
swallow
(
result
)
if
result
:
self
.
tree__
.
swallow
(
result
)
return
self
.
tree__
...
...
@@ -849,7 +849,7 @@ class PreprocessorToken(Parser):
def
__deepcopy__
(
self
,
memo
):
duplicate
=
self
.
__class__
(
self
.
name
)
duplicate
.
name
=
self
.
name
duplicate
.
ptype
=
self
.
ptype
duplicate
.
ptype
=
self
.
ptype
return
duplicate
def
__call__
(
self
,
text
:
StringView
)
->
Tuple
[
Optional
[
Node
],
StringView
]:
...
...
@@ -857,19 +857,22 @@ class PreprocessorToken(Parser):
end
=
text
.
find
(
END_TOKEN
,
1
)
if
end
<
0
:
node
=
Node
(
self
,
''
)
self
.
grammar
.
tree__
.
new_error
(
node
,
self
.
grammar
.
tree__
.
new_error
(
node
,
'END_TOKEN delimiter missing from preprocessor token. '
'(Most likely due to a preprocessor bug!)'
)
# type: Node
return
node
,
text
[
1
:]
elif
end
==
0
:
node
=
Node
(
self
,
''
)
self
.
grammar
.
tree__
.
new_error
(
node
,
self
.
grammar
.
tree__
.
new_error
(
node
,
'Preprocessor-token cannot have zero length. '
'(Most likely due to a preprocessor bug!)'
)
return
node
,
text
[
2
:]
elif
text
.
find
(
BEGIN_TOKEN
,
1
,
end
)
>=
0
:
node
=
Node
(
self
,
text
[
len
(
self
.
name
)
+
1
:
end
])
self
.
grammar
.
tree__
.
new_error
(
node
,
self
.
grammar
.
tree__
.
new_error
(
node
,
'Preprocessor-tokens must not be nested or contain '
'BEGIN_TOKEN delimiter as part of their argument. '
'(Most likely due to a preprocessor bug!)'
)
...
...
@@ -943,7 +946,7 @@ class RegExp(Parser):
regexp
=
self
.
regexp
.
pattern
duplicate
=
self
.
__class__
(
regexp
)
duplicate
.
name
=
self
.
name
duplicate
.
ptype
=
self
.
ptype
duplicate
.
ptype
=
self
.
ptype
return
duplicate
def
__call__
(
self
,
text
:
StringView
)
->
Tuple
[
Optional
[
Node
],
StringView
]:
...
...
@@ -964,7 +967,7 @@ class RegExp(Parser):