Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
badw-it
DHParser
Commits
71dca11d
Commit
71dca11d
authored
Sep 21, 2017
by
Eckhart Arnold
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
- Mandatory operator finished and tested
parent
89b64a8c
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
326 additions
and
182 deletions
+326
-182
DHParser/dsl.py
DHParser/dsl.py
+3
-3
DHParser/ebnf.py
DHParser/ebnf.py
+109
-30
DHParser/error.py
DHParser/error.py
+27
-24
DHParser/parser.py
DHParser/parser.py
+11
-10
DHParser/syntaxtree.py
DHParser/syntaxtree.py
+33
-33
DHParser/testing.py
DHParser/testing.py
+4
-4
DHParser/transform.py
DHParser/transform.py
+7
-5
examples/EBNF/EBNF.ebnf
examples/EBNF/EBNF.ebnf
+1
-1
examples/EBNF/EBNF_old.ebnf
examples/EBNF/EBNF_old.ebnf
+2
-2
examples/LaTeX/LaTeX.ebnf
examples/LaTeX/LaTeX.ebnf
+1
-1
examples/LaTeX/LaTeXCompiler.py
examples/LaTeX/LaTeXCompiler.py
+94
-57
examples/LaTeX/tst_LaTeX_docs.py
examples/LaTeX/tst_LaTeX_docs.py
+3
-4
examples/LaTeX/tst_LaTeX_grammar.py
examples/LaTeX/tst_LaTeX_grammar.py
+8
-5
test/test_ebnf.py
test/test_ebnf.py
+23
-3
No files found.
DHParser/dsl.py
View file @
71dca11d
...
...
@@ -409,8 +409,8 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
A (potentially empty) list of error or warning messages.
"""
filepath
=
os
.
path
.
normpath
(
source_file
)
#
with open(source_file, encoding="utf-8") as f:
#
source = f.read()
with
open
(
source_file
,
encoding
=
"utf-8"
)
as
f
:
source
=
f
.
read
()
rootname
=
os
.
path
.
splitext
(
filepath
)[
0
]
compiler_name
=
os
.
path
.
basename
(
rootname
)
if
compiler_suite
:
...
...
@@ -422,7 +422,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
cfactory
=
get_ebnf_compiler
compiler1
=
cfactory
()
compiler1
.
set_grammar_name
(
compiler_name
,
source_file
)
result
,
messages
,
ast
=
compile_source
(
source
_file
,
sfactory
(),
pfactory
(),
tfactory
(),
compiler1
)
result
,
messages
,
ast
=
compile_source
(
source
,
sfactory
(),
pfactory
(),
tfactory
(),
compiler1
)
if
has_errors
(
messages
):
return
messages
...
...
DHParser/ebnf.py
View file @
71dca11d
...
...
@@ -30,9 +30,9 @@ except ImportError:
from
.typing34
import
Callable
,
Dict
,
List
,
Set
,
Tuple
,
Union
from
DHParser.toolkit
import
load_if_file
,
escape_re
,
md5
,
sane_parser_name
from
DHParser.parser
import
Grammar
,
mixin_comment
,
nil_preprocessor
,
Forward
,
R
E
,
NegativeLookahead
,
\
Alternative
,
Series
,
Option
,
Required
,
OneOrMore
,
ZeroOrMore
,
Token
,
Compiler
,
\
PreprocessorFunc
from
DHParser.parser
import
Grammar
,
mixin_comment
,
nil_preprocessor
,
Forward
,
R
egExp
,
RE
,
\
NegativeLookahead
,
Alternative
,
Series
,
Option
,
OneOrMore
,
ZeroOrMore
,
Token
,
\
Compiler
,
PreprocessorFunc
from
DHParser.syntaxtree
import
Node
,
TransformationFunc
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
from
DHParser.error
import
Error
from
DHParser.transform
import
traverse
,
remove_brackets
,
\
...
...
@@ -73,6 +73,77 @@ def get_ebnf_preprocessor() -> PreprocessorFunc:
########################################################################
# class EBNFGrammar(Grammar):
# r"""Parser for an EBNF source file, with this grammar:
#
# # EBNF-Grammar in EBNF
#
# @ comment = /#.*(?:\n|$)/ # comments start with '#' and eat all chars up to and including '\n'
# @ whitespace = /\s*/ # whitespace includes linefeed
# @ literalws = right # trailing whitespace of literals will be ignored tacitly
#
# syntax = [~//] { definition | directive } §EOF
# definition = symbol §"=" expression
# directive = "@" §symbol §"=" ( regexp | literal | list_ )
#
# expression = term { "|" term }
# term = { factor }+
# factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
# | [flowmarker] literal
# | [flowmarker] regexp
# | [flowmarker] group
# | [flowmarker] oneormore
# | repetition
# | option
#
# flowmarker = "!" | "&" | "§" | # '!' negative lookahead, '&' positive lookahead, '§' required
# "-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
# retrieveop = "::" | ":" # '::' pop, ':' retrieve
#
# group = "(" expression §")"
# oneormore = "{" expression "}+"
# repetition = "{" expression §"}"
# option = "[" expression §"]"
#
# symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
# literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
# | /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
# regexp = /~?\/(?:[^\/]|(?<=\\)\/)*\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# # '~' is a whitespace-marker, if present leading or trailing
# # whitespace of a regular expression will be ignored tacitly.
# list_ = /\w+/~ { "," /\w+/~ } # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
# # BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
# EOF = !/./
# """
# expression = Forward()
# source_hash__ = "a410e1727fb7575e98ff8451dbf8f3bd"
# parser_initialization__ = "upon instantiation"
# COMMENT__ = r'#.*(?:\n|$)'
# WSP__ = mixin_comment(whitespace=r'\s*', comment=r'#.*(?:\n|$)')
# wspL__ = ''
# wspR__ = WSP__
# EOF = NegativeLookahead(RE('.', wR=''))
# list_ = Series(RE('\\w+'), ZeroOrMore(Series(Token(","), RE('\\w+'))))
# regexp = RE(r'~?/(?:\\/|[^/])*?/~?') # RE('~?/(?:[^/]|(?<=\\\\)/)*/~?')
# literal = Alternative(RE('"(?:[^"]|\\\\")*?"'), RE("'(?:[^']|\\\\')*?'"))
# symbol = RE('(?!\\d)\\w+')
# option = Series(Token("["), expression, Required(Token("]")))
# repetition = Series(Token("{"), expression, Required(Token("}")))
# oneormore = Series(Token("{"), expression, Token("}+"))
# group = Series(Token("("), expression, Required(Token(")")))
# retrieveop = Alternative(Token("::"), Token(":"))
# flowmarker = Alternative(Token("!"), Token("&"), Token("§"), Token("-!"), Token("-&"))
# factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Token("="))),
# Series(Option(flowmarker), literal), Series(Option(flowmarker), regexp),
# Series(Option(flowmarker), group), Series(Option(flowmarker), oneormore),
# repetition, option)
# term = OneOrMore(factor)
# expression.set(Series(term, ZeroOrMore(Series(Token("|"), term))))
# directive = Series(Token("@"), Required(symbol), Required(Token("=")), Alternative(regexp, literal, list_))
# definition = Series(symbol, Required(Token("=")), expression)
# syntax = Series(Option(RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)), Required(EOF))
# root__ = syntax
class
EBNFGrammar
(
Grammar
):
r
"""Parser for an EBNF source file, with this grammar:
...
...
@@ -84,10 +155,10 @@ class EBNFGrammar(Grammar):
syntax = [~//] { definition | directive } §EOF
definition = symbol §"=" expression
directive = "@" §symbol
§
"=" ( regexp | literal | list_ )
directive = "@" §symbol "=" ( regexp | literal | list_ )
expression = term { "|" term }
term = { factor }+
term = {
["§"]
factor }+
# "§" means all following factors mandatory
factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
| [flowmarker] literal
| [flowmarker] regexp
...
...
@@ -96,8 +167,8 @@ class EBNFGrammar(Grammar):
| repetition
| option
flowmarker = "!" | "&"
| "§" |
# '!' negative lookahead, '&' positive lookahead
, '§' required
"-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
flowmarker = "!" | "&"
# '!' negative lookahead, '&' positive lookahead
|
"-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" expression §")"
...
...
@@ -108,7 +179,7 @@ class EBNFGrammar(Grammar):
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
regexp = /~?\/(?:
[^\/]|(?<=\\)
\/)*\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
regexp = /~?\/(?:
\\\/|[^
\/
]
)*
?
\/~?/~
# e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading or trailing
# whitespace of a regular expression will be ignored tacitly.
list_ = /\w+/~ { "," /\w+/~ } # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
...
...
@@ -116,32 +187,39 @@ class EBNFGrammar(Grammar):
EOF = !/./
"""
expression
=
Forward
()
source_hash__
=
"a
410e1727fb7575e98ff8451dbf8f3bd
"
source_hash__
=
"a
131abc5259738631000cda90d2fc65b
"
parser_initialization__
=
"upon instantiation"
COMMENT__
=
r
'#.*(?:\n|$)'
WSP__
=
mixin_comment
(
whitespace
=
r
'\s*'
,
comment
=
r
'#.*(?:\n|$)'
)
WHITESPACE__
=
r
'\s*'
WSP__
=
mixin_comment
(
whitespace
=
WHITESPACE__
,
comment
=
COMMENT__
)
wspL__
=
''
wspR__
=
WSP__
EOF
=
NegativeLookahead
(
RE
(
'.'
,
wR
=
''
))
list_
=
Series
(
RE
(
'
\\
w+'
),
ZeroOrMore
(
Series
(
Token
(
","
),
RE
(
'
\\
w+'
))))
regexp
=
RE
(
r
'~?/(?:\\/|[^/])*?/~?'
)
# RE('~?/(?:[^/]|(?<=\\\\)/)*/~?')
EOF
=
NegativeLookahead
(
RegExp
(
'.'
))
list_
=
Series
(
RE
(
'
\\
w+'
),
ZeroOrMore
(
Series
(
Token
(
","
),
RE
(
'
\\
w+'
),
mandatory
=
1000
)),
mandatory
=
1000
)
regexp
=
RE
(
'~?/(?:
\\\\
/|[^/])*?/~?'
)
literal
=
Alternative
(
RE
(
'"(?:[^"]|
\\\\
")*?"'
),
RE
(
"'(?:[^']|
\\\\
')*?'"
))
symbol
=
RE
(
'(?!
\\
d)
\\
w+'
)
option
=
Series
(
Token
(
"["
),
expression
,
Required
(
Token
(
"]"
)
)
)
repetition
=
Series
(
Token
(
"{"
),
expression
,
Required
(
Token
(
"}"
)
)
)
oneormore
=
Series
(
Token
(
"{"
),
expression
,
Token
(
"}+"
))
group
=
Series
(
Token
(
"("
),
expression
,
Required
(
Token
(
")"
)
)
)
option
=
Series
(
Token
(
"["
),
expression
,
Token
(
"]"
)
,
mandatory
=
2
)
repetition
=
Series
(
Token
(
"{"
),
expression
,
Token
(
"}"
)
,
mandatory
=
2
)
oneormore
=
Series
(
Token
(
"{"
),
expression
,
Token
(
"}+"
)
,
mandatory
=
1000
)
group
=
Series
(
Token
(
"("
),
expression
,
Token
(
")"
)
,
mandatory
=
2
)
retrieveop
=
Alternative
(
Token
(
"::"
),
Token
(
":"
))
flowmarker
=
Alternative
(
Token
(
"!"
),
Token
(
"&"
),
Token
(
"§"
),
Token
(
"-!"
),
Token
(
"-&"
))
factor
=
Alternative
(
Series
(
Option
(
flowmarker
),
Option
(
retrieveop
),
symbol
,
NegativeLookahead
(
Token
(
"="
))),
Series
(
Option
(
flowmarker
),
literal
),
Series
(
Option
(
flowmarker
),
regexp
),
Series
(
Option
(
flowmarker
),
group
),
Series
(
Option
(
flowmarker
),
oneormore
),
repetition
,
option
)
term
=
OneOrMore
(
factor
)
expression
.
set
(
Series
(
term
,
ZeroOrMore
(
Series
(
Token
(
"|"
),
term
))))
directive
=
Series
(
Token
(
"@"
),
Required
(
symbol
),
Required
(
Token
(
"="
)),
Alternative
(
regexp
,
literal
,
list_
))
definition
=
Series
(
symbol
,
Required
(
Token
(
"="
)),
expression
)
syntax
=
Series
(
Option
(
RE
(
''
,
wR
=
''
,
wL
=
WSP__
)),
ZeroOrMore
(
Alternative
(
definition
,
directive
)),
Required
(
EOF
))
flowmarker
=
Alternative
(
Token
(
"!"
),
Token
(
"&"
),
Token
(
"-!"
),
Token
(
"-&"
))
factor
=
Alternative
(
Series
(
Option
(
flowmarker
),
Option
(
retrieveop
),
symbol
,
NegativeLookahead
(
Token
(
"="
)),
mandatory
=
1000
),
Series
(
Option
(
flowmarker
),
literal
,
mandatory
=
1000
),
Series
(
Option
(
flowmarker
),
regexp
,
mandatory
=
1000
),
Series
(
Option
(
flowmarker
),
group
,
mandatory
=
1000
),
Series
(
Option
(
flowmarker
),
oneormore
,
mandatory
=
1000
),
repetition
,
option
)
term
=
OneOrMore
(
Series
(
Option
(
Token
(
"§"
)),
factor
,
mandatory
=
1000
))
expression
.
set
(
Series
(
term
,
ZeroOrMore
(
Series
(
Token
(
"|"
),
term
,
mandatory
=
1000
)),
mandatory
=
1000
))
directive
=
Series
(
Token
(
"@"
),
symbol
,
Token
(
"="
),
Alternative
(
regexp
,
literal
,
list_
),
mandatory
=
1
)
definition
=
Series
(
symbol
,
Token
(
"="
),
expression
,
mandatory
=
1
)
syntax
=
Series
(
Option
(
RE
(
''
,
wR
=
''
,
wL
=
WSP__
)),
ZeroOrMore
(
Alternative
(
definition
,
directive
)),
EOF
,
mandatory
=
2
)
root__
=
syntax
...
...
@@ -583,7 +661,7 @@ class EBNFCompiler(Compiler):
else
:
assert
nd
.
parser
.
name
==
"directive"
,
nd
.
as_sxpr
()
self
.
compile
(
nd
)
node
.
error_flag
=
max
(
node
.
error_flag
,
nd
.
error_flag
)
node
.
error_flag
=
max
(
node
.
error_flag
,
nd
.
error_flag
)
self
.
definitions
.
update
(
definitions
)
return
self
.
assemble_parser
(
definitions
,
node
)
...
...
@@ -715,6 +793,7 @@ class EBNFCompiler(Compiler):
name for the particular non-terminal.
"""
arguments
=
[
self
.
compile
(
r
)
for
r
in
node
.
children
]
+
custom_args
node
.
error_flag
=
max
(
node
.
error_flag
,
max
(
t
.
error_flag
for
t
in
node
.
children
))
return
parser_class
+
'('
+
', '
.
join
(
arguments
)
+
')'
...
...
@@ -731,10 +810,10 @@ class EBNFCompiler(Compiler):
mandatory_marker
.
append
(
i
)
if
i
==
0
:
nd
.
add_error
(
'First item of a series should not be mandatory.'
,
code
=
Error
.
WARNING
)
Error
.
WARNING
)
elif
len
(
mandatory_marker
)
>
1
:
nd
.
add_error
(
'One mandatory marker (§) sufficient to declare the '
'rest of the series as mandatory.'
,
code
=
Error
.
WARNING
)
'rest of the series as mandatory.'
,
Error
.
WARNING
)
else
:
filtered_children
.
append
(
nd
)
i
+=
1
...
...
DHParser/error.py
View file @
71dca11d
...
...
@@ -27,8 +27,7 @@ __all__ = ('Error',
'has_errors'
,
'only_errors'
,
'linebreaks'
,
'line_col'
,
'error_messages'
)
'line_col'
)
class
Error
:
...
...
@@ -44,14 +43,15 @@ class Error:
MANDATORY_CONTINUATION
=
1001
def
__init__
(
self
,
message
:
str
,
level
:
int
=
ERROR
,
code
:
Hashable
=
0
):
def
__init__
(
self
,
message
:
str
,
level
:
int
=
ERROR
,
code
:
Hashable
=
0
,
pos
:
int
=
-
1
,
line
:
int
=
-
1
,
column
:
int
=
-
1
):
self
.
message
=
message
assert
level
>=
0
self
.
level
=
level
or
Error
.
ERROR
self
.
code
=
code
self
.
pos
=
-
1
self
.
line
=
-
1
self
.
column
=
-
1
self
.
pos
=
pos
self
.
line
=
line
self
.
column
=
column
def
__str__
(
self
):
prefix
=
''
...
...
@@ -59,6 +59,10 @@ class Error:
prefix
=
"line: %3i, column: %2i, "
%
(
self
.
line
,
self
.
column
)
return
prefix
+
"%s: %s"
%
(
self
.
level_str
,
self
.
message
)
def
__repr__
(
self
):
return
'Error("%s", %i, %s, %i, %i, %i)'
\
%
(
self
.
message
,
self
.
level
,
repr
(
self
.
code
),
self
.
pos
,
self
.
line
,
self
.
column
)
@
property
def
level_str
(
self
):
return
"Warning"
if
is_warning
(
self
.
level
)
else
"Error"
...
...
@@ -124,21 +128,20 @@ def _line_col(lbreaks: List[int], pos: int) -> Tuple[int, int]:
column
=
pos
-
lbreaks
[
line
-
1
]
return
line
,
column
def
error_messages
(
source_text
,
errors
)
->
List
[
str
]:
"""Returns the sequence or iterator of error objects as an intertor
of error messages with line and column numbers at the beginning.
Args:
source_text (str): The source text on which the errors occurred.
(Needed in order to determine the line and column numbers.)
errors (list): The list of errors as returned by the method
``collect_errors()`` of a Node object
Returns:
a list that contains all error messages in string form. Each
string starts with "line: [Line-No], column: [Column-No]
"""
for
err
in
errors
:
if
err
.
pos
>=
0
and
err
.
line
<=
0
:
err
.
line
,
err
.
column
=
line_col
(
source_text
,
err
.
pos
)
return
[
str
(
err
)
for
err
in
sorted
(
errors
,
key
=
lambda
err
:
err
.
pos
)]
# def error_messages(source_text:str, errors: List[Error]) -> List[str]:
# """Adds line, column information for error messages, if the position
# is given.
#
# Args:
# source_text (str): The source text on which the errors occurred.
# (Needed in order to determine the line and column numbers.)
# errors (list): The list of errors as returned by the method
# ``collect_errors()`` of a Node object
# Returns:
# The same list of error messages, which now contain line and
# column numbers.
# """
# for err in errors:
# if err.pos >= 0 and err.line <= 0:
# err.line, err.column = line_col(source_text, err.pos)
# return errors
DHParser/parser.py
View file @
71dca11d
...
...
@@ -690,9 +690,9 @@ class Grammar:
for
entry
,
parser
in
cdict
.
items
():
if
isinstance
(
parser
,
Parser
)
and
sane_parser_name
(
entry
):
if
not
parser
.
name
:
parser
.
name
=
entry
if
(
isinstance
(
parser
,
Forward
)
and
(
not
parser
.
parser
.
name
)):
parser
.
parser
.
name
=
entry
parser
.
_
name
=
entry
if
(
isinstance
(
parser
,
Forward
)
and
(
not
parser
.
parser
.
_
name
)):
parser
.
parser
.
_
name
=
entry
cls
.
parser_initialization__
=
"done"
...
...
@@ -843,7 +843,7 @@ class Grammar:
stitches
[
-
1
].
add_error
(
error_msg
)
if
self
.
history_tracking__
:
# some parsers may have matched and left history records with nodes != None.
# Because these are not connected to the stiched root node, their pos
# Because these are not connected to the sti
t
ched root node, their pos
-
# properties will not be initialized by setting the root node's pos property
# to zero. Therefore, their pos properties need to be initialized here
for
record
in
self
.
history__
:
...
...
@@ -869,7 +869,7 @@ class Grammar:
else
:
result
.
add_error
(
error_str
)
result
.
pos
=
0
# calculate all positions
result
.
finalize
_errors
(
self
.
document__
)
#
result.
collect
_errors(self.document__)
return
result
...
...
@@ -1426,9 +1426,10 @@ class Series(NaryOperator):
text_
=
text
[
i
:]
node
.
add_error
(
'%s expected; "%s" found!'
%
(
str
(
parser
),
text
[:
10
]),
code
=
Error
.
MANDATORY_CONTINUATION
)
return
node
,
text_
results
+=
(
node
,)
if
node
.
error_flag
:
break
#
if node.error_flag:
#
break
pos
+=
1
assert
len
(
results
)
<=
len
(
self
.
parsers
)
return
Node
(
self
,
results
),
text_
...
...
@@ -1976,14 +1977,14 @@ def compile_source(source: str,
# likely that error list gets littered with compile error messages
result
=
None
ef
=
syntax_tree
.
error_flag
messages
=
syntax_tree
.
collect_errors
(
clear_errors
=
True
)
messages
=
syntax_tree
.
collect_errors
(
source_text
,
clear_errors
=
True
)
if
not
is_error
(
ef
):
transformer
(
syntax_tree
)
ef
=
max
(
ef
,
syntax_tree
.
error_flag
)
messages
.
extend
(
syntax_tree
.
collect_errors
(
clear_errors
=
True
))
messages
.
extend
(
syntax_tree
.
collect_errors
(
source_text
,
clear_errors
=
True
))
if
is_logging
():
syntax_tree
.
log
(
log_file_name
+
'.ast'
)
if
not
is_error
(
syntax_tree
.
error_flag
):
result
=
compiler
(
syntax_tree
)
messages
.
extend
(
syntax_tree
.
collect_errors
())
messages
.
extend
(
syntax_tree
.
collect_errors
(
source_text
))
syntax_tree
.
error_flag
=
max
(
syntax_tree
.
error_flag
,
ef
)
return
result
,
messages
,
syntax_tree
DHParser/syntaxtree.py
View file @
71dca11d
...
...
@@ -60,7 +60,7 @@ class ParserBase:
for instantiation.
"""
def
__init__
(
self
,
name
=
''
):
# , pbases=frozenset()):
self
.
name
=
name
# type: str
self
.
_
name
=
name
# type: str
self
.
_ptype
=
':'
+
self
.
__class__
.
__name__
# type: str
def
__repr__
(
self
):
...
...
@@ -69,6 +69,10 @@ class ParserBase:
def
__str__
(
self
):
return
self
.
name
+
(
' = '
if
self
.
name
else
''
)
+
repr
(
self
)
@
property
def
name
(
self
):
return
self
.
_name
@
property
def
ptype
(
self
)
->
str
:
return
self
.
_ptype
...
...
@@ -94,8 +98,7 @@ class MockParser(ParserBase):
"""
def
__init__
(
self
,
name
=
''
,
ptype
=
''
):
# , pbases=frozenset()):
assert
not
ptype
or
ptype
[
0
]
==
':'
super
(
MockParser
,
self
).
__init__
(
name
)
self
.
name
=
name
super
().
__init__
(
name
)
self
.
_ptype
=
ptype
or
':'
+
self
.
__class__
.
__name__
...
...
@@ -303,44 +306,41 @@ class Node(collections.abc.Sized):
def
errors
(
self
)
->
List
[
Error
]:
return
self
.
_errors
.
copy
()
def
add_error
(
self
,
message
:
str
,
level
:
int
=
Error
.
ERROR
,
code
:
Hashable
=
0
)
->
'Node'
:
def
add_error
(
self
,
message
:
str
,
level
:
int
=
Error
.
ERROR
,
code
:
Hashable
=
0
)
->
'Node'
:
self
.
_errors
.
append
(
Error
(
message
,
level
,
code
))
self
.
error_flag
=
max
(
self
.
error_flag
,
self
.
_errors
[
-
1
].
level
)
return
self
def
_finalize_errors
(
self
,
lbreaks
:
List
[
int
]):
if
self
.
error_flag
:
for
err
in
self
.
_errors
:
assert
err
.
pos
>=
0
err
.
line
,
err
.
column
=
line_col
(
lbreaks
,
err
.
pos
)
for
child
in
self
.
children
:
child
.
_finalize_errors
(
lbreaks
)
def
finalize_errors
(
self
,
source_text
:
Union
[
StringView
,
str
]):
"""Recursively adds line- and column-numbers to all error objects.
"""
if
self
.
error_flag
:
lbreaks
=
linebreaks
(
source_text
)
self
.
_finalize_errors
(
lbreaks
)
def
collect_errors
(
self
,
clear_errors
=
False
)
->
List
[
Error
]:
def
collect_errors
(
self
,
document
:
Union
[
StringView
,
str
]
=
''
,
clear_errors
=
False
)
->
List
[
Error
]:
"""
Recursively adds line- and column-numbers to all error objects.
Returns all errors of this node or any child node in the form
of a set of tuples (position, error_message), where position
is always relative to this node.
"""
errors
=
self
.
errors
if
clear_errors
:
self
.
_errors
=
[]
self
.
error_flag
=
0
if
self
.
children
:
for
child
in
self
.
children
:
errors
.
extend
(
child
.
collect_errors
(
clear_errors
))
return
errors
if
self
.
error_flag
:
lbreaks
=
linebreaks
(
document
)
if
document
else
[]
return
self
.
_collect_errors
(
lbreaks
,
clear_errors
)
else
:
return
[]
def
_collect_errors
(
self
,
lbreaks
:
List
[
int
]
=
[],
clear_errors
=
False
)
->
List
[
Error
]:
if
self
.
error_flag
:
errors
=
self
.
errors
if
lbreaks
:
for
err
in
errors
:
err
.
pos
=
self
.
pos
err
.
line
,
err
.
column
=
line_col
(
lbreaks
,
err
.
pos
)
if
clear_errors
:
self
.
_errors
=
[]
self
.
error_flag
=
0
if
self
.
children
:
for
child
in
self
.
children
:
errors
.
extend
(
child
.
_collect_errors
(
lbreaks
,
clear_errors
))
return
errors
else
:
return
[]
def
_tree_repr
(
self
,
tab
,
openF
,
closeF
,
dataF
=
identity
,
density
=
0
)
->
str
:
...
...
@@ -408,7 +408,7 @@ class Node(collections.abc.Sized):
s
=
lB
+
node
.
tag_name
# s += " '(pos %i)" % node.pos
if
src
:
s
+=
" '(pos %i "
%
node
.
pos
+
" %i %i)"
%
line_col
(
src
,
node
.
pos
)
s
+=
" '(pos %i "
%
node
.
pos
#
+ " %i %i)" % line_col(src, node.pos)
if
node
.
errors
:
s
+=
" '(err '(%s))"
%
' '
.
join
(
str
(
err
).
replace
(
'"'
,
r
'\"'
)
for
err
in
node
.
errors
)
...
...
DHParser/testing.py
View file @
71dca11d
...
...
@@ -29,7 +29,7 @@ except ImportError:
from
DHParser.toolkit
import
is_logging
,
clear_logs
from
DHParser.syntaxtree
import
mock_syntax_tree
,
flatten_sxpr
from
DHParser.error
import
is_error
,
error_messages
from
DHParser.error
import
is_error
__all__
=
(
'unit_from_configfile'
,
'unit_from_json'
,
...
...
@@ -78,7 +78,7 @@ def unit_from_json(json_filename):
for
symbol
in
unit
:
for
stage
in
unit
[
symbol
]:
if
stage
not
in
UNIT_STAGES
:
raise
ValueError
(
'Test stage %s not in: '
%
(
stage
,
str
(
UNIT_STAGES
)))
raise
ValueError
(
'Test stage %s not in:
%s
'
%
(
stage
,
str
(
UNIT_STAGES
)))
return
unit
# TODO: add support for yaml, cson, toml
...
...
@@ -163,8 +163,8 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
if
is_error
(
cst
.
error_flag
):
errata
.
append
(
'Match test "%s" for parser "%s" failed:
\n\t
Expr.: %s
\n\n\t
%s
\n\n
'
%
(
test_name
,
parser_name
,
'
\n\t
'
.
join
(
test_code
.
split
(
'
\n
'
)),
'
\n\t
'
.
join
(
m
.
replace
(
'
\n
'
,
'
\n\t\t
'
)
for
m
in
error_messages
(
test_code
,
cst
.
collect_errors
(
)
))))
'
\n\t
'
.
join
(
str
(
m
)
.
replace
(
'
\n
'
,
'
\n\t\t
'
)
for
m
in
cst
.
collect_errors
(
test_code
))))
tests
.
setdefault
(
'__err__'
,
{})[
test_name
]
=
errata
[
-
1
]
# write parsing-history log only in case of failure!
parser
.
log_parsing_history__
(
"match_%s_%s.log"
%
(
parser_name
,
test_name
))
...
...
DHParser/transform.py
View file @
71dca11d
...
...
@@ -275,11 +275,13 @@ def TRUE_CONDITION(context: List[Node]) -> bool:
def
replace_child
(
node
:
Node
):
assert
len
(
node
.
children
)
==
1
if
not
node
.
children
[
0
].
parser
.
name
:
node
.
children
[
0
].
parser
.
name
=
node
.
parser
.
name
node
.
parser
=
node
.
children
[
0
].
parser
node
.
_errors
.
extend
(
node
.
children
[
0
].
_errors
)
node
.
result
=
node
.
children
[
0
].
result
child
=
node
.
children
[
0
]
if
not
child
.
parser
.
name
:
child
.
parser
=
MockParser
(
node
.
parser
.
name
,
child
.
parser
.
ptype
)
# parser names must not be overwritten, else: child.parser.name = node.parser.name
node
.
parser
=
child
.
parser
node
.
_errors
.
extend
(
child
.
_errors
)
node
.
result
=
child
.
result
def
reduce_child
(
node
:
Node
):
...
...
examples/EBNF/EBNF.ebnf
View file @
71dca11d
...
...
@@ -19,7 +19,7 @@ factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be
| option
flowmarker = "!" | "&" # '!' negative lookahead, '&' positive lookahead
"-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
|
"-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" expression §")"
...
...
examples/EBNF/EBNF_old.ebnf
View file @
71dca11d
...
...
@@ -18,8 +18,8 @@ factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be
| repetition
| option
flowmarker = "!" | "&" | "§"
|
# '!' negative lookahead, '&' positive lookahead, '§' required
"-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
flowmarker = "!" | "&" | "§"
# '!' negative lookahead, '&' positive lookahead, '§' required
|
"-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" expression §")"
...
...
examples/LaTeX/LaTeX.ebnf
View file @
71dca11d
# LaTeX-Grammar for DHParser
# preamble
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # optional whitespace, including at most one linefeed
@ comment = /%.*/
########################################################################
#
# outer document structure
...
...
examples/LaTeX/LaTeXCompiler.py
View file @
71dca11d
...
...
@@ -16,7 +16,7 @@ try:
except
ImportError
:
import
re
from
DHParser
import
logging
,
is_filename
,
Grammar
,
Compiler
,
Lookbehind
,
Alternative
,
Pop
,
\
Required
,
Token
,
Synonym
,
\
Token
,
Synonym
,
\
Option
,
NegativeLookbehind
,
OneOrMore
,
RegExp
,
Series
,
RE
,
Capture
,
\
ZeroOrMore
,
Forward
,
NegativeLookahead
,
mixin_comment
,
compile_source
,
\
PreprocessorFunc
,
TransformationDict
,
\
...
...
@@ -49,10 +49,10 @@ class LaTeXGrammar(Grammar):
# LaTeX-Grammar for DHParser
# preamble
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # optional whitespace, including at most one linefeed
@ comment = /%.*/
########################################################################
#
# outer document structure
...
...
@@ -228,7 +228,7 @@ class LaTeXGrammar(Grammar):
paragraph
=
Forward
()
tabular_config
=
Forward
()
text_element
=
Forward
()
source_hash__
=
"
37585004123d6b80ecf8f67217b43479
"
source_hash__
=
"
6f0e961d68f21a54a6e4b1fb01fe17bf
"
parser_initialization__
=
"upon instantiation"
COMMENT__
=
r
'%.*'
WHITESPACE__
=
r
'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
...
...
@@ -238,12 +238,15 @@ class LaTeXGrammar(Grammar):
EOF
=
RegExp
(
'(?!.)'
)
BACKSLASH
=
RegExp
(
'[
\\\\
]'
)
LB
=
RegExp
(
'
\\
s*?
\\
n|$'
)
NEW_LINE
=
Series
(
RegExp
(
'[
\\
t]*'
),
Option
(
RegExp
(
COMMENT__
)),
RegExp
(
'
\\
n'
))
NEW_LINE
=
Series
(
RegExp
(
'[
\\
t]*'
),
Option
(
RegExp
(
COMMENT__
)),
RegExp
(
'
\\
n'
)
,
mandatory
=
1000
)
GAP
=
RE
(
'[
\\
t]*(?:
\\
n[
\\
t]*)+
\\
n'
)
WSPC
=
OneOrMore
(
Alternative
(
RegExp
(
COMMENT__
),
RegExp
(
'
\\
s+'
)))
PARSEP
=
Series
(
ZeroOrMore
(
Series
(
RegExp
(
WHITESPACE__
),
RegExp
(
COMMENT__
))),
GAP
,
Option
(
WSPC
))
LFF
=
Series
(
NEW_LINE
,
Option
(
WSPC
))
LF
=
Series
(
NEW_LINE
,
ZeroOrMore
(
Series
(
RegExp
(
COMMENT__
),
RegExp
(
WHITESPACE__
))))
PARSEP
=
Series
(
ZeroOrMore
(
Series
(
RegExp
(
WHITESPACE__
),
RegExp
(
COMMENT__
),
mandatory
=
1000
)),
GAP
,
Option
(
WSPC
),
mandatory
=
1000
)
LFF
=
Series
(
NEW_LINE
,
Option
(
WSPC
),
mandatory
=
1000
)
LF
=
Series
(
NEW_LINE
,
ZeroOrMore
(
Series
(
RegExp
(
COMMENT__
),
RegExp
(
WHITESPACE__
),
mandatory
=
1000
)),
mandatory
=
1000
)
TEXTCHUNK
=
RegExp
(
'[^
\\\\
%$&
\\
{
\\
}
\\
[
\\
]
\\
s
\\
n]+'
)