Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
badw-it
DHParser
Commits
79f5bbdd
Commit
79f5bbdd
authored
Aug 29, 2017
by
Eckhart Arnold
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
LaTeX.ebnf reworked
parent
8e00e0f8
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
169 additions
and
30 deletions
+169
-30
DHParser/ebnf.py
DHParser/ebnf.py
+7
-7
DHParser/testing.py
DHParser/testing.py
+9
-4
dhparser.py
dhparser.py
+1
-1
examples/LaTeX/LaTeX.ebnf
examples/LaTeX/LaTeX.ebnf
+5
-5
examples/LaTeX/LaTeXCompiler.py
examples/LaTeX/LaTeXCompiler.py
+13
-11
examples/LaTeX/grammar_tests/00_test_regexes.ini
examples/LaTeX/grammar_tests/00_test_regexes.ini
+130
-0
examples/LaTeX/tst_LaTeX_grammar.py
examples/LaTeX/tst_LaTeX_grammar.py
+4
-2
No files found.
DHParser/ebnf.py
View file @
79f5bbdd
...
...
@@ -362,7 +362,8 @@ class EBNFCompiler(Compiler):
"""
COMMENT_KEYWORD
=
"COMMENT__"
WHITESPACE_KEYWORD
=
"WSP__"
RESERVED_SYMBOLS
=
{
WHITESPACE_KEYWORD
,
COMMENT_KEYWORD
}
RAW_WS_KEYWORD
=
"WHITESPACE__"
RESERVED_SYMBOLS
=
{
WHITESPACE_KEYWORD
,
RAW_WS_KEYWORD
,
COMMENT_KEYWORD
}
AST_ERROR
=
"Badly structured syntax tree. "
\
"Potentially due to erroneous AST transformation."
PREFIX_TABLE
=
{
'§'
:
'Required'
,
...
...
@@ -425,8 +426,7 @@ class EBNFCompiler(Compiler):
'"gen_transformer_Skeleton()"!'
)
tt_name
=
self
.
grammar_name
+
'_AST_transformation_table'
transtable
=
[
tt_name
+
' = {'
,
' # AST Transformations for the '
+
self
.
grammar_name
+
'-grammar'
]
' # AST Transformations for the '
+
self
.
grammar_name
+
'-grammar'
]
transtable
.
append
(
' "+": remove_empty,'
)
for
name
in
self
.
rules
:
tf
=
'[]'
...
...
@@ -498,9 +498,9 @@ class EBNFCompiler(Compiler):
definitions
.
append
((
'wspL__'
,
self
.
WHITESPACE_KEYWORD
if
'left'
in
self
.
directives
[
'literalws'
]
else
"''"
))
definitions
.
append
((
self
.
WHITESPACE_KEYWORD
,
(
"mixin_comment(whitespace="
"
r'{whitespace}', comment=r'{comment}'
)"
)
.
format
(
**
self
.
directives
)))
(
"mixin_comment(whitespace="
+
self
.
RAW_WS_KEYWORD
+
"
, comment="
+
self
.
COMMENT_KEYWORD
+
"
)"
)
))
definitions
.
append
((
self
.
RAW_WS_KEYWORD
,
"r'{whitespace}'"
.
format
(
**
self
.
directives
)))
definitions
.
append
((
self
.
COMMENT_KEYWORD
,
"r'{comment}'"
.
format
(
**
self
.
directives
)))
# prepare parser class header and docstring and
...
...
@@ -814,7 +814,7 @@ class EBNFCompiler(Compiler):
self
.
symbols
[
symbol
]
=
node
# remember first use of symbol
if
symbol
in
self
.
rules
:
self
.
recursive
.
add
(
symbol
)
if
symbol
in
(
EBNFCompiler
.
WHITESPACE_KEYWORD
,
EBNFCompiler
.
COMMENT_KEYWORD
):
if
symbol
in
EBNFCompiler
.
RESERVED_SYMBOLS
:
#
(EBNFCompiler.WHITESPACE_KEYWORD, EBNFCompiler.COMMENT_KEYWORD):
return
"RegExp(%s)"
%
symbol
return
symbol
...
...
DHParser/testing.py
View file @
79f5bbdd
...
...
@@ -18,6 +18,7 @@ permissions and limitations under the License.
import
collections
import
configparser
import
copy
import
fnmatch
import
inspect
import
json
import
os
...
...
@@ -195,7 +196,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
# write parsing-history log only in case of test-failure
parser
.
log_parsing_history__
(
"fail_%s_%s.log"
%
(
parser_name
,
test_name
))
if
verbose
:
print
(
infostr
+
"OK"
if
len
(
errata
)
==
errflag
else
"FAIL"
)
print
(
infostr
+
(
"OK"
if
len
(
errata
)
==
errflag
else
"FAIL"
)
)
# write test-report
if
report
:
...
...
@@ -208,12 +209,16 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
return
errata
def
grammar_suite
(
directory
,
parser_factory
,
transformer_factory
,
ignore_unknown_filetypes
=
False
,
report
=
True
,
verbose
=
False
):
def
grammar_suite
(
directory
,
parser_factory
,
transformer_factory
,
fn_patterns
=
[
'*test*'
],
ignore_unknown_filetypes
=
False
,
report
=
True
,
verbose
=
True
):
"""
Runs all grammar unit tests in a directory. A file is considered a test
unit, if it has the word "test" in its name.
"""
if
not
isinstance
(
fn_patterns
,
collections
.
abc
.
Collection
):
fn_patterns
=
[
fn_patterns
]
all_errors
=
collections
.
OrderedDict
()
if
verbose
:
print
(
"
\n
Scanning test-directory: "
+
directory
)
...
...
@@ -221,7 +226,7 @@ def grammar_suite(directory, parser_factory, transformer_factory, ignore_unknown
os
.
chdir
(
directory
)
if
is_logging
():
clear_logs
()
for
filename
in
sorted
(
os
.
listdir
()):
if
filename
.
lower
().
find
(
"test"
)
>=
0
:
if
any
(
fnmatch
.
fnmatch
(
filename
,
pattern
)
for
pattern
in
fn_patterns
)
:
try
:
if
verbose
:
print
(
"
\n
Running grammar tests from: "
+
filename
)
...
...
dhparser.py
View file @
79f5bbdd
...
...
@@ -40,7 +40,7 @@ EBNF_TEMPLATE = r"""-grammar
@ testing = True # testing supresses error messages for unconnected symbols
@ whitespace = vertical # implicit whitespace, includes any number of line feeds
@ literalws = right # literals have implicit whitespace on the right hand side
@ comment = /#.*
(?:\n|$)/
# comments range from a '#'-character to the end of the line
@ comment = /#.*
/
# comments range from a '#'-character to the end of the line
@ ignorecase = False # literals and regular expressions are case-sensitive
...
...
examples/LaTeX/LaTeX.ebnf
View file @
79f5bbdd
...
...
@@ -2,7 +2,7 @@
@ testing = True
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # optional whitespace, including at most one linefeed
@ comment = /%.*
(?:\n|$)
/
@ comment = /%.*/
latexdoc = preamble document
...
...
@@ -151,13 +151,13 @@ INTEGER = /\d+/~
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
LF = !GAP /[ \t]*\n[ \t]*/ # linefeed but not an empty line
LFF = ~/\n?/ -&LB [ WSPC ] # at least one linefeed
LF = NEW_LINE { COMMENT__ WHITESPACE__ } # linefeed but not an empty line
LFF = NEW_LINE [ WSPC ] # at least one linefeed
PARSEP = { WHITESPACE__ COMMENT__ } GAP [WSPC] # paragraph separator
WSPC = { COMMENT__ | /\s+/ }+ # arbitrary horizontal or vertical whitespace
# WSPC = { /\s+/~ | ~/\s+/ }+ # arbitrary horizontal or vertical whitespace
PARSEP = { GAP }+ # paragraph separator
GAP = /[ \t]*(?:\n[ \t]*)+\n/~ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
NEW_LINE = /[ \t]*/ [COMMENT__] /\n/
LB = /\s*?\n|$/ # backwards line break for Lookbehind-Operator
# beginning of text marker '$' added for test code
BACKSLASH = /[\\]/
...
...
examples/LaTeX/LaTeXCompiler.py
View file @
79f5bbdd
...
...
@@ -51,7 +51,7 @@ class LaTeXGrammar(Grammar):
@ testing = True
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # optional whitespace, including at most one linefeed
@ comment = /%.*
(?:\n|$)
/
@ comment = /%.*/
latexdoc = preamble document
...
...
@@ -200,13 +200,13 @@ class LaTeXGrammar(Grammar):
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
LF = !GAP /[ \t]*\n[ \t]*/ # linefeed but not an empty line
LFF = ~/\n?/ -&LB [ WSPC ] # at least one linefeed
LF = NEW_LINE { COMMENT__ WHITESPACE__ } # linefeed but not an empty line
LFF = NEW_LINE [ WSPC ] # at least one linefeed
PARSEP = { WHITESPACE__ COMMENT__ } GAP [WSPC] # paragraph separator
WSPC = { COMMENT__ | /\s+/ }+ # arbitrary horizontal or vertical whitespace
# WSPC = { /\s+/~ | ~/\s+/ }+ # arbitrary horizontal or vertical whitespace
PARSEP = { GAP }+ # paragraph separator
GAP = /[ \t]*(?:\n[ \t]*)+\n/~ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
NEW_LINE = /[ \t]*/ [COMMENT__] /\n/
LB = /\s*?\n|$/ # backwards line break for Lookbehind-Operator
# beginning of text marker '$' added for test code
BACKSLASH = /[\\]/
...
...
@@ -220,20 +220,22 @@ class LaTeXGrammar(Grammar):
paragraph
=
Forward
()
tabular_config
=
Forward
()
text_element
=
Forward
()
source_hash__
=
"
2d33db878d9e5354a05e23f48a756604
"
source_hash__
=
"
ed181ac517b686f843e13d5783527fe3
"
parser_initialization__
=
"upon instantiation"
COMMENT__
=
r
'%.*(?:\n|$)'
WSP__
=
mixin_comment
(
whitespace
=
r
'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
,
comment
=
r
'%.*(?:\n|$)'
)
COMMENT__
=
r
'%.*'
WHITESPACE__
=
r
'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
WSP__
=
mixin_comment
(
whitespace
=
WHITESPACE__
,
comment
=
COMMENT__
)
wspL__
=
''
wspR__
=
WSP__
EOF
=
RegExp
(
'(?!.)'
)
BACKSLASH
=
RegExp
(
'[
\\\\
]'
)
LB
=
RegExp
(
'
\\
s*?
\\
n|$'
)
NEW_LINE
=
Series
(
RegExp
(
'[
\\
t]*'
),
Optional
(
RegExp
(
COMMENT__
)),
RegExp
(
'
\\
n'
))
GAP
=
RE
(
'[
\\
t]*(?:
\\
n[
\\
t]*)+
\\
n'
)
PARSEP
=
OneOrMore
(
GAP
)
WSPC
=
OneOrMore
(
Alternative
(
RegExp
(
COMMENT__
),
RegExp
(
'
\\
s+'
)))
LFF
=
Series
(
RE
(
'
\\
n?'
,
wR
=
''
,
wL
=
WSP__
),
Lookbehind
(
LB
),
Optional
(
WSPC
))
LF
=
Series
(
NegativeLookahead
(
GAP
),
RegExp
(
'[
\\
t]*
\\
n[
\\
t]*'
))
PARSEP
=
Series
(
ZeroOrMore
(
Series
(
RegExp
(
WHITESPACE__
),
RegExp
(
COMMENT__
))),
GAP
,
Optional
(
WSPC
))
LFF
=
Series
(
NEW_LINE
,
Optional
(
WSPC
))
LF
=
Series
(
NEW_LINE
,
ZeroOrMore
(
Series
(
RegExp
(
COMMENT__
),
RegExp
(
WHITESPACE__
))))
TEXTCHUNK
=
RegExp
(
'[^
\\\\
%$&
\\
{
\\
}
\\
[
\\
]
\\
s
\\
n]+'
)
INTEGER
=
RE
(
'
\\
d+'
)
NAME
=
Capture
(
RE
(
'
\\
w+'
))
...
...
examples/LaTeX/grammar_tests/00_test_regexes.ini
0 → 100644
View file @
79f5bbdd
[match:LB]
1:
"""
"""
[match:GAP]
1:
"""
"""
2:
"""
%
Comment
"""
3:
"""
"""
[fail:GAP]
1:
"""
"""
2:
"""
%
Comment
%
Comment
"""
[match:PARSEP]
1:
"""
"""
2:
"""
%
Comment
"""
3:
"""
"""
4:
"""
%
Comment
%
Comment
"""
5:
"""
%
Comment
%
Comment
%
Comment"""
[fail:PARSEP]
1:
"
"
2:
"""
"""
3:
"""
%
Comment"""
4:
"""
%
Comment
%
Comment
%
Comment"""
[match:WSPC]
1:
"
"
2:
"
%
Comment"
3:
"
"
4:
"%
Comment"
5:
"""%
Comment
"""
6:
"""
%
Comment
%
Comment
"""
7:
"""
"""
[fail:WSPC]
1:
"X"
[match:LFF]
1:
"""
"""
2:
"""
%
Comment"""
3:
"""
%
Comment
"""
4:
"""
"""
[fail:LFF]
1:
"
"
[match:LF]
1:
"""
"""
2:
"""
%
Comment"""
3:
"""
%
Comment
%
Comment
"""
[fail:LF]
1:
"""
"""
examples/LaTeX/tst_LaTeX_grammar.py
View file @
79f5bbdd
...
...
@@ -25,6 +25,7 @@ sys.path.extend(['../../', '../', './'])
import
DHParser.dsl
from
DHParser
import
testing
from
DHParser
import
toolkit
if
not
DHParser
.
dsl
.
recompile_grammar
(
'LaTeX.ebnf'
,
force
=
False
):
# recompiles Grammar only if it has changed
print
(
'
\n
Errors while recompiling "LaTeX.ebnf":
\n
--------------------------------------
\n\n
'
)
...
...
@@ -35,8 +36,9 @@ if not DHParser.dsl.recompile_grammar('LaTeX.ebnf', force=False): # recompiles
from
LaTeXCompiler
import
get_grammar
,
get_transformer
with
toolkit
.
logging
(
True
):
error_report
=
testing
.
grammar_suite
(
'grammar_tests'
,
get_grammar
,
get_transformer
,
report
=
True
,
verbose
=
True
)
error_report
=
testing
.
grammar_suite
(
'grammar_tests'
,
get_grammar
,
get_transformer
,
fn_patterns
=
[
'*_test_*.ini'
],
report
=
True
,
verbose
=
True
)
if
error_report
:
print
(
'
\n
'
)
print
(
error_report
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment