Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
9.2.2023: Due to updates GitLab will be unavailable for some minutes between 9:00 and 11:00.
Open sidebar
badw-it
DHParser
Commits
3a8fefd2
Commit
3a8fefd2
authored
Aug 24, 2017
by
di68kap
Browse files
- added ignorecase flag; Example BibTeX added
parent
56211924
Changes
5
Hide whitespace changes
Inline
Side-by-side
DHParser/ebnf.py
View file @
3a8fefd2
...
...
@@ -356,6 +356,9 @@ class EBNFCompiler(Compiler):
directives: A dictionary of all directives and their default
values.
re_flags: A set of regular expression flags to be added to all
regular expressions found in the current parsing process
"""
COMMENT_KEYWORD
=
"COMMENT__"
WHITESPACE_KEYWORD
=
"WSP__"
...
...
@@ -379,6 +382,7 @@ class EBNFCompiler(Compiler):
def
_reset
(
self
):
super
(
EBNFCompiler
,
self
).
_reset
()
self
.
_result
=
''
# type: str
self
.
re_flags
=
set
()
# type: Set[str]
self
.
rules
=
OrderedDict
()
# type: OrderedDict[str, List[Node]]
self
.
current_symbols
=
[]
# type: List[Node]
self
.
symbols
=
{}
# type: Dict[str, Node]
...
...
@@ -392,6 +396,7 @@ class EBNFCompiler(Compiler):
'literalws'
:
[
'right'
],
'tokens'
:
set
(),
# alt. 'preprocessor_tokens'
'filter'
:
dict
(),
# alt. 'filter'
'ignorecase'
:
False
,
'testing'
:
False
}
@
property
...
...
@@ -624,14 +629,14 @@ class EBNFCompiler(Compiler):
return
rule
,
defn
@
staticmethod
def
_check_rx
(
node
:
Node
,
rx
:
str
)
->
str
:
def
_check_rx
(
self
,
node
:
Node
,
rx
:
str
)
->
str
:
"""
Checks whether the string `rx` represents a valid regular
expression. Makes sure that multiline regular expressions are
prepended by the multiline-flag. Returns the regular expression string.
"""
rx
=
rx
if
rx
.
find
(
'
\n
'
)
<
0
or
rx
[
0
:
4
]
==
'(?x)'
else
'(?x)'
+
rx
flags
=
self
.
re_flags
|
{
'x'
}
if
rx
.
find
(
'
\n
'
)
>=
0
else
self
.
re_flags
rx
=
"(?%s)%s"
%
(
""
.
join
(
flags
),
rx
)
try
:
re
.
compile
(
rx
)
except
Exception
as
re_error
:
...
...
@@ -668,6 +673,12 @@ class EBNFCompiler(Compiler):
"/%s/ does not."
%
value
)
self
.
directives
[
key
]
=
value
elif
key
==
'ignorecase'
:
value
=
str
(
node
.
children
[
1
]).
lower
()
not
in
{
"off"
,
"false"
,
"no"
}
self
.
directives
[
'ignorecase'
]
==
value
if
value
:
self
.
re_flags
.
add
(
'i'
)
elif
key
==
'testing'
:
value
=
str
(
node
.
children
[
1
])
self
.
directives
[
'testing'
]
=
value
.
lower
()
not
in
{
"off"
,
"false"
,
"no"
}
...
...
examples/BibTeX/BibTeX.ebnf
0 → 100644
View file @
3a8fefd2
# BibTeX-Grammar
@ testing = True
@ whitespace = /\s*/
@ ignorecase = True
@ comment = /%.*(?:\n|$)/
bibliography = { preamble | comment | entry }
preamble = "@Preamble{" /"/ PREAMBLE /"/~ §"}"
comment = "@Comment{" COMMENT §"}"
entry = /@/ entry_type "{" KEY { "," NAME §"=" field_content } §"}"
field_content = /\{/ content §"}" | PLAIN_CONTENT
content = { /(?:\\.|[^\\{}])*/ ( /\{/ content /\}/ ) }
PREAMBLE = /[^"]*/
COMMENT = /[^}]*/
KEY = /[^,}]*/~
NAME = /\w+/~
PLAIN_CONTENT = /[^,}]*/
\ No newline at end of file
examples/BibTeX/tst_BibTeX_grammar.py
0 → 100644
View file @
3a8fefd2
#!/usr/bin/python3
"""tst_BibTeX_grammar.py - runs the unit tests for the BibTeX grammar
Author: Eckhart Arnold <arnold@badw.de>
Copyright 2017 Bavarian Academy of Sciences and Humanities
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import
sys
sys
.
path
.
extend
([
'../../'
,
'../'
,
'./'
])
import
DHParser.dsl
from
DHParser
import
testing
from
DHParser
import
toolkit
if
not
DHParser
.
dsl
.
recompile_grammar
(
'BibTeX.ebnf'
,
force
=
False
):
# recompiles Grammar only if it has changed
print
(
'
\n
Errors while recompiling "BibTeX.ebnf":
\n
--------------------------------------
\n\n
'
)
with
open
(
'BibTeX_ebnf_ERRORS.txt'
)
as
f
:
print
(
f
.
read
())
sys
.
exit
(
1
)
from
BibTeXCompiler
import
get_grammar
,
get_transformer
with
toolkit
.
logging
(
True
):
error_report
=
testing
.
grammar_suite
(
'grammar_tests'
,
get_grammar
,
get_transformer
,
report
=
True
,
verbose
=
True
)
if
error_report
:
print
(
'
\n
'
)
print
(
error_report
)
sys
.
exit
(
1
)
else
:
print
(
'
\n
SUCCESS! All tests passed :-)'
)
examples/LaTeX/tst_LaTeX_docs.py
View file @
3a8fefd2
...
...
@@ -56,7 +56,7 @@ with toolkit.logging(False):
pr
.
enable
()
for
file
in
files
:
if
file
.
lower
().
endswith
(
'.tex'
)
and
file
.
lower
().
find
(
'error'
)
<
0
:
with
open
(
os
.
path
.
join
(
'testdata'
,
file
),
'r'
)
as
f
:
with
open
(
os
.
path
.
join
(
'testdata'
,
file
),
'r'
,
encoding
=
"utf-8"
)
as
f
:
doc
=
f
.
read
()
print
(
'
\n\n
Parsing document: "%s"
\n
'
%
file
)
result
=
parser
(
doc
)
...
...
test/test_parser.py
View file @
3a8fefd2
...
...
@@ -150,13 +150,43 @@ class TestRegex:
result
,
messages
,
syntax_tree
=
compile_source
(
mlregex
,
None
,
get_ebnf_grammar
(),
get_ebnf_transformer
(),
get_ebnf_compiler
(
'MultilineRegexTest'
))
assert
result
assert
not
messages
assert
not
messages
,
str
(
messages
)
parser
=
compile_python_object
(
DHPARSER_IMPORTS
+
result
,
'\w+Grammar$'
)()
node
,
rest
=
parser
.
regex
(
'abc+def'
)
assert
rest
==
''
assert
node
.
parser
.
name
==
"regex"
assert
str
(
node
)
==
'abc+def'
def
text_ignore_case
(
self
):
mlregex
=
r
"""
@ ignorecase = True
regex = /alpha/
"""
result
,
messages
,
syntax_tree
=
compile_source
(
mlregex
,
None
,
get_ebnf_grammar
(),
get_ebnf_transformer
(),
get_ebnf_compiler
(
'MultilineRegexTest'
))
assert
result
assert
not
messages
parser
=
compile_python_object
(
DHPARSER_IMPORTS
+
result
,
'\w+Grammar$'
)()
node
,
rest
=
parser
.
regex
(
'Alpha'
)
assert
node
assert
not
node
.
error_flag
assert
rest
==
''
assert
node
.
parser
.
name
==
"regex"
assert
str
(
node
)
==
'Alpha'
mlregex
=
r
"""
@ ignorecase = False
regex = /alpha/
"""
result
,
messages
,
syntax_tree
=
compile_source
(
mlregex
,
None
,
get_ebnf_grammar
(),
get_ebnf_transformer
(),
get_ebnf_compiler
(
'MultilineRegexTest'
))
assert
result
assert
not
messages
parser
=
compile_python_object
(
DHPARSER_IMPORTS
+
result
,
'\w+Grammar$'
)()
node
,
rest
=
parser
.
regex
(
'Alpha'
)
assert
node
.
error_flag
def
test_token
(
self
):
tokenlang
=
r
"""
@whitespace = linefeed
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment