Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
fab4160e
Commit
fab4160e
authored
Dec 28, 2017
by
eckhart
Browse files
- sync commit
parent
f09ba4fd
Changes
9
Expand all
Hide whitespace changes
Inline
Side-by-side
DHParser/parse.py
View file @
fab4160e
...
...
@@ -1068,7 +1068,7 @@ class PreprocessorToken(Parser):
def
__init__
(
self
,
token
:
str
)
->
None
:
assert
token
and
token
.
isupper
()
assert
RX_TOKEN_NAME
.
match
(
token
)
super
(
PreprocessorToken
,
self
).
__init__
(
token
)
super
().
__init__
(
token
)
def
__call__
(
self
,
text
:
StringView
)
->
Tuple
[
Optional
[
Node
],
StringView
]:
if
text
[
0
:
1
]
==
BEGIN_TOKEN
:
...
...
@@ -1095,6 +1095,30 @@ class PreprocessorToken(Parser):
return
None
,
text
class
PlainText
(
Parser
):
"""
Parses plain text strings.
Example:
>>> while_token = PlainText("while")
>>> Grammar(while_token)("while").content
'while'
"""
def
__init__
(
self
,
text
:
str
,
name
:
str
=
''
)
->
None
:
super
().
__init__
(
name
)
self
.
text
=
text
self
.
textlen
=
len
(
text
)
def
__deepcopy__
(
self
,
memo
):
return
self
.
__class__
(
self
.
text
,
self
.
name
)
def
__call__
(
self
,
text
:
StringView
)
->
Tuple
[
Optional
[
Node
],
StringView
]:
if
text
.
startswith
(
self
.
text
):
return
Node
(
self
,
self
.
text
,
True
),
text
[
self
.
textlen
:]
return
None
,
text
class
RegExp
(
Parser
):
r
"""
Regular expression parser.
...
...
@@ -1114,7 +1138,7 @@ class RegExp(Parser):
"""
def
__init__
(
self
,
regexp
,
name
:
str
=
''
)
->
None
:
super
(
RegExp
,
self
).
__init__
(
name
)
super
().
__init__
(
name
)
self
.
regexp
=
re
.
compile
(
regexp
)
if
isinstance
(
regexp
,
str
)
else
regexp
def
__deepcopy__
(
self
,
memo
):
...
...
@@ -1123,7 +1147,7 @@ class RegExp(Parser):
regexp
=
copy
.
deepcopy
(
self
.
regexp
,
memo
)
except
TypeError
:
regexp
=
self
.
regexp
.
pattern
return
RegExp
(
regexp
,
self
.
name
)
return
self
.
__class__
(
regexp
,
self
.
name
)
def
__call__
(
self
,
text
:
StringView
)
->
Tuple
[
Optional
[
Node
],
StringView
]:
match
=
text
.
match
(
self
.
regexp
)
...
...
@@ -1179,7 +1203,7 @@ class RE(Parser):
EBNF-Example: `word = /\w+/~`
"""
def
__init__
(
self
,
regexp
,
wL
=
None
,
wR
=
None
,
name
=
''
)
:
def
__init__
(
self
,
regexp
,
wL
=
None
,
wR
=
None
,
name
:
str
=
''
)
->
None
:
r
"""Constructor for class RE.
Args:
...
...
@@ -1195,12 +1219,12 @@ class RE(Parser):
See above.
name: The optional name of the parser.
"""
super
(
RE
,
self
).
__init__
(
name
)
super
().
__init__
(
name
)
self
.
rx_wsl
=
wL
self
.
rx_wsr
=
wR
self
.
wsp_left
=
Whitespace
(
wL
)
if
wL
else
ZOMBIE_PARSER
self
.
wsp_right
=
Whitespace
(
wR
)
if
wR
else
ZOMBIE_PARSER
self
.
main
=
RegExp
(
regexp
)
self
.
main
=
self
.
create_main_parser
(
regexp
)
def
__deepcopy__
(
self
,
memo
=
{}):
try
:
...
...
@@ -1216,8 +1240,7 @@ class RE(Parser):
main
,
txt
=
self
.
main
(
txt
)
if
main
:
wsr
,
txt
=
self
.
wsp_right
(
txt
)
result
=
tuple
(
nd
for
nd
in
(
wsl
,
main
,
wsr
)
if
nd
and
nd
.
result
!=
''
)
result
=
tuple
(
nd
for
nd
in
(
wsl
,
main
,
wsr
)
if
nd
)
return
Node
(
self
,
result
),
txt
return
None
,
text
...
...
@@ -1244,6 +1267,10 @@ class RE(Parser):
return
True
return
False
def
create_main_parser
(
self
,
arg
)
->
Parser
:
"""Creates the main parser of this compound parser. Can be overridden."""
return
RegExp
(
arg
)
class
Token
(
RE
):
"""
...
...
@@ -1259,7 +1286,7 @@ class Token(RE):
def
__init__
(
self
,
token
:
str
,
wL
=
None
,
wR
=
None
,
name
:
str
=
''
)
->
None
:
self
.
token
=
token
super
(
Token
,
self
).
__init__
(
escape_re
(
token
)
,
wL
,
wR
,
name
)
super
().
__init__
(
token
,
wL
,
wR
,
name
)
def
__deepcopy__
(
self
,
memo
=
{}):
return
self
.
__class__
(
self
.
token
,
self
.
rx_wsl
,
self
.
rx_wsr
,
self
.
name
)
...
...
@@ -1267,6 +1294,9 @@ class Token(RE):
def
__repr__
(
self
):
return
'"%s"'
%
self
.
token
if
self
.
token
.
find
(
'"'
)
<
0
else
"'%s'"
%
self
.
token
def
create_main_parser
(
self
,
arg
)
->
Parser
:
return
PlainText
(
arg
)
########################################################################
#
...
...
@@ -1316,7 +1346,7 @@ class NaryOperator(Parser):
"""
def
__init__
(
self
,
*
parsers
:
Parser
,
name
:
str
=
''
)
->
None
:
super
(
NaryOperator
,
self
).
__init__
(
name
)
super
().
__init__
(
name
)
assert
all
([
isinstance
(
parser
,
Parser
)
for
parser
in
parsers
]),
str
(
parsers
)
self
.
parsers
=
parsers
# type: Tuple[Parser, ...]
...
...
@@ -1359,7 +1389,7 @@ class Option(UnaryOperator):
"""
def
__init__
(
self
,
parser
:
Parser
,
name
:
str
=
''
)
->
None
:
super
(
Option
,
self
).
__init__
(
parser
,
name
)
super
().
__init__
(
parser
,
name
)
# assert isinstance(parser, Parser)
assert
not
isinstance
(
parser
,
Option
),
\
"Redundant nesting of options: %s(%s)"
%
(
str
(
name
),
str
(
parser
.
name
))
...
...
@@ -1431,7 +1461,7 @@ class OneOrMore(UnaryOperator):
"""
def
__init__
(
self
,
parser
:
Parser
,
name
:
str
=
''
)
->
None
:
super
(
OneOrMore
,
self
).
__init__
(
parser
,
name
)
super
().
__init__
(
parser
,
name
)
assert
not
isinstance
(
parser
,
Option
),
\
"Use ZeroOrMore instead of nesting OneOrMore and Option: "
\
"%s(%s)"
%
(
str
(
name
),
str
(
parser
.
name
))
...
...
@@ -1476,7 +1506,7 @@ class Series(NaryOperator):
NOPE
=
1000
def
__init__
(
self
,
*
parsers
:
Parser
,
mandatory
:
int
=
NOPE
,
name
:
str
=
''
)
->
None
:
super
(
Series
,
self
).
__init__
(
*
parsers
,
name
=
name
)
super
().
__init__
(
*
parsers
,
name
=
name
)
length
=
len
(
self
.
parsers
)
assert
1
<=
length
<
Series
.
NOPE
,
\
'Length %i of series exceeds maximum length of %i'
%
(
length
,
Series
.
NOPE
)
...
...
@@ -1581,7 +1611,7 @@ class Alternative(NaryOperator):
"""
def
__init__
(
self
,
*
parsers
:
Parser
,
name
:
str
=
''
)
->
None
:
super
(
Alternative
,
self
).
__init__
(
*
parsers
,
name
=
name
)
super
().
__init__
(
*
parsers
,
name
=
name
)
assert
len
(
self
.
parsers
)
>=
1
# only the last alternative may be optional. Could this be checked at compile time?
assert
all
(
not
isinstance
(
p
,
Option
)
for
p
in
self
.
parsers
[:
-
1
]),
\
...
...
TODO.md
View file @
fab4160e
General TODO-List
-----------------
-
Position Handling:
`Node._pos`
and
`Node._len`
should be set by
parser guard to allow for early dropping of nodes. (Should speed
up tree-traversal later)
-
Position handling should provide for position shifts during preprocessing
examples/LaTeX/grammar_tests/REPORT/00_test_regexes.md
View file @
fab4160e
...
...
@@ -36,13 +36,16 @@ Match-test "1"
###
C
ST
###
A
ST
(GAP
(:RegExp
""
""
""
)
(:Whitespace
" "
)
)
Match-test "2"
...
...
@@ -76,7 +79,7 @@ Match-test "3"
###
C
ST
###
A
ST
(GAP
(:RegExp
""
...
...
@@ -84,6 +87,9 @@ Match-test "3"
""
""
)
(:Whitespace
" "
)
)
Fail-test "10"
...
...
examples/LaTeX/grammar_tests/REPORT/01_test_text.md
View file @
fab4160e
...
...
@@ -82,6 +82,9 @@ Match-test "3"
(text
"footnote"
)
(:Whitespace
" "
)
)
)
...
...
@@ -160,12 +163,21 @@ Match-test "7"
### AST
(block
(:Whitespace
" "
)
(generic_command
(CMDNAME
"
\e
m"
)
)
(:Whitespace
" "
)
(text
"block"
)
(:Whitespace
" "
)
)
\ No newline at end of file
examples/LaTeX/grammar_tests/REPORT/02_test_paragraph.md
View file @
fab4160e
...
...
@@ -19,6 +19,9 @@ Match-test "1"
"Professoren, Philister und Vieh; welche vier Stände doch nichts weniger"
"als streng geschieden sind. Der Viehstand ist der bedeutendste."
)
(:Whitespace
" "
)
)
Match-test "2"
...
...
@@ -37,14 +40,23 @@ Match-test "2"
" "
)
(block
(:Whitespace
" "
)
(generic_command
(CMDNAME
"
\e
m"
)
)
(:Whitespace
" "
)
(text
"inline blocks"
)
(:Whitespace
" "
)
)
(:Whitespace
" "
...
...
@@ -59,8 +71,19 @@ Match-test "2"
(CMDNAME
"
\e
mph"
)
(text
"inline commands"
(:Whitespace
" "
)
(block
(:Whitespace
" "
)
(text
"inline commands"
)
(:Whitespace
" "
)
)
)
(:Whitespace
...
...
@@ -84,6 +107,9 @@ Match-test "2"
(text
"characters."
)
(:Whitespace
" "
)
)
Match-test "3"
...
...
@@ -101,6 +127,9 @@ Match-test "3"
"Therefore,"
"this line still belongs to the same paragraph."
)
(:Whitespace
" "
)
)
Match-test "4"
...
...
@@ -123,6 +152,9 @@ Match-test "4"
"Comment lines do not break paragraphs."
"in sequence."
)
(:Whitespace
" "
)
)
Match-test "5"
...
...
@@ -142,14 +174,23 @@ Match-test "5"
" "
)
(block
(:Whitespace
" "
)
(generic_command
(CMDNAME
"
\e
m"
)
)
(:Whitespace
" "
)
(text
"emphasized"
)
(:Whitespace
" "
)
)
(:Whitespace
" "
...
...
@@ -161,14 +202,23 @@ Match-test "5"
" "
)
(block
(:Whitespace
" "
)
(generic_command
(CMDNAME
"
\b
f"
)
)
(:Whitespace
" "
)
(text
"bold"
)
(:Whitespace
" "
)
)
(:Whitespace
" "
...
...
@@ -210,18 +260,33 @@ Match-test "5"
" "
)
(block
(:Whitespace
" "
)
(generic_command
(CMDNAME
"
\l
arge"
)
)
(:Whitespace
" "
)
(text
"large"
)
(:Whitespace
" "
)
)
(:Whitespace
" "
)
(text
"."
)
(:Whitespace
" "
)
)
Match-test "6"
...
...
@@ -239,11 +304,17 @@ Match-test "6"
" "
)
(block
(:Whitespace
" "
)
(generic_command
(CMDNAME
"
\x
y"
)
)
(:Whitespace
" "
)
(text
"unknown blocks"
)
...
...
@@ -251,9 +322,15 @@ Match-test "6"
" "
)
)
(:Whitespace
" "
)
(text
"."
)
(:Whitespace
" "
)
)
Match-test "7"
...
...
@@ -274,10 +351,24 @@ Match-test "7"
(CMDNAME
"
\x
y"
)
(:Whitespace
" "
)
(config
"xycgf"
(:Whitespace
" "
)
(text
"xycgf"
)
)
(:Whitespace
" "
)
(block
(:Whitespace
" "
)
(text
"some"
)
...
...
@@ -285,14 +376,26 @@ Match-test "7"
" "
)
(block
(:Whitespace
" "
)
(generic_command
(CMDNAME
"
\e
m"
)
)
(:Whitespace
" "
)
(text
"unbknown"
)
(:Whitespace
" "
)
)
(:Whitespace
" "
)
)
)
...
...
@@ -302,6 +405,9 @@ Match-test "7"
(text
"commands."
)
(:Whitespace
" "
)
)
Match-test "8"
...
...
@@ -324,6 +430,9 @@ Match-test "8"
"
\x
y"
)
)
(:Whitespace
" "
)
(text
"commands within paragraphs may be simple"
"or"
...
...
@@ -335,13 +444,30 @@ Match-test "8"
(CMDNAME
"
\x
y"
)
(text
"complex"
(:Whitespace
" "
)
(block
(:Whitespace
" "
)
(text
"complex"
)
(:Whitespace
" "
)
)
)
(:Whitespace
" "
)
(text
"."
)
(:Whitespace
" "
)
)
Match-test "9"
...
...
@@ -391,14 +517,26 @@ Match-test "9"
""
)
(block
(:Whitespace
" "
)
(generic_command
(CMDNAME
"
\b
f"
)
)
(:Whitespace
" "
)
(text
"blocks"
)
(:Whitespace
" "
)
)
(:Whitespace
" "
)
(text
","
...
...
@@ -425,6 +563,9 @@ Match-test "9"
"]"
)
)
(:Whitespace
" "
)
(text
","
)
...
...
@@ -480,8 +621,14 @@ Match-test "9"
(text
"footnote"
)
(:Whitespace
" "
)
)
)
(:Whitespace
" "
)
)
Match-test "10"
...
...
@@ -497,10 +644,16 @@ Match-test "10"
(begin_environment
"generic"
)
(:Whitespace
" "
)
(paragraph
(text
"inline environment"
)
(:Whitespace
" "
)
)
(end_environment
"generic"
...
...
@@ -526,6 +679,9 @@ Match-test "11"
(begin_environment
"generic"
)
(:Whitespace
" "
)
(paragraph
(text
"inline environment"
...
...
@@ -600,11 +756,17 @@ Match-test "1"
(text
"Paragraphs are separated by gaps."
)
(:Whitespace
" "
)
)
(paragraph
(text
"Like this one."
)
(:Whitespace
" "
)
)
)
...
...
@@ -623,11 +785,17 @@ Match-test "2"
(text
"The second paragraph follows after a long gap."
)