Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
D
DHParser
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Iterations
Merge Requests
0
Merge Requests
0
Requirements
Requirements
List
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Operations
Operations
Incidents
Analytics
Analytics
Code Review
Insights
Issue
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
badw-it
DHParser
Commits
ed754af9
Commit
ed754af9
authored
Sep 02, 2017
by
Eckhart Arnold
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
- mypy-test and some type errors corrected
parent
2ce46062
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
75 additions
and
73 deletions
+75
-73
DHParser/dsl.py
DHParser/dsl.py
+1
-1
DHParser/ebnf.py
DHParser/ebnf.py
+6
-6
DHParser/parser.py
DHParser/parser.py
+12
-12
DHParser/toolkit.py
DHParser/toolkit.py
+4
-2
examples/CommonMark/markdown.py
examples/CommonMark/markdown.py
+1
-1
examples/CommonMark/markdown_old.py
examples/CommonMark/markdown_old.py
+1
-1
examples/LaTeX/LaTeXCompiler.py
examples/LaTeX/LaTeXCompiler.py
+28
-28
examples/MLW/VERALTET/MLW_compiler.py
examples/MLW/VERALTET/MLW_compiler.py
+14
-14
examples/Tutorial/LyrikCompiler_example.py
examples/Tutorial/LyrikCompiler_example.py
+8
-8
No files found.
DHParser/dsl.py
View file @
ed754af9
...
...
@@ -78,7 +78,7 @@ except ImportError:
from DHParser import logging, is_filename, load_if_file,
\\
Grammar, Compiler, nil_preprocessor,
\\
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym,
\\
Option
al
, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture,
\\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture,
\\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source,
\\
last_value, counterpart, accumulate, PreprocessorFunc,
\\
Node, TransformationFunc, TransformationDict, TRUE_CONDITION,
\\
...
...
DHParser/ebnf.py
View file @
ed754af9
...
...
@@ -31,7 +31,7 @@ except ImportError:
from
DHParser.toolkit
import
load_if_file
,
escape_re
,
md5
,
sane_parser_name
from
DHParser.parser
import
Grammar
,
mixin_comment
,
nil_preprocessor
,
Forward
,
RE
,
NegativeLookahead
,
\
Alternative
,
Series
,
Option
al
,
Required
,
OneOrMore
,
ZeroOrMore
,
Token
,
Compiler
,
\
Alternative
,
Series
,
Option
,
Required
,
OneOrMore
,
ZeroOrMore
,
Token
,
Compiler
,
\
PreprocessorFunc
from
DHParser.syntaxtree
import
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
Node
,
TransformationFunc
from
DHParser.transform
import
TransformationDict
,
traverse
,
remove_brackets
,
\
...
...
@@ -132,15 +132,15 @@ class EBNFGrammar(Grammar):
group
=
Series
(
Token
(
"("
),
expression
,
Required
(
Token
(
")"
)))
retrieveop
=
Alternative
(
Token
(
"::"
),
Token
(
":"
))
flowmarker
=
Alternative
(
Token
(
"!"
),
Token
(
"&"
),
Token
(
"§"
),
Token
(
"-!"
),
Token
(
"-&"
))
factor
=
Alternative
(
Series
(
Option
al
(
flowmarker
),
Optional
(
retrieveop
),
symbol
,
NegativeLookahead
(
Token
(
"="
))),
Series
(
Option
al
(
flowmarker
),
literal
),
Series
(
Optional
(
flowmarker
),
regexp
),
Series
(
Option
al
(
flowmarker
),
group
),
Series
(
Optional
(
flowmarker
),
oneormore
),
factor
=
Alternative
(
Series
(
Option
(
flowmarker
),
Option
(
retrieveop
),
symbol
,
NegativeLookahead
(
Token
(
"="
))),
Series
(
Option
(
flowmarker
),
literal
),
Series
(
Option
(
flowmarker
),
regexp
),
Series
(
Option
(
flowmarker
),
group
),
Series
(
Option
(
flowmarker
),
oneormore
),
repetition
,
option
)
term
=
OneOrMore
(
factor
)
expression
.
set
(
Series
(
term
,
ZeroOrMore
(
Series
(
Token
(
"|"
),
term
))))
directive
=
Series
(
Token
(
"@"
),
Required
(
symbol
),
Required
(
Token
(
"="
)),
Alternative
(
regexp
,
literal
,
list_
))
definition
=
Series
(
symbol
,
Required
(
Token
(
"="
)),
expression
)
syntax
=
Series
(
Option
al
(
RE
(
''
,
wR
=
''
,
wL
=
WSP__
)),
ZeroOrMore
(
Alternative
(
definition
,
directive
)),
Required
(
EOF
))
syntax
=
Series
(
Option
(
RE
(
''
,
wR
=
''
,
wL
=
WSP__
)),
ZeroOrMore
(
Alternative
(
definition
,
directive
)),
Required
(
EOF
))
root__
=
syntax
...
...
@@ -784,7 +784,7 @@ class EBNFCompiler(Compiler):
def
on_option
(
self
,
node
)
->
str
:
return
self
.
non_terminal
(
node
,
'Option
al
'
)
return
self
.
non_terminal
(
node
,
'Option'
)
def
on_repetition
(
self
,
node
)
->
str
:
...
...
DHParser/parser.py
View file @
ed754af9
...
...
@@ -97,7 +97,7 @@ __all__ = ('PreprocessorFunc',
# 'UnaryOperator',
# 'NaryOperator',
'Synonym'
,
'Option
al
'
,
'Option'
,
'ZeroOrMore'
,
'OneOrMore'
,
'Series'
,
...
...
@@ -355,7 +355,7 @@ class Parser(ParserBase, metaclass=ParserMetaClass):
representing the root of the concrete syntax tree resulting from the
match as well as the substring `text[i:]` where i is the length of
matched text (which can be zero in the case of parsers like
`ZeroOrMore` or `Option
al
`). If `i > 0` then the parser has "moved
`ZeroOrMore` or `Option`). If `i > 0` then the parser has "moved
forward".
If the parser does not match it returns `(None, text). **Note** that
...
...
@@ -1229,7 +1229,7 @@ class NaryOperator(Parser):
parser
.
apply
(
func
)
class
Option
al
(
UnaryOperator
):
class
Option
(
UnaryOperator
):
"""
Parser `Optional` always matches, even if its child-parser
did not match.
...
...
@@ -1243,7 +1243,7 @@ class Optional(UnaryOperator):
left it.
Examples:
>>> number = Option
al(Token('-')) + RegExp(r'\d+') + Optional
(RegExp(r'\.\d+'))
>>> number = Option
(Token('-')) + RegExp(r'\d+') + Option
(RegExp(r'\.\d+'))
>>> Grammar(number)('3.14159').content()
'3.14159'
>>> Grammar(number)('3.14159').structure()
...
...
@@ -1255,9 +1255,9 @@ class Optional(UnaryOperator):
EBNF-Example: `number = ["-"] /\d+/ [ /\.\d+/ ]
"""
def
__init__
(
self
,
parser
:
Parser
,
name
:
str
=
''
)
->
None
:
super
(
Option
al
,
self
).
__init__
(
parser
,
name
)
super
(
Option
,
self
).
__init__
(
parser
,
name
)
# assert isinstance(parser, Parser)
assert
not
isinstance
(
parser
,
Option
al
),
\
assert
not
isinstance
(
parser
,
Option
),
\
"Redundant nesting of options: %s(%s)"
%
\
(
str
(
name
),
str
(
parser
.
name
))
assert
not
isinstance
(
parser
,
Required
),
\
...
...
@@ -1275,10 +1275,10 @@ class Optional(UnaryOperator):
and
not
self
.
parser
.
name
else
self
.
parser
.
repr
)
+
']'
class
ZeroOrMore
(
Option
al
):
class
ZeroOrMore
(
Option
):
"""
`ZeroOrMore` applies a parser repeatedly as long as this parser
matches. Like `Option
al
` the `ZeroOrMore` parser always matches. In
matches. Like `Option` the `ZeroOrMore` parser always matches. In
case of zero repetitions, the empty match `((), text)` is returned.
Examples:
...
...
@@ -1310,8 +1310,8 @@ class ZeroOrMore(Optional):
class
OneOrMore
(
UnaryOperator
):
def
__init__
(
self
,
parser
:
Parser
,
name
:
str
=
''
)
->
None
:
super
(
OneOrMore
,
self
).
__init__
(
parser
,
name
)
assert
not
isinstance
(
parser
,
Option
al
),
\
"Use ZeroOrMore instead of nesting OneOrMore and Option
al
: "
\
assert
not
isinstance
(
parser
,
Option
),
\
"Use ZeroOrMore instead of nesting OneOrMore and Option: "
\
"%s(%s)"
%
(
str
(
name
),
str
(
parser
.
name
))
def
__call__
(
self
,
text
:
StringView
)
->
Tuple
[
Node
,
StringView
]:
...
...
@@ -1397,7 +1397,7 @@ class Alternative(NaryOperator):
super
(
Alternative
,
self
).
__init__
(
*
parsers
,
name
=
name
)
assert
len
(
self
.
parsers
)
>=
1
# only the last alternative may be optional. Could this be checked at compile time?
assert
all
(
not
isinstance
(
p
,
Option
al
)
for
p
in
self
.
parsers
[:
-
1
])
assert
all
(
not
isinstance
(
p
,
Option
)
for
p
in
self
.
parsers
[:
-
1
])
self
.
been_here
=
dict
()
# type: Dict[int, int]
def
__call__
(
self
,
text
:
StringView
)
->
Tuple
[
Node
,
StringView
]:
...
...
@@ -1446,7 +1446,7 @@ class FlowOperator(UnaryOperator):
class
Required
(
FlowOperator
):
# Add constructor that checks for logical errors, like `Required(Option
al
(...))` constructs ?
# Add constructor that checks for logical errors, like `Required(Option(...))` constructs ?
RX_ARGUMENT
=
re
.
compile
(
r'\s(\S)'
)
def
__call__
(
self
,
text
:
StringView
)
->
Tuple
[
Node
,
StringView
]:
...
...
DHParser/toolkit.py
View file @
ed754af9
...
...
@@ -169,6 +169,8 @@ class StringView(collections.abc.Sized):
def
__init__
(
self
,
text
:
str
,
begin
:
Optional
[
int
]
=
0
,
end
:
Optional
[
int
]
=
None
)
->
None
:
self
.
text
=
text
# type: str
self
.
begin
=
0
# type: int
self
.
end
=
0
# type: int
self
.
begin
,
self
.
end
=
StringView
.
real_indices
(
begin
,
end
,
len
(
text
))
self
.
len
=
max
(
self
.
end
-
self
.
begin
,
0
)
...
...
@@ -218,7 +220,7 @@ def sv_match(regex, sv: StringView):
return
regex
.
match
(
sv
.
text
,
pos
=
sv
.
begin
,
endpos
=
sv
.
end
)
def
sv_index
(
absolute_index
:
Union
[
int
,
Iterable
]
,
sv
:
StringView
)
->
int
:
def
sv_index
(
absolute_index
:
int
,
sv
:
StringView
)
->
int
:
"""
Converts the an index into string watched by a StringView object
to an index relativ to the string view object, e.g.:
...
...
@@ -232,7 +234,7 @@ def sv_index(absolute_index: Union[int, Iterable], sv: StringView) -> int:
return
absolute_index
-
sv
.
begin
def
sv_indices
(
absolute_indices
:
Iterable
[
int
],
sv
:
StringView
)
->
Tuple
[
int
]:
def
sv_indices
(
absolute_indices
:
Iterable
[
int
],
sv
:
StringView
)
->
Tuple
[
int
,
...
]:
"""Converts the an index into string watched by a StringView object
to an index relativ to the string view object. See also: `sv_index()`
"""
...
...
examples/CommonMark/markdown.py
View file @
ed754af9
...
...
@@ -191,7 +191,7 @@ code = compile(parser_py, '<string>', 'exec')
module_vars
=
globals
()
name_space
=
{
k
:
module_vars
[
k
]
for
k
in
{
'RegExp'
,
'RE'
,
'Token'
,
'Required'
,
'Option
al
'
,
'mixin_comment'
,
name_space
=
{
k
:
module_vars
[
k
]
for
k
in
{
'RegExp'
,
'RE'
,
'Token'
,
'Required'
,
'Option'
,
'mixin_comment'
,
'ZeroOrMore'
,
'OneOrMore'
,
'Sequence'
,
'Alternative'
,
'Forward'
,
'NegativeLookahead'
,
'PositiveLookahead'
,
'PreprocessorToken'
,
'Grammar'
}}
exec
(
code
,
name_space
)
...
...
examples/CommonMark/markdown_old.py
View file @
ed754af9
...
...
@@ -269,7 +269,7 @@ code = compile(parser_py, '<string>', 'exec')
module_vars
=
globals
()
name_space
=
{
k
:
module_vars
[
k
]
for
k
in
{
'RegExp'
,
'RE'
,
'Token'
,
'Required'
,
'Option
al
'
,
'mixin_comment'
,
name_space
=
{
k
:
module_vars
[
k
]
for
k
in
{
'RegExp'
,
'RE'
,
'Token'
,
'Required'
,
'Option'
,
'mixin_comment'
,
'ZeroOrMore'
,
'Sequence'
,
'Alternative'
,
'Forward'
}}
exec
(
code
,
name_space
)
parser
=
name_space
[
'Grammar'
]
...
...
examples/LaTeX/LaTeXCompiler.py
View file @
ed754af9
...
...
@@ -17,7 +17,7 @@ except ImportError:
import
re
from
DHParser
import
logging
,
is_filename
,
Grammar
,
Compiler
,
Lookbehind
,
Alternative
,
Pop
,
\
Required
,
Token
,
Synonym
,
\
Option
al
,
NegativeLookbehind
,
OneOrMore
,
RegExp
,
Series
,
RE
,
Capture
,
\
Option
,
NegativeLookbehind
,
OneOrMore
,
RegExp
,
Series
,
RE
,
Capture
,
\
ZeroOrMore
,
Forward
,
NegativeLookahead
,
mixin_comment
,
compile_source
,
\
PreprocessorFunc
,
TransformationDict
,
\
Node
,
TransformationFunc
,
traverse
,
remove_children_if
,
is_anonymous
,
\
...
...
@@ -230,11 +230,11 @@ class LaTeXGrammar(Grammar):
EOF
=
RegExp
(
'(?!.)'
)
BACKSLASH
=
RegExp
(
'[
\\\\
]'
)
LB
=
RegExp
(
'
\\
s*?
\\
n|$'
)
NEW_LINE
=
Series
(
RegExp
(
'[
\\
t]*'
),
Option
al
(
RegExp
(
COMMENT__
)),
RegExp
(
'
\\
n'
))
NEW_LINE
=
Series
(
RegExp
(
'[
\\
t]*'
),
Option
(
RegExp
(
COMMENT__
)),
RegExp
(
'
\\
n'
))
GAP
=
RE
(
'[
\\
t]*(?:
\\
n[
\\
t]*)+
\\
n'
)
WSPC
=
OneOrMore
(
Alternative
(
RegExp
(
COMMENT__
),
RegExp
(
'
\\
s+'
)))
PARSEP
=
Series
(
ZeroOrMore
(
Series
(
RegExp
(
WHITESPACE__
),
RegExp
(
COMMENT__
))),
GAP
,
Option
al
(
WSPC
))
LFF
=
Series
(
NEW_LINE
,
Option
al
(
WSPC
))
PARSEP
=
Series
(
ZeroOrMore
(
Series
(
RegExp
(
WHITESPACE__
),
RegExp
(
COMMENT__
))),
GAP
,
Option
(
WSPC
))
LFF
=
Series
(
NEW_LINE
,
Option
(
WSPC
))
LF
=
Series
(
NEW_LINE
,
ZeroOrMore
(
Series
(
RegExp
(
COMMENT__
),
RegExp
(
WHITESPACE__
))))
TEXTCHUNK
=
RegExp
(
'[^
\\\\
%$&
\\
{
\\
}
\\
[
\\
]
\\
s
\\
n]+'
)
INTEGER
=
RE
(
'
\\
d+'
)
...
...
@@ -250,15 +250,15 @@ class LaTeXGrammar(Grammar):
no_command
=
Alternative
(
Token
(
"
\\
begin{"
),
Token
(
"
\\
end"
),
Series
(
BACKSLASH
,
structural
))
text
=
Series
(
TEXTCHUNK
,
ZeroOrMore
(
Series
(
RE
(
''
),
TEXTCHUNK
)))
block
=
Series
(
RegExp
(
'{'
),
RE
(
''
),
ZeroOrMore
(
Series
(
NegativeLookahead
(
blockcmd
),
text_element
,
RE
(
''
))),
Required
(
RegExp
(
'}'
)))
cfg_text
=
ZeroOrMore
(
Alternative
(
Series
(
Option
al
(
RE
(
''
)),
text
),
CMDNAME
,
SPECIAL
))
cfg_text
=
ZeroOrMore
(
Alternative
(
Series
(
Option
(
RE
(
''
)),
text
),
CMDNAME
,
SPECIAL
))
config
=
Series
(
Token
(
"["
),
cfg_text
,
Required
(
Token
(
"]"
)))
cline
=
Series
(
Token
(
"
\\
cline{"
),
INTEGER
,
Token
(
"-"
),
INTEGER
,
Token
(
"}"
))
hline
=
Token
(
"
\\
hline"
)
multicolumn
=
Series
(
Token
(
"
\\
multicolumn"
),
Token
(
"{"
),
INTEGER
,
Token
(
"}"
),
tabular_config
,
block_of_paragraphs
)
caption
=
Series
(
Token
(
"
\\
caption"
),
block
)
includegraphics
=
Series
(
Token
(
"
\\
includegraphics"
),
Option
al
(
config
),
block
)
includegraphics
=
Series
(
Token
(
"
\\
includegraphics"
),
Option
(
config
),
block
)
footnote
=
Series
(
Token
(
"
\\
footnote"
),
block_of_paragraphs
)
generic_command
=
Series
(
NegativeLookahead
(
no_command
),
CMDNAME
,
Option
al
(
Series
(
Optional
(
Series
(
RE
(
''
),
config
)),
RE
(
''
),
block
)))
generic_command
=
Series
(
NegativeLookahead
(
no_command
),
CMDNAME
,
Option
(
Series
(
Option
(
Series
(
RE
(
''
),
config
)),
RE
(
''
),
block
)))
text_command
=
Alternative
(
TXTCOMMAND
,
ESCAPED
,
BRACKETS
)
known_command
=
Alternative
(
footnote
,
includegraphics
,
caption
,
multicolumn
,
hline
,
cline
)
command
=
Alternative
(
known_command
,
text_command
,
generic_command
)
...
...
@@ -273,8 +273,8 @@ class LaTeXGrammar(Grammar):
line_element
=
Alternative
(
text
,
block
,
inline_environment
,
command
)
text_element
.
set
(
Alternative
(
line_element
,
LINEFEED
))
paragraph
.
set
(
OneOrMore
(
Series
(
NegativeLookahead
(
blockcmd
),
text_element
,
RE
(
''
))))
sequence
=
OneOrMore
(
Series
(
Alternative
(
paragraph
,
block_environment
),
Option
al
(
PARSEP
)))
block_of_paragraphs
.
set
(
Series
(
Token
(
"{"
),
Option
al
(
sequence
),
Required
(
Token
(
"}"
))))
sequence
=
OneOrMore
(
Series
(
Alternative
(
paragraph
,
block_environment
),
Option
(
PARSEP
)))
block_of_paragraphs
.
set
(
Series
(
Token
(
"{"
),
Option
(
sequence
),
Required
(
Token
(
"}"
))))
tabular_config
.
set
(
Series
(
Token
(
"{"
),
RE
(
'[lcr|]+'
),
Required
(
Token
(
"}"
))))
tabular_cell
=
ZeroOrMore
(
Series
(
line_element
,
RE
(
''
)))
tabular_row
=
Series
(
Alternative
(
multicolumn
,
tabular_cell
),
ZeroOrMore
(
Series
(
Token
(
"&"
),
Alternative
(
multicolumn
,
tabular_cell
))),
Token
(
"
\\\\
"
),
Alternative
(
hline
,
ZeroOrMore
(
cline
)))
...
...
@@ -282,31 +282,31 @@ class LaTeXGrammar(Grammar):
verbatim
=
Series
(
Token
(
"
\\
begin{verbatim}"
),
sequence
,
Required
(
Token
(
"
\\
end{verbatim}"
)))
quotation
=
Alternative
(
Series
(
Token
(
"
\\
begin{quotation}"
),
sequence
,
Required
(
Token
(
"
\\
end{quotation}"
))),
Series
(
Token
(
"
\\
begin{quote}"
),
sequence
,
Required
(
Token
(
"
\\
end{quote}"
))))
figure
=
Series
(
Token
(
"
\\
begin{figure}"
),
sequence
,
Required
(
Token
(
"
\\
end{figure}"
)))
item
=
Series
(
Token
(
"
\\
item"
),
Option
al
(
WSPC
),
sequence
)
enumerate
=
Series
(
Token
(
"
\\
begin{enumerate}"
),
Option
al
(
WSPC
),
ZeroOrMore
(
item
),
Required
(
Token
(
"
\\
end{enumerate}"
)))
itemize
=
Series
(
Token
(
"
\\
begin{itemize}"
),
Option
al
(
WSPC
),
ZeroOrMore
(
item
),
Required
(
Token
(
"
\\
end{itemize}"
)))
item
=
Series
(
Token
(
"
\\
item"
),
Option
(
WSPC
),
sequence
)
enumerate
=
Series
(
Token
(
"
\\
begin{enumerate}"
),
Option
(
WSPC
),
ZeroOrMore
(
item
),
Required
(
Token
(
"
\\
end{enumerate}"
)))
itemize
=
Series
(
Token
(
"
\\
begin{itemize}"
),
Option
(
WSPC
),
ZeroOrMore
(
item
),
Required
(
Token
(
"
\\
end{itemize}"
)))
end_generic_block
.
set
(
Series
(
Lookbehind
(
LB
),
end_environment
,
LFF
))
begin_generic_block
.
set
(
Series
(
Lookbehind
(
LB
),
begin_environment
,
LFF
))
generic_block
=
Series
(
begin_generic_block
,
sequence
,
Required
(
end_generic_block
))
known_environment
=
Alternative
(
itemize
,
enumerate
,
figure
,
tabular
,
quotation
,
verbatim
)
block_environment
.
set
(
Alternative
(
known_environment
,
generic_block
))
Index
=
Series
(
Token
(
"
\\
printindex"
),
Option
al
(
WSPC
))
Bibliography
=
Series
(
Token
(
"
\\
bibliography"
),
block
,
Option
al
(
WSPC
))
SubParagraph
=
Series
(
Token
(
"
\\
subparagraph"
),
block
,
Option
al
(
WSPC
),
Optional
(
sequence
))
SubParagraphs
=
OneOrMore
(
Series
(
SubParagraph
,
Option
al
(
WSPC
)))
Paragraph
=
Series
(
Token
(
"
\\
paragraph"
),
block
,
Option
al
(
WSPC
),
ZeroOrMore
(
Alternative
(
sequence
,
SubParagraphs
)))
Paragraphs
=
OneOrMore
(
Series
(
Paragraph
,
Option
al
(
WSPC
)))
SubSubSection
=
Series
(
Token
(
"
\\
subsubsection"
),
block
,
Option
al
(
WSPC
),
ZeroOrMore
(
Alternative
(
sequence
,
Paragraphs
)))
SubSubSections
=
OneOrMore
(
Series
(
SubSubSection
,
Option
al
(
WSPC
)))
SubSection
=
Series
(
Token
(
"
\\
subsection"
),
block
,
Option
al
(
WSPC
),
ZeroOrMore
(
Alternative
(
sequence
,
SubSubSections
)))
SubSections
=
OneOrMore
(
Series
(
SubSection
,
Option
al
(
WSPC
)))
Section
=
Series
(
Token
(
"
\\
section"
),
block
,
Option
al
(
WSPC
),
ZeroOrMore
(
Alternative
(
sequence
,
SubSections
)))
Sections
=
OneOrMore
(
Series
(
Section
,
Option
al
(
WSPC
)))
Chapter
=
Series
(
Token
(
"
\\
chapter"
),
block
,
Option
al
(
WSPC
),
ZeroOrMore
(
Alternative
(
sequence
,
Sections
)))
Chapters
=
OneOrMore
(
Series
(
Chapter
,
Option
al
(
WSPC
)))
Index
=
Series
(
Token
(
"
\\
printindex"
),
Option
(
WSPC
))
Bibliography
=
Series
(
Token
(
"
\\
bibliography"
),
block
,
Option
(
WSPC
))
SubParagraph
=
Series
(
Token
(
"
\\
subparagraph"
),
block
,
Option
(
WSPC
),
Option
(
sequence
))
SubParagraphs
=
OneOrMore
(
Series
(
SubParagraph
,
Option
(
WSPC
)))
Paragraph
=
Series
(
Token
(
"
\\
paragraph"
),
block
,
Option
(
WSPC
),
ZeroOrMore
(
Alternative
(
sequence
,
SubParagraphs
)))
Paragraphs
=
OneOrMore
(
Series
(
Paragraph
,
Option
(
WSPC
)))
SubSubSection
=
Series
(
Token
(
"
\\
subsubsection"
),
block
,
Option
(
WSPC
),
ZeroOrMore
(
Alternative
(
sequence
,
Paragraphs
)))
SubSubSections
=
OneOrMore
(
Series
(
SubSubSection
,
Option
(
WSPC
)))
SubSection
=
Series
(
Token
(
"
\\
subsection"
),
block
,
Option
(
WSPC
),
ZeroOrMore
(
Alternative
(
sequence
,
SubSubSections
)))
SubSections
=
OneOrMore
(
Series
(
SubSection
,
Option
(
WSPC
)))
Section
=
Series
(
Token
(
"
\\
section"
),
block
,
Option
(
WSPC
),
ZeroOrMore
(
Alternative
(
sequence
,
SubSections
)))
Sections
=
OneOrMore
(
Series
(
Section
,
Option
(
WSPC
)))
Chapter
=
Series
(
Token
(
"
\\
chapter"
),
block
,
Option
(
WSPC
),
ZeroOrMore
(
Alternative
(
sequence
,
Sections
)))
Chapters
=
OneOrMore
(
Series
(
Chapter
,
Option
(
WSPC
)))
frontpages
=
Synonym
(
sequence
)
document
=
Series
(
Option
al
(
WSPC
),
Token
(
"
\\
begin{document}"
),
Optional
(
WSPC
),
frontpages
,
Optional
(
WSPC
),
Alternative
(
Chapters
,
Sections
),
Optional
(
WSPC
),
Optional
(
Bibliography
),
Optional
(
Index
),
Optional
(
WSPC
),
Token
(
"
\\
end{document}"
),
Optional
(
WSPC
),
Required
(
EOF
))
preamble
=
OneOrMore
(
Series
(
Option
al
(
WSPC
),
command
))
document
=
Series
(
Option
(
WSPC
),
Token
(
"
\\
begin{document}"
),
Option
(
WSPC
),
frontpages
,
Option
(
WSPC
),
Alternative
(
Chapters
,
Sections
),
Option
(
WSPC
),
Option
(
Bibliography
),
Option
(
Index
),
Option
(
WSPC
),
Token
(
"
\\
end{document}"
),
Option
(
WSPC
),
Required
(
EOF
))
preamble
=
OneOrMore
(
Series
(
Option
(
WSPC
),
command
))
latexdoc
=
Series
(
preamble
,
document
)
root__
=
latexdoc
...
...
examples/MLW/VERALTET/MLW_compiler.py
View file @
ed754af9
...
...
@@ -15,7 +15,7 @@ try:
except
ImportError
:
import
re
from
DHParser.parser
import
Grammar
,
Compiler
,
Alternative
,
Required
,
Token
,
\
Option
al
,
OneOrMore
,
Series
,
RE
,
ZeroOrMore
,
NegativeLookahead
,
mixin_comment
,
compile_source
Option
,
OneOrMore
,
Series
,
RE
,
ZeroOrMore
,
NegativeLookahead
,
mixin_comment
,
compile_source
from
DHParser.syntaxtree
import
traverse
,
reduce_single_child
,
replace_by_single_child
,
no_transformation
,
\
remove_expendables
,
remove_tokens
,
flatten
,
\
WHITESPACE_KEYWORD
,
TOKEN_KEYWORD
...
...
@@ -160,36 +160,36 @@ class MLWGrammar(Grammar):
Name
=
Series
(
WORT
,
ZeroOrMore
(
Alternative
(
WORT
,
NAMENS_ABKÜRZUNG
)))
Autorinfo
=
Series
(
Alternative
(
Token
(
"AUTORIN"
),
Token
(
"AUTOR"
)),
Name
)
Zusatz
=
Series
(
Token
(
"ZUSATZ"
),
RE
(
'
\\
s*.*'
,
wR
=
''
,
wL
=
''
),
TRENNER
)
EinBeleg
=
Series
(
OneOrMore
(
Series
(
NegativeLookahead
(
Series
(
Option
al
(
LEER
),
Alternative
(
Token
(
"*"
),
Token
(
"BEDEUTUNG"
),
Token
(
"AUTOR"
),
Token
(
"NAME"
),
Token
(
"ZUSATZ"
)))),
RE
(
'
\\
s*.*
\\
s*'
,
wR
=
''
,
wL
=
''
))),
Optional
(
Zusatz
))
Belege
=
Series
(
Token
(
"BELEGE"
),
Option
al
(
LEER
),
ZeroOrMore
(
Series
(
Token
(
"*"
),
EinBeleg
)))
EinBeleg
=
Series
(
OneOrMore
(
Series
(
NegativeLookahead
(
Series
(
Option
(
LEER
),
Alternative
(
Token
(
"*"
),
Token
(
"BEDEUTUNG"
),
Token
(
"AUTOR"
),
Token
(
"NAME"
),
Token
(
"ZUSATZ"
)))),
RE
(
'
\\
s*.*
\\
s*'
,
wR
=
''
,
wL
=
''
))),
Option
(
Zusatz
))
Belege
=
Series
(
Token
(
"BELEGE"
),
Option
(
LEER
),
ZeroOrMore
(
Series
(
Token
(
"*"
),
EinBeleg
)))
DeutscheBedeutung
=
Series
(
Token
(
"DEU"
),
RE
(
'(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+'
,
wL
=
''
))
LateinischeBedeutung
=
Series
(
Token
(
"LAT"
),
RE
(
'(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+'
,
wL
=
''
))
Interpretamente
=
Series
(
LateinischeBedeutung
,
Option
al
(
LEER
),
Required
(
DeutscheBedeutung
),
Optional
(
LEER
))
Bedeutungskategorie
=
Series
(
RE
(
'(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+'
,
wL
=
''
),
Option
al
(
LEER
))
Bedeutung
=
Series
(
Alternative
(
Interpretamente
,
Bedeutungskategorie
),
Option
al
(
Belege
))
BedeutungsPosition
=
OneOrMore
(
Series
(
Token
(
"BEDEUTUNG"
),
Option
al
(
LEER
),
Required
(
Bedeutung
)))
Interpretamente
=
Series
(
LateinischeBedeutung
,
Option
(
LEER
),
Required
(
DeutscheBedeutung
),
Option
(
LEER
))
Bedeutungskategorie
=
Series
(
RE
(
'(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+'
,
wL
=
''
),
Option
(
LEER
))
Bedeutung
=
Series
(
Alternative
(
Interpretamente
,
Bedeutungskategorie
),
Option
(
Belege
))
BedeutungsPosition
=
OneOrMore
(
Series
(
Token
(
"BEDEUTUNG"
),
Option
(
LEER
),
Required
(
Bedeutung
)))
VerweisZiel
=
RE
(
'<
\\
w+>'
)
Verweis
=
RE
(
'
\\
w+'
)
Beleg
=
Verweis
Schreibweise
=
Alternative
(
Token
(
"vizreg-"
),
Token
(
"festregel(a)"
),
Token
(
"fezdregl(a)"
),
Token
(
"fat-"
))
SWVariante
=
Series
(
Schreibweise
,
Token
(
":"
),
Beleg
)
SWTyp
=
Alternative
(
Token
(
"script."
),
Token
(
"script. fat-"
))
SchreibweisenPosition
=
Series
(
Token
(
"SCHREIBWEISE"
),
Option
al
(
LEER
),
Required
(
SWTyp
),
Token
(
":"
),
Optional
(
LEER
),
Required
(
SWVariante
),
ZeroOrMore
(
Series
(
TRENNER
,
SWVariante
)),
Optional
(
LEER
))
SchreibweisenPosition
=
Series
(
Token
(
"SCHREIBWEISE"
),
Option
(
LEER
),
Required
(
SWTyp
),
Token
(
":"
),
Option
(
LEER
),
Required
(
SWVariante
),
ZeroOrMore
(
Series
(
TRENNER
,
SWVariante
)),
Option
(
LEER
))
ArtikelKopf
=
SchreibweisenPosition
_genus
=
Alternative
(
Token
(
"maskulinum"
),
Token
(
"m."
),
Token
(
"femininum"
),
Token
(
"f."
),
Token
(
"neutrum"
),
Token
(
"n."
))
Flexion
=
RE
(
'-?[a-z]+'
,
wL
=
''
)
Flexionen
=
Series
(
Flexion
,
ZeroOrMore
(
Series
(
Token
(
","
),
Required
(
Flexion
))))
GVariante
=
Series
(
Flexionen
,
Option
al
(
_genus
),
Token
(
":"
),
Beleg
)
GVariante
=
Series
(
Flexionen
,
Option
(
_genus
),
Token
(
":"
),
Beleg
)
GrammatikVarianten
=
Series
(
TRENNER
,
GVariante
)
_wortart
=
Alternative
(
Token
(
"nomen"
),
Token
(
"n."
),
Token
(
"verb"
),
Token
(
"v."
),
Token
(
"adverb"
),
Token
(
"adv."
),
Token
(
"adjektiv"
),
Token
(
"adj."
))
GrammatikPosition
=
Series
(
Token
(
"GRAMMATIK"
),
Option
al
(
LEER
),
Required
(
_wortart
),
Required
(
TRENNER
),
Required
(
Flexionen
),
Optional
(
_genus
),
ZeroOrMore
(
GrammatikVarianten
),
Optional
(
TRENNER
))
GrammatikPosition
=
Series
(
Token
(
"GRAMMATIK"
),
Option
(
LEER
),
Required
(
_wortart
),
Required
(
TRENNER
),
Required
(
Flexionen
),
Option
(
_genus
),
ZeroOrMore
(
GrammatikVarianten
),
Option
(
TRENNER
))
LVZusatz
=
Series
(
Token
(
"ZUSATZ"
),
Token
(
"sim."
))
LVariante
=
RE
(
'(?:[a-z]|-)+'
)
LemmaVarianten
=
Series
(
Token
(
"VARIANTEN"
),
Option
al
(
LEER
),
Required
(
LVariante
),
ZeroOrMore
(
Series
(
TRENNER
,
LVariante
)),
Optional
(
Series
(
TRENNER
,
LVZusatz
)),
Optional
(
TRENNER
))
LemmaVarianten
=
Series
(
Token
(
"VARIANTEN"
),
Option
(
LEER
),
Required
(
LVariante
),
ZeroOrMore
(
Series
(
TRENNER
,
LVariante
)),
Option
(
Series
(
TRENNER
,
LVZusatz
)),
Option
(
TRENNER
))
_tll
=
Token
(
"*"
)
Lemma
=
Series
(
Option
al
(
_tll
),
WORT_KLEIN
,
Optional
(
LEER
))
LemmaPosition
=
Series
(
Token
(
"LEMMA"
),
Required
(
Lemma
),
Option
al
(
LemmaVarianten
),
Required
(
GrammatikPosition
))
Artikel
=
Series
(
Option
al
(
LEER
),
Required
(
LemmaPosition
),
Optional
(
ArtikelKopf
),
Required
(
BedeutungsPosition
),
Required
(
Autorinfo
),
Optional
(
LEER
),
DATEI_ENDE
)
Lemma
=
Series
(
Option
(
_tll
),
WORT_KLEIN
,
Option
(
LEER
))
LemmaPosition
=
Series
(
Token
(
"LEMMA"
),
Required
(
Lemma
),
Option
(
LemmaVarianten
),
Required
(
GrammatikPosition
))
Artikel
=
Series
(
Option
(
LEER
),
Required
(
LemmaPosition
),
Option
(
ArtikelKopf
),
Required
(
BedeutungsPosition
),
Required
(
Autorinfo
),
Option
(
LEER
),
DATEI_ENDE
)
root__
=
Artikel
...
...
examples/Tutorial/LyrikCompiler_example.py
View file @
ed754af9
...
...
@@ -19,7 +19,7 @@ except ImportError:
import
re
from
DHParser.toolkit
import
logging
,
is_filename
from
DHParser.parser
import
Grammar
,
Compiler
,
Required
,
Token
,
\
Option
al
,
OneOrMore
,
Series
,
RE
,
ZeroOrMore
,
NegativeLookahead
,
mixin_comment
,
compile_source
,
\
Option
,
OneOrMore
,
Series
,
RE
,
ZeroOrMore
,
NegativeLookahead
,
mixin_comment
,
compile_source
,
\
PreprocessorFunc
,
Synonym
from
DHParser.syntaxtree
import
Node
,
traverse
,
remove_last
,
\
reduce_single_child
,
replace_by_single_child
,
remove_tokens
,
flatten
,
is_whitespace
,
collapse
,
replace_content
,
\
...
...
@@ -105,13 +105,13 @@ class LyrikGrammar(Grammar):
namenfolge
=
OneOrMore
(
NAME
)
wortfolge
=
OneOrMore
(
WORT
)
jahr
=
Synonym
(
JAHRESZAHL
)
ort
=
Series
(
wortfolge
,
Option
al
(
verknüpfung
))
untertitel
=
Series
(
wortfolge
,
Option
al
(
verknüpfung
))
werk
=
Series
(
wortfolge
,
Option
al
(
Series
(
Token
(
"."
),
Required
(
untertitel
))),
Optional
(
verknüpfung
))
autor
=
Series
(
namenfolge
,
Option
al
(
verknüpfung
))
bibliographisches
=
Series
(
autor
,
Required
(
Token
(
","
)),
Option
al
(
NZ
),
werk
,
Required
(
Token
(
","
)),
Optional
(
NZ
),
ort
,
Required
(
Token
(
","
)),
Option
al
(
NZ
),
jahr
,
Required
(
Token
(
"."
)))
gedicht
=
Series
(
bibliographisches
,
OneOrMore
(
LEERZEILE
),
Option
al
(
serie
),
Required
(
titel
),
Required
(
text
),
ort
=
Series
(
wortfolge
,
Option
(
verknüpfung
))
untertitel
=
Series
(
wortfolge
,
Option
(
verknüpfung
))
werk
=
Series
(
wortfolge
,
Option
(
Series
(
Token
(
"."
),
Required
(
untertitel
))),
Option
(
verknüpfung
))
autor
=
Series
(
namenfolge
,
Option
(
verknüpfung
))
bibliographisches
=
Series
(
autor
,
Required
(
Token
(
","
)),
Option
(
NZ
),
werk
,
Required
(
Token
(
","
)),
Option
(
NZ
),
ort
,
Required
(
Token
(
","
)),
Option
(
NZ
),
jahr
,
Required
(
Token
(
"."
)))
gedicht
=
Series
(
bibliographisches
,
OneOrMore
(
LEERZEILE
),
Option
(
serie
),
Required
(
titel
),
Required
(
text
),
RE
(
'
\\
s*'
,
wR
=
''
),
Required
(
ENDE
))
root__
=
gedicht
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment