Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
968f1acc
Commit
968f1acc
authored
Jul 08, 2017
by
Eckhart Arnold
Browse files
- LaTeX ebnf erweitert
parent
7cba2407
Changes
11
Hide whitespace changes
Inline
Side-by-side
DHParser/parsers.py
View file @
968f1acc
...
...
@@ -51,9 +51,10 @@ https://bitbucket.org/apalala/grako
import
abc
import
copy
from
functools
import
partial
import
os
import
platform
from
functools
import
partial
try
:
import
regex
as
re
except
ImportError
:
...
...
@@ -206,7 +207,6 @@ def add_parser_guard(parser_func):
node
,
rest
=
parser
.
visited
.
get
(
location
,
(
None
,
rest
))
# don't overwrite any positive match (i.e. node not None) in the cache
# and don't add empty entries for parsers returning from left recursive calls!
# TODO: Bei Gelegenheit messen, ob sich das hier überhaupt lohnt...
if
node
is
None
and
not
grammar
.
left_recursion_encountered__
:
# ortherwise also cache None-results
parser
.
visited
[
location
]
=
None
,
rest
...
...
DHParser/syntaxtree.py
View file @
968f1acc
...
...
@@ -32,7 +32,7 @@ except ImportError:
from
.typing34
import
AbstractSet
,
Any
,
ByteString
,
Callable
,
cast
,
Container
,
Dict
,
\
Iterator
,
List
,
NamedTuple
,
Sequence
,
Union
,
Text
,
Tuple
from
DHParser.toolkit
import
log_dir
,
repr_call
,
expand_table
,
line_col
,
smart_list
from
DHParser.toolkit
import
log_dir
,
expand_table
,
line_col
,
smart_list
__all__
=
[
'WHITESPACE_PTYPE'
,
...
...
@@ -203,17 +203,17 @@ class Node:
"""Initializes the ``Node``-object with the ``Parser``-Instance
that generated the node and the parser's result.
"""
self
.
_result
=
''
# type: StrictResultType
# self._result = '' # type: StrictResultType
# self._children = () # type: ChildrenType
self
.
_errors
=
[]
# type: List[str]
self
.
_children
=
()
# type: ChildrenType
self
.
result
=
result
self
.
_len
=
len
(
self
.
result
)
if
not
self
.
children
else
\
sum
(
child
.
_len
for
child
in
self
.
children
)
# type: int
# self.pos: int = 0 # continuous updating of pos values
self
.
_len
=
len
(
result
)
if
not
self
.
_
children
else
\
sum
(
child
.
_len
for
child
in
self
.
_
children
)
# type: int
# self.pos: int = 0 # continuous updating of pos values
wastes a lot of time
self
.
_pos
=
-
1
# type: int
self
.
parser
=
parser
or
ZOMBIE_PARSER
self
.
error_flag
=
any
(
r
.
error_flag
for
r
in
self
.
children
)
\
if
self
.
children
else
False
# type: bool
self
.
error_flag
=
any
(
r
.
error_flag
for
r
in
self
.
_
children
)
\
if
self
.
_
children
else
False
# type: bool
def
__str__
(
self
):
if
self
.
children
:
...
...
@@ -254,9 +254,9 @@ class Node:
# assert ((isinstance(result, tuple) and all(isinstance(child, Node) for child in result))
# or isinstance(result, Node)
# or isinstance(result, str)), str(result)
self
.
_result
=
(
result
,)
if
isinstance
(
result
,
Node
)
else
result
or
''
self
.
_result
=
(
result
,)
if
isinstance
(
result
,
Node
)
else
result
or
''
# type: StrictResultType
self
.
_children
=
cast
(
ChildrenType
,
self
.
_result
)
\
if
isinstance
(
self
.
_result
,
tuple
)
else
cast
(
ChildrenType
,
())
if
isinstance
(
self
.
_result
,
tuple
)
else
cast
(
ChildrenType
,
())
# type: ChildrenType
@
property
def
children
(
self
)
->
ChildrenType
:
...
...
dhparser.py
View file @
968f1acc
...
...
@@ -52,7 +52,7 @@ def selftest(file_name):
else
:
# compile the grammar again using the result of the previous
# compilation as parser
for
i
in
range
(
1
):
for
i
in
range
(
1
00
):
result
=
compileDSL
(
grammar
,
nil_scanner
,
result
,
transformer
,
compiler
)
print
(
result
)
return
result
...
...
examples/LaTeX/LaTeX.ebnf
View file @
968f1acc
#
latex
Grammar
#
LaTeX-
Grammar
for DHParser
@ testing = True
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # optional whitespace, including at most one linefeed
@ comment = /%.*(?:\n|$)/
latexdoc = preamble document
preamble = { command }+
document = [PARSEP] "\begin{document}" [PARSEP] frontpages [PARSEP] (chapters | sections)
[bibliography] [index] "\end{document}" §EOF
frontpages = sequence
latexdoc = preamble document
preamble = { command }+
document = [PARSEP] "\begin{document}" [PARSEP]
frontpages [PARSEP]
(chapters | sections) [PARSEP]
[bibliography] [index] [PARSEP]
"\end{document}" [PARSEP] §EOF
frontpages = sequence
#######################################################################
#
...
...
@@ -17,26 +22,26 @@ frontpages = sequence
#
#######################################################################
Chapters = { Chapter [PARSEP] }+
Chapter = "\Chapter" block [PARSEP] { sequence | Sections }
Chapters
= { Chapter [PARSEP] }+
Chapter
= "\Chapter" block [PARSEP] { sequence | Sections }
Sections = { Section [PARSEP] }+
Section = "\Section" block [PARSEP] { sequence | SubSections }
Sections
= { Section [PARSEP] }+
Section
= "\Section" block [PARSEP] { sequence | SubSections }
SubSections = { SubSection [PARSEP] }+
SubSection = "\SubSection" block [PARSEP] { sequence | SubSubSections }
SubSections
= { SubSection [PARSEP] }+
SubSection
= "\SubSection" block [PARSEP] { sequence | SubSubSections }
SubSubsections = { SubSubSection [PARSEP] }+
SubSubSection = "\SubSubSection" block [PARSEP] { sequence | Paragraphs }
Paragraphs = { Paragraph [PARSEP] }+
Paragraph = "\paragraph" block [PARSEP] { sequence | SubParagraphs }
Paragraphs
= { Paragraph [PARSEP] }+
Paragraph
= "\paragraph" block [PARSEP] { sequence | SubParagraphs }
SubParagraphs = { SubParagraph [PARSEP] }+
SubParagraph = "\subparagpaph" block [PARSEP] { sequence }
SubParagraphs
= { SubParagraph [PARSEP] }+
SubParagraph
= "\subparagpaph" block [PARSEP] { sequence }
bibliography = "\bibliography" block [PARSEP]
index = "\printindex" [PARSEP
bibliography
= "\bibliography" block [PARSEP]
index
= "\printindex" [PARSEP
#######################################################################
...
...
@@ -45,21 +50,53 @@ index = "\printindex" [PARSEP
#
#######################################################################
blockenv = beginenv sequence §endenv
parblock = /{/ sequence §/}/
sequence = { (paragraph | itemize | enumeration | figure | table | blockenv ) [PARSEP] }+
#### block environments ####
block_enrivonment = known_enrivonment | generic_enrivonment
known_enrivonment = itemize | enumerate | figure | table
generic_enrivonment = begin_enrivonment sequence §end_enrivonment
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"end{enumerate}"
item = "\item" [PARSEP] sequence
#### paragraphs and sequences of paragraphs ####
block_of_paragraphs = /{/ sequence §/}/
sequence = { (paragraph | block_enrivonment ) [PARSEP] }+
paragraph = { !blockcmd textelements //~ }+
textelemts = command | text | block | inline_enrivonment
#### inline enivronments ####
inline_enrivonment = known_inline_env | generic_inline_env
known_inline_env = inline_math
generic_inline_env = begin_enrivonment { textelements }+ §end_enrivonment
begin_enrivonment = "\begin{" §NAME §"}"
end_enrivonment = "\end{" §::NAME §"}"
inline_math = "$" math_Text "$"
#### commands ####
paragraph = { !blockcmd textelements //~ }+
textelemts = command | text | block | inlinemath | inlineenv
command = knownd_command | generic_command
known_command = footnote
generic_command = CMDNAME [[ //~ config ] //~ block ]
inlineenv = beginenv { textelements }+ endenv
beginenv = "\begin{" §NAME §"}"
endenv = "\end{" §::NAME §"}"
footnote = "\footnote" block_of_paragraphs
#######################################################################
#
# low-level text and character sequences
#
#######################################################################
command = specialcmd | plaincmd
specialcmd = "\footnote" parblock
plaincmd = CMDNAME [[ //~ config ] //~ block ]
config = "[" cfgtext §"]"
block = /{/ { textelements } §/}/
...
...
@@ -71,6 +108,13 @@ blockcmd = "\subsection" | "\section" | "\chapter" | "\subsubsection"
| "\paragraph" | "\subparagraph" | "\begin{enumerate}"
| "\begin{itemize}" | "\item" | "\begin{figure}"
#######################################################################
#
# Primitives
#
#######################################################################
CMDNAME = /\\(?:(?!_)\w)+/~
NAME = /\w+/~
...
...
@@ -82,5 +126,4 @@ WSPC = /[ \t]+/ # (horizontal) whitespace
LF = !PARSEP /[ \t]*\n[ \t]*/ # LF but not an empty line
PARSEP = /[ \t]*(?:\n[ \t]*)+\n[ \t]*/ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
EOF = !/./
examples/MLW/
samp
le
s
/compile_MLW-entry.py
→
examples/MLW/
Beispie
le/compile_MLW-entry.py
View file @
968f1acc
File moved
examples/MLW/
samp
le
s
/facitergula.pdf
→
examples/MLW/
Beispie
le/facitergula.pdf
View file @
968f1acc
File moved
examples/MLW/
samp
le
s
/fascitergula.mlw
→
examples/MLW/
Beispie
le/fascitergula.mlw
View file @
968f1acc
File moved
examples/MLW/
samp
le
s
/fascitergula.xml
→
examples/MLW/
Beispie
le/fascitergula.xml
View file @
968f1acc
File moved
examples/MLW/samples/MLW.cst
deleted
100644 → 0
View file @
7cba2407
(Artikel
(Optional
(LEER)
)
(LemmaPosition
(token__
(RegExp
"LEMMA"
)
(wsp__
" "
)
)
(Lemma
(Optional
(_tll
(RegExp
"*"
)
)
)
(WORT_KLEIN
(RegExp
"facitergula"
)
(wsp__
""
""
""
)
)
)
(Optional
(LemmaVarianten
(token__
(RegExp
"VARIANTEN"
)
(wsp__
""
" "
)
)
(LVariante
(RegExp
"fasc-itergula"
)
)
(ZeroOrMore
(Sequence
(token__
(RegExp
","
)
(wsp__
""
" "
)
)
(LVariante
(RegExp
"fac-iet-ergula"
)
)
)
(Sequence
(token__
(RegExp
","
)
(wsp__
""
" "
)
)
(LVariante
(RegExp
"fac-ist-ergula"
)
)
)
(Sequence
(token__
(RegExp
","
)
(wsp__
""
" "
)
)
(LVariante
(RegExp
"fa-rcu-tergula"
)
)
)
)
(Optional
(Sequence
(token__
(RegExp
";"
)
(wsp__
""
""
" "
)
)
(LVZusatz
(RegExp
"sim."
)
(wsp__
""
""
""
""
)
)
)
)
)
)
(GrammatikPosition
(token__
(RegExp
"GRAMMATIK"
)
(wsp__
""
" "
)
)
(_wortart
(token__
(RegExp
"nomen"
)
)
)
(token__
(RegExp
";"
)
(wsp__
" "
)
)
(Flexionen
(Flexion
(RegExp
"-ar"
)
(wsp__
" "
)
)
)
(Optional
(_genus
(token__
(RegExp
"f."
)
)
)
)
(ZeroOrMore
(GrammatikVarianten
(token__
(RegExp
";"
)
(wsp__
""
""
" "
)
)
(GVariante
(Flexionen
(Flexion
(RegExp
"-us"
)
)
(ZeroOrMore
(Sequence
(token__
(RegExp
","
)
(wsp__
" "
)
)
(Flexion
(RegExp
"-i"
)
(wsp__
" "
)
)
)
)
)
(Optional
(_genus
(token__
(RegExp
"m."
)
)
)
)
(token__
(RegExp
":"
)
(wsp__
" "
)
)
(Verweis
(RegExp
">>beleg_id_1"
)
)
)
)
(GrammatikVarianten
(token__
(RegExp
";"
)
(wsp__
""
" "
)
)
(GVariante
(Flexionen
(Flexion
(RegExp
"-um"
)
)
(ZeroOrMore
(Sequence
(token__
(RegExp
","
)
(wsp__
" "
)
)
(Flexion
(RegExp
"-i"
)
(wsp__
" "
)
)
)
)
)
(Optional
(_genus
(token__
(RegExp
"n."
)
)
)
)
(token__
(RegExp
":"
)
(wsp__
" "
)
)
(Verweis
(RegExp
">>beleg_id_2"
)
(wsp__
""
""
""
""
)
)
)
)
)
)
)
(Optional
(SchreibweisenPosition
(token__
(RegExp
"SCHREIBWEISE"
)
(wsp__
""
" "
)
)
(SWTyp
(token__
(RegExp
"script."
)
)
)
(token__
(RegExp
":"
)
(wsp__
""
" "
)
)
(SWVariante
(Schreibweise
(token__
(RegExp
"vizreg-"
)
)
)
(token__
(RegExp
":"
)
(wsp__
" "
)
)
(Verweis
(RegExp
">>beleg_id_3"
)
)
)
(ZeroOrMore
(Sequence
(token__
(RegExp
","
)
(wsp__
""
" "
)
)
(SWVariante
(Schreibweise
(token__
(RegExp
"festregel(a)"
)
)
)
(token__
(RegExp
":"
)
(wsp__
" "
)
)
(Verweis
(RegExp
">>beleg_id_4"
)
)
)
)
(Sequence
(token__
(RegExp
","
)
(wsp__
""
" "
)
)
(SWVariante
(Schreibweise
(token__
(RegExp
"fezdregl(a)"
)
)
)
(token__
(RegExp
":"
)
(wsp__
" "
)
)
(Verweis
(RegExp