Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
9.2.2023: Due to updates GitLab will be unavailable for some minutes between 9:00 and 11:00.
Open sidebar
badw-it
DHParser
Commits
fc3aa0d1
Commit
fc3aa0d1
authored
Nov 16, 2017
by
di68kap
Browse files
- MLW ergänzt
parent
56729685
Changes
5
Hide whitespace changes
Inline
Side-by-side
DHParser/syntaxtree.py
View file @
fc3aa0d1
...
...
@@ -486,8 +486,8 @@ class Node(collections.abc.Sized):
# s += " '(pos %i)" % node.pos
if
src
:
txt
+=
" '(pos %i "
%
node
.
pos
# + " %i %i)" % line_col(src, node.pos)
if
node
.
error_flag
:
txt
+=
" HAS ERRORS"
#
if node.error_flag:
# just for debugging error collecting
#
txt += " HAS ERRORS"
if
node
.
errors
:
txt
+=
" '(err '(%s))"
%
' '
.
join
(
str
(
err
).
replace
(
'"'
,
r
'\"'
)
for
err
in
node
.
errors
)
...
...
@@ -556,8 +556,8 @@ class Node(collections.abc.Sized):
"""
Finds nodes in the tree that match a specific criterion.
`
`find`
`
is a generator that yields all nodes for which the
given
`
`match_function`
`
evaluates to True. The tree is
`find` is a generator that yields all nodes for which the
given `match_function` evaluates to True. The tree is
traversed pre-order.
Args:
...
...
@@ -596,7 +596,8 @@ class Node(collections.abc.Sized):
def
mock_syntax_tree
(
sxpr
):
"""
Generates a tree of nodes from an S-expression.
Generates a tree of nodes from an S-expression. The main purpose of this is
to generate test data.
Example:
>>> mock_syntax_tree("(a (b c))").as_sxpr()
...
...
examples/MLW/Beispiele/imperium/imperium.mlw
View file @
fc3aa0d1
...
...
@@ -11,7 +11,8 @@ SCHREIBWEISE
em-: Chron. Fred. 2,35sqq. capit. p. 43. 2,36 p. 60,10.
ym-: Chart. Sangall. A 194.
impir-: v. ibi.
STRUKTUR
form. sing.:
gen.:
-ri: v. ibi. adde Annal. Plac. a. 1266 p. 516,21.
...
...
@@ -19,6 +20,7 @@ SCHREIBWEISE
abl.:
-um: Chron. Fred. 2,15. 2,35sqq. capit. p. 43.
VERWECHSELBAR
confunditur c.:
imperitus: v. ibi.
...
...
examples/MLW/MLW.ebnf
View file @
fc3aa0d1
...
...
@@ -32,6 +32,8 @@ Lemma = [< klassisch | gesichert >] LemmaWort
klassisch = "*"
gesichert = "$" # TODO: Noch fragen: Welches Zeichen?
LemmaWort = LAT_WORT
LemmaVarianten = [LZ]
{ LemmaVariante §TR }+
[Zusatz]
...
...
@@ -76,7 +78,7 @@ Etymologie = FREITEXT
#### ARTIKEL-KOPF ############################################################
ArtikelKopf = < SchreibweisenPosition | StrukturPosition | GebrauchPosition
| MetrikPosition | VerwechselungPosition >
| MetrikPosition | Verwechselung
s
Position >
## Schreibweisen-Position ##
...
...
@@ -84,35 +86,63 @@ ArtikelKopf = < SchreibweisenPosition | StrukturPosition | GebrauchPositio
SchreibweisenPosition = "SCHREIBWEISE" [LZ] { SWKategorie }+
SWKategorie = SWTyp DPP [LZ] §{SWUnterkategorie | SWVariante { ABS SWVariante }}+ [LZ]
SWUnterkategorie = SWUnterTyp DPP [LZ] §SWVariante { ABS SWVariante }+ [LZ]
SWTyp = scriptfat | scriptform | script | form | OFFEN
SWVariante = !KATEGORIENZEILE Schreibweise DPP Beleg
Schreibweise = ZEICHENFOLGE
SWKategorie = SWTyp DPP [LZ] §(Varianten | { SWKategorie }+) [LZ]
SWTyp = scriptfat | scriptform | script | form | gen | abl | OFFEN
scriptfat = "script." "fat-"
scriptform = "script. " "form"
script = "srcipt."
form = "form"
gen = "gen."
abl = "abl."
OFFEN = FREITEXT
#### STRUKTUR-POSITION #######################################################
StrukturPosition = "STRUKTUR" [LZ] { STVariante }+
StrukturPosition = "STRUKTUR" [LZ] { STKategorie }+
STKategorie = STTyp DPP [LZ] §(Varianten | { STKategorie }+) [LZ]
STTyp = "form." ZEICHENFOLGE
#### GEBRAUCH-POSITION #######################################################
GebrauchPosition = "GEBRAUCH"
#### METRIK-POSITION #########################################################
STVariante = "TODO
"
MetrikPosition = "METRIK
"
#### VERWECHSLUNGS-POSITION ##################################################
VerwechslungsPosition = "VERWECHSELBAR" [LZ] [Confunditur]
VerwechselungsVariante { ABS VerwechselungsVariante }+ [LZ]
Confunditur = "confunditur c." DPP [LZ]
VerwechselungsVariante = !KATEGORIENZEILE Verwechselung DPP Beleg
Verwechselung = ZEICHENFOLGE
#### ARTIKELKOPF POSITIONEN VARIANTEN ########################################
Varianten = Variante { ABS Variante }
Variante = !KATEGORIENZEILE Gegenstand DPP Beleg
Gegenstand = ZEICHENFOLGE
#### BEDEUTUNGS-POSITION #####################################################
##############################################################################
BedeutungsPosition = { "BEDEUTUNG" [LZ] §Bedeutung }+
Bedeutung = (Interpretamente | Bedeutungskategorie) [Belege]
Bedeutungskategorie = /(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+/~ [LZ]
Interpretamente = LateinischeBedeutung [LZ] §DeutscheBedeutung [LZ]
LateinischeBedeutung =
SW_
LAT /(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+/~
DeutscheBedeutung =
SW_
DEU /(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+/~
LateinischeBedeutung = LAT /(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+/~
DeutscheBedeutung = DEU /(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+/~
Belege = "BELEGE" [LZ] (EinBeleg | { "*" EinBeleg }) ABS
EinBeleg = { !([LZ] "*" | SCHLUESSELWORT) /\s*[^\n]*/~ [ZW] }+ [Zusatz]
...
...
@@ -137,9 +167,9 @@ Name = { NAME | NAMENS_ABKÜRZUNG }+
#### Schlüsselwörter #########################################################
SW_LAT
= "LATEINISCH" | "LAT"
SW_DEU
= "DEUTSCH" | "DEU"
SW_GRIECH
= "GRIECHISCH" | "GRIECH" | "GRIE" | "GRI"
LAT
= "LATEINISCH" | "LAT"
DEU
= "DEUTSCH" | "DEU"
GRI
= "GRIECHISCH" | "GRIECH" | "GRIE" | "GRI"
SCHLUESSELWORT = { //~ /\n/ }+ !ROEMISCHE_ZAHL /[A-ZÄÖÜ]{3,}\s+/
...
...
@@ -162,6 +192,11 @@ Verweis = "->" ZielName
VerweisZiel = "{" ZielName "}"
ZielName = BUCHSTABENFOLGE
Autor = FREITEXT
Werk = FREITEXT
Stelle = FREITEXT
Datierung = FREITEXT
Edition = FREITEXT
#### GENERISCHE UND ATOMARE AUSDRÜCKE ########################################
...
...
@@ -177,9 +212,12 @@ LAT_WORT_TEIL = /[a-z]+/
GROSSSCHRIFT = /[A-ZÄÖÜ]+/~
ZAHL = /\d+/~
ROEMISCHE_ZAHL = /(?=[MDCLXVI])M*(C[MD]|D?C*)(X[CL]|L?X*)(I[XV]|V?I*)/~
SATZZEICHEN = /(?:,(?!,))|(?:;(?!;))|(?::(?!:))|[.()-]+/~ # div. Satzzeichen, aber keine doppelten ,, ;; oder ::
FREITEXT = { DEU_WORT | ZAHL | ROEMISCHE_ZAHL | SATZZEICHEN | /s*/ }+
SATZZEICHEN = /(?:,(?!,))|(?:;(?!;))|(?::(?!:))|[.()\-]+/~ # div. Satzzeichen, aber keine doppelten ,, ;; oder ::
TEXTELEMENT = DEU_WORT | ZAHL | ROEMISCHE_ZAHL
FREITEXT = { TEXTELEMENT | /[.()\-\s]+/ | /,(?!,)\s*/ }+
ERW_FREITEXT = { TEXTELEMENT | SATZZEICHEN | /\s+/ }+
BUCHSTABENFOLGE = /\w+/~
ZEICHENFOLGE = /[\w()-]+/~
...
...
examples/MLW/MLWCompiler.py
View file @
fc3aa0d1
...
...
@@ -16,7 +16,7 @@ except ImportError:
import
re
from
DHParser
import
logging
,
is_filename
,
load_if_file
,
\
Grammar
,
Compiler
,
nil_preprocessor
,
\
Lookbehind
,
Lookahead
,
Alternative
,
Pop
,
Required
,
Token
,
Synonym
,
\
Lookbehind
,
Lookahead
,
Alternative
,
Pop
,
Token
,
Synonym
,
\
Option
,
NegativeLookbehind
,
OneOrMore
,
RegExp
,
Retrieve
,
Series
,
RE
,
Capture
,
\
ZeroOrMore
,
Forward
,
NegativeLookahead
,
mixin_comment
,
compile_source
,
\
last_value
,
counterpart
,
accumulate
,
PreprocessorFunc
,
\
...
...
@@ -26,7 +26,6 @@ from DHParser import logging, is_filename, load_if_file, \
remove_expendables
,
remove_empty
,
remove_tokens
,
flatten
,
is_whitespace
,
\
is_empty
,
is_expendable
,
collapse
,
replace_content
,
remove_parser
,
remove_content
,
remove_brackets
,
replace_parser
,
\
keep_children
,
is_one_of
,
has_content
,
apply_if
,
remove_first
,
remove_last
from
DHParser.base
import
WHITESPACE_PTYPE
,
TOKEN_PTYPE
#######################################################################
...
...
examples/MLW/MLW_ebnf_ERRORS.txt
View file @
fc3aa0d1
line: 31, column: 49, Error: Missing definition for symbol 'LemmaWort'
line: 70, column: 22, Error: Missing definition for symbol 'LAT'
line: 70, column: 28, Error: Missing definition for symbol 'GRI'
line: 78, column: 66, Error: Missing definition for symbol 'GebrauchPosition'
line: 79, column: 23, Error: Missing definition for symbol 'MetrikPosition'
line: 79, column: 40, Error: Missing definition for symbol 'VerwechselungPosition'
line: 88, column: 20, Error: Missing definition for symbol 'SWUnterTyp'
line: 89, column: 61, Error: Missing definition for symbol 'OFFEN'
line: 142, column: 1, Warning: Rule "SW_GRIECH" is not connected to parser root "Artikel" !
line: 158, column: 20, Error: Missing definition for symbol 'Autor'
line: 158, column: 30, Error: Missing definition for symbol 'Werk'
line: 158, column: 39, Error: Missing definition for symbol 'Stelle'
line: 158, column: 51, Error: Missing definition for symbol 'Datierung'
line: 158, column: 67, Error: Missing definition for symbol 'Edition'
line: 162, column: 1, Warning: Rule "VerweisZiel" is not connected to parser root "Artikel" !
line: 173, column: 1, Warning: Rule "DEU_GROSS" is not connected to parser root "Artikel" !
line: 174, column: 1, Warning: Rule "DEU_KLEIN" is not connected to parser root "Artikel" !
line: 175, column: 1, Warning: Rule "LAT_WORT" is not connected to parser root "Artikel" !
line: 177, column: 1, Warning: Rule "GROSSSCHRIFT" is not connected to parser root "Artikel" !
line: 199, column: 1, Warning: Rule "RZS" is not connected to parser root "Artikel" !
line: 206, column: 1, Warning: Rule "NIEMALS" is not connected to parser root "Artikel" !
line: 208, column: 1, Warning: Rule "DUMMY" is not connected to parser root "Artikel" !
line: 81, column: 40, Error: Missing definition for symbol 'VerwechselungsPosition'
line: 126, column: 1, Warning: Rule "VerwechslungsPosition" is not connected to parser root "Artikel" !
line: 128, column: 1, Warning: Rule "Confunditur" is not connected to parser root "Artikel" !
line: 129, column: 1, Warning: Rule "VerwechselungsVariante" is not connected to parser root "Artikel" !
line: 130, column: 1, Warning: Rule "Verwechselung" is not connected to parser root "Artikel" !
line: 188, column: 1, Warning: Rule "VerweisZiel" is not connected to parser root "Artikel" !
line: 204, column: 1, Warning: Rule "DEU_GROSS" is not connected to parser root "Artikel" !
line: 205, column: 1, Warning: Rule "DEU_KLEIN" is not connected to parser root "Artikel" !
line: 208, column: 1, Warning: Rule "GROSSSCHRIFT" is not connected to parser root "Artikel" !
line: 212, column: 1, Warning: Rule "SATZZEICHEN" is not connected to parser root "Artikel" !
line: 216, column: 1, Warning: Rule "ERW_FREITEXT" is not connected to parser root "Artikel" !
line: 233, column: 1, Warning: Rule "RZS" is not connected to parser root "Artikel" !
line: 240, column: 1, Warning: Rule "NIEMALS" is not connected to parser root "Artikel" !
line: 242, column: 1, Warning: Rule "DUMMY" is not connected to parser root "Artikel" !
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment