Commit fda0225c authored by di68kap's avatar di68kap
Browse files

- toolkit.py: added detections for fenced code blocks (for future common mark support)

parent 93a58ddc
...@@ -90,8 +90,8 @@ def log_dir() -> str: ...@@ -90,8 +90,8 @@ def log_dir() -> str:
if not dirname: if not dirname:
raise NameError # raise a name error if LOGGING evaluates to False raise NameError # raise a name error if LOGGING evaluates to False
except NameError: except NameError:
raise NameError("No access to log directory before logging has been turned " raise NameError("No access to log directory before logging has been "
"on within the same thread/process.") "turned on within the same thread/process.")
if os.path.exists(dirname) and not os.path.isdir(dirname): if os.path.exists(dirname) and not os.path.isdir(dirname):
raise IOError('"' + dirname + '" cannot be used as log directory, ' raise IOError('"' + dirname + '" cannot be used as log directory, '
'because it is not a directory!') 'because it is not a directory!')
...@@ -216,7 +216,7 @@ def load_if_file(text_or_file) -> str: ...@@ -216,7 +216,7 @@ def load_if_file(text_or_file) -> str:
return text_or_file return text_or_file
def is_python_code(text_or_file) -> bool: def is_python_code(text_or_file: str) -> bool:
"""Checks whether 'text_or_file' is python code or the name of a file that """Checks whether 'text_or_file' is python code or the name of a file that
contains python code. contains python code.
""" """
...@@ -230,6 +230,34 @@ def is_python_code(text_or_file) -> bool: ...@@ -230,6 +230,34 @@ def is_python_code(text_or_file) -> bool:
return False return False
def has_fenced_code(text_or_file: str, info_strings = ('ebnf', 'test')) -> bool:
"""Checks whether `text_or_file` contains fenced code blocks, which are
marked by one of the given info strings.
See http://spec.commonmark.org/0.28/#fenced-code-blocks for more
information on fenced code blocks in common mark documents.
"""
if is_filename(text_or_file):
with open(text_or_file, 'r', encoding='utf-8') as f:
markdown = f.read()
else:
markdown = text_or_file
if markdown.find('\n~~~') < 0 and markdown.find('\n```') < 0:
return False
if isinstance(info_strings, str): info_strings = (info_strings,)
FENCE_TMPL = '\n(?:(?:``[`]*[ ]*(?:%s)(?=[ .\-:\n])[^`\n]*\n)|(?:~~[~]*[ ]*(?:%s)(?=[ .\-:\n])[\n]*\n))'
LABEL_RE = '|'.join('(?:%s)' % s for s in info_strings)
RX_FENCE = re.compile(FENCE_TMPL % (LABEL_RE, LABEL_RE), flags=re.IGNORECASE)
for m in RX_FENCE.finditer(markdown):
s = re.match('(?:\n`+)|(?:\n~+)', m.group(0)).group(0)
if markdown.find(s, m.end()) >= 0:
return True
else:
return False
def md5(*txt): def md5(*txt):
"""Returns the md5-checksum for `txt`. This can be used to test if """Returns the md5-checksum for `txt`. This can be used to test if
some piece of text, for example a grammar source file, has changed. some piece of text, for example a grammar source file, has changed.
......
...@@ -82,12 +82,12 @@ KeyFunc = Callable[[Node], str] ...@@ -82,12 +82,12 @@ KeyFunc = Callable[[Node], str]
def transformation_factory(t=None): def transformation_factory(t=None):
"""Creates factory functions from transformation-functions that """Creates factory functions from transformation-functions that
dispatch on the first parameter after the node parameter. dispatch on the first parameter after the context parameter.
Decorating a transformation-function that has more than merely the Decorating a transformation-function that has more than merely the
``node``-parameter with ``transformation_factory`` creates a ``node``-parameter with ``transformation_factory`` creates a
function with the same name, which returns a partial-function that function with the same name, which returns a partial-function that
takes just the node-parameter. takes just the context-parameter.
Additionally, there is some some syntactic sugar for Additionally, there is some some syntactic sugar for
transformation-functions that receive a collection as their second transformation-functions that receive a collection as their second
......
...@@ -22,6 +22,7 @@ Artikel = [LZ] ...@@ -22,6 +22,7 @@ Artikel = [LZ]
[LZ] DATEI_ENDE [LZ] DATEI_ENDE
#### LEMMA-POSITION ########################################################## #### LEMMA-POSITION ##########################################################
LemmaPosition = "LEMMA" [LZ] §Lemma TR [LemmaVarianten] LemmaPosition = "LEMMA" [LZ] §Lemma TR [LemmaVarianten]
...@@ -37,6 +38,7 @@ LemmaVarianten = [LZ] ...@@ -37,6 +38,7 @@ LemmaVarianten = [LZ]
LemmaVariante = LAT_WORT_TEIL { "-" LAT_WORT_TEIL } LemmaVariante = LAT_WORT_TEIL { "-" LAT_WORT_TEIL }
## GRAMMATIK-POSITION ## ## GRAMMATIK-POSITION ##
GrammatikPosition = "GRAMMATIK" [LZ] Grammatik §ABS { GrammatikVariante §ABS } GrammatikPosition = "GRAMMATIK" [LZ] Grammatik §ABS { GrammatikVariante §ABS }
...@@ -59,6 +61,7 @@ genus = "maskulinum" | "m." ...@@ -59,6 +61,7 @@ genus = "maskulinum" | "m."
GrammatikVariante = [wortart ABS] flexion [genus] DPP { Beleg }+ GrammatikVariante = [wortart ABS] flexion [genus] DPP { Beleg }+
#### ETYMOLOGIE-POSITION ##################################################### #### ETYMOLOGIE-POSITION #####################################################
EtymologiePosition = "ETYMOLOGIE" [LZ] { EtymologieVariante }+ EtymologiePosition = "ETYMOLOGIE" [LZ] { EtymologieVariante }+
...@@ -67,6 +70,7 @@ EtymologieBesonderheit = FREITEXT ...@@ -67,6 +70,7 @@ EtymologieBesonderheit = FREITEXT
Etymologie = FREITEXT Etymologie = FREITEXT
#### ARTIKEL-KOPF ############################################################ #### ARTIKEL-KOPF ############################################################
ArtikelKopf = { SchreibweisenPosition | StrukturPosition ArtikelKopf = { SchreibweisenPosition | StrukturPosition
...@@ -101,6 +105,7 @@ ArtikelVerfasser = ("AUTORIN" | "AUTOR") Name ...@@ -101,6 +105,7 @@ ArtikelVerfasser = ("AUTORIN" | "AUTOR") Name
Name = { NAME | NAMENS_ABKÜRZUNG }+ Name = { NAME | NAMENS_ABKÜRZUNG }+
#### Schlüsselwörter ######################################################### #### Schlüsselwörter #########################################################
SW_LAT = "LATEINISCH" | "LAT" SW_LAT = "LATEINISCH" | "LAT"
...@@ -110,12 +115,14 @@ SW_GRIECH = "GRIECHISCH" | "GRIECH" | "GRIE" | "GRI" ...@@ -110,12 +115,14 @@ SW_GRIECH = "GRIECHISCH" | "GRIECH" | "GRIE" | "GRI"
SCHLUESSELWORT = { //~ /\n/ }+ !ROEMISCHE_ZAHL /[A-ZÄÖÜ]{3,}\s+/ SCHLUESSELWORT = { //~ /\n/ }+ !ROEMISCHE_ZAHL /[A-ZÄÖÜ]{3,}\s+/
#### ZUSATZ an verschiedenen Stellen der Struktur ############################ #### ZUSATZ an verschiedenen Stellen der Struktur ############################
Zusatz = "ZUSATZ" §{ zusatz_typ [TR] }+ Zusatz = "ZUSATZ" §{ zusatz_typ [TR] }+
zusatz_typ = "adde" | "al" | "sim." | "saepe" | "vel-rarius" | "vel" | FREITEXT zusatz_typ = "adde" | "al" | "sim." | "saepe" | "vel-rarius" | "vel" | FREITEXT
#### BELEGE ################################################################## #### BELEGE ##################################################################
Beleg = (BelegQuelle BelegText) | BelegText | Verweis Beleg = (BelegQuelle BelegText) | BelegText | Verweis
...@@ -127,6 +134,7 @@ VerweisZiel = "{" ZielName "}" ...@@ -127,6 +134,7 @@ VerweisZiel = "{" ZielName "}"
ZielName = BUCHSTABENFOLGE ZielName = BUCHSTABENFOLGE
#### GENERISCHE UND ATOMARE AUSDRÜCKE ######################################## #### GENERISCHE UND ATOMARE AUSDRÜCKE ########################################
NAMENS_ABKÜRZUNG = /[A-ZÄÖÜÁÀÂÓÒÔÚÙÛ]\./~ NAMENS_ABKÜRZUNG = /[A-ZÄÖÜÁÀÂÓÒÔÚÙÛ]\./~
......
...@@ -26,7 +26,7 @@ import sys ...@@ -26,7 +26,7 @@ import sys
sys.path.extend(['../', './']) sys.path.extend(['../', './'])
from DHParser.toolkit import load_if_file, logging, log_dir, is_logging, re from DHParser.toolkit import has_fenced_code, load_if_file, logging, log_dir, is_logging, re
class TestLoggingAndLoading: class TestLoggingAndLoading:
...@@ -78,6 +78,15 @@ class TestLoggingAndLoading: ...@@ -78,6 +78,15 @@ class TestLoggingAndLoading:
# file correctly loaded # file correctly loaded
assert self.code2 == load_if_file(self.filename) assert self.code2 == load_if_file(self.filename)
def test_has_fenced_code(self):
code1="has fenced code block\n~~~ ebnf\nstart = 'start'\n~~~\n"
code2="no fenced code block ~~~ ebnf\nstart = 'start'\n~~~\n"
code3="\n~~~ ebnd\nstart = 'start'\n~~"
assert has_fenced_code(code1)
assert not has_fenced_code(code2)
assert not has_fenced_code(code3)
def test_logging(self): def test_logging(self):
try: try:
log_dir() log_dir()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment