Commit fda0225c authored by di68kap's avatar di68kap
Browse files

- toolkit.py: added detections for fenced code blocks (for future common mark support)

parent 93a58ddc
......@@ -90,8 +90,8 @@ def log_dir() -> str:
if not dirname:
raise NameError # raise a name error if LOGGING evaluates to False
except NameError:
raise NameError("No access to log directory before logging has been turned "
"on within the same thread/process.")
raise NameError("No access to log directory before logging has been "
"turned on within the same thread/process.")
if os.path.exists(dirname) and not os.path.isdir(dirname):
raise IOError('"' + dirname + '" cannot be used as log directory, '
'because it is not a directory!')
......@@ -216,7 +216,7 @@ def load_if_file(text_or_file) -> str:
return text_or_file
def is_python_code(text_or_file) -> bool:
def is_python_code(text_or_file: str) -> bool:
"""Checks whether 'text_or_file' is python code or the name of a file that
contains python code.
"""
......@@ -230,6 +230,34 @@ def is_python_code(text_or_file) -> bool:
return False
def has_fenced_code(text_or_file: str, info_strings = ('ebnf', 'test')) -> bool:
"""Checks whether `text_or_file` contains fenced code blocks, which are
marked by one of the given info strings.
See http://spec.commonmark.org/0.28/#fenced-code-blocks for more
information on fenced code blocks in common mark documents.
"""
if is_filename(text_or_file):
with open(text_or_file, 'r', encoding='utf-8') as f:
markdown = f.read()
else:
markdown = text_or_file
if markdown.find('\n~~~') < 0 and markdown.find('\n```') < 0:
return False
if isinstance(info_strings, str): info_strings = (info_strings,)
FENCE_TMPL = '\n(?:(?:``[`]*[ ]*(?:%s)(?=[ .\-:\n])[^`\n]*\n)|(?:~~[~]*[ ]*(?:%s)(?=[ .\-:\n])[\n]*\n))'
LABEL_RE = '|'.join('(?:%s)' % s for s in info_strings)
RX_FENCE = re.compile(FENCE_TMPL % (LABEL_RE, LABEL_RE), flags=re.IGNORECASE)
for m in RX_FENCE.finditer(markdown):
s = re.match('(?:\n`+)|(?:\n~+)', m.group(0)).group(0)
if markdown.find(s, m.end()) >= 0:
return True
else:
return False
def md5(*txt):
"""Returns the md5-checksum for `txt`. This can be used to test if
some piece of text, for example a grammar source file, has changed.
......
......@@ -82,12 +82,12 @@ KeyFunc = Callable[[Node], str]
def transformation_factory(t=None):
"""Creates factory functions from transformation-functions that
dispatch on the first parameter after the node parameter.
dispatch on the first parameter after the context parameter.
Decorating a transformation-function that has more than merely the
``node``-parameter with ``transformation_factory`` creates a
function with the same name, which returns a partial-function that
takes just the node-parameter.
takes just the context-parameter.
Additionally, there is some some syntactic sugar for
transformation-functions that receive a collection as their second
......
......@@ -22,6 +22,7 @@ Artikel = [LZ]
[LZ] DATEI_ENDE
#### LEMMA-POSITION ##########################################################
LemmaPosition = "LEMMA" [LZ] §Lemma TR [LemmaVarianten]
......@@ -37,6 +38,7 @@ LemmaVarianten = [LZ]
LemmaVariante = LAT_WORT_TEIL { "-" LAT_WORT_TEIL }
## GRAMMATIK-POSITION ##
GrammatikPosition = "GRAMMATIK" [LZ] Grammatik §ABS { GrammatikVariante §ABS }
......@@ -59,6 +61,7 @@ genus = "maskulinum" | "m."
GrammatikVariante = [wortart ABS] flexion [genus] DPP { Beleg }+
#### ETYMOLOGIE-POSITION #####################################################
EtymologiePosition = "ETYMOLOGIE" [LZ] { EtymologieVariante }+
......@@ -67,6 +70,7 @@ EtymologieBesonderheit = FREITEXT
Etymologie = FREITEXT
#### ARTIKEL-KOPF ############################################################
ArtikelKopf = { SchreibweisenPosition | StrukturPosition
......@@ -101,6 +105,7 @@ ArtikelVerfasser = ("AUTORIN" | "AUTOR") Name
Name = { NAME | NAMENS_ABKÜRZUNG }+
#### Schlüsselwörter #########################################################
SW_LAT = "LATEINISCH" | "LAT"
......@@ -110,12 +115,14 @@ SW_GRIECH = "GRIECHISCH" | "GRIECH" | "GRIE" | "GRI"
SCHLUESSELWORT = { //~ /\n/ }+ !ROEMISCHE_ZAHL /[A-ZÄÖÜ]{3,}\s+/
#### ZUSATZ an verschiedenen Stellen der Struktur ############################
Zusatz = "ZUSATZ" §{ zusatz_typ [TR] }+
zusatz_typ = "adde" | "al" | "sim." | "saepe" | "vel-rarius" | "vel" | FREITEXT
#### BELEGE ##################################################################
Beleg = (BelegQuelle BelegText) | BelegText | Verweis
......@@ -127,6 +134,7 @@ VerweisZiel = "{" ZielName "}"
ZielName = BUCHSTABENFOLGE
#### GENERISCHE UND ATOMARE AUSDRÜCKE ########################################
NAMENS_ABKÜRZUNG = /[A-ZÄÖÜÁÀÂÓÒÔÚÙÛ]\./~
......
......@@ -142,4 +142,4 @@ class TestCompilerGeneration:
if __name__ == "__main__":
from DHParser.testing import runner
runner("", globals())
\ No newline at end of file
runner("", globals())
......@@ -26,7 +26,7 @@ import sys
sys.path.extend(['../', './'])
from DHParser.toolkit import load_if_file, logging, log_dir, is_logging, re
from DHParser.toolkit import has_fenced_code, load_if_file, logging, log_dir, is_logging, re
class TestLoggingAndLoading:
......@@ -78,6 +78,15 @@ class TestLoggingAndLoading:
# file correctly loaded
assert self.code2 == load_if_file(self.filename)
def test_has_fenced_code(self):
code1="has fenced code block\n~~~ ebnf\nstart = 'start'\n~~~\n"
code2="no fenced code block ~~~ ebnf\nstart = 'start'\n~~~\n"
code3="\n~~~ ebnd\nstart = 'start'\n~~"
assert has_fenced_code(code1)
assert not has_fenced_code(code2)
assert not has_fenced_code(code3)
def test_logging(self):
try:
log_dir()
......@@ -124,4 +133,4 @@ class TestLoggingAndLoading:
if __name__ == "__main__":
from DHParser.testing import runner
runner("", globals())
\ No newline at end of file
runner("", globals())
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment