Commit c082a5f2 authored by Stefan Müller's avatar Stefan Müller

Multum.

parent bb5a55c6
......@@ -126,7 +126,7 @@ Attribution notice: by Stefan Müller in 2013 ff.
box-shadow:0 1px 3px 0 #b0c0f4;
height:auto;
justify-content:center;
max-width:1000px;
max-width:50em;
padding:8% 8%;
width:99%;
}
......@@ -788,7 +788,7 @@ body {
background-color:#f0f0f0;
box-sizing:border-box;
font-family:"linbiolinum", "Linux Biolinum", "Linux Biolinum G", "Linux Biolinum O", sans-serif;
font-size:1.3rem;
font-size:20px;
height:100%;
line-height:1.4;
padding-top:0.1px;
......@@ -1098,7 +1098,7 @@ ul.list ul.list ul.list {list-style-type:square;}
content:" (" attr(href) ")";
}
body {
font-size:1em;
font-size:18px;
margin-left:-3%;
width:74%;
}
......
......@@ -120,7 +120,7 @@ Attribution notice: by Stefan Müller in 2013 ff.
box-shadow:0 1px 3px 0 #b0c0f4;
height:auto;
justify-content:center;
max-width:1100px;
max-width:50em;
padding:8% 8%;
width:99%;
}
......@@ -798,7 +798,7 @@ body {
background-color:#f0f0f0;
box-sizing:border-box;
font-family:"Calibri", "Noto Sans", "DejaVu Sans", sans-serif;
font-size:1.25rem;
font-size:20px;
height:100%;
line-height:1.45;
padding-top:0.1px;
......@@ -1045,7 +1045,7 @@ ul {
content:" (" attr(href) ")";
}
body {
font-size:1em;
font-size:18px;
margin-left:-3%;
width:74%;
}
......
......@@ -152,7 +152,7 @@ Attribution notice: by Stefan Müller in 2013 ff.
box-shadow:0 1px 3px 0 #b0c0f4;
height:auto;
justify-content:center;
max-width:900px;
max-width:50em;
padding:8% 8%;
width:99%;
}
......@@ -287,12 +287,16 @@ Attribution notice: by Stefan Müller in 2013 ff.
1px 1px 0 #fff,
1px 0 0 #fff;
}
.desk > header .subtitle {
display:block;
font-size:0.66em;
}
.desk > header .utilities {
font-size:0.8em;
margin-top:1em;
}
.desk > header h1,
.desk > header h2 {
.desk > header h1 {
margin-top:3%;
text-align:right;
}
.desk > header nav a:focus,
......@@ -719,6 +723,7 @@ p {
}
.para {
display:block;
font-size:0.8em;
margin-top:0.4em;
}
.para_in_note {
......@@ -924,7 +929,7 @@ p {
margin-top:0;
}
.transcription header > p {
margin-top:1.5em;
margin:1em 0 1em 0;
}
.txt {
color:#000;
......@@ -1069,7 +1074,7 @@ body {
background-color:#f0f0f0;
box-sizing:border-box;
font-family:"LinLibertine", "Palatino Linotype", serif;
font-size:1.25rem;
font-size:20px;
height:100%;
line-height:1.35;
padding-top:0.1px;
......@@ -1164,6 +1169,15 @@ main {
display:block;
z-index:2;
}
pb-:before {
color:#aaaaaa;
content:"/" attr(id) "/";
}
rp,
rt {
font-size:0.75em;
margin-bottom:-4px;
}
s, s:link, s:visited, s:focus, s:hover, s:active {
text-decoration:line-through;
}
......@@ -1231,6 +1245,10 @@ table small.note:hover {
display:block;
position:absolute;
}
tablestart-:after {
color:#aaaaaa;
content:"⟦TABLE⟧";
}
td, th {
padding:8px 10px;
vertical-align:top;
......@@ -1361,7 +1379,7 @@ ol.list li {
content:"" !important;
}
body {
font-size:1em;
font-size:18px;
margin-left:-3%;
width:74%;
}
......
......@@ -552,7 +552,7 @@ body {
background-color:#f0f0f0;
box-sizing:border-box;
font-family:"linbiolinum", "Linux Biolinum", "Linux Biolinum G", "Linux Biolinum O", sans-serif;
font-size:1.25rem;
font-size:20px;
height:100%;
line-height:1.4;
padding-top:0.1px;
......@@ -824,7 +824,7 @@ ul.list ul.list ul.list {list-style-type:square;}
content:" (" attr(href) ")";
}
body {
font-size:1em;
font-size:18px;
margin-left:-3%;
width:74%;
}
......
......@@ -94,7 +94,7 @@ Attribution notice: by Stefan Müller in 2013 ff.
display:flex;
height:auto;
justify-content:center;
max-width:900px;
max-width:50em;
padding:8% 8%;
width:99%;
}
......@@ -569,7 +569,7 @@ body {
background-color:#f0f0f0;
box-sizing:border-box;
font-family:"Palatino nova", "Palatino Linotype", "Palatino", "URW Palladio L", "URW Palladio", "Georgia", "Century Schoolbook", serif;
font-size:1.25rem;
font-size:20px;
height:100%;
line-height:1.45;
padding-top:0.1px;
......@@ -861,7 +861,7 @@ ul.list ul.list ul.list {list-style-type:square;}
content:" (" attr(href) ")";
}
body {
font-size:1em;
font-size:18px;
margin-left:-3%;
width:74%;
}
......
No preview for this file type
# import strutils, sequtils
import strutils
# var doc = readFile(r"C:\Users\M\Eigene\bestand\Mhd\PP\P_K\I-0-p1.Wil\WILL.GR")
# echo doc
proc unexpected(): int =
var result = 5
result += 5
var names = ["Ähndel", "Rum", "Liri"]
echo unexpected() # Prints 0, not 10
\ No newline at end of file
var kinds = {'a', 'b', 'c'}
echo card(kinds)
for name in names.mitems:
name = name.replace('r', 'l')
echo names
......@@ -563,7 +563,7 @@ class DB(auth_io.Auth, web_io.IO):
on {version} between person_von and person_bis
and person_id = werk2person_person_id
where _art = %s
order by werk_haupttitel ASC
order by werk_haupttitel
""", kind):
eigen_id = row['_werk_id']
term = row['werk_haupttitel']
......@@ -937,7 +937,7 @@ class DB(auth_io.Auth, web_io.IO):
IFNULL((
select group_concat(
person_hauptname
order by person2lit_rang ASC
order by person2lit_rang
SEPARATOR ' – ')
from person2lit
left join person
......@@ -1102,7 +1102,7 @@ class DB(auth_io.Auth, web_io.IO):
IFNULL((
select group_concat(
person_hauptname
order by person2lit_rang ASC
order by person2lit_rang
SEPARATOR ' – ')
from person2lit
left join person
......@@ -1273,6 +1273,7 @@ class DB(auth_io.Auth, web_io.IO):
and werk_id = werk2person_werk_id
where {version} between werk2person_von and werk2person_bis
and werk2person_person_id = %s
order by werk_haupttitel COLLATE utf8mb4_german2_ci
""", item_id)
item['person_lit'] = sorted(
(
......@@ -1383,8 +1384,7 @@ class DB(auth_io.Auth, web_io.IO):
):
item['werk_lit'][lit_art] = sorted(
(
self.get_lit(lit)
for lit in con.geteach(rf"""
self.get_lit(lit) for lit in con.geteach(rf"""
{lit_cols},
werk2lit_art,
werk2lit_anmerkung,
......@@ -1897,6 +1897,7 @@ class DB(auth_io.Auth, web_io.IO):
where {version} between person2lit_von and person2lit_bis
and person2lit_art = '{case}'
and person2lit_lit_id = %s
order by person2lit_rang
""", lit_id) ]
if case == 'autor':
autor_sort = ' '.join(terms)
......
......@@ -18,7 +18,6 @@ executes the module on a server, naming a different second file:
:shell:`python /local/app/code/python/exemplar/geist.py "/local/app/config.ini" "/local/app/config_server_80.ini"`
'''
from gevent import monkey; monkey.patch_all()
import json
try: import regex as re
except ImportError: import re
......
......@@ -93,8 +93,9 @@ def main(
table = result['table_name']
if table not in {
't_eintraege',
't_untergruppen',
't_lit',
't_stoffgruppen',
't_untergruppen',
}:
continue
cur.execute("describe " + table)
......
......@@ -229,14 +229,19 @@ class DB(auth_io.Auth, web_io.IO):
def get_hsc_urls(
self,
data_urls_prefix_1: str,
link_re = re.compile(
link_re: 'Pattern[str]' = re.compile(
r'''\shref\s*=\s*['"](http://www.handschriftencensus.de/.*?)['"]'''),
) -> 'Generator[Tuple[str, str]]':
'''
Yield all URL tails of all pages linking to the handschriftencensus
and for each one the belonging handschriftencensus-URL-tail.
'''
) -> 'Tuple[str, str, Tuple[Tuple[str, str]]]':
def get_common_prefix(terms: 'Sequence[str]') -> str:
prefix = ''
for chars in zip(*terms):
if all( char == chars[0] for char in chars ):
prefix += chars[0]
else:
break
return prefix
urls = deque()
hsc_urls = deque()
with DBCon(self.connect()) as (con, cur):
for item in con.geteach("""
f_stoffgr_nr,
......@@ -253,7 +258,20 @@ class DB(auth_io.Auth, web_io.IO):
ein = item['f_hs_position_nr']
url = f"{kind}/{stoff}/{unter}/{ein}"
for match in link_re.finditer(item['f_externe_daten']):
yield (url, match.group(1).split(data_urls_prefix_1, 1)[-1])
urls.append(url)
hsc_urls.append(match.group(1))
prefix_0 = get_common_prefix(urls)
prefix_1 = get_common_prefix(hsc_urls)
return (
self.config['ids']['url'].rstrip('/') + '/' + prefix_0,
prefix_1,
tuple(zip(
( url.split(prefix_0, 1)[-1] for url in urls )
if prefix_0 else urls,
( url.split(prefix_1, 1)[-1] for url in hsc_urls)
if prefix_1 else hsc_urls,
)),
)
def get_index(
self,
......@@ -1016,7 +1034,8 @@ def replace_tag(
continue
elif key == 'class':
values = set(value.split())
values.discard('MsoNormal')
for part in ('key', 'MsoNormal', 'ref'):
values.discard(part)
if css_small_caps and not 'small-caps' in values:
values.add('small-caps')
if not values:
......
......@@ -93,6 +93,7 @@ class Geist(bottle.Bottle):
handschriftencensus and the belonging handschriftencensus-URLs.
'''
response.content_type = 'application/json'
prefix_0, prefix_1, urls = db.get_hsc_urls()
return json.dumps(
{
'name': db.config['ids']['name'],
......@@ -101,13 +102,9 @@ class Geist(bottle.Bottle):
'email': db.config['ids']['email'],
'data_desc': db.config['api_hsc']['data_desc'],
'data_url': db.config['api_hsc']['data_url'],
'data_urls_prefix_0':
db.config['ids']['url'].rstrip('/') + '/',
'data_urls_prefix_1':
db.config['api_hsc']['data_urls_prefix_1'],
'data': tuple(
db.get_hsc_urls(
db.config['api_hsc']['data_urls_prefix_1'])),
'data_urls_prefix_0': prefix_0,
'data_urls_prefix_1': prefix_1,
'data': urls,
},
separators = (',', ':'),
)
......@@ -445,8 +442,17 @@ class Geist(bottle.Bottle):
except: # Sic.
time.sleep(1)
def template(*args, **kwargs):
def template(
*args,
alternatives_re = re.compile(r'\{\{([^}]*)\}\{([^}]*)\}\}'),
**kwargs,
) -> str:
doc = bottle.template(*args, **kwargs)
if '{{' in doc:
docparts = doc.split('</head>', 1)
if len(docparts) == 2:
docparts[1] = alternatives_re.sub('\g<1>', docparts[1])
doc = f'{docparts[0]}</head>{docparts[1]}'
if 'request' in kwargs:
mark = request.query.mark
if mark:
......
......@@ -47,8 +47,8 @@
<strong style="color:#a01030;font-size:larger">test version.</strong>
% end
<img class="shadow" src="/icons/main.png" alt="Project icon: lavishly furnished initial letter with a painting of Ptolemy using an astrolab."/>
<h1 lang="la" style="margin:0.25em 0 0 0">Ptolemaeus</h1>
<h2 lang="la" style="font-size:1.22em;margin:0 0 0.8em 0">Arabus et Latinus</h2>
<h1 lang="la">Ptolemaeus
<span class="subtitle">Arabus et Latinus</span></h1>
<form action="/filter" method="get">
<input type="search" name="text" value="{{request.query.text}}" aria-label="{{db.glosses['search_term'][lang_id]}}" placeholder="Fulltext search"/>
<div class="droparea shadow">
......
......@@ -160,4 +160,6 @@ def main(path: str):
split(transcriptiondirpath)
if __name__ == '__main__':
main(r'Z:\pal\7. Website\repros\ms\Tehran, Malik Library, 5924 #756\#1135\transcription')
main(r'Z:\pal\7. Website\repros\print\Venice, Erhardus Ratdolt, 1484 #24\#21\transcription')
## main(r'Z:\pal\7. Website\repros\print\Venice, Erhardus Ratdolt, 1484 #24\#28\transcription')
## main(r'Z:\pal\7. Website\repros\print\Venice, Erhardus Ratdolt, 1484 #24\#48\transcription')
......@@ -498,6 +498,7 @@ def main(
for nth, (timestamp, name) in enumerate(items):
if not sys_io.pid_exists(pid):
sys.exit(0)
time.sleep(0.05)
if current_cycle < refresh_cycle and name in filenames_in_db:
continue
path = repros_path + name
......@@ -520,8 +521,8 @@ def main(
full_item, persons, pub_item = get_item(
config, catalog, pub_id, pub_catid)
if not (full_item and pub_item):
log.write('{}: The catalog makes no reply to: {}\n'.format(
pub_id, pub_catid))
#### log.write('{}: The catalog makes no reply to: {}\n'.format(
#### pub_id, pub_catid))
continue
# We do not overwrite an existing item if ``pub_item`` is
# empty, which can happen when the catalog is unavailable.
......
......@@ -15,6 +15,7 @@ import tempfile
import zipfile
from datetime import datetime as DT
from html import unescape
from unicodedata import normalize
from urllib.parse import quote, unquote
import __init__
......@@ -474,6 +475,68 @@ url = "{PERMA_DOMAIN}{infix}{pub_id}",
kwargs['text_lang_id'] = self.default_lang_id
return text, kwargs
def get_urdata_items(
self,
lang_id: str,
pub_id: str,
form: str,
path: str,
num_re: 'Pattern[str]' = re.compile(r'\d+'),
is_public_bwb: 'Callable[[int, int], bool]' =
lambda vol, odd_col: vol < 3 or odd_col < 1317,
) -> 'Generator':
fullpath = os.path.abspath(os.path.join(self.repros_path, path))
if not fullpath.startswith(self.repros_path):
return
if form == 'index' and pub_id == 'bwb':
with open(fullpath, 'rb') as file:
items = json.load(file)[1]
for pos, (num, lem, sem, vol, col) in enumerate(items):
odd_col = int(num_re.search(col).group())
if not odd_col % 2:
odd_col -= 1
if not is_public_bwb(vol, odd_col):
continue
if sem:
sem = f' ‘{sem}’'
yield {
'0': {
'_': f'''{lem}{sem} <a onclick=\"rI(event,'-/{vol}.{odd_col}.pdf',1,2,1330)\">{vol}:{col}</a>''',
'f': lem,
's': pos,
},
'DT_RowId': num,
}
elif form == 'bdo-xml' and pub_id == 'bwb':
with open(fullpath, 'rb') as file:
items = json.load(file)[1]
for num, lem, sem, vol, col in items:
odd_col = int(num_re.search(col).group())
if not odd_col % 2:
odd_col -= 1
if not is_public_bwb(vol, odd_col):
continue
yield (num, lem, sem, vol, col)
elif form.startswith('tr_') and pub_id == 'bwb':
form, letter = form.split('_', 1)
letter = letter.lower()
with open(fullpath, 'rb') as file:
items = json.load(file)[1]
for num, lem, sem, vol, col in items:
if not normalize('NFKD', lem[0].lower()).startswith(letter):
continue
odd_col = int(num_re.search(col).group())
if not odd_col % 2:
odd_col -= 1
if sem:
sem = f' ‘{sem}’'
ref = f' <a href="https://publikationen.badw.de/de/{pub_id}/index#{num}">↗</a>' \
if is_public_bwb(vol, odd_col) else ''
vol = parse.arabic_to_roman(vol)
yield f'<tr><td>{lem}{sem}{ref}</td><td>{vol},{col}</td></tr>\n'
else:
return
def make_new_empty_db_if_absent(
meta_path: str,
wal_mode: bool,
......
......@@ -19,10 +19,12 @@ executes the module on a server, naming a different second file:
:shell:`python /local/app/code/python/exemplar/geist.py "/local/app/config.ini" "/local/app/config_server_80.ini"`
'''
from gevent import monkey; monkey.patch_all()
import json
try: import regex as re
except ImportError: import re
import sys
import time
from collections import deque
from datetime import datetime
from urllib.parse import quote, unquote
......@@ -149,6 +151,41 @@ class Geist(bottle.Bottle):
'''
redirect('/{}/{}'.format(db.default_lang_id, pub_id))
@self.route('/<lang_id>/api/<pub_id>/<form>/<path:path>')
def return_urdata_items(lang_id, pub_id, form, path):
'''
Stream items created from urdata found at :param:`path` within
the folder ``db.repros_path``. E.g.:
- ``/de/api/bwb/index/bwb/urdata.json``.
- ``/de/api/bwb/bdo-xml/bwb/urdata.json``.
- ``/de/api/bwb/tr_a/bwb/urdata.json``.
'''
if '#' in pub_id:
raise HTTPError(423, db.glosses['httperror423'][lang_id])
if form == 'bdo-xml':
response.content_type = 'application/xml'
yield '<?xml version = "1.0" encoding = "UTF-8"?>\n<bdo>\n'
for item in db.get_urdata_items(lang_id, pub_id, form, path):
yield template(
'bdo-xml.tpl', request = request, db = db,
kwargs = {
'pub_id': pub_id,
'path': path,
'item': item,
})
yield '\n</bdo>'
elif form == 'index':
response.content_type = 'application/json'
yield json.dumps(
{'data': tuple(
db.get_urdata_items(lang_id, pub_id, form, path))},
separators = (',', ':'))
elif form.startswith('tr_'):
for tr in db.get_urdata_items(lang_id, pub_id, form, path):
yield tr
# No ``else: raise HTTPError``, for it would rise after any stream.
@self.route('/<_>/api/oai')
@self.route('/<_>/api/oai', method = 'POST')
def return_oai_filedata(
......@@ -401,18 +438,22 @@ class Geist(bottle.Bottle):
Such a subfolder may e.g. contain a file “index.json” (containing
data for an overview of images) and belonging images.
If the file specified by :param:`path` is named ``'urdata.json'``,
this file is preprocessed to generate a file of the structure apt
for a datatable presentation.
:param lang_id: See :func:`.return_page`.
:param form: See :func:`return_pub`.
:param path: the relative path to the static file, seen from said
subfolder.
'''
match = db.filename_re.search(pub_id)
if '#' in pub_id:
raise HTTPError(423, db.glosses['httperror423'][lang_id])
return static_file(
path,
db.repros_path + pub_id,
download = bool(request.query.download))
else:
return static_file(
path,
db.repros_path + pub_id,
download = bool(request.query.download))
@self.route('/<lang_id>/<pub_id>/<sub_id>')
@self.route('/<lang_id>/<pub_id>/<form>/<lic>')
......
% # Licensed under http://www.apache.org/licenses/LICENSE-2.0
% # Attribution notice: by Stefan Müller in 2015 ff. (© http://badw.de)
% num, lem, sem, vol, col = kwargs['item']
<artikel id="{{kwargs['pub_id']}}_{{num}}" wb="{{kwargs['pub_id']}}" handverlesen="nein" dummy="ja">
<lemma-position>
<such-lemma>{{lem}}</such-lemma>
<lemma>{{lem}}</lemma>
</lemma-position>
<bedeutung-position>
% if sem:
<bedeutung>{{sem}}</bedeutung>
% end
<verweis ziel-extern="https://publikationen.badw.de/de/{{kwargs['pub_id']}}/index#{{num}}">Band {{vol}}, Spalte {{col}}</verweis>
</bedeutung-position>
</artikel>
\ No newline at end of file
......@@ -54,6 +54,7 @@ def main(
for nth, name in enumerate(os.listdir(repros_path)):
if not sys_io.pid_exists(pid):
sys.exit(0)
time.sleep(0.5)
match = filename_re.search(name)
if not match:
continue
......
......@@ -496,7 +496,7 @@ def format_title_sorttitle(
if __name__ == '__main__':
bvs = r'''
BV046754224
BV003389937
'''
bvs = [ bv.strip() for bv in bvs.split() if bv.strip() ]
catalog = Catalog('http://bvbr.bib-bvb.de:5661/bvb01sru')
......
......@@ -26,7 +26,7 @@ from datetime import datetime
from itertools import zip_longest
from json import loads
DIFF_CHAR = '+'
DIFF_CHAR = '-'
class ConfigParser(configparser.ConfigParser):
def __init__(self):
......
......@@ -702,14 +702,12 @@ def disambiguate_segmentation(line: str) -> str:
line[field_nth] = instance[0] + ' ' + instance[1] + instance[2]
return '\t'.join(line)
def even_out(ms_form, del_whitespace = True, del_all_diacritics = True):
def even_out(ms_form, del_all_diacritics = True, del_whitespace = True):
'''
:param del_all_diacritics: If true, delete also consonant and vowel letters
which are superscribed or written in ligature, but not the Nasalstrich
nor the r-abbreviation.
'''
if del_whitespace:
ms_form = sub(r'\s', '', ms_form)
ms_form = compare.even_out(
ms_form,
erase_re = re.compile(r'\\[^-]') if del_all_diacritics else None,
......@@ -724,6 +722,8 @@ def even_out(ms_form, del_whitespace = True, del_all_diacritics = True):
),
)
ms_form = re.sub('[0-9]', '', ms_form)
if del_whitespace:
ms_form = sub(r'\s', '', ms_form)
return ms_form
def even_out_a_ja_lemma(lemma: str) -> str:
......@@ -1276,29 +1276,49 @@ def search(inpath, outpath, size, results):
for n in range(len(lines)):
(lem, lex, art, mor, norm, form, ordn, stat, anm, gra, *_) = [ cell[1:] for cell in lines[n].split('\t') ] + ['', '', '']
#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
if lem.split()[0] != 'kristen': continue
if art != 'adj':
print(art)
else:
if gra.startswith('│'):
continue
sign = art
else:
gra = '│' + gra
gra = sub(r'│([uvw])=w│=u(o|\\o)│', '│\g<1>=w\g<2>│', gra)
if (
s(r'·[0123]·', ordn) and
'*' not in lex and
'N' not in art and
len(gra) > 2 and
gra[0] == '│' and
gra[-1] == '│' and
not (set(gra) & {'!', '*', '[', '('})
):