Notice to GitKraken users: A vulnerability has been found in the SSH key generation of GitKraken versions 7.6.0 to 8.0.0 (https://www.gitkraken.com/blog/weak-ssh-key-fix). If you use GitKraken and have generated a SSH key using one of these versions, please remove it both from your local workstation and from your LRZ GitLab profile.

21.10.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 90a003a3 authored by eckhart's avatar eckhart
Browse files

- examples adjusted

parent a8bf0742
......@@ -129,6 +129,8 @@ def logging(dirname="LOGS"):
except AttributeError:
save = ""
GLOBALS.LOGGING = dirname or ""
if dirname and not os.path.exists(dirname):
os.mkdir(dirname)
yield
GLOBALS.LOGGING = save
......
......@@ -22,7 +22,7 @@ from DHParser import is_filename, Grammar, Compiler, Lookbehind, Alternative, Po
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
PreprocessorFunc, TransformationDict, \
Node, TransformationFunc, traverse, remove_children_if, is_anonymous, \
reduce_single_child, replace_by_single_child, remove_whitespace, \
reduce_single_child, replace_by_single_child, remove_whitespace, reduce_anonymous_nodes, \
flatten, is_empty, collapse, replace_content, replace_content_by, remove_brackets, \
is_one_of, traverse_locally, remove_tokens, remove_nodes, TOKEN_PTYPE, Error, GLOBALS
from DHParser.log import logging
......@@ -191,7 +191,7 @@ def streamline_whitespace(context):
def watch(node):
print(node.as_sxpr())
flatten_structure = flatten(lambda context: is_anonymous(context) or is_one_of(
flatten_structure = flatten(lambda context: is_one_of(
context, {"Chapters", "Sections", "SubSections", "SubSubSections", "Paragraphs",
"SubParagraphs", "sequence"}), recursive=True)
......@@ -219,7 +219,7 @@ drop_expendables = remove_children_if(lambda context: is_empty(context) or
LaTeX_AST_transformation_table = {
# AST Transformations for the LaTeX-grammar
"<": [drop_expendables, flatten_structure],
"<": [reduce_anonymous_nodes, flatten_structure],
"latexdoc": [],
"preamble": [traverse_locally({'<': remove_whitespace, 'block': replace_by_single_child})],
"document": [flatten_structure],
......
......@@ -64,8 +64,6 @@ def count_nodes(tree, condition=lambda n: True):
def tst_func():
with DHParser.log.logging(LOGGING):
if not os.path.exists('REPORT'):
os.mkdir('REPORT')
files = os.listdir('testdata')
files.sort()
for file in files:
......@@ -80,7 +78,7 @@ def tst_func():
# lambda n: not bool(n.result))))
if DHParser.log.is_logging():
print('Saving CST')
with open('REPORT/' + file[:-4] + '.cst', 'w', encoding='utf-8') as f:
with open('LOGS/' + file[:-4] + '.cst', 'w', encoding='utf-8') as f:
f.write(result.as_sxpr(compact=True))
print('Saving parsing history')
log_parsing_history(parser, os.path.basename(file), html=True)
......
......@@ -10,6 +10,7 @@
@ literalws = none # literals have no implicit whitespace
@ comment = // # no implicit comments
@ ignorecase = False # literals and regular expressions are case-sensitive
@ drop = token, whitespace # drop anonymous tokens and whitespace
#######################################################################
......
......@@ -25,8 +25,8 @@ from DHParser import logging, is_filename, load_if_file, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, Token, \
traverse, remove_children_if, is_anonymous, GLOBALS, \
Node, TransformationFunc, TransformationDict, Token, DropToken, DropWhitespace, \
traverse, remove_children_if, is_anonymous, GLOBALS, reduce_anonymous_nodes, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
......@@ -67,18 +67,19 @@ class XMLGrammar(Grammar):
extSubsetDecl = Forward()
ignoreSectContents = Forward()
markupdecl = Forward()
source_hash__ = "1c64c8f613952c5ab8e851da15f65ec3"
source_hash__ = "afe79281456bb2625a0c90c58a699d32"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r''
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
dwsp__ = DropWhitespace(WSP_RE__)
wsp__ = Whitespace(WSP_RE__)
EOF = NegativeLookahead(RegExp('.'))
S = RegExp('\\s+')
Char = RegExp('\\x09|\\x0A|\\x0D|[\\u0020-\\uD7FF]|[\\uE000-\\uFFFD]|[\\U00010000-\\U0010FFFF]')
Chars = RegExp('(?:\\x09|\\x0A|\\x0D|[\\u0020-\\uD7FF]|[\\uE000-\\uFFFD]|[\\U00010000-\\U0010FFFF])+')
CharRef = Alternative(Series(Token('&#'), RegExp('[0-9]+'), Token(';')), Series(Token('&#x'), RegExp('[0-9a-fA-F]+'), Token(';')))
CharRef = Alternative(Series(DropToken('&#'), RegExp('[0-9]+'), DropToken(';')), Series(DropToken('&#x'), RegExp('[0-9a-fA-F]+'), DropToken(';')))
CommentChars = RegExp('(?:(?!-)(?:\\x09|\\x0A|\\x0D|[\\u0020-\\uD7FF]|[\\uE000-\\uFFFD]|[\\U00010000-\\U0010FFFF]))+')
PIChars = RegExp('(?:(?!\\?>)(?:\\x09|\\x0A|\\x0D|[\\u0020-\\uD7FF]|[\\uE000-\\uFFFD]|[\\U00010000-\\U0010FFFF]))+')
IgnoreChars = RegExp('(?:(?!(?:<!\\[)|(?:\\]\\]>))(?:\\x09|\\x0A|\\x0D|[\\u0020-\\uD7FF]|[\\uE000-\\uFFFD]|[\\U00010000-\\U0010FFFF]))+')
......@@ -86,10 +87,10 @@ class XMLGrammar(Grammar):
CharData = RegExp('(?:(?!\\]\\]>)[^<&])+')
PubidChars = RegExp("(?:\\x20|\\x0D|\\x0A|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%])+")
PubidCharsSingleQuoted = RegExp('(?:\\x20|\\x0D|\\x0A|[a-zA-Z0-9]|[-()+,./:=?;!*#@$_%])+')
CDSect = Series(Token('<![CDATA['), CData, Token(']]>'))
CDSect = Series(DropToken('<![CDATA['), CData, DropToken(']]>'))
PITarget = Series(NegativeLookahead(RegExp('X|xM|mL|l')), Name)
PI = Series(Token('<?'), PITarget, Option(Series(wsp__, PIChars)), Token('?>'))
Comment = Series(Token('<!--'), ZeroOrMore(Alternative(CommentChars, RegExp('-(?!-)'))), Token('-->'))
PI = Series(DropToken('<?'), PITarget, Option(Series(dwsp__, PIChars)), DropToken('?>'))
Comment = Series(DropToken('<!--'), ZeroOrMore(Alternative(CommentChars, RegExp('-(?!-)'))), DropToken('-->'))
Misc = OneOrMore(Alternative(Comment, PI, S))
NameChars = RegExp('(?x)(?:_|:|-|\\.|[A-Z]|[a-z]|[0-9]\n |\\u00B7|[\\u0300-\\u036F]|[\\u203F-\\u2040]\n |[\\u00C0-\\u00D6]|[\\u00D8-\\u00F6]|[\\u00F8-\\u02FF]\n |[\\u0370-\\u037D]|[\\u037F-\\u1FFF]|[\\u200C-\\u200D]\n |[\\u2070-\\u218F]|[\\u2C00-\\u2FEF]|[\\u3001-\\uD7FF]\n |[\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]\n |[\\U00010000-\\U000EFFFF])+')
NameStartChar = RegExp('(?x)_|:|[A-Z]|[a-z]\n |[\\u00C0-\\u00D6]|[\\u00D8-\\u00F6]|[\\u00F8-\\u02FF]\n |[\\u0370-\\u037D]|[\\u037F-\\u1FFF]|[\\u200C-\\u200D]\n |[\\u2070-\\u218F]|[\\u2C00-\\u2FEF]|[\\u3001-\\uD7FF]\n |[\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]\n |[\\U00010000-\\U000EFFFF]')
......@@ -97,35 +98,35 @@ class XMLGrammar(Grammar):
Names = Series(Name, ZeroOrMore(Series(RegExp(' '), Name)))
Nmtoken = Synonym(NameChars)
Nmtokens = Series(Nmtoken, ZeroOrMore(Series(RegExp(' '), Nmtoken)))
PEReference = Series(Token('%'), Name, Token(';'))
EntityRef = Series(Token('&'), Name, Token(';'))
PEReference = Series(DropToken('%'), Name, DropToken(';'))
EntityRef = Series(DropToken('&'), Name, DropToken(';'))
Reference = Alternative(EntityRef, CharRef)
PubidLiteral = Alternative(Series(Token('"'), Option(PubidChars), Token('"')), Series(Token("'"), Option(PubidCharsSingleQuoted), Token("'")))
SystemLiteral = Alternative(Series(Token('"'), RegExp('[^"]*'), Token('"')), Series(Token("'"), RegExp("[^']*"), Token("'")))
AttValue = Alternative(Series(Token('"'), ZeroOrMore(Alternative(RegExp('[^<&"]+'), Reference)), Token('"')), Series(Token("'"), ZeroOrMore(Alternative(RegExp("[^<&']+"), Reference)), Token("'")))
EntityValue = Alternative(Series(Token('"'), ZeroOrMore(Alternative(RegExp('[^%&"]+'), PEReference, Reference)), Token('"')), Series(Token("'"), ZeroOrMore(Alternative(RegExp("[^%&']+"), PEReference, Reference)), Token("'")))
PubidLiteral = Alternative(Series(DropToken('"'), Option(PubidChars), DropToken('"')), Series(DropToken("'"), Option(PubidCharsSingleQuoted), DropToken("'")))
SystemLiteral = Alternative(Series(DropToken('"'), RegExp('[^"]*'), DropToken('"')), Series(DropToken("'"), RegExp("[^']*"), DropToken("'")))
AttValue = Alternative(Series(DropToken('"'), ZeroOrMore(Alternative(RegExp('[^<&"]+'), Reference)), DropToken('"')), Series(DropToken("'"), ZeroOrMore(Alternative(RegExp("[^<&']+"), Reference)), DropToken("'")))
EntityValue = Alternative(Series(DropToken('"'), ZeroOrMore(Alternative(RegExp('[^%&"]+'), PEReference, Reference)), DropToken('"')), Series(DropToken("'"), ZeroOrMore(Alternative(RegExp("[^%&']+"), PEReference, Reference)), DropToken("'")))
content = Series(Option(CharData), ZeroOrMore(Series(Alternative(element, Reference, CDSect, PI, Comment), Option(CharData))))
Attribute = Series(Name, wsp__, Token('='), wsp__, AttValue, mandatory=2)
Attribute = Series(Name, dwsp__, DropToken('='), dwsp__, AttValue, mandatory=2)
TagName = Capture(Name)
emptyElement = Series(Token('<'), Name, ZeroOrMore(Series(wsp__, Attribute)), wsp__, Token('/>'))
ETag = Series(Token('</'), Pop(TagName), wsp__, Token('>'), mandatory=1)
STag = Series(Token('<'), TagName, ZeroOrMore(Series(wsp__, Attribute)), wsp__, Token('>'))
emptyElement = Series(DropToken('<'), Name, ZeroOrMore(Series(dwsp__, Attribute)), dwsp__, DropToken('/>'))
ETag = Series(DropToken('</'), Pop(TagName), dwsp__, DropToken('>'), mandatory=1)
STag = Series(DropToken('<'), TagName, ZeroOrMore(Series(dwsp__, Attribute)), dwsp__, DropToken('>'))
element.set(Alternative(emptyElement, Series(STag, content, ETag, mandatory=1)))
NDataDecl = Series(Token('NData'), S, Name, mandatory=1)
PublicID = Series(Token('PUBLIC'), S, PubidLiteral, mandatory=1)
ExternalID = Series(Token('SYSTEM'), S, SystemLiteral, mandatory=1)
NotationDecl = Series(Token('<!NOTATION'), S, Name, wsp__, Alternative(ExternalID, PublicID), wsp__, Token('>'), mandatory=1)
NDataDecl = Series(DropToken('NData'), S, Name, mandatory=1)
PublicID = Series(DropToken('PUBLIC'), S, PubidLiteral, mandatory=1)
ExternalID = Series(DropToken('SYSTEM'), S, SystemLiteral, mandatory=1)
NotationDecl = Series(DropToken('<!NOTATION'), S, Name, dwsp__, Alternative(ExternalID, PublicID), dwsp__, DropToken('>'), mandatory=1)
PEDef = Alternative(EntityValue, ExternalID)
EntityDef = Alternative(EntityValue, Series(ExternalID, Option(NDataDecl)))
PEDecl = Series(Token('<!ENTITY'), S, Token('%'), S, Name, S, PEDef, wsp__, Token('>'), mandatory=3)
GEDecl = Series(Token('<!ENTITY'), S, Name, S, EntityDef, wsp__, Token('>'), mandatory=3)
PEDecl = Series(DropToken('<!ENTITY'), S, DropToken('%'), S, Name, S, PEDef, dwsp__, DropToken('>'), mandatory=3)
GEDecl = Series(DropToken('<!ENTITY'), S, Name, S, EntityDef, dwsp__, DropToken('>'), mandatory=3)
EntityDecl = Alternative(GEDecl, PEDecl)
FIXED = Series(Option(Series(Token('#FIXED'), S)), AttValue)
FIXED = Series(Option(Series(DropToken('#FIXED'), S)), AttValue)
IMPLIED = Token('#IMPLIED')
REQUIRED = Token('#REQUIRED')
DefaultDecl = Alternative(REQUIRED, IMPLIED, FIXED)
Enumeration = Series(Token('('), wsp__, Nmtoken, ZeroOrMore(Series(wsp__, Token('|'), wsp__, Nmtoken)), wsp__, Token(')'))
NotationType = Series(Token('NOTATION'), S, Token('('), wsp__, Name, ZeroOrMore(Series(wsp__, Token('|'), wsp__, Name)), wsp__, Token(')'))
Enumeration = Series(DropToken('('), dwsp__, Nmtoken, ZeroOrMore(Series(dwsp__, DropToken('|'), dwsp__, Nmtoken)), dwsp__, DropToken(')'))
NotationType = Series(DropToken('NOTATION'), S, DropToken('('), dwsp__, Name, ZeroOrMore(Series(dwsp__, DropToken('|'), dwsp__, Name)), dwsp__, DropToken(')'))
EnumeratedType = Alternative(NotationType, Enumeration)
NMTOKENS = Token('NMTOKENS')
NMTOKEN = Token('NMTOKEN')
......@@ -137,50 +138,50 @@ class XMLGrammar(Grammar):
TokenizedType = Alternative(ID, IDREF, IDREFS, ENTITY, ENTITIES, NMTOKEN, NMTOKENS)
StringType = Token('CDATA')
AttType = Alternative(StringType, TokenizedType, EnumeratedType)
AttDef = Series(Name, wsp__, AttType, S, DefaultDecl, mandatory=2)
AttlistDecl = Series(Token('<!ATTLIST'), S, Name, ZeroOrMore(Series(wsp__, AttDef)), wsp__, Token('>'), mandatory=1)
seq = Series(Token('('), wsp__, cp, ZeroOrMore(Series(wsp__, Token(','), wsp__, cp)), wsp__, Token(')'))
cp.set(Series(Alternative(Name, choice, seq), Option(Alternative(Token('?'), Token('*'), Token('+')))))
choice.set(Series(Token('('), wsp__, OneOrMore(Series(wsp__, Token('|'), wsp__, cp)), wsp__, Token(')')))
children = Series(Alternative(choice, seq), Option(Alternative(Token('?'), Token('*'), Token('+'))))
Mixed = Alternative(Series(Token('('), wsp__, Token('#PCDATA'), ZeroOrMore(Series(wsp__, Token('|'), wsp__, Name)), wsp__, Token(')*')), Series(Token('('), wsp__, Token('#PCDATA'), wsp__, Token(')')))
AttDef = Series(Name, dwsp__, AttType, S, DefaultDecl, mandatory=2)
AttlistDecl = Series(DropToken('<!ATTLIST'), S, Name, ZeroOrMore(Series(dwsp__, AttDef)), dwsp__, DropToken('>'), mandatory=1)
seq = Series(DropToken('('), dwsp__, cp, ZeroOrMore(Series(dwsp__, DropToken(','), dwsp__, cp)), dwsp__, DropToken(')'))
cp.set(Series(Alternative(Name, choice, seq), Option(Alternative(DropToken('?'), DropToken('*'), DropToken('+')))))
choice.set(Series(DropToken('('), dwsp__, OneOrMore(Series(dwsp__, DropToken('|'), dwsp__, cp)), dwsp__, DropToken(')')))
children = Series(Alternative(choice, seq), Option(Alternative(DropToken('?'), DropToken('*'), DropToken('+'))))
Mixed = Alternative(Series(DropToken('('), dwsp__, DropToken('#PCDATA'), ZeroOrMore(Series(dwsp__, DropToken('|'), dwsp__, Name)), dwsp__, DropToken(')*')), Series(DropToken('('), dwsp__, DropToken('#PCDATA'), dwsp__, DropToken(')')))
ANY = Token('ANY')
EMPTY = Token('EMPTY')
contentspec = Alternative(EMPTY, ANY, Mixed, children)
elementdecl = Series(Token('<!ELEMENT'), S, Name, wsp__, contentspec, wsp__, Token('>'), mandatory=1)
TextDecl = Series(Token('<?xml'), Option(VersionInfo), EncodingDecl, wsp__, Token('?>'))
elementdecl = Series(DropToken('<!ELEMENT'), S, Name, dwsp__, contentspec, dwsp__, DropToken('>'), mandatory=1)
TextDecl = Series(DropToken('<?xml'), Option(VersionInfo), EncodingDecl, dwsp__, DropToken('?>'))
extParsedEnt = Series(Option(TextDecl), content)
ignoreSectContents.set(Series(IgnoreChars, ZeroOrMore(Series(Token('<!['), ignoreSectContents, Token(']]>'), IgnoreChars))))
ignoreSect = Series(Token('<!['), wsp__, Token('IGNORE'), wsp__, Token('['), ignoreSectContents, Token(']]>'))
includeSect = Series(Token('<!['), wsp__, Token('INCLUDE'), wsp__, Token('['), extSubsetDecl, Token(']]>'))
ignoreSectContents.set(Series(IgnoreChars, ZeroOrMore(Series(DropToken('<!['), ignoreSectContents, DropToken(']]>'), IgnoreChars))))
ignoreSect = Series(DropToken('<!['), dwsp__, DropToken('IGNORE'), dwsp__, DropToken('['), ignoreSectContents, DropToken(']]>'))
includeSect = Series(DropToken('<!['), dwsp__, DropToken('INCLUDE'), dwsp__, DropToken('['), extSubsetDecl, DropToken(']]>'))
conditionalSect = Alternative(includeSect, ignoreSect)
extSubsetDecl.set(ZeroOrMore(Alternative(markupdecl, conditionalSect, DeclSep)))
extSubset = Series(Option(TextDecl), extSubsetDecl)
markupdecl.set(Alternative(elementdecl, AttlistDecl, EntityDecl, NotationDecl, PI, Comment))
DeclSep.set(Alternative(PEReference, S))
intSubset = ZeroOrMore(Alternative(markupdecl, DeclSep))
doctypedecl = Series(Token('<!DOCTYPE'), wsp__, Name, Option(Series(wsp__, ExternalID)), wsp__, Option(Series(Token('['), intSubset, Token(']'), wsp__)), Token('>'))
doctypedecl = Series(DropToken('<!DOCTYPE'), dwsp__, Name, Option(Series(dwsp__, ExternalID)), dwsp__, Option(Series(DropToken('['), intSubset, DropToken(']'), dwsp__)), DropToken('>'))
No = Token('no')
Yes = Token('yes')
SDDecl = Series(wsp__, Token('standalone'), wsp__, Token('='), wsp__, Alternative(Alternative(Series(Token("'"), Yes), Series(No, Token("'"))), Alternative(Series(Token('"'), Yes), Series(No, Token('"')))))
SDDecl = Series(dwsp__, DropToken('standalone'), dwsp__, DropToken('='), dwsp__, Alternative(Alternative(Series(DropToken("'"), Yes), Series(No, DropToken("'"))), Alternative(Series(DropToken('"'), Yes), Series(No, DropToken('"')))))
EncName = RegExp('[A-Za-z][A-Za-z0-9._\\-]*')
EncodingDecl.set(Series(wsp__, Token('encoding'), wsp__, Token('='), wsp__, Alternative(Series(Token("'"), EncName, Token("'")), Series(Token('"'), EncName, Token('"')))))
EncodingDecl.set(Series(dwsp__, DropToken('encoding'), dwsp__, DropToken('='), dwsp__, Alternative(Series(DropToken("'"), EncName, DropToken("'")), Series(DropToken('"'), EncName, DropToken('"')))))
VersionNum = RegExp('[0-9]+\\.[0-9]+')
VersionInfo.set(Series(wsp__, Token('version'), wsp__, Token('='), wsp__, Alternative(Series(Token("'"), VersionNum, Token("'")), Series(Token('"'), VersionNum, Token('"')))))
XMLDecl = Series(Token('<?xml'), VersionInfo, Option(EncodingDecl), Option(SDDecl), wsp__, Token('?>'))
prolog = Series(Option(Series(wsp__, XMLDecl)), Option(Misc), Option(Series(doctypedecl, Option(Misc))))
VersionInfo.set(Series(dwsp__, DropToken('version'), dwsp__, DropToken('='), dwsp__, Alternative(Series(DropToken("'"), VersionNum, DropToken("'")), Series(DropToken('"'), VersionNum, DropToken('"')))))
XMLDecl = Series(DropToken('<?xml'), VersionInfo, Option(EncodingDecl), Option(SDDecl), dwsp__, DropToken('?>'))
prolog = Series(Option(Series(dwsp__, XMLDecl)), Option(Misc), Option(Series(doctypedecl, Option(Misc))))
document = Series(prolog, element, Option(Misc), EOF)
root__ = document
def get_grammar() -> XMLGrammar:
global GLOBALS
try:
grammar = GLOBALS.XML_1_grammar_singleton
grammar = GLOBALS.XML_00000001_grammar_singleton
except AttributeError:
GLOBALS.XML_1_grammar_singleton = XMLGrammar()
GLOBALS.XML_00000001_grammar_singleton = XMLGrammar()
if hasattr(get_grammar, 'python_src__'):
GLOBALS.XML_1_grammar_singleton.python_src__ = get_grammar.python_src__
grammar = GLOBALS.XML_1_grammar_singleton
GLOBALS.XML_00000001_grammar_singleton.python_src__ = get_grammar.python_src__
grammar = GLOBALS.XML_00000001_grammar_singleton
return grammar
......@@ -193,7 +194,7 @@ def get_grammar() -> XMLGrammar:
XML_AST_transformation_table = {
# AST Transformations for the XML-grammar
"<": [remove_empty, remove_anonymous_tokens, remove_whitespace, remove_nodes("S")],
"<": [reduce_anonymous_nodes, remove_empty, remove_anonymous_tokens, remove_whitespace, remove_nodes("S")],
"document": [flatten(lambda context: context[-1].tag_name == 'prolog', recursive=False)],
"prolog": [],
"XMLDecl": [],
......
......@@ -11,6 +11,7 @@
@ literalws = none # literals have no implicit whitespace
@ comment = // # no implicit comments
@ ignorecase = False # literals and regular expressions are case-sensitive
@ drop = Token, Whitespace
#######################################################################
......
......@@ -18,7 +18,8 @@ try:
import regex as re
except ImportError:
import re
from DHParser import logging, is_filename, load_if_file, Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, \
from DHParser import logging, is_filename, load_if_file, Grammar, Compiler, nil_preprocessor, \
PreprocessorToken, Whitespace, DropWhitespace, DropToken, reduce_anonymous_nodes, \
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \
......@@ -59,18 +60,19 @@ class XMLSnippetGrammar(Grammar):
"""
Name = Forward()
element = Forward()
source_hash__ = "2efb839574bee3f63b5b9d1ea5c96386"
source_hash__ = "49e51a7b2ad79e95ba239427830ba02f"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r''
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
dwsp__ = DropWhitespace(WSP_RE__)
wsp__ = Whitespace(WSP_RE__)
EOF = NegativeLookahead(RegExp('.'))
S = RegExp('\\s+')
Char = RegExp('\\x09|\\x0A|\\x0D|[\\u0020-\\uD7FF]|[\\uE000-\\uFFFD]|[\\U00010000-\\U0010FFFF]')
Chars = RegExp('(?:\\x09|\\x0A|\\x0D|[\\u0020-\\uD7FF]|[\\uE000-\\uFFFD]|[\\U00010000-\\U0010FFFF])+')
CharRef = Alternative(Series(Token('&#'), RegExp('[0-9]+'), Token(';')), Series(Token('&#x'), RegExp('[0-9a-fA-F]+'), Token(';')))
CharRef = Alternative(Series(DropToken('&#'), RegExp('[0-9]+'), DropToken(';')), Series(DropToken('&#x'), RegExp('[0-9a-fA-F]+'), DropToken(';')))
CommentChars = RegExp('(?:(?!-)(?:\\x09|\\x0A|\\x0D|[\\u0020-\\uD7FF]|[\\uE000-\\uFFFD]|[\\U00010000-\\U0010FFFF]))+')
PIChars = RegExp('(?:(?!\\?>)(?:\\x09|\\x0A|\\x0D|[\\u0020-\\uD7FF]|[\\uE000-\\uFFFD]|[\\U00010000-\\U0010FFFF]))+')
IgnoreChars = RegExp('(?:(?!(?:<!\\[)|(?:\\]\\]>))(?:\\x09|\\x0A|\\x0D|[\\u0020-\\uD7FF]|[\\uE000-\\uFFFD]|[\\U00010000-\\U0010FFFF]))+')
......@@ -78,10 +80,10 @@ class XMLSnippetGrammar(Grammar):
CharData = RegExp('(?:(?!\\]\\]>)[^<&])+')
PubidChars = RegExp("(?:\\x20|\\x0D|\\x0A|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%])+")
PubidCharsSingleQuoted = RegExp('(?:\\x20|\\x0D|\\x0A|[a-zA-Z0-9]|[-()+,./:=?;!*#@$_%])+')
CDSect = Series(Token('<![CDATA['), CData, Token(']]>'))
CDSect = Series(DropToken('<![CDATA['), CData, DropToken(']]>'))
PITarget = Series(NegativeLookahead(RegExp('X|xM|mL|l')), Name)
PI = Series(Token('<?'), PITarget, Option(Series(wsp__, PIChars)), Token('?>'))
Comment = Series(Token('<!--'), ZeroOrMore(Alternative(CommentChars, RegExp('-(?!-)'))), Token('-->'))
PI = Series(DropToken('<?'), PITarget, Option(Series(dwsp__, PIChars)), DropToken('?>'))
Comment = Series(DropToken('<!--'), ZeroOrMore(Alternative(CommentChars, RegExp('-(?!-)'))), DropToken('-->'))
Misc = OneOrMore(Alternative(Comment, PI, S))
NameChars = RegExp('(?x)(?:_|:|-|\\.|[A-Z]|[a-z]|[0-9]\n |\\u00B7|[\\u0300-\\u036F]|[\\u203F-\\u2040]\n |[\\u00C0-\\u00D6]|[\\u00D8-\\u00F6]|[\\u00F8-\\u02FF]\n |[\\u0370-\\u037D]|[\\u037F-\\u1FFF]|[\\u200C-\\u200D]\n |[\\u2070-\\u218F]|[\\u2C00-\\u2FEF]|[\\u3001-\\uD7FF]\n |[\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]\n |[\\U00010000-\\U000EFFFF])+')
NameStartChar = RegExp('(?x)_|:|[A-Z]|[a-z]\n |[\\u00C0-\\u00D6]|[\\u00D8-\\u00F6]|[\\u00F8-\\u02FF]\n |[\\u0370-\\u037D]|[\\u037F-\\u1FFF]|[\\u200C-\\u200D]\n |[\\u2070-\\u218F]|[\\u2C00-\\u2FEF]|[\\u3001-\\uD7FF]\n |[\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]\n |[\\U00010000-\\U000EFFFF]')
......@@ -89,44 +91,44 @@ class XMLSnippetGrammar(Grammar):
Names = Series(Name, ZeroOrMore(Series(RegExp(' '), Name)))
Nmtoken = Synonym(NameChars)
Nmtokens = Series(Nmtoken, ZeroOrMore(Series(RegExp(' '), Nmtoken)))
PEReference = Series(Token('%'), Name, Token(';'))
EntityRef = Series(Token('&'), Name, Token(';'))
PEReference = Series(DropToken('%'), Name, DropToken(';'))
EntityRef = Series(DropToken('&'), Name, DropToken(';'))
Reference = Alternative(EntityRef, CharRef)
PubidLiteral = Alternative(Series(Token('"'), Option(PubidChars), Token('"')), Series(Token("'"), Option(PubidCharsSingleQuoted), Token("'")))
SystemLiteral = Alternative(Series(Token('"'), RegExp('[^"]*'), Token('"')), Series(Token("'"), RegExp("[^']*"), Token("'")))
AttValue = Alternative(Series(Token('"'), ZeroOrMore(Alternative(RegExp('[^<&"]+'), Reference)), Token('"')), Series(Token("'"), ZeroOrMore(Alternative(RegExp("[^<&']+"), Reference)), Token("'")))
EntityValue = Alternative(Series(Token('"'), ZeroOrMore(Alternative(RegExp('[^%&"]+'), PEReference, Reference)), Token('"')), Series(Token("'"), ZeroOrMore(Alternative(RegExp("[^%&']+"), PEReference, Reference)), Token("'")))
PubidLiteral = Alternative(Series(DropToken('"'), Option(PubidChars), DropToken('"')), Series(DropToken("'"), Option(PubidCharsSingleQuoted), DropToken("'")))
SystemLiteral = Alternative(Series(DropToken('"'), RegExp('[^"]*'), DropToken('"')), Series(DropToken("'"), RegExp("[^']*"), DropToken("'")))
AttValue = Alternative(Series(DropToken('"'), ZeroOrMore(Alternative(RegExp('[^<&"]+'), Reference)), DropToken('"')), Series(DropToken("'"), ZeroOrMore(Alternative(RegExp("[^<&']+"), Reference)), DropToken("'")))
EntityValue = Alternative(Series(DropToken('"'), ZeroOrMore(Alternative(RegExp('[^%&"]+'), PEReference, Reference)), DropToken('"')), Series(DropToken("'"), ZeroOrMore(Alternative(RegExp("[^%&']+"), PEReference, Reference)), DropToken("'")))
content = Series(Option(CharData), ZeroOrMore(Series(Alternative(element, Reference, CDSect, PI, Comment), Option(CharData))))
Attribute = Series(Name, wsp__, Token('='), wsp__, AttValue, mandatory=2)
Attribute = Series(Name, dwsp__, DropToken('='), dwsp__, AttValue, mandatory=2)
TagName = Capture(Name)
emptyElement = Series(Token('<'), Name, ZeroOrMore(Series(wsp__, Attribute)), wsp__, Token('/>'))
ETag = Series(Token('</'), Pop(TagName), wsp__, Token('>'), mandatory=1)
STag = Series(Token('<'), TagName, ZeroOrMore(Series(wsp__, Attribute)), wsp__, Token('>'))
emptyElement = Series(DropToken('<'), Name, ZeroOrMore(Series(dwsp__, Attribute)), dwsp__, DropToken('/>'))
ETag = Series(DropToken('</'), Pop(TagName), dwsp__, DropToken('>'), mandatory=1)
STag = Series(DropToken('<'), TagName, ZeroOrMore(Series(dwsp__, Attribute)), dwsp__, DropToken('>'))
element.set(Alternative(emptyElement, Series(STag, content, ETag, mandatory=1)))
intSubset = RegExp('(?:(?!\\][^\\]])[^<&])+')
ExternalID = Series(Token('SYSTEM'), S, SystemLiteral, mandatory=1)
doctypedecl = Series(Token('<!DOCTYPE'), wsp__, Name, Option(Series(wsp__, ExternalID)), wsp__, Option(Series(Token('['), intSubset, Token(']'), wsp__)), Token('>'))
ExternalID = Series(DropToken('SYSTEM'), S, SystemLiteral, mandatory=1)
doctypedecl = Series(DropToken('<!DOCTYPE'), dwsp__, Name, Option(Series(dwsp__, ExternalID)), dwsp__, Option(Series(DropToken('['), intSubset, DropToken(']'), dwsp__)), DropToken('>'))
No = Token('no')
Yes = Token('yes')
SDDecl = Series(wsp__, Token('standalone'), wsp__, Token('='), wsp__, Alternative(Alternative(Series(Token("'"), Yes), Series(No, Token("'"))), Alternative(Series(Token('"'), Yes), Series(No, Token('"')))))
SDDecl = Series(dwsp__, DropToken('standalone'), dwsp__, DropToken('='), dwsp__, Alternative(Alternative(Series(DropToken("'"), Yes), Series(No, DropToken("'"))), Alternative(Series(DropToken('"'), Yes), Series(No, DropToken('"')))))
EncName = RegExp('[A-Za-z][A-Za-z0-9._\\-]*')
EncodingDecl = Series(wsp__, Token('encoding'), wsp__, Token('='), wsp__, Alternative(Series(Token("'"), EncName, Token("'")), Series(Token('"'), EncName, Token('"'))))
EncodingDecl = Series(dwsp__, DropToken('encoding'), dwsp__, DropToken('='), dwsp__, Alternative(Series(DropToken("'"), EncName, DropToken("'")), Series(DropToken('"'), EncName, DropToken('"'))))
VersionNum = RegExp('[0-9]+\\.[0-9]+')
VersionInfo = Series(wsp__, Token('version'), wsp__, Token('='), wsp__, Alternative(Series(Token("'"), VersionNum, Token("'")), Series(Token('"'), VersionNum, Token('"'))))
XMLDecl = Series(Token('<?xml'), VersionInfo, Option(EncodingDecl), Option(SDDecl), wsp__, Token('?>'))
prolog = Series(Option(Series(wsp__, XMLDecl)), Option(Misc), Option(Series(doctypedecl, Option(Misc))))
VersionInfo = Series(dwsp__, DropToken('version'), dwsp__, DropToken('='), dwsp__, Alternative(Series(DropToken("'"), VersionNum, DropToken("'")), Series(DropToken('"'), VersionNum, DropToken('"'))))
XMLDecl = Series(DropToken('<?xml'), VersionInfo, Option(EncodingDecl), Option(SDDecl), dwsp__, DropToken('?>'))
prolog = Series(Option(Series(dwsp__, XMLDecl)), Option(Misc), Option(Series(doctypedecl, Option(Misc))))
document = Series(prolog, element, Option(Misc), EOF)
root__ = document
def get_grammar() -> XMLSnippetGrammar:
global GLOBALS
try:
grammar = GLOBALS.XMLSnippet_1_grammar_singleton
grammar = GLOBALS.XMLSnippet_00000001_grammar_singleton
except AttributeError:
GLOBALS.XMLSnippet_1_grammar_singleton = XMLSnippetGrammar()
GLOBALS.XMLSnippet_00000001_grammar_singleton = XMLSnippetGrammar()
if hasattr(get_grammar, 'python_src__'):
GLOBALS.XMLSnippet_1_grammar_singleton.python_src__ = get_grammar.python_src__
grammar = GLOBALS.XMLSnippet_1_grammar_singleton
GLOBALS.XMLSnippet_00000001_grammar_singleton.python_src__ = get_grammar.python_src__
grammar = GLOBALS.XMLSnippet_00000001_grammar_singleton
return grammar
......@@ -138,7 +140,7 @@ def get_grammar() -> XMLSnippetGrammar:
XMLSnippet_AST_transformation_table = {
# AST Transformations for the XMLSnippet-grammar
"<": remove_empty,
"<": reduce_anonymous_nodes,
"document": [],
"prolog": [],
"XMLDecl": [],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment