Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
The container registry cleanup task is now completed and the registry can be used normally.
Open sidebar
badw-it
DHParser
Commits
a77b86b4
Commit
a77b86b4
authored
Nov 21, 2019
by
di68kap
Browse files
- cython compatibility enhanced
parent
e49b99c8
Changes
8
Hide whitespace changes
Inline
Side-by-side
DHParser/ebnf.py
View file @
a77b86b4
...
...
@@ -1304,8 +1304,9 @@ class EBNFCompiler(Compiler):
'and not a %s.'
)
%
(
prefix
,
arg
.
tag_name
))
return
arg
.
content
elif
self
.
anonymous_regexp
.
match
(
arg
.
content
):
self
.
tree
.
new_error
(
node
,
(
'Retrie does not work with anonymous parsers like %s'
)
%
(
prefix
,
arg
.
content
))
self
.
tree
.
new_error
(
node
,
(
'Retrive operator "%s" does not work with anonymous parsers like %s'
)
%
(
prefix
,
arg
.
content
))
return
arg
.
content
if
arg
.
content
in
self
.
directives
.
filter
:
custom_args
=
[
'rfilter=%s'
%
self
.
directives
.
filter
[
arg
.
content
]]
...
...
DHParser/parse.pxd
View file @
a77b86b4
...
...
@@ -17,7 +17,7 @@ cdef class Parser:
cpdef
_parse
(
self
,
text
)
cpdef
reset
(
self
)
# def __call__(self, text)
# def __call__(self,
StringView
text)
# def __add__(self, other)
# def __or__(self, other)
cpdef
_parse
(
self
,
text
)
...
...
DHParser/parse.py
View file @
a77b86b4
...
...
@@ -355,7 +355,7 @@ class Parser:
error_node_id
=
0
grammar
=
self
.
_grammar
location
=
grammar
.
document_length__
-
text
.
_
_
len
__
()
# faster then len(text)?
location
=
grammar
.
document_length__
-
text
.
_len
# faster then len(text)?
try
:
# rollback variable changing operation if parser backtracks
...
...
@@ -1545,7 +1545,7 @@ class MetaParser(Parser):
"""
assert
node
is
None
or
isinstance
(
node
,
Node
)
if
self
.
_grammar
.
flatten_tree__
:
if
node
:
if
node
is
not
None
:
if
self
.
anonymous
:
if
self
.
drop_content
:
return
EMPTY_NODE
...
...
@@ -1726,7 +1726,7 @@ class ZeroOrMore(Option):
n
=
len
node
,
text
=
self
.
parser
(
text
)
len
=
text
.
__len__
()
if
no
t
nod
e
:
if
no
de
is
Non
e
:
break
if
node
.
_result
or
not
node
.
tag_name
.
startswith
(
':'
):
# drop anonymous empty nodes
results
+=
(
node
,)
...
...
@@ -1778,7 +1778,7 @@ class OneOrMore(UnaryParser):
n
=
len
node
,
text_
=
self
.
parser
(
text_
)
len
=
text_
.
__len__
()
if
no
t
nod
e
:
if
no
de
is
Non
e
:
break
match_flag
=
True
if
node
.
_result
or
not
node
.
tag_name
.
startswith
(
':'
):
# drop anonymous empty nodes
...
...
@@ -1929,7 +1929,7 @@ class Series(NaryParser):
error
=
None
# type: Optional[Error]
for
pos
,
parser
in
enumerate
(
self
.
parsers
):
node
,
text_
=
parser
(
text_
)
if
no
t
nod
e
:
if
no
de
is
Non
e
:
if
pos
<
self
.
mandatory
:
return
None
,
text
else
:
...
...
@@ -1942,7 +1942,7 @@ class Series(NaryParser):
# check if parsing of the series can be resumed somewhere
if
reloc
>=
0
:
nd
,
text_
=
parser
(
text_
)
# try current parser again
if
nd
:
if
nd
is
not
None
:
results
+=
(
node
,)
node
=
nd
else
:
...
...
@@ -2035,7 +2035,7 @@ class Alternative(NaryParser):
def
_parse
(
self
,
text
:
StringView
)
->
Tuple
[
Optional
[
Node
],
StringView
]:
for
parser
in
self
.
parsers
:
node
,
text_
=
parser
(
text
)
if
node
:
if
node
is
not
None
:
return
self
.
_return_value
(
node
),
text_
# return self._return_value(node if node._result or parser.pname else None), text_
# return Node(self.tag_name,
...
...
@@ -2148,7 +2148,7 @@ class AllOf(NaryParser):
while
parsers
:
for
i
,
parser
in
enumerate
(
parsers
):
node
,
text__
=
parser
(
text_
)
if
node
:
if
node
is
not
None
:
if
node
.
_result
or
not
node
.
tag_name
.
startswith
(
':'
):
# drop anonymous empty nodes
results
+=
(
node
,)
text_
=
text__
...
...
@@ -2217,7 +2217,7 @@ class SomeOf(NaryParser):
while
parsers
:
for
i
,
parser
in
enumerate
(
parsers
):
node
,
text__
=
parser
(
text_
)
if
node
:
if
node
is
not
None
:
if
node
.
_result
or
not
node
.
tag_name
.
startswith
(
':'
):
# drop anonymous empty nodes
results
+=
(
node
,)
text_
=
text__
...
...
@@ -2389,7 +2389,7 @@ class Capture(UnaryParser):
def
_parse
(
self
,
text
:
StringView
)
->
Tuple
[
Optional
[
Node
],
StringView
]:
node
,
text_
=
self
.
parser
(
text
)
if
node
:
if
node
is
not
None
:
assert
self
.
pname
,
"""Tried to apply an unnamed capture-parser!"""
assert
not
self
.
parser
.
drop_content
,
\
"Cannot capture content of returned by parser, the content of which will be dropped!"
...
...
@@ -2517,7 +2517,7 @@ class Pop(Retrieve):
def
_parse
(
self
,
text
:
StringView
)
->
Tuple
[
Optional
[
Node
],
StringView
]:
node
,
txt
=
self
.
retrieve_and_match
(
text
)
if
node
and
not
id
(
node
)
in
self
.
grammar
.
tree__
.
error_nodes
:
if
node
is
not
None
and
not
id
(
node
)
in
self
.
grammar
.
tree__
.
error_nodes
:
self
.
values
.
append
(
self
.
grammar
.
variables__
[
self
.
symbol
.
pname
].
pop
())
location
=
self
.
grammar
.
document_length__
-
text
.
__len__
()
self
.
grammar
.
push_rollback__
(
location
,
self
.
_rollback
)
# lambda: stack.append(value))
...
...
@@ -2605,7 +2605,7 @@ class Synonym(UnaryParser):
def
_parse
(
self
,
text
:
StringView
)
->
Tuple
[
Optional
[
Node
],
StringView
]:
node
,
text
=
self
.
parser
.
_parse
(
text
)
# circumvent Parser.__call__ as an optimization (dangerous?)
if
node
:
if
node
is
not
None
:
if
self
.
drop_content
:
return
EMPTY_NODE
,
text
# if self.anonymous:
...
...
DHParser/stringview.pxd
View file @
a77b86b4
...
...
@@ -16,11 +16,12 @@ cdef int last_char(str text, int begin, int end, str chars)
cdef
int
pack_index
(
int
index
,
int
length
)
@
cython
.
locals
(
cbegin
=
cython
.
int
,
cend
=
cython
.
int
)
@
cython
.
locals
(
cbegin
=
cython
.
int
,
cend
=
cython
.
int
,
length
=
cython
.
int
)
cpdef
real_indices
(
begin
,
end
,
int
length
)
cdef
class
StringView
:
cdef
str
_text
cdef
int
_begin
,
_end
,
_len
cdef
int
_begin
,
_end
cdef
readonly
int
_len
cdef
str
_fullstring
DHParser/stringview.py
View file @
a77b86b4
...
...
@@ -43,10 +43,13 @@ except ImportError:
import
DHParser.shadow_cython
as
cython
__all__
=
(
'StringView'
,
'EMPTY_STRING_VIEW'
)
__all__
=
(
'StringView'
,
'real_indices'
,
'EMPTY_STRING_VIEW'
)
def
first_char
(
text
,
begin
:
int
,
end
:
int
,
chars
)
->
int
:
@
cython
.
cfunc
@
cython
.
returns
(
cython
.
int
)
@
cython
.
locals
(
begin
=
cython
.
int
,
end
=
cython
.
int
)
def
first_char
(
text
:
str
,
begin
:
int
,
end
:
int
,
chars
:
str
)
->
int
:
"""Returns the index of the first non-whitespace character in string
`text` within the bounds [begin, end].
"""
...
...
@@ -55,7 +58,10 @@ def first_char(text, begin: int, end: int, chars) -> int:
return
begin
def
last_char
(
text
,
begin
:
int
,
end
:
int
,
chars
)
->
int
:
@
cython
.
cfunc
@
cython
.
returns
(
cython
.
int
)
@
cython
.
locals
(
begin
=
cython
.
int
,
end
=
cython
.
int
)
def
last_char
(
text
:
str
,
begin
:
int
,
end
:
int
,
chars
:
str
)
->
int
:
"""Returns the index of the first non-whitespace character in string
`text` within the bounds [begin, end].
"""
...
...
@@ -64,6 +70,9 @@ def last_char(text, begin: int, end: int, chars) -> int:
return
end
@
cython
.
cfunc
@
cython
.
returns
(
cython
.
int
)
@
cython
.
locals
(
index
=
cython
.
int
,
length
=
cython
.
int
)
def
pack_index
(
index
:
int
,
length
:
int
)
->
int
:
"""Transforms `index` into a positive index counting from the beginning
of the string, capping it at the boundaries [0, len].
...
...
@@ -83,9 +92,10 @@ def pack_index(index: int, length: int) -> int:
return
0
if
index
<
0
else
length
if
index
>
length
else
index
@
cython
.
locals
(
cbegin
=
cython
.
int
,
cend
=
cython
.
int
,
length
=
cython
.
int
)
def
real_indices
(
begin
:
Optional
[
int
],
end
:
Optional
[
int
],
length
)
->
Tuple
[
int
,
int
]:
length
:
int
)
->
Tuple
[
int
,
int
]:
"""Returns the tuple of real (i.e. positive) indices from the slice
indices `begin`, `end`, assuming a string of size `length`.
"""
...
...
@@ -108,7 +118,9 @@ class StringView: # collections.abc.Sized
# assert isinstance(text, str)
self
.
_text
=
text
# type: str
self
.
_begin
,
self
.
_end
=
real_indices
(
begin
,
end
,
len
(
text
))
self
.
_len
=
max
(
self
.
_end
-
self
.
_begin
,
0
)
# type: int
self
.
_len
=
self
.
_end
-
self
.
_begin
# type: int
if
self
.
_len
<
0
:
self
.
_len
=
0
self
.
_fullstring
=
''
# type: str
# if (self._begin == 0 and self._len == len(self._text)):
# self._fullstring = self._text # type: str
...
...
@@ -116,7 +128,7 @@ class StringView: # collections.abc.Sized
# self._fullstring = ''
def
__bool__
(
self
)
->
bool
:
return
self
.
_end
>
self
.
_begin
# and bool(self.text)
return
self
.
_len
!=
0
#
self._end > self._begin # and bool(self.text)
def
__len__
(
self
)
->
int
:
return
self
.
_len
...
...
DHParser/transform.pxd
View file @
a77b86b4
...
...
@@ -27,7 +27,6 @@ cpdef is_named(context: List[Node])
cpdef
is_anonymous
(
context
:
List
[
Node
])
cpdef
is_insignificant_whitespace
(
context
:
List
[
Node
])
cpdef
contains_only_whitespace
(
context
:
List
[
Node
])
cpdef
is_any_kind_of_whitespace
(
context
:
List
[
Node
])
cpdef
is_empty
(
context
:
List
[
Node
])
# cpdef is_token(context: List[Node], tokens: AbstractSet[str] = ?)
# cpdef is_one_of(context: List[Node], tag_name_set: AbstractSet[str])
...
...
@@ -61,9 +60,7 @@ cpdef normalize_whitespace(context)
# cpdef keep_nodes(context: List[Node], tag_names: AbstractSet[str])
# cpdef keep_content(context: List[Node], regexp: str)
# cpdef remove_children_if(context: List[Node], condition: Callable)
cpdef
remove_first
(
context
:
List
[
Node
])
cpdef
remove_last
(
context
:
List
[
Node
])
cpdef
remove_brackets
(
context
:
List
[
Node
])
# cpdef remove_brackets(context: List[Node])
# cpdef remove_tokens(context: List[Node], tokens: AbstractSet[str] = ?)
# cpdef remove_nodes(context: List[Node], tag_names: AbstractSet[str])
# cpdef remove_content(context: List[Node], regexp: str)
...
...
examples/LaTeX/LaTeX.ebnf
View file @
a77b86b4
...
...
@@ -139,7 +139,9 @@ pdfinfo = "\pdfinfo" block
config = "[" cfg_text §"]"
cfg_text = { (~ text) | CMDNAME | SPECIAL }
block = /{/ ~ { !blockcmd text_element [S] } §/}/
text = TEXT { S TEXT } # LETTERS { S LETTERS }
# text = LETTERS { S LETTERS }
# text = LINE { S LINE }
text = TEXT { S TEXT }
no_command = "\begin{" | "\end" | BACKSLASH structural
blockcmd = BACKSLASH ( ( "begin{" | "end{" )
...
...
examples/LaTeX/LaTeXCompiler.py
View file @
a77b86b4
...
...
@@ -61,7 +61,7 @@ class LaTeXGrammar(Grammar):
paragraph
=
Forward
()
tabular_config
=
Forward
()
text_element
=
Forward
()
source_hash__
=
"
0bb1db2c52e06989cb6d1b87a5476d14
"
source_hash__
=
"
e3f453cc7a08e4faefd2b76302e34a65
"
anonymous__
=
re
.
compile
(
'_WSPC$|_GAP$|_LB$|_PARSEP$|block_environment$|known_environment$|text_element$|inline_element$|inline_environment$|known_inline_env$|begin_inline_env$|end_inline_env$|command$|known_command$'
)
static_analysis_pending__
=
[
True
]
parser_initialization__
=
[
"upon instantiation"
]
...
...
@@ -340,8 +340,8 @@ class LaTeXCompiler(Compiler):
def
__call__
(
self
,
root
):
result
=
super
().
__call__
(
root
)
self
.
tree
.
inline_tags
=
{}
# {'paragraph'}
self
.
tree
.
empty_tags
=
{}
self
.
tree
.
inline_tags
=
set
()
# {'paragraph'}
self
.
tree
.
empty_tags
=
set
()
self
.
tree
.
omit_tags
=
{
'S'
,
'PARSEP'
}
return
result
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment