Commit a77b86b4 authored by di68kap's avatar di68kap
Browse files

- cython compatibility enhanced

parent e49b99c8
......@@ -1304,8 +1304,9 @@ class EBNFCompiler(Compiler):
'and not a %s.') % (prefix, arg.tag_name))
return arg.content
elif self.anonymous_regexp.match(arg.content):
self.tree.new_error(node, ('Retrie does not work with anonymous parsers like %s')
% (prefix, arg.content))
self.tree.new_error(
node, ('Retrive operator "%s" does not work with anonymous parsers like %s')
% (prefix, arg.content))
return arg.content
if arg.content in self.directives.filter:
custom_args = ['rfilter=%s' % self.directives.filter[arg.content]]
......
......@@ -17,7 +17,7 @@ cdef class Parser:
cpdef _parse(self, text)
cpdef reset(self)
# def __call__(self, text)
# def __call__(self, StringView text)
# def __add__(self, other)
# def __or__(self, other)
cpdef _parse(self, text)
......
......@@ -355,7 +355,7 @@ class Parser:
error_node_id = 0
grammar = self._grammar
location = grammar.document_length__ - text.__len__() # faster then len(text)?
location = grammar.document_length__ - text._len # faster then len(text)?
try:
# rollback variable changing operation if parser backtracks
......@@ -1545,7 +1545,7 @@ class MetaParser(Parser):
"""
assert node is None or isinstance(node, Node)
if self._grammar.flatten_tree__:
if node:
if node is not None:
if self.anonymous:
if self.drop_content:
return EMPTY_NODE
......@@ -1726,7 +1726,7 @@ class ZeroOrMore(Option):
n = len
node, text = self.parser(text)
len = text.__len__()
if not node:
if node is None:
break
if node._result or not node.tag_name.startswith(':'): # drop anonymous empty nodes
results += (node,)
......@@ -1778,7 +1778,7 @@ class OneOrMore(UnaryParser):
n = len
node, text_ = self.parser(text_)
len = text_.__len__()
if not node:
if node is None:
break
match_flag = True
if node._result or not node.tag_name.startswith(':'): # drop anonymous empty nodes
......@@ -1929,7 +1929,7 @@ class Series(NaryParser):
error = None # type: Optional[Error]
for pos, parser in enumerate(self.parsers):
node, text_ = parser(text_)
if not node:
if node is None:
if pos < self.mandatory:
return None, text
else:
......@@ -1942,7 +1942,7 @@ class Series(NaryParser):
# check if parsing of the series can be resumed somewhere
if reloc >= 0:
nd, text_ = parser(text_) # try current parser again
if nd:
if nd is not None:
results += (node,)
node = nd
else:
......@@ -2035,7 +2035,7 @@ class Alternative(NaryParser):
def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
for parser in self.parsers:
node, text_ = parser(text)
if node:
if node is not None:
return self._return_value(node), text_
# return self._return_value(node if node._result or parser.pname else None), text_
# return Node(self.tag_name,
......@@ -2148,7 +2148,7 @@ class AllOf(NaryParser):
while parsers:
for i, parser in enumerate(parsers):
node, text__ = parser(text_)
if node:
if node is not None:
if node._result or not node.tag_name.startswith(':'): # drop anonymous empty nodes
results += (node,)
text_ = text__
......@@ -2217,7 +2217,7 @@ class SomeOf(NaryParser):
while parsers:
for i, parser in enumerate(parsers):
node, text__ = parser(text_)
if node:
if node is not None:
if node._result or not node.tag_name.startswith(':'): # drop anonymous empty nodes
results += (node,)
text_ = text__
......@@ -2389,7 +2389,7 @@ class Capture(UnaryParser):
def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
node, text_ = self.parser(text)
if node:
if node is not None:
assert self.pname, """Tried to apply an unnamed capture-parser!"""
assert not self.parser.drop_content, \
"Cannot capture content of returned by parser, the content of which will be dropped!"
......@@ -2517,7 +2517,7 @@ class Pop(Retrieve):
def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
node, txt = self.retrieve_and_match(text)
if node and not id(node) in self.grammar.tree__.error_nodes:
if node is not None and not id(node) in self.grammar.tree__.error_nodes:
self.values.append(self.grammar.variables__[self.symbol.pname].pop())
location = self.grammar.document_length__ - text.__len__()
self.grammar.push_rollback__(location, self._rollback) # lambda: stack.append(value))
......@@ -2605,7 +2605,7 @@ class Synonym(UnaryParser):
def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
node, text = self.parser._parse(text) # circumvent Parser.__call__ as an optimization (dangerous?)
if node:
if node is not None:
if self.drop_content:
return EMPTY_NODE, text
# if self.anonymous:
......
......@@ -16,11 +16,12 @@ cdef int last_char(str text, int begin, int end, str chars)
cdef int pack_index(int index, int length)
@cython.locals(cbegin=cython.int, cend=cython.int)
@cython.locals(cbegin=cython.int, cend=cython.int, length=cython.int)
cpdef real_indices(begin, end, int length)
cdef class StringView:
cdef str _text
cdef int _begin, _end, _len
cdef int _begin, _end
cdef readonly int _len
cdef str _fullstring
......@@ -43,10 +43,13 @@ except ImportError:
import DHParser.shadow_cython as cython
__all__ = ('StringView', 'EMPTY_STRING_VIEW')
__all__ = ('StringView', 'real_indices', 'EMPTY_STRING_VIEW')
def first_char(text, begin: int, end: int, chars) -> int:
@cython.cfunc
@cython.returns(cython.int)
@cython.locals(begin=cython.int, end=cython.int)
def first_char(text: str, begin: int, end: int, chars: str) -> int:
"""Returns the index of the first non-whitespace character in string
`text` within the bounds [begin, end].
"""
......@@ -55,7 +58,10 @@ def first_char(text, begin: int, end: int, chars) -> int:
return begin
def last_char(text, begin: int, end: int, chars) -> int:
@cython.cfunc
@cython.returns(cython.int)
@cython.locals(begin=cython.int, end=cython.int)
def last_char(text: str, begin: int, end: int, chars: str) -> int:
"""Returns the index of the first non-whitespace character in string
`text` within the bounds [begin, end].
"""
......@@ -64,6 +70,9 @@ def last_char(text, begin: int, end: int, chars) -> int:
return end
@cython.cfunc
@cython.returns(cython.int)
@cython.locals(index=cython.int, length=cython.int)
def pack_index(index: int, length: int) -> int:
"""Transforms `index` into a positive index counting from the beginning
of the string, capping it at the boundaries [0, len].
......@@ -83,9 +92,10 @@ def pack_index(index: int, length: int) -> int:
return 0 if index < 0 else length if index > length else index
@cython.locals(cbegin=cython.int, cend=cython.int, length=cython.int)
def real_indices(begin: Optional[int],
end: Optional[int],
length) -> Tuple[int, int]:
length: int) -> Tuple[int, int]:
"""Returns the tuple of real (i.e. positive) indices from the slice
indices `begin`, `end`, assuming a string of size `length`.
"""
......@@ -108,7 +118,9 @@ class StringView: # collections.abc.Sized
# assert isinstance(text, str)
self._text = text # type: str
self._begin, self._end = real_indices(begin, end, len(text))
self._len = max(self._end - self._begin, 0) # type: int
self._len = self._end - self._begin # type: int
if self._len < 0:
self._len = 0
self._fullstring = '' # type: str
# if (self._begin == 0 and self._len == len(self._text)):
# self._fullstring = self._text # type: str
......@@ -116,7 +128,7 @@ class StringView: # collections.abc.Sized
# self._fullstring = ''
def __bool__(self) -> bool:
return self._end > self._begin # and bool(self.text)
return self._len != 0 # self._end > self._begin # and bool(self.text)
def __len__(self) -> int:
return self._len
......
......@@ -27,7 +27,6 @@ cpdef is_named(context: List[Node])
cpdef is_anonymous(context: List[Node])
cpdef is_insignificant_whitespace(context: List[Node])
cpdef contains_only_whitespace(context: List[Node])
cpdef is_any_kind_of_whitespace(context: List[Node])
cpdef is_empty(context: List[Node])
# cpdef is_token(context: List[Node], tokens: AbstractSet[str] = ?)
# cpdef is_one_of(context: List[Node], tag_name_set: AbstractSet[str])
......@@ -61,9 +60,7 @@ cpdef normalize_whitespace(context)
# cpdef keep_nodes(context: List[Node], tag_names: AbstractSet[str])
# cpdef keep_content(context: List[Node], regexp: str)
# cpdef remove_children_if(context: List[Node], condition: Callable)
cpdef remove_first(context: List[Node])
cpdef remove_last(context: List[Node])
cpdef remove_brackets(context: List[Node])
# cpdef remove_brackets(context: List[Node])
# cpdef remove_tokens(context: List[Node], tokens: AbstractSet[str] = ?)
# cpdef remove_nodes(context: List[Node], tag_names: AbstractSet[str])
# cpdef remove_content(context: List[Node], regexp: str)
......
......@@ -139,7 +139,9 @@ pdfinfo = "\pdfinfo" block
config = "[" cfg_text §"]"
cfg_text = { (~ text) | CMDNAME | SPECIAL }
block = /{/ ~ { !blockcmd text_element [S] } §/}/
text = TEXT { S TEXT } # LETTERS { S LETTERS }
# text = LETTERS { S LETTERS }
# text = LINE { S LINE }
text = TEXT { S TEXT }
no_command = "\begin{" | "\end" | BACKSLASH structural
blockcmd = BACKSLASH ( ( "begin{" | "end{" )
......
......@@ -61,7 +61,7 @@ class LaTeXGrammar(Grammar):
paragraph = Forward()
tabular_config = Forward()
text_element = Forward()
source_hash__ = "0bb1db2c52e06989cb6d1b87a5476d14"
source_hash__ = "e3f453cc7a08e4faefd2b76302e34a65"
anonymous__ = re.compile('_WSPC$|_GAP$|_LB$|_PARSEP$|block_environment$|known_environment$|text_element$|inline_element$|inline_environment$|known_inline_env$|begin_inline_env$|end_inline_env$|command$|known_command$')
static_analysis_pending__ = [True]
parser_initialization__ = ["upon instantiation"]
......@@ -340,8 +340,8 @@ class LaTeXCompiler(Compiler):
def __call__(self, root):
result = super().__call__(root)
self.tree.inline_tags = {} # {'paragraph'}
self.tree.empty_tags = {}
self.tree.inline_tags = set() # {'paragraph'}
self.tree.empty_tags = set()
self.tree.omit_tags = {'S', 'PARSEP'}
return result
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment