summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Andreoli <dave@gurumeditation.it>2018-01-02 13:57:22 +0100
committerDave Andreoli <dave@gurumeditation.it>2018-01-02 13:58:08 +0100
commit3388077bc35b9d0be902051aee3d9977562dda6c (patch)
tree1d9189be699d4f4ae054c2492c5291e33507d507
parent9bedda14b3a3edb972bb231dd1e727c86355d6da (diff)
downloadefl-3388077bc35b9d0be902051aee3d9977562dda6c.tar.gz
Pyolian: implemented doc tokenizer
-rw-r--r--src/scripts/pyolian/eolian.py69
-rw-r--r--src/scripts/pyolian/eolian_lib.py2
-rwxr-xr-xsrc/scripts/pyolian/generator.py1
3 files changed, 68 insertions, 4 deletions
diff --git a/src/scripts/pyolian/eolian.py b/src/scripts/pyolian/eolian.py
index a5653a5120..75a98ff0fe 100644
--- a/src/scripts/pyolian/eolian.py
+++ b/src/scripts/pyolian/eolian.py
@@ -24,13 +24,13 @@ a way that this folder will be available on PYTHON_PATH, fe:
"""
from enum import IntEnum
-from ctypes import cast, byref, c_char_p, c_void_p
+from ctypes import cast, byref, c_char_p, c_void_p, c_int
+import ctypes
try:
from .eolian_lib import lib
except ImportError:
from eolian_lib import lib
-
### Eolian Enums ############################################################
@@ -1235,11 +1235,16 @@ class Declaration(EolianBaseObject):
return Variable(c_var) if c_var else None
+class _Eolian_Doc_Token_Struct(ctypes.Structure):
+ _fields_ = [("type", c_int),
+ ("text", c_char_p),
+ ("text_end", c_char_p)]
+
class Documentation(EolianBaseObject):
# def __repr__(self):
# return "<eolian.Documentation '{0.name}'>".format(self)
- # this is too much for py, just use string.split('\n\n')
+ # this is too much for py, just use string.split('\n\n') instead
# def string_split(self, string):
# c_list = lib.eolian_documentation_string_split
@@ -1255,6 +1260,64 @@ class Documentation(EolianBaseObject):
def since(self):
return _str_to_py(lib.eolian_documentation_since_get(self._obj))
+ @property
+ def summary_tokens(self):
+ """ return a list of paragraphs, each one is a list of tokens """
+ return self._tokenize(self.summary)
+
+ @property
+ def description_tokens(self):
+ """ return a list of paragraphs, each one is a list of tokens """
+ return self._tokenize(self.description)
+
+ @classmethod
+ def _tokenize(cls, full_text):
+ paragraphs = []
+ if not full_text:
+ return paragraphs
+
+ tok = _Eolian_Doc_Token_Struct()
+ for paragraph in full_text.split('\n\n'):
+ tokens = []
+ c_paragraph = _str_to_bytes(paragraph) # keep c_paragraph alive !
+
+ lib.eolian_doc_token_init(byref(tok))
+ next_chunk = lib.eolian_documentation_tokenize(c_paragraph, byref(tok))
+ while next_chunk:
+ typ = lib.eolian_doc_token_type_get(byref(tok))
+ txt = lib.eolian_doc_token_text_get(byref(tok))
+ # ref = # TODO ... Stupido parametro '*unit' :(
+ tokens.append(Documentation_Token(typ, txt))
+ lib.free(c_void_p(txt))
+ next_chunk = lib.eolian_documentation_tokenize(c_char_p(next_chunk), byref(tok))
+ paragraphs.append(tokens)
+
+ return paragraphs
+
+
+class Documentation_Token(object):
+ def __init__(self, c_token_type, c_text):
+ self._type = Eolian_Doc_Token_Type(c_token_type)
+ self._text = _str_to_py(c_text)
+ self._ref = None # TODO
+
+ def __repr__(self):
+ t = self.text if len(self.text) < 40 else self.text[:40] + '...'
+ return "<eolian.Doc_Token ({}), text='{}', len={}>".format(
+ self.type.name, t, len(self.text))
+
+ @property
+ def type(self):
+ return self._type
+
+ @property
+ def text(self):
+ return self._text
+
+ @property
+ def ref(self):
+ return self._ref
+
### internal string encode/decode ###########################################
diff --git a/src/scripts/pyolian/eolian_lib.py b/src/scripts/pyolian/eolian_lib.py
index a330853dbd..66b1556b5b 100644
--- a/src/scripts/pyolian/eolian_lib.py
+++ b/src/scripts/pyolian/eolian_lib.py
@@ -737,7 +737,7 @@ lib.eolian_documentation_since_get.restype = c_char_p
# # EAPI const char *eolian_documentation_tokenize(const char *doc, Eolian_Doc_Token *ret);
lib.eolian_documentation_tokenize.argtypes = [c_char_p, c_void_p]
-lib.eolian_documentation_tokenize.restype = c_char_p
+lib.eolian_documentation_tokenize.restype = c_void_p # this need to be passed back as char*
# EAPI void eolian_doc_token_init(Eolian_Doc_Token *tok);
lib.eolian_doc_token_init.argtypes = [c_void_p,]
diff --git a/src/scripts/pyolian/generator.py b/src/scripts/pyolian/generator.py
index 84290b6bef..2640bfaf40 100755
--- a/src/scripts/pyolian/generator.py
+++ b/src/scripts/pyolian/generator.py
@@ -127,6 +127,7 @@ class Template(pyratemp.Template):
'Variable': eolian.Variable,
'Declaration': eolian.Declaration,
'Documentation': eolian.Documentation,
+ 'Documentation_Token': eolian.Documentation_Token,
# Eolian Enums
'Eolian_Function_Type': eolian.Eolian_Function_Type,
'Eolian_Parameter_Dir': eolian.Eolian_Parameter_Dir,