Skip to content

Commit d3cd6e2

Browse files
authored
Merge pull request #106 from bigdata-ustc/dev
[FEATURE] version upgrade
2 parents 1a7fe0c + 9071cca commit d3cd6e2

File tree

99 files changed

+17838
-1537
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

99 files changed

+17838
-1537
lines changed

AUTHORS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,7 @@
1616

1717
[Meikai Bao](https://github.com/BAOOOOOM)
1818

19+
[Yuting Ning](https://github.com/nnnyt)
20+
1921

2022
The stared contributors are the corresponding authors.

CHANGE.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
v0.0.7:
2+
1. add BERT and pretrained model (luna_bert)
3+
2. speed up the process in sif
4+
3. handling OOV in word2vec
5+
4. add English tutorials
6+
5. add api docs and prettify tutorials
7+
6. fix the np.error in gensim_vec.W2V.infer_vector
8+
7. fix the parameters lost in tokenization
9+
110
v0.0.6:
211
1. dev: add half-pretrained rnn model
312
2. important!!!: rename TextTokenizer to PureTextTokenizer, and add a new tokenizer named TextTokenizer (the two have similar but not the same behaviours).

EduNLP/Formula/Formula.py

Lines changed: 75 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,18 @@
1515

1616
class Formula(object):
1717
"""
18+
The part transform a formula to the parsed abstracted syntax tree.
19+
20+
Parameters
21+
----------
22+
formula: str or List[Dict]
23+
latex formula string or the parsed abstracted syntax tree
24+
variable_standardization
25+
const_mathord
26+
init
27+
args
28+
kwargs
29+
1830
Examples
1931
--------
2032
>>> f = Formula("x")
@@ -29,22 +41,21 @@ class Formula(object):
2941
<Formula: x>
3042
>>> f.elements
3143
[{'id': 0, 'type': 'mathord', 'text': 'x', 'role': None, 'var': 0}]
32-
"""
3344
45+
Attributes
46+
------------
47+
ast
48+
show all ast details
49+
elements
50+
just show elements' id, type, text and role
51+
ast_graph
52+
draw a ast graph
53+
to_str
54+
resetable
55+
return bool
56+
"""
3457
def __init__(self, formula: (str, List[Dict]), variable_standardization=False, const_mathord=None,
3558
init=True, *args, **kwargs):
36-
"""
37-
38-
Parameters
39-
----------
40-
formula: str or List[Dict]
41-
latex formula string or the parsed abstracted syntax tree
42-
variable_standardization
43-
const_mathord
44-
init
45-
args
46-
kwargs
47-
"""
4859
self._formula = formula
4960
self._ast = None
5061
if init is True:
@@ -55,6 +66,15 @@ def __init__(self, formula: (str, List[Dict]), variable_standardization=False, c
5566
)
5667

5768
def variable_standardization(self, inplace=False, const_mathord=None, variable_connect_dict=None):
69+
"""
70+
It makes same parmeters have the same number.
71+
72+
Parameters
73+
----------
74+
inplace
75+
const_mathord
76+
variable_connect_dict
77+
"""
5878
const_mathord = const_mathord if const_mathord is not None else CONST_MATHORD
5979
ast_tree = self._ast if inplace else deepcopy(self._ast)
6080
var_code = variable_connect_dict["var_code"] if variable_connect_dict is not None else {}
@@ -118,6 +138,26 @@ def resetable(self):
118138

119139
class FormulaGroup(object):
120140
"""
141+
The part transform a group of formula to the parsed abstracted syntax forest.
142+
143+
Attributes
144+
------------
145+
to_str
146+
ast
147+
show all ast details
148+
elements
149+
just show elements' id, type, text and role
150+
ast_graph
151+
draw a ast graph
152+
153+
Parameters
154+
----------
155+
formula: str or List[Dict] or List[Formula]
156+
latex formula string or the parsed abstracted syntax tree or a group of parsed abstracted syntax tree
157+
variable_standardization
158+
const_mathord
159+
detach
160+
121161
Examples
122162
---------
123163
>>> fg = FormulaGroup(["x + y", "y + x", "z + x"])
@@ -128,15 +168,16 @@ class FormulaGroup(object):
128168
<FormulaGroup: <Formula: x + y>;<Formula: y + x>;<Formula: z + x>>
129169
>>> fg = FormulaGroup(["x", Formula("y"), "x"])
130170
>>> fg.elements
131-
[{'id': 0, 'type': 'mathord', 'text': 'x', 'role': None}, {'id': 1, 'type': 'mathord', 'text': 'y', 'role': None},\
132-
{'id': 2, 'type': 'mathord', 'text': 'x', 'role': None}]
171+
[{'id': 0, 'type': 'mathord', 'text': 'x', 'role': None}, \
172+
{'id': 1, 'type': 'mathord', 'text': 'y', 'role': None}, \
173+
{'id': 2, 'type': 'mathord', 'text': 'x', 'role': None}]
133174
>>> fg = FormulaGroup(["x", Formula("y"), "x"], variable_standardization=True)
134175
>>> fg.elements
135176
[{'id': 0, 'type': 'mathord', 'text': 'x', 'role': None, 'var': 0}, \
136177
{'id': 1, 'type': 'mathord', 'text': 'y', 'role': None, 'var': 1}, \
137178
{'id': 2, 'type': 'mathord', 'text': 'x', 'role': None, 'var': 0}]
138-
"""
139179
180+
"""
140181
def __init__(self,
141182
formula_list: (list, List[str], List[Formula]),
142183
variable_standardization=False,
@@ -186,6 +227,15 @@ def __contains__(self, item) -> bool:
186227
return item in self._formulas
187228

188229
def variable_standardization(self, inplace=False, const_mathord=None, variable_connect_dict=None):
230+
"""
231+
It makes same parmeters have the same number.
232+
233+
Parameters
234+
----------
235+
inplace
236+
const_mathord
237+
variable_connect_dict
238+
"""
189239
ret = []
190240
for formula in self._formulas:
191241
ret.append(formula.variable_standardization(inplace=inplace, const_mathord=const_mathord,
@@ -220,6 +270,15 @@ def ast_graph(self) -> (nx.Graph, nx.DiGraph):
220270

221271

222272
def link_formulas(*formula: Formula, link_vars=True, **kwargs):
273+
"""
274+
275+
Parameters
276+
----------
277+
formula
278+
the parsed abstracted syntax tree
279+
link_vars
280+
kwargs
281+
"""
223282
forest = []
224283
for form in formula:
225284
forest += form.reset_ast(

EduNLP/Formula/ast/ast.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@
88

99

1010
def katex_parse(formula):
11+
"""将公式传入katex进行语法解析"""
1112
return katex.katex.__parse(formula,{'displayMode':True,'trust': True}).to_list()
1213

1314

1415
def str2ast(formula: str, *args, **kwargs):
16+
"""给字符串的接口"""
1517
return ast(formula, is_str=True, *args, **kwargs)
1618

1719

EduNLP/Formula/viz/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
# 2021/3/8 @ tongshiwei
33

44
import warnings
5-
warnings.warn("Do not use this package")
5+
# warnings.warn("Do not use this package")
66
from .tree_viz import TreePlotter, ForestPlotter

EduNLP/I2V/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
# 2021/8/1 @ tongshiwei
33

44
from .i2v import I2V, get_pretrained_i2v
5-
from .i2v import D2V, W2V
5+
from .i2v import D2V, W2V, Bert

0 commit comments

Comments
 (0)