@@ -16,31 +16,31 @@ def __init__(self, min_cut=0.1, max_cut=0.9):
1616 self .__max_cut = max_cut
1717 self .__stopwords = set (stopwords .words ("thai" ) + list (punctuation ))
1818
19- def __compute_frequencies (self , word_sent ):
20- freq = defaultdict (int )
21- for s in word_sent :
22- for word in s :
19+ def __compute_frequencies (self , word_tokenized_sents ):
20+ word_freqs = defaultdict (int )
21+ for sent in word_tokenized_sents :
22+ for word in sent :
2323 if word not in self .__stopwords :
24- freq [word ] += 1
24+ word_freqs [word ] += 1
2525
26- m = float (max (freq .values ()))
27- for w in list (freq ):
28- freq [w ] = freq [w ] / m
29- if freq [w ] >= self .__max_cut or freq [w ] <= self .__min_cut :
30- del freq [w ]
26+ max_freq = float (max (word_freqs .values ()))
27+ for w in list (word_freqs ):
28+ word_freqs [w ] = word_freqs [w ] / max_freq
29+ if word_freqs [w ] >= self .__max_cut or word_freqs [w ] <= self .__min_cut :
30+ del word_freqs [w ]
3131
32- return freq
32+ return word_freqs
3333
3434 def __rank (self , ranking , n ):
3535 return nlargest (n , ranking , key = ranking .get )
3636
3737 def summarize (self , text , n , tokenizer ):
3838 sents = sent_tokenize (text )
39- word_sent = [word_tokenize (s , tokenizer ) for s in sents ]
40- self .__freq = self .__compute_frequencies (word_sent )
39+ word_tokenized_sents = [word_tokenize (sent , tokenizer ) for sent in sents ]
40+ self .__freq = self .__compute_frequencies (word_tokenized_sents )
4141 ranking = defaultdict (int )
4242
43- for i , sent in enumerate (word_sent ):
43+ for i , sent in enumerate (word_tokenized_sents ):
4444 for w in sent :
4545 if w in self .__freq :
4646 ranking [i ] += self .__freq [w ]
0 commit comments