-
Notifications
You must be signed in to change notification settings - Fork 0
/
features_ryan.py
65 lines (46 loc) · 1.92 KB
/
features_ryan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from pprint import pprint
def subsets(word, history):
all_groups = []
all_groups.append( ['JJ','JJR','JJS'] ) # adjectives
all_groups.append( ['NN','NNS','NNP','NNPS'] ) # nouns
all_groups.append( ['PRP','PRP$'] ) # pronouns
all_groups.append( ['RB','RBR'] ) # adverbs
all_groups.append( ['VB','VBD','VBG','VBN','VBP','VBZ'] ) # verbs
all_groups.append( ['WDT','WP','WRB'] ) # wh_words
all_groups.append( ['<COLON>','<COMMA>','<LEFTPAR>','<PERIOD>','<RIGHTPAR>'] ) # punctuation
all_groups.append( ['CC'] ) # coordinating conjunction
all_groups.append( ['CD'] ) # cardinal number
all_groups.append( ['DT'] ) # determiner
all_groups.append( ['EX'] ) # existential THERE
all_groups.append( ['IN'] ) # preposition or subordinating conjunction
all_groups.append( ['MD'] ) # modal
all_groups.append( ['POS'] ) # possessive ending
all_groups.append( ['RP'] ) # particle
all_groups.append( ['TO'] ) # TO
all_groups.append( ['CC', 'CD', 'DT', 'EX', 'IN', 'MD', 'POS', 'RP', 'TO'] ) #other
def vocab(group, lookback):
def subsets_specific(word, history):
# check the last *lookback* entries in the history and check if one of the tags
# in the given group is there
return reduce(lambda x,y: x or y, [tag in history[-lookback:] for tag in group])
subsets_specific.__name__ = 'subsets_specific_{0}_lookback_{1}'.format(group, lookback)
return subsets_specific
min_lookback = 1
max_lookback = 3
functions = []
for lookback in range(min_lookback, max_lookback+1):
for group in all_groups:
fcn = vocab(group, lookback)
functions.append( (fcn.__name__, fcn(word, history)) )
return functions
def eval(word, history):
return subsets(word, history)
def get_feature_funcs():
return subsets()
def main():
word = 'NNP'
history = ['NNP', 'CD','<COMMA>']
evaluated = eval(word, history)
for e in evaluated:
if e[1]: print e
if __name__ == '__main__': main()