-
Notifications
You must be signed in to change notification settings - Fork 2
/
sentiment.py
54 lines (42 loc) · 1.51 KB
/
sentiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import numpy as np
from sklearn import svm
from prepro import *
from msda import *
num_layers = 5
noises = [0.5, 0.6, 0.7, 0.8, 0.9]
domains = ['books', 'dvd', 'electronics', 'kitchen']
X_train, y_train, train_offset = [], [], [0]
X_test, y_test, test_offset = [], [], [0]
for domain in domains:
train_path = "data/processed_stars/{0}/train".format(domain)
test_path = "data/processed_stars/{0}/test".format(domain)
X, y, word_mappings = create_input(train_path)
X_train.append(X)
y_train.extend(y)
train_offset.append(train_offset[-1] + len(X))
X, y = create_input(test_path, word_mappings=word_mappings)
X_test.append(X)
y_test.extend(y)
test_offset.append(test_offset[-1] + len(X))
X_train = np.vstack(X_train)
X_test = np.vstack(X_test)
noise = noises[0]
train_mappings, train_reps = mSDA(X_train, noise, num_layers)
_, test_reps = mSDA(X_test, noise, num_layers, train_mappings)
X_train_reps = np.hstack([X_train, train_reps[-1]])
X_test_reps = np.hstack([X_test, test_reps[-1]])
for i, domain in enumerate(domains):
b, e = train_offset[i], train_offset[i+1]
classifier = svm.SVC().fit(X_train_reps[b:e], y_train[b:e])
b, e = test_offset[i], test_offset[i+1]
preds = classifier.predict(X_test_reps[b:e])
acc = np.mean(preds == y_test[b:e])
print "Accuracy on %s domain" % domain
for j, domain in enumerate(domains):
if i == j:
continue
b, e = test_offset[j], test_offset[j+1]
preds = classifier.predict(X_test_reps[b:e])
acc = np.mean(preds == y_test[b:e])
print "Adapt to %s domain" % domain
print ""