理论和代码-逻辑回归-多分类-代码

其中data位于lr_softmax.csv
tensorflow 版本 softmax_tf.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/python
# coding=utf-8

import numpy as np
import os
import sys
import tensorflow as tf

def load_data(path):
data_mat = []
label_mat = []
fd = file(path)
for line in fd.readlines():
line = line.strip()
if not line:
continue
line_array = line.split(',')
temp_data_array = []
for i in range(len(line_array) - 1):
temp_data_array.append(float(line_array[i]))
data_mat.append(temp_data_array)
label_mat.append(int(line_array[len(line_array) - 1]))

fd.close()

return np.array(data_mat), np.array(label_mat)




#define dimension
label_dim = 3
input_dim = 4 # zi = bi*1 + wi1*x1 + wi2*x2 + wi3*x3 + wi4*x4

# train data
input_features = tf.placeholder(tf.float32, [None, input_dim])
input_label = tf.placeholder(tf.float32, [None, label_dim])

# define variable
W = tf.Variable(tf.random_normal([input_dim, label_dim], name='weights'))
b = tf.Variable(tf.zeros([label_dim], name='bias'))

# define model
output = tf.nn.softmax(tf.matmul(input_features, W)+b)

# loss
cross_entropy = tf.reduce_mean(-tf.reduce_sum(input_label*tf.log(output), reduction_indices=[1]))

# train
#train = tf.train.AdamOptimizer(0.1).minimize(cross_entropy)
train = tf.train.AdamOptimizer(0.05).minimize(cross_entropy)

x, y = load_data('lr_softmax.csv')
print np.shape(x)
print np.shape(y)
temp_y = np.zeros([np.shape(y)[0], label_dim])
print np.shape(temp_y)[0]
for idx in range(np.shape(temp_y)[0]):
temp_y[idx][y[idx]-1] = 1
y = temp_y
print np.shape(y)

saver = tf.train.Saver()
file_path = "./models/softmax_tf"
path = os.path.dirname(os.path.abspath(file_path))
if not os.path.isdir(path):
os.makedir(path)

with tf.Session() as sess:
sess.run(tf.global_variables_initializer())

for i in range(1000):
sess.run(train, feed_dict={input_features:x, input_label:y})

if i % 50 == 0:
print sess.run(W)
sess.run(output, feed_dict={input_features:x})
correct_prediction = tf.equal(tf.argmax(input_label, 1), tf.argmax(output, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print sess.run(accuracy, feed_dict={input_features:x, input_label:y})



saver.save(sess, file_path, write_meta_graph=True)

python版本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import math
import numpy as np
import os
import random
import sys

# z = w0 + w1*x1 + w2*x2
def load_data_set(path):
data_mat = []
label_mat = []
fd = file(path)
for line in fd.readlines():
line = line.strip()
if not line:
continue
line_array = line.split(',')
temp_data_array = []
temp_data_array.append(1)
for i in range(len(line_array) - 1):
temp_data_array.append(float(line_array[i]))
data_mat.append(temp_data_array)
label_mat.append(int(line_array[len(line_array) - 1]))
#print len(data_mat)
#print len(label_mat)
return data_mat, label_mat


def gradient_descent(data_mat_in, class_labels, class_num, max_cycles=1000000):
data_matrix = np.mat(data_mat_in) # convert to Numpy matrix
label_mat = np.mat(class_labels).transpose() - 1 # convert to Numpy matrix and convert
m, n = np.shape(data_matrix)
alpha = 0.00001
lamta = 0.01
threshold = 0.01
y_mat = np.zeros((m, class_num)) # m*c
#np.random.seed(1)
#weights = np.random.random((class_num, n)) / m
weights = np.zeros((class_num, n))
for i in xrange(m):
y_mat[i, label_mat[i][0]] = 1.0

for k in xrange(max_cycles):
exp_w_x = np.exp(np.dot(data_matrix, weights.transpose())) # m*n * n*c-->m*c
exp_w_x_p = exp_w_x / exp_w_x.sum(axis=1).reshape(m, 1) #m*c
delta_w = -1.0 / m * np.dot((y_mat - exp_w_x_p).transpose(), data_matrix) + lamta * weights
cost = -1.0 / m * np.sum(np.multiply(y_mat, np.log(exp_w_x_p))) + lamta / 2 * np.sum(weights ** 2)
weights -= alpha * delta_w
if k % 50 == 0:
#print cost #more little and little
print np.sum(np.fabs(delta_w)) # more little and little
if np.sum(np.fabs(delta_w)) < threshold:
break

return weights

def get_exp(x, weights, idx):
weights_idx = weights[idx]
return math.exp(np.dot(weights_idx, x.transpose()))

def get_exp_probability(x, weights, idx, class_num):
up = get_exp(x, weights, idx)
down = 0.0
for i in xrange(class_num):
down += get_exp(x, weights, i)
return up * 1.0 / down

def stochastic_gradient_descent(data_mat_in, class_labels, class_num, max_cycles=1000000):
data_matrix = np.mat(data_mat_in) # convert to Numpy matrix
label_mat = np.mat(class_labels).transpose() - 1 # convert to Numpy matrix and convert
m, n = np.shape(data_matrix)
alpha = 0.00001
lamta = 0.01
threshold = 0.01
weights = np.zeros((class_num, n))
for k in xrange(max_cycles):
expect = 0.0
cost_p1 = 0.0
cost_p2 = 0.0
for i in xrange(class_num):
delta_w_i = np.zeros((1, n))
for j in xrange(m):
x = data_matrix[j]
y = label_mat[j]
probability = get_exp_probability(x, weights, i, class_num)
delta_w_i += x * (int(y==i) - probability)
cost_p1 += int(y==i) * np.log(probability)

cost_p2 += np.sum(weights[i] ** 2)
delta_w_i = -1.0 / m * delta_w_i + lamta * weights[i]
weights[i] = weights[i] - alpha * delta_w_i
expect += np.sum(np.fabs(delta_w_i))

cost = -1.0 / m * cost_p1 + lamta / 2 * cost_p2
if k % 50 == 0:
#print expect
print cost
if expect < threshold:
break
return weights

def stochastic_gradient_descentEnhance(data_mat_in, class_labels, class_num, max_cycles=1000000):
data_matrix = np.mat(data_mat_in) # convert to Numpy matrix
label_mat = np.mat(class_labels).transpose() - 1 # convert to Numpy matrix and convert
m, n = np.shape(data_matrix)
alpha = 0.00001
lamta = 0.01
threshold = 0.01
weights = np.zeros((class_num, n))
for k in xrange(max_cycles):
expect = 0.0
cost_p1 = 0.0
cost_p2 = 0.0
for i in xrange(class_num):
delta_w_i = np.zeros((1, n))
dataIdxArray = range(m) # [0, 1, ...., m - 1]
for j in xrange(m):
alpha = 4 / (1.0 + j + k) + 0.000001 #alpha decrease with iteration
randIdx = int(random.uniform(0, len(dataIdxArray)))
x = data_matrix[randIdx]
y = label_mat[randIdx]
probability = get_exp_probability(x, weights, i, class_num)
delta_w_i += x * (int(y==i) - probability)
cost_p1 += int(y==i) * np.log(probability)
del(dataIdxArray[randIdx])

cost_p2 += np.sum(weights[i] ** 2)
delta_w_i = -1.0 / m * delta_w_i + lamta * weights[i]
weights[i] = weights[i] - alpha * delta_w_i
expect += np.sum(np.fabs(delta_w_i))

cost = -1.0 / m * cost_p1 + lamta / 2 * cost_p2
if k % 50 == 0:
#print expect
print cost
if expect < threshold:
break
return weights


if __name__ == '__main__':
data_mat, label_mat = load_data_set(sys.argv[1])
label_set = set()
for label in label_mat:
label_set.add(label)

weights = gradient_descent(data_mat, label_mat, len(label_set), 1000000) #1000 # work ok
#weights = stochastic_gradient_descent(data_mat, label_mat, len(label_set), 100000) #1000
#weights = stochastic_gradient_descentEnhance(data_mat, label_mat, len(label_set), 10000) #1000
#np.savetxt('%s_weights' % (sys.argv[2]), weights)
np.savetxt('weights.txt', weights)
data_mat_array = np.array(data_mat)
calc_mat = np.dot(data_mat_array, weights.transpose())
print np.argmax(calc_mat, axis=1)
rec_array = np.argmax(calc_mat, axis=1)
print len(label_mat)
ok_count = 0
for i in range(len(label_mat)):
if rec_array[i] + 1 == label_mat[i]:
ok_count += 1

print ok_count

坚持原创技术分享,您的支持将鼓励我继续创作!