本篇为”理论和代码-逻辑回归-理论”的实现,和理论不完全一致,代码会有考虑L2正则化等,其中data位于lr_train.txt
tensorflow 版本, lr_tf.py1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77#!/usr/bin/python
# coding=utf-8
import os
import numpy as np
import tensorflow as tf
def load_data_set(path):
data_mat = []
label_mat = []
fd = file(path)
for line in fd.readlines():
line = line.strip()
if not line:
continue
#lineArray = line.split('\t')
lineArray = line.split()
temp_data_array = []
for i in range(len(lineArray) - 1):
temp_data_array.append(float(lineArray[i]))
data_mat.append(temp_data_array)
label_mat.append(int(lineArray[len(lineArray) - 1]))
return np.array(data_mat), np.array(label_mat)
#define dimension
label_dim = 1 # 0 or 1
input_dim = 2 # z = w1*x1+w2+x2+b
# train data
input_features = tf.placeholder(tf.float32, [None, input_dim])
input_label = tf.placeholder(tf.float32, [None, label_dim])
# define variable
W = tf.Variable(tf.random_normal([input_dim, label_dim]), name="weights")
b = tf.Variable(tf.zeros([label_dim], name="bias"))
# define model
output = tf.nn.sigmoid(tf.matmul(input_features, W)+b)
# loss
cross_entropy = -(input_label*tf.log(output)+(1-input_label)*tf.log(1-output))
#loss = tf.reduce_sum(cross_entropy)
loss = tf.reduce_mean(cross_entropy)
# err
diff= tf.square(input_label-output)
err = tf.reduce_mean(diff)
# optimizer
optimizer = tf.train.AdamOptimizer(0.01)
train = optimizer.minimize(loss)
#x, y= load_data_set(sys.argv[1])
x, y= load_data_set("lr_train.txt")
y = y.reshape((np.shape(x)[0], label_dim))
#print np.shape(x)
#print np.shape(y)
saver = tf.train.Saver()
ckpt_file_path = "./models/lr_tf"
path = os.path.dirname(os.path.abspath(ckpt_file_path))
if not os.path.isdir(path):
os.makedirs(path)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for idx in range(5000):
_, lossval, errval, outputval = sess.run([train, loss, err, output], feed_dict={input_features:x, input_label:y} )
if idx % 100 == 0:
#print sess.run(b)
#print sess.run(W)
print lossval
saver.save(sess, ckpt_file_path, write_meta_graph=True)
执行1
python lr_tf.py
raw python版本1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120import sys
import os
#from numpy import *
import numpy as np
import random
eclipse = 0.01
# z = w0 + w1*x1 + w2*x2
def load_data_set(path):
data_mat = []
label_mat = []
fd = file(path)
for line in fd.readlines():
line = line.strip()
if not line:
continue
#lineArray = line.split('\t')
lineArray = line.split()
temp_data_array = []
temp_data_array.append(1)
for i in range(len(lineArray) - 1):
temp_data_array.append(float(lineArray[i]))
data_mat.append(temp_data_array)
label_mat.append(int(lineArray[len(lineArray) - 1]))
return data_mat, label_mat
def sigmoid(x):
return 1.0 / (1 + np.exp(-x))
def gradient_descent(data_mat_in, class_labels, max_cycles=500):
data_matrix = np.mat(data_mat_in) # convert to Numpy matrix
label_mat = np.mat(class_labels).transpose() # convert to Numpy matrix and convert # m*1
m, n = np.shape(data_matrix)
alpha = 0.001
weights = np.ones((n, 1))
for k in range(max_cycles):
h = sigmoid(data_matrix * weights) # matrix mult m*n * n*1 --> m*1
error = label_mat - h # vector substraction# m*1
weights = weights + alpha * data_matrix.transpose() * error #matrix mult
cost = -1.0 / m * (label_mat.transpose() * np.log(h) + (1 - label_mat).transpose() * np.log(1 - h))
if k % 50 == 0:
print cost
if cost < eclipse:
break
return weights
def stochastic_gradient_descent(data_mat_in, class_labels, max_cycles=500):
data_matrix = np.array(data_mat_in)
m, n = np.shape(data_matrix)
alpha = 0.001
weights = np.ones(n) # np.array([ 1., 1., 1.])
for k in range(max_cycles):
cost = 0.0
for i in range(m):
h = sigmoid(np.sum(data_matrix[i] * weights))
error = class_labels[i] - h
weights = weights + alpha * error * data_matrix[i]
cost += class_labels[i] * np.log(h) + (1 - class_labels[i]) * np.log(1 - h)
cost = -1.0 / m * cost
if k % 50 == 0:
print cost
if cost < eclipse:
break
return weights
def stochastic_gradient_descent_enhance(data_mat_in, class_labels, max_cycles=500):
data_matrix = np.array(data_mat_in)
m, n = np.shape(data_matrix)
weights = np.ones(n) # np.array([ 1., 1., 1.])
for k in range(max_cycles):
dataIdxArray = range(m) # [0, 1, ...., m - 1]
cost = 0.0
for j in range(m):
alpha = 4 / (1.0 + j + k) + 0.0001 #alpha decrease with iteration
randIdx = int(random.uniform(0, len(dataIdxArray)))
h = sigmoid(np.sum(data_matrix[randIdx] * weights))
error = class_labels[randIdx] - h
weights = weights + alpha * error * data_matrix[randIdx]
cost += class_labels[randIdx] * np.log(h) + (1 - class_labels[randIdx]) * np.log(1 - h)
del(dataIdxArray[randIdx])
cost = -1.0 / m * cost
if k % 50 == 0:
print cost
if cost < eclipse:
break
return weights
def classify_vector(x, weights):
value = sigmoid(np.sum(x * weights))
if value > 0.5:
return 1
else:
return 0
if __name__ == '__main__':
data_mat, label_mat = load_data_set(sys.argv[1])
#weights = stochastic_gradient_descent_enhance(data_mat, label_mat, 500) #1000 # work ok
#weights = stochastic_gradient_descent(data_mat, label_mat, 5000)#work ok
weights = gradient_descent(data_mat, label_mat, 5000) # work ok
#print weights
np.savetxt('%s_weights' % (sys.argv[2]), weights)
#fd_weights = open('%s_weights' % (sys.argv[1]), 'w')
#fd_weights.write('%s\n' % (weights))
#fd_weights.close()
print 'b, w1, w2', weights
data_mat_array = np.array(data_mat)
allCount = 0
errorCount = 0
for i in range(len(data_mat_array)):
value = classify_vector(data_mat_array[i], weights)
allCount += 1
#print value'\t'label_mat[i]
if value != label_mat[i]:
errorCount += 1
print allCount, errorCount
执行1
python lr.py lr_train.txt lr_train.txt