理论和代码-逻辑回归-代码

本篇为”理论和代码-逻辑回归-理论”的实现，和理论不完全一致，代码会有考虑L2正则化等，其中data位于lr_train.txt
tensorflow 版本, lr_tf.py

#!/usr/bin/python
# coding=utf-8

import os
import numpy as np
import tensorflow as tf
def load_data_set(path):
    data_mat = []
    label_mat = []
    fd = file(path)
    for line in fd.readlines():
        line = line.strip()
        if not line:
            continue
        #lineArray = line.split('\t')
        lineArray = line.split()
        temp_data_array = []
        for i in range(len(lineArray) - 1):
            temp_data_array.append(float(lineArray[i]))
        data_mat.append(temp_data_array)
        label_mat.append(int(lineArray[len(lineArray) - 1]))
    
    return np.array(data_mat), np.array(label_mat)


#define dimension
label_dim = 1 # 0 or 1
input_dim = 2 # z = w1*x1+w2+x2+b

# train data
input_features = tf.placeholder(tf.float32, [None, input_dim])
input_label = tf.placeholder(tf.float32, [None, label_dim])

# define variable
W = tf.Variable(tf.random_normal([input_dim, label_dim]), name="weights")
b = tf.Variable(tf.zeros([label_dim], name="bias"))

# define model
output = tf.nn.sigmoid(tf.matmul(input_features, W)+b)

# loss 
cross_entropy = -(input_label*tf.log(output)+(1-input_label)*tf.log(1-output))
#loss = tf.reduce_sum(cross_entropy)
loss = tf.reduce_mean(cross_entropy)

# err
diff= tf.square(input_label-output)
err = tf.reduce_mean(diff)

# optimizer
optimizer = tf.train.AdamOptimizer(0.01)
train = optimizer.minimize(loss)

#x, y= load_data_set(sys.argv[1])
x, y= load_data_set("lr_train.txt")
y = y.reshape((np.shape(x)[0], label_dim))
#print np.shape(x)
#print np.shape(y)

saver = tf.train.Saver()
ckpt_file_path = "./models/lr_tf"
path = os.path.dirname(os.path.abspath(ckpt_file_path))
if not os.path.isdir(path):
    os.makedirs(path)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for idx in range(5000):
        _, lossval, errval, outputval = sess.run([train, loss, err, output], feed_dict={input_features:x, input_label:y} )

        if idx  % 100 == 0:
            #print sess.run(b)
            #print sess.run(W)
            print lossval

    saver.save(sess, ckpt_file_path, write_meta_graph=True)

执行

1	python lr_tf.py

raw python版本

import sys
import os
#from numpy import *
import numpy as np
import random

eclipse = 0.01
# z = w0 + w1*x1 + w2*x2
def load_data_set(path):
    data_mat = []
    label_mat = []
    fd = file(path)
    for line in fd.readlines():
        line = line.strip()
        if not line:
            continue
        #lineArray = line.split('\t')
        lineArray = line.split()
        temp_data_array = []
        temp_data_array.append(1)
        for i in range(len(lineArray) - 1):
            temp_data_array.append(float(lineArray[i]))
        data_mat.append(temp_data_array)
        label_mat.append(int(lineArray[len(lineArray) - 1]))
    
    return data_mat, label_mat

def sigmoid(x):
    return 1.0 / (1 + np.exp(-x))

def gradient_descent(data_mat_in, class_labels, max_cycles=500):
    data_matrix = np.mat(data_mat_in) # convert to Numpy matrix
    label_mat = np.mat(class_labels).transpose() # convert to Numpy matrix and convert # m*1
    m, n = np.shape(data_matrix)
    alpha = 0.001
    weights = np.ones((n, 1))
    for k in range(max_cycles):
        h = sigmoid(data_matrix * weights) # matrix mult m*n * n*1 --> m*1
        error = label_mat - h # vector substraction# m*1
        weights = weights + alpha * data_matrix.transpose() * error #matrix mult
        cost = -1.0 / m  * (label_mat.transpose() * np.log(h) + (1 - label_mat).transpose() * np.log(1 - h))

        if k % 50 == 0:
            print cost
        if cost < eclipse:
            break
    return weights

def stochastic_gradient_descent(data_mat_in, class_labels, max_cycles=500):
    data_matrix = np.array(data_mat_in)
    m, n = np.shape(data_matrix)
    alpha = 0.001
    weights = np.ones(n) # np.array([ 1.,  1.,  1.])
    for k in range(max_cycles):
        cost = 0.0
        for i in range(m):
            h = sigmoid(np.sum(data_matrix[i] * weights))
            error = class_labels[i] - h
            weights = weights + alpha * error * data_matrix[i]
            cost += class_labels[i] * np.log(h) + (1 - class_labels[i]) * np.log(1 - h)
        cost = -1.0 / m  * cost
        if k % 50 == 0:
            print cost
        if cost < eclipse:
            break
    return weights

def stochastic_gradient_descent_enhance(data_mat_in, class_labels, max_cycles=500):
    data_matrix = np.array(data_mat_in)
    m, n = np.shape(data_matrix)
    weights = np.ones(n) # np.array([ 1.,  1.,  1.])
    for k in range(max_cycles):
        dataIdxArray = range(m) # [0, 1, ...., m - 1]
        cost = 0.0
        for j in range(m):
            alpha = 4 / (1.0 + j + k) + 0.0001 #alpha decrease with iteration
            randIdx = int(random.uniform(0, len(dataIdxArray)))
            h = sigmoid(np.sum(data_matrix[randIdx] * weights))
            error = class_labels[randIdx] - h
            weights = weights + alpha * error * data_matrix[randIdx]
            cost += class_labels[randIdx] * np.log(h) + (1 - class_labels[randIdx]) * np.log(1 - h)
            del(dataIdxArray[randIdx])

        cost = -1.0 / m  * cost
        if k % 50 == 0:
            print cost
        if cost < eclipse:
            break
    return weights


def classify_vector(x, weights):
    value = sigmoid(np.sum(x * weights))
    if value > 0.5:
        return 1
    else:
        return 0

if __name__ == '__main__':
    data_mat, label_mat = load_data_set(sys.argv[1])
    #weights = stochastic_gradient_descent_enhance(data_mat, label_mat, 500) #1000 # work ok
    #weights = stochastic_gradient_descent(data_mat, label_mat, 5000)#work ok
    weights = gradient_descent(data_mat, label_mat, 5000) # work ok
    #print weights
    np.savetxt('%s_weights' % (sys.argv[2]), weights)
    #fd_weights = open('%s_weights' % (sys.argv[1]), 'w')
    #fd_weights.write('%s\n' % (weights))         
    #fd_weights.close()
    print 'b, w1, w2',  weights
    data_mat_array = np.array(data_mat)
    allCount = 0
    errorCount = 0
    for i in range(len(data_mat_array)):
        value = classify_vector(data_mat_array[i], weights)
        allCount += 1
        #print value'\t'label_mat[i]
        if value != label_mat[i]:
            errorCount += 1

    print allCount, errorCount

执行

1	python lr.py lr_train.txt lr_train.txt