Kaggle數字識別基線模型(use tensorflow)

來自專欄機器學習隨筆記

# import datasimport numpy as npimport pandas as pdfrom PIL import Imagefrom subprocess import check_output#print(check_output(["ls","../input"]).decode(utf-8))# Read competition data filestrain = pd.read_csv(.\others\kaggle2_mnist\all\train.csv)test = pd.read_csv(.others\kaggle2_mnist\all\test.csv)print("Training set has {0[0]} rows and {0[1]} columns".format(train.shape))print("Test set has {0[0]} rows and {0[1]} columns".format(test.shape))class Dataset(object): def __init__(self, data): self.rows = len(data.values) self.images = data.iloc[:,1:].values self.images = self.images.astype(np.float32) self.images = np.multiply(self.images, 1.0 / 255.0) self.labels = np.array([np.array([int(i == l) for i in range(10)]) for l in data.iloc[:,0].values]) #one-hot self.index_in_epoch = 0 self.epoch = 0 def next_batch(self, batch_size): start = self.index_in_epoch self.index_in_epoch += batch_size if self.index_in_epoch > self.rows: self.epoch += 1 perm = np.arange(self.rows) np.random.shuffle(perm) self.images = self.images[perm] self.labels = self.labels[perm] #next epoch start = 0 self.index_in_epoch = batch_size end = self.index_in_epoch return self.images[start:end] , self.labels[start:end]import tensorflow as tfx = tf.placeholder(np.float32,[None ,784])W = tf.Variable(tf.zeros([784,10]))b = tf.Variable(tf.zeros([10]))y = tf.nn.softmax(tf.matmul(x,W)+b)y_ = tf.placeholder(np.float32, [None,10])cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices = [1]))train_step = tf.train.GradientDescentOptimizer(0.05).minimize(cross_entropy)train_data = Dataset(train.iloc[0:420000])init = tf.global_variables_initializer()sess = tf.Session()sess.run(init)for i in range(2000): batch_xs, batch_ys = train_data.next_batch(100) sess.run(train_step, feed_dict = {x: batch_xs, y_:batch_ys})test_images = test.values.astype(np.float32)test_images = np.multiply(test_images,1.0/255.0)predictions = sess.run(y, feed_dict={x:test_images})predictions = [np.argmax(p) for p in predictions]result = pd.DataFrame({ImageId:range(1,len(predictions) + 1),Label:predictions})result.to_csv(.\others\kaggle2_mnist\all\result_2000_學習率0.05.csv, index = False,encoding = utf-8)

最終提交結果,微調過學習率、訓練樣本數,很粗的基線模型成績0.918

細節待學習,待解釋補充。


推薦閱讀:
相關文章