From ee1d371b9b82c4053e4b00866cbca177240e80a4 Mon Sep 17 00:00:00 2001 From: dmclark53 Date: Wed, 27 Jul 2016 12:07:37 -0700 Subject: [PATCH 1/4] corrected merge conflict --- main.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/main.py b/main.py index 0e38412..30461ef 100644 --- a/main.py +++ b/main.py @@ -6,10 +6,7 @@ import tensorflow as tf import sys -<<<<<<< HEAD sys.path.append('wine_quality') -======= ->>>>>>> a5fa9c9b8f29d00dbdb3d7a6027c75d0160cd26f import wine_quality.model as model import json import os From d668409d4df3029c96e58153088fdc3a34bc1032 Mon Sep 17 00:00:00 2001 From: dmclark53 Date: Wed, 27 Jul 2016 12:07:56 -0700 Subject: [PATCH 2/4] added more comments --- wine_quality/softmax_regression.py | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/wine_quality/softmax_regression.py b/wine_quality/softmax_regression.py index 5e0da12..3f03f23 100644 --- a/wine_quality/softmax_regression.py +++ b/wine_quality/softmax_regression.py @@ -3,7 +3,7 @@ import pandas as pd import numpy as np from sklearn.cross_validation import train_test_split -import model +import wine_quality.model as model # Load the data red_wine = pd.read_csv('data/winequality-red.csv', sep=';') @@ -17,6 +17,7 @@ def _outliers(df, threshold, columns): df.loc[mask == True,col] = mean_property return df +# Convert labels into one-hot vector format def _dense_to_one_hot(labels_dense, num_classes=2): # Convert class labels from scalars to one-hot vectors num_labels = len(labels_dense) @@ -25,15 +26,17 @@ def _dense_to_one_hot(labels_dense, num_classes=2): labels_one_hot.flat[index_offset + labels_dense] = 1 return labels_one_hot +# Function to train the model def train_model(): column_list = red_wine.columns.tolist() - threshold = 5 + threshold = 5 # Set threshold to 5 standard deviations + # Remove outliers red_wine_cleaned = red_wine.copy() red_wine_cleaned = _outliers(red_wine_cleaned, threshold, column_list[0:-1]) - # Bin the data + # Bin the data into three separate categories bins = [3, 5, 6, 8] red_wine_cleaned['category'] = pd.cut(red_wine_cleaned.quality, bins, labels=['Bad', 'Average', 'Good'], include_lowest=True) @@ -44,48 +47,59 @@ def train_model(): bins = [3, 5, 8] red_wine_newcats['category'] = pd.cut(red_wine_newcats.quality, bins, labels=['Bad', 'Good'], include_lowest=True) - + # Extract categories column and save in an array for the labels y_red_wine = red_wine_newcats[['category']].get_values() - # Removing fixed_acidity and quality + + # Extract features and save to an array. Removing fixed_acidity and quality X_red_wine = red_wine_newcats.iloc[:,1:-2].get_values() + # Convert string categories into integers that can be used to make one-hot vectors y_red_wine_raveled = y_red_wine.ravel() y_red_wine_integers = [y.replace('Bad', '1') for y in y_red_wine_raveled] y_red_wine_integers = [y.replace('Good', '0') for y in y_red_wine_integers] y_red_wine_integers = [np.int(y) for y in y_red_wine_integers] - + # Create one-hot vector array for labels y_one_hot = _dense_to_one_hot(y_red_wine_integers, num_classes=2) + # Split data into training and test sets X_train, X_test, y_train, y_test = train_test_split(X_red_wine, y_one_hot, test_size=0.2, random_state=42) - # model + + # Define model parameters learning_rate = 0.001 batch_size = 126 + # Create placeholders and variables for input data and model with tf.variable_scope("softmax_regression"): X = tf.placeholder("float", [None, 10]) y, variables = model.softmax_regression(X) - # train y_ = tf.placeholder("float", [None, 2]) + + # Define the cost and optimization functions cost = -tf.reduce_mean(y_*tf.log(y)) optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) + # Start the TensorFlow session and train the model saver = tf.train.Saver(variables) sess = tf.Session() init = tf.initialize_all_variables() sess.run(init) + # Loop through each epoch for i in range(100): average_cost = 0 number_of_batches = int(len(X_train) / batch_size) + + # Loop through each batch for start, end in zip(range(0, len(X_train), batch_size), range(batch_size, len(X_train), batch_size)): sess.run(optimizer, feed_dict={X: X_train[start:end], y_: y_train[start:end]}) # Compute average loss average_cost += sess.run(cost, feed_dict={X: X_train[start:end], y_: y_train[start:end]}) / number_of_batches print(sess.run(accuracy, feed_dict={X: X_test, y_: y_test})) + # Save the model path = saver.save(sess, os.path.join(os.path.dirname(__file__), "data/softmax_regression.ckpt")) print("Saved:", path) From 7d5cba1cf6f10037c7adb486383b669a48602508 Mon Sep 17 00:00:00 2001 From: dmclark53 Date: Wed, 27 Jul 2016 14:27:19 -0700 Subject: [PATCH 3/4] added _make_integer_labels() function --- wine_quality/softmax_regression.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/wine_quality/softmax_regression.py b/wine_quality/softmax_regression.py index 3f03f23..dc0c5d6 100644 --- a/wine_quality/softmax_regression.py +++ b/wine_quality/softmax_regression.py @@ -17,6 +17,7 @@ def _outliers(df, threshold, columns): df.loc[mask == True,col] = mean_property return df + # Convert labels into one-hot vector format def _dense_to_one_hot(labels_dense, num_classes=2): # Convert class labels from scalars to one-hot vectors @@ -26,6 +27,17 @@ def _dense_to_one_hot(labels_dense, num_classes=2): labels_one_hot.flat[index_offset + labels_dense] = 1 return labels_one_hot + +# Function to convert string categories into integers for making one-hot vectors +def _make_integer_labels(label_series): + label_series_raveled = label_series.ravel().copy() + labels_to_replace = np.sort(label_series.unique()) + replace_with = np.arange(len(labels_to_replace)).tolist() + integer_labels = pd.Series(label_series_raveled).replace(to_replace=labels_to_replace, value=replace_with).tolist() + + return integer_labels + + # Function to train the model def train_model(): @@ -54,10 +66,7 @@ def train_model(): X_red_wine = red_wine_newcats.iloc[:,1:-2].get_values() # Convert string categories into integers that can be used to make one-hot vectors - y_red_wine_raveled = y_red_wine.ravel() - y_red_wine_integers = [y.replace('Bad', '1') for y in y_red_wine_raveled] - y_red_wine_integers = [y.replace('Good', '0') for y in y_red_wine_integers] - y_red_wine_integers = [np.int(y) for y in y_red_wine_integers] + y_red_wine_integers = _make_integer_labels(y_red_wine) # Create one-hot vector array for labels y_one_hot = _dense_to_one_hot(y_red_wine_integers, num_classes=2) From 4ba1387098567728904806dcf1adf1cc7c5c90e9 Mon Sep 17 00:00:00 2001 From: dmclark53 Date: Sat, 30 Jul 2016 14:53:38 -0700 Subject: [PATCH 4/4] fixed error in _make_integer_labels function --- wine_quality/softmax_regression.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/wine_quality/softmax_regression.py b/wine_quality/softmax_regression.py index dc0c5d6..dc74938 100644 --- a/wine_quality/softmax_regression.py +++ b/wine_quality/softmax_regression.py @@ -29,11 +29,13 @@ def _dense_to_one_hot(labels_dense, num_classes=2): # Function to convert string categories into integers for making one-hot vectors -def _make_integer_labels(label_series): - label_series_raveled = label_series.ravel().copy() - labels_to_replace = np.sort(label_series.unique()) +def _make_integer_labels(label_array): + labels_raveled = label_array.ravel() + label_series = pd.Series(labels_raveled) + unique_labels = label_series.unique() + labels_to_replace = np.sort(unique_labels) replace_with = np.arange(len(labels_to_replace)).tolist() - integer_labels = pd.Series(label_series_raveled).replace(to_replace=labels_to_replace, value=replace_with).tolist() + integer_labels = label_series.replace(to_replace=labels_to_replace, value=replace_with).tolist() return integer_labels @@ -61,6 +63,7 @@ def train_model(): # Extract categories column and save in an array for the labels y_red_wine = red_wine_newcats[['category']].get_values() + print(pd.Series(y_red_wine.ravel()).unique()) # Extract features and save to an array. Removing fixed_acidity and quality X_red_wine = red_wine_newcats.iloc[:,1:-2].get_values()