CNTK - Simple linear regression
The following are sample code to create a linear regression to work with car data.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import cntk as C | |
trainData = pd.read_csv("cardata.csv") | |
testData = pd.read_csv("testCarData.csv") | |
#print(trainData) | |
#print(testData) | |
input_dim = 2 | |
num_output_classes = 2 | |
#convert data => 0 1 => if it is a turbo | |
class_ind = [trainData['IsRaceCar'] == class_number for class_number in range(2)] | |
#print('****') | |
#print(class_ind) | |
plotLabel = np.asarray(np.hstack(class_ind), dtype=np.float32) | |
# We expect our data features to be in [[1,2], [2,1]] ... format | |
def getData(targetData): | |
alist = [] | |
for idx in range(0, targetData['CarCC'].size): | |
idxVal = [targetData['CarCC'].values[idx], targetData['Turbo'].values[idx]] | |
alist.append(idxVal) | |
labels = [] | |
for class_number in trainData['IsRaceCar']: | |
raceValue = class_number == 1 | |
if raceValue == 1: | |
nraceValue = 0 | |
elif raceValue == 0: | |
nraceValue = 1 | |
a = [class_number, nraceValue] | |
labels.append(a) | |
return alist, labels | |
dataFeatures, labels = getData(trainData) | |
print('train data samples') | |
print(dataFeatures) | |
print(labels) | |
import matplotlib.pyplot as plt | |
#print(plotLabel) | |
colors = ['r' if label == 0 else 'b' for label in plotLabel] | |
plt.scatter(trainData['CarCC'], trainData['Turbo'], c=colors) | |
plt.xlabel("Engine size") | |
plt.ylabel("With Turbo") | |
plt.show() | |
## starting off with a not so complicated linear model | |
mydict = {} | |
def linear_layer(input_var, output_dim): | |
input_dim = input_var.shape[0] | |
weight_param = C.parameter(shape=(input_dim, output_dim)) | |
bias_param = C.parameter(shape=(output_dim)) | |
mydict['w'], mydict['b'] = weight_param, bias_param | |
return C.times(input_var, weight_param) + bias_param | |
feature = C.input_variable(input_dim, np.float32) | |
#print(feature) | |
output_dim = num_output_classes | |
z = linear_layer(feature, output_dim) | |
## pretty standard stuff ### | |
progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=25) | |
C.logging.log_number_of_parameters(z) | |
label = C.input_variable(num_output_classes, np.float32) | |
loss = C.cross_entropy_with_softmax(z, label) | |
eval_error = C.classification_error(z, label) | |
learning_rate = 0.5 | |
lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) | |
learner = C.sgd(z.parameters, lr_schedule) | |
trainer = C.Trainer(z, (loss, eval_error), [learner], progress_printer) | |
minibatch_size = 25 | |
for i in range(0, 5): | |
# Assign the minibatch data to the input variables and train the model on the minibatch | |
trainer.train_minibatch({feature : dataFeatures, label : labels}) | |
dataFeatures, labels = getData(testData) | |
trainer.test_minibatch({feature : dataFeatures, label : labels}) | |
predDataFeature = [[2500,1], [300,0]] | |
out = C.softmax(z) | |
result = out.eval({feature : predDataFeature}) | |
print('predication results:') | |
print(result) | |
print("Predicted:", [np.argmax(x) for x in result]) | |
CarData.csv + TestCarData.csv | |
CarCC,Turbo,IsRaceCar | |
3000,1,1 | |
2500,1,1 | |
1300,0,0 | |
1200,0,0 | |
1600,0,0 | |
2000,0,0 | |
1800,0,0 | |
1500,0,0 | |
900,0,0 | |
700,0,0 | |
800,0,0 | |
700,0,0 | |
1100,0,0 | |
1300,0,0 | |
1200,0,0 | |
1600,0,0 | |
2000,0,0 | |
1800,0,0 | |
1500,0,0 | |
900,0,0 | |
700,0,0 | |
800,0,0 | |
700,0,0 | |
1100,0,0 | |
1300,0,0 | |
1200,0,0 | |
1600,0,0 | |
2000,0,0 | |
1800,0,0 | |
1500,0,0 | |
900,0,0 | |
700,0,0 | |
800,0,0 | |
700,0,0 | |
1100,0,0 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import cntk as C | |
trainData = pd.read_csv("cardata.csv") | |
testData = pd.read_csv("testCarData.csv") | |
#print(trainData) | |
#print(testData) | |
input_dim = 2 | |
num_output_classes = 2 | |
#convert data => 0 1 => if it is a turbo | |
class_ind = [trainData['IsRaceCar'] == class_number for class_number in range(2)] | |
#print('****') | |
#print(class_ind) | |
plotLabel = np.asarray(np.hstack(class_ind), dtype=np.float32) | |
# We expect our data features to be in [[1,2], [2,1]] ... format | |
def getData(targetData): | |
alist = [] | |
for idx in range(0, targetData['CarCC'].size): | |
idxVal = [targetData['CarCC'].values[idx], targetData['Turbo'].values[idx]] | |
alist.append(idxVal) | |
labels = [] | |
for class_number in trainData['IsRaceCar']: | |
raceValue = class_number == 1 | |
if raceValue == 1: | |
nraceValue = 0 | |
elif raceValue == 0: | |
nraceValue = 1 | |
a = [class_number, nraceValue] | |
labels.append(a) | |
return alist, labels | |
dataFeatures, labels = getData(trainData) | |
print('train data samples') | |
print(dataFeatures) | |
print(labels) | |
import matplotlib.pyplot as plt | |
#print(plotLabel) | |
colors = ['r' if label == 0 else 'b' for label in plotLabel] | |
plt.scatter(trainData['CarCC'], trainData['Turbo'], c=colors) | |
plt.xlabel("Engine size") | |
plt.ylabel("With Turbo") | |
plt.show() | |
## starting off with a not so complicated linear model | |
mydict = {} | |
def linear_layer(input_var, output_dim): | |
input_dim = input_var.shape[0] | |
weight_param = C.parameter(shape=(input_dim, output_dim)) | |
bias_param = C.parameter(shape=(output_dim)) | |
mydict['w'], mydict['b'] = weight_param, bias_param | |
return C.times(input_var, weight_param) + bias_param | |
feature = C.input_variable(input_dim, np.float32) | |
#print(feature) | |
output_dim = num_output_classes | |
z = linear_layer(feature, output_dim) | |
## pretty standard stuff ### | |
progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=25) | |
C.logging.log_number_of_parameters(z) | |
label = C.input_variable(num_output_classes, np.float32) | |
loss = C.cross_entropy_with_softmax(z, label) | |
eval_error = C.classification_error(z, label) | |
learning_rate = 0.5 | |
lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) | |
learner = C.sgd(z.parameters, lr_schedule) | |
trainer = C.Trainer(z, (loss, eval_error), [learner], progress_printer) | |
minibatch_size = 25 | |
for i in range(0, 5): | |
# Assign the minibatch data to the input variables and train the model on the minibatch | |
trainer.train_minibatch({feature : dataFeatures, label : labels}) | |
dataFeatures, labels = getData(testData) | |
trainer.test_minibatch({feature : dataFeatures, label : labels}) | |
predDataFeature = [[2500,1], [300,0]] | |
out = C.softmax(z) | |
result = out.eval({feature : predDataFeature}) | |
print('predication results:') | |
print(result) | |
print("Predicted:", [np.argmax(x) for x in result]) | |
CarData.csv + TestCarData.csv | |
CarCC,Turbo,IsRaceCar | |
3000,1,1 | |
2500,1,1 | |
1300,0,0 | |
1200,0,0 | |
1600,0,0 | |
2000,0,0 | |
1800,0,0 | |
1500,0,0 | |
900,0,0 | |
700,0,0 | |
800,0,0 | |
700,0,0 | |
1100,0,0 | |
1300,0,0 | |
1200,0,0 | |
1600,0,0 | |
2000,0,0 | |
1800,0,0 | |
1500,0,0 | |
900,0,0 | |
700,0,0 | |
800,0,0 | |
700,0,0 | |
1100,0,0 | |
1300,0,0 | |
1200,0,0 | |
1600,0,0 | |
2000,0,0 | |
1800,0,0 | |
1500,0,0 | |
900,0,0 | |
700,0,0 | |
800,0,0 | |
700,0,0 | |
1100,0,0 | |
Comments