CNTK - Simple linear regression





The following are sample code to create a linear regression to work with car data.

import pandas as pd
import numpy as np
import cntk as C
trainData = pd.read_csv("cardata.csv")
testData = pd.read_csv("testCarData.csv")
#print(trainData)
#print(testData)
input_dim = 2
num_output_classes = 2
#convert data => 0 1 => if it is a turbo
class_ind = [trainData['IsRaceCar'] == class_number for class_number in range(2)]
#print('****')
#print(class_ind)
plotLabel = np.asarray(np.hstack(class_ind), dtype=np.float32)
# We expect our data features to be in [[1,2], [2,1]] ... format
def getData(targetData):
alist = []
for idx in range(0, targetData['CarCC'].size):
idxVal = [targetData['CarCC'].values[idx], targetData['Turbo'].values[idx]]
alist.append(idxVal)
labels = []
for class_number in trainData['IsRaceCar']:
raceValue = class_number == 1
if raceValue == 1:
nraceValue = 0
elif raceValue == 0:
nraceValue = 1
a = [class_number, nraceValue]
labels.append(a)
return alist, labels
dataFeatures, labels = getData(trainData)
print('train data samples')
print(dataFeatures)
print(labels)
import matplotlib.pyplot as plt
#print(plotLabel)
colors = ['r' if label == 0 else 'b' for label in plotLabel]
plt.scatter(trainData['CarCC'], trainData['Turbo'], c=colors)
plt.xlabel("Engine size")
plt.ylabel("With Turbo")
plt.show()
## starting off with a not so complicated linear model
mydict = {}
def linear_layer(input_var, output_dim):
input_dim = input_var.shape[0]
weight_param = C.parameter(shape=(input_dim, output_dim))
bias_param = C.parameter(shape=(output_dim))
mydict['w'], mydict['b'] = weight_param, bias_param
return C.times(input_var, weight_param) + bias_param
feature = C.input_variable(input_dim, np.float32)
#print(feature)
output_dim = num_output_classes
z = linear_layer(feature, output_dim)
## pretty standard stuff ###
progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=25)
C.logging.log_number_of_parameters(z)
label = C.input_variable(num_output_classes, np.float32)
loss = C.cross_entropy_with_softmax(z, label)
eval_error = C.classification_error(z, label)
learning_rate = 0.5
lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
learner = C.sgd(z.parameters, lr_schedule)
trainer = C.Trainer(z, (loss, eval_error), [learner], progress_printer)
minibatch_size = 25
for i in range(0, 5):
# Assign the minibatch data to the input variables and train the model on the minibatch
trainer.train_minibatch({feature : dataFeatures, label : labels})
dataFeatures, labels = getData(testData)
trainer.test_minibatch({feature : dataFeatures, label : labels})
predDataFeature = [[2500,1], [300,0]]
out = C.softmax(z)
result = out.eval({feature : predDataFeature})
print('predication results:')
print(result)
print("Predicted:", [np.argmax(x) for x in result])
CarData.csv + TestCarData.csv
CarCC,Turbo,IsRaceCar
3000,1,1
2500,1,1
1300,0,0
1200,0,0
1600,0,0
2000,0,0
1800,0,0
1500,0,0
900,0,0
700,0,0
800,0,0
700,0,0
1100,0,0
1300,0,0
1200,0,0
1600,0,0
2000,0,0
1800,0,0
1500,0,0
900,0,0
700,0,0
800,0,0
700,0,0
1100,0,0
1300,0,0
1200,0,0
1600,0,0
2000,0,0
1800,0,0
1500,0,0
900,0,0
700,0,0
800,0,0
700,0,0
1100,0,0



import pandas as pd
import numpy as np
import cntk as C
trainData = pd.read_csv("cardata.csv")
testData = pd.read_csv("testCarData.csv")
#print(trainData)
#print(testData)
input_dim = 2
num_output_classes = 2
#convert data => 0 1 => if it is a turbo
class_ind = [trainData['IsRaceCar'] == class_number for class_number in range(2)]
#print('****')
#print(class_ind)
plotLabel = np.asarray(np.hstack(class_ind), dtype=np.float32)
# We expect our data features to be in [[1,2], [2,1]] ... format
def getData(targetData):
alist = []
for idx in range(0, targetData['CarCC'].size):
idxVal = [targetData['CarCC'].values[idx], targetData['Turbo'].values[idx]]
alist.append(idxVal)
labels = []
for class_number in trainData['IsRaceCar']:
raceValue = class_number == 1
if raceValue == 1:
nraceValue = 0
elif raceValue == 0:
nraceValue = 1
a = [class_number, nraceValue]
labels.append(a)
return alist, labels
dataFeatures, labels = getData(trainData)
print('train data samples')
print(dataFeatures)
print(labels)
import matplotlib.pyplot as plt
#print(plotLabel)
colors = ['r' if label == 0 else 'b' for label in plotLabel]
plt.scatter(trainData['CarCC'], trainData['Turbo'], c=colors)
plt.xlabel("Engine size")
plt.ylabel("With Turbo")
plt.show()
## starting off with a not so complicated linear model
mydict = {}
def linear_layer(input_var, output_dim):
input_dim = input_var.shape[0]
weight_param = C.parameter(shape=(input_dim, output_dim))
bias_param = C.parameter(shape=(output_dim))
mydict['w'], mydict['b'] = weight_param, bias_param
return C.times(input_var, weight_param) + bias_param
feature = C.input_variable(input_dim, np.float32)
#print(feature)
output_dim = num_output_classes
z = linear_layer(feature, output_dim)
## pretty standard stuff ###
progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=25)
C.logging.log_number_of_parameters(z)
label = C.input_variable(num_output_classes, np.float32)
loss = C.cross_entropy_with_softmax(z, label)
eval_error = C.classification_error(z, label)
learning_rate = 0.5
lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
learner = C.sgd(z.parameters, lr_schedule)
trainer = C.Trainer(z, (loss, eval_error), [learner], progress_printer)
minibatch_size = 25
for i in range(0, 5):
# Assign the minibatch data to the input variables and train the model on the minibatch
trainer.train_minibatch({feature : dataFeatures, label : labels})
dataFeatures, labels = getData(testData)
trainer.test_minibatch({feature : dataFeatures, label : labels})
predDataFeature = [[2500,1], [300,0]]
out = C.softmax(z)
result = out.eval({feature : predDataFeature})
print('predication results:')
print(result)
print("Predicted:", [np.argmax(x) for x in result])
CarData.csv + TestCarData.csv
CarCC,Turbo,IsRaceCar
3000,1,1
2500,1,1
1300,0,0
1200,0,0
1600,0,0
2000,0,0
1800,0,0
1500,0,0
900,0,0
700,0,0
800,0,0
700,0,0
1100,0,0
1300,0,0
1200,0,0
1600,0,0
2000,0,0
1800,0,0
1500,0,0
900,0,0
700,0,0
800,0,0
700,0,0
1100,0,0
1300,0,0
1200,0,0
1600,0,0
2000,0,0
1800,0,0
1500,0,0
900,0,0
700,0,0
800,0,0
700,0,0
1100,0,0

Comments

Popular posts from this blog

The specified initialization vector (IV) does not match the block size for this algorithm

NodeJS: Error: spawn EINVAL in window for node version 20.20 and 18.20