lightgbm - simple race car example
Description of sample data
The sample data is pretty straight forward (intended to be that way). We have 3 main column which are:-
1. Horse power
2. Has Turbo
3. IsRaceCar - this is the label which basically conclusively tells us if this is a race car or not.
For example :-
CarCC,Turbo,IsRaceCar
3000,1,1 (race car)
2500,1,1
1300,0,0
1200,0,0
The sample code for learning and predicting from this dataset is shown below :-
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# pylint: disable = invalid-name, C0111 | |
import json | |
import lightgbm as lgb | |
import pandas as pd | |
from sklearn.metrics import mean_squared_error | |
import numpy as np | |
# load or create your dataset | |
print('Load data...') | |
df_train = pd.read_csv('cardata.csv', header=None, sep=',') | |
df_test = pd.read_csv('testcardata.csv', header=None, sep=',') | |
y_train = df_train[2].values | |
print(y_train) | |
y_test = df_test[2].values | |
X_train = df_train.drop(2, axis=1).values | |
X_test = df_test.drop(2, axis=1).values | |
# create dataset for lightgbm | |
lgb_train = lgb.Dataset(X_train, y_train) | |
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train) | |
# specify your configurations as a dict | |
params = { | |
'task': 'train', | |
'boosting_type': 'gbdt', | |
'objective': 'binary', | |
'metric': {'l2'}, | |
'num_leaves': 31, | |
'learning_rate': 0.05, | |
#'feature_fraction': 0.9, | |
#'bagging_fraction': 0.8, | |
'bagging_freq': 5, 'min_data' : 1, | |
'verbose': 0 | |
} | |
print('Start training...') | |
# train | |
gbm = lgb.train(params, | |
lgb_train, | |
num_boost_round=20, | |
valid_sets=lgb_eval, | |
early_stopping_rounds=5) | |
print('Save model...') | |
# save model to file | |
gbm.save_model('model.txt') | |
print('Start predicting...') | |
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration) | |
# eval | |
print(y_pred) | |
Comments