simple linear regression

GitHub Link : - Code Url


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook
In [3]:
dataset = pd.read_csv("Salary_data.csv")
dataset.head()
Out[3]:
YearsExperience Salary
0 1.1 39343.0
1 1.3 46205.0
2 1.5 37731.0
3 2.0 43525.0
4 2.2 39891.0
In [4]:
x = dataset.iloc[:,0]
y = dataset.iloc[:,1]
In [5]:
from sklearn.cross_validation import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.3)
In [6]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
x_train = np.array(x_train).reshape(21,1)
y_train = np.array(y_train).reshape(21,1)
reg.fit(x_train,y_train)
Out[6]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
In [7]:
pred = reg.predict(np.array(x_test).reshape(9,1))
In [8]:
plt.scatter(x_train,y_train,color = 'g')
plt.scatter(x_test,y_test,color = 'r')
plt.plot(x_train,reg.predict(x_train))
plt.plot(x_test,pred)
plt.show()
In [ ]:
 

Comments

Post a Comment