In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook
In [3]:
dataset = pd.read_csv("Salary_data.csv")
dataset.head()
Out[3]:
In [4]:
x = dataset.iloc[:,0]
y = dataset.iloc[:,1]
In [5]:
from sklearn.cross_validation import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.3)
In [6]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
x_train = np.array(x_train).reshape(21,1)
y_train = np.array(y_train).reshape(21,1)
reg.fit(x_train,y_train)
Out[6]:
In [7]:
pred = reg.predict(np.array(x_test).reshape(9,1))
In [8]:
plt.scatter(x_train,y_train,color = 'g')
plt.scatter(x_test,y_test,color = 'r')
plt.plot(x_train,reg.predict(x_train))
plt.plot(x_test,pred)
plt.show()
In [ ]:
good
ReplyDelete