In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
In [3]:
dataset = pd.read_csv("50_Startups.csv")
dataset.head(2)
Out[3]:
In [4]:
x = dataset.iloc[:,:-1]
y = dataset.iloc[:,-1]
In [5]:
x = pd.get_dummies(x)
x = np.array(x)
In [6]:
from sklearn.cross_validation import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.3)
In [7]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
In [8]:
reg.fit(x_train,y_train)
Out[8]:
In [9]:
pred = reg.predict(x_test)
In [10]:
pred
Out[10]:
In [11]:
y_test
Out[11]:
In [12]:
#backward elimination
x = np.append(arr = np.ones((50,1)).astype(int),values = x,axis =1)
In [13]:
import statsmodels.formula.api as sm
In [14]:
x_opt = x[:,[0,1,2,3,4,5,6]]
regressor = sm.OLS(endog=y,exog=x_opt).fit()
regressor.summary()
Out[14]:
In [15]:
x_opt = x[:,[0,1,3,4,5,6]]
regressor = sm.OLS(endog=y,exog=x_opt).fit()
regressor.summary()
Out[15]:
In [16]:
x_opt = x[:,[0,1,4,5,6]]
regressor = sm.OLS(endog=y,exog=x_opt).fit()
regressor.summary()
Out[16]:
In [ ]:
multiple
Comments
Post a Comment