from sklearn.datasets import load_boston
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
boston = load_boston()
X = boston.data
y = boston.target
#print(X.shape)
#print(boston.DESCR)
df = pd.DataFrame(X)
df.columns= boston.feature_names
#Normalisation
u = np.mean(X,axis=0)
std = np.mean(X,axis=0)
X = (X-u)/std
ones = np.ones((X.shape[0],1))
X = np.hstack((ones,X))
print(X.shape)
X - Matrix ( m x n)
x - Vector (Single Example with n features)
def hypothesis(x,theta):
y_ = 0.0
n = x.shape[0]
for i in range(n):
y_ += (theta[i]*x[i])
return y_
def error(X,y,theta):
e = 0.0
m = X.shape[0]
for i in range(m):
y_ = hypothesis(X[i],theta)
e += (y[i] - y_)**2
return e/m
def gradient(X,y,theta):
m,n = X.shape
grad = np.zeros((n,))
# for all values of j
for j in range(n):
#sum over all examples
for i in range(m):
y_ = hypothesis(X[i],theta)
grad[j] += (y_ - y[i])*X[i][j]
# Out of the loops
return grad/m
def gradient_descent(X,y,learning_rate=0.1,max_epochs=300):
m,n = X.shape
theta = np.zeros((n,))
error_list = []
for i in range(max_epochs):
e = error(X,y,theta)
error_list.append(e)
# Gradient Descent
grad = gradient(X,y,theta)
for j in range(n):
theta[j] = theta[j] - learning_rate*grad[j]
return theta,error_list
import time
start = time.time()
theta,error_list = gradient_descent(X,y)
end = time.time()
print("Time taken is ", end-start)
print(theta)
plt.plot(error_list)
plt.show()
#r2 score
m = X.shape[0]
y_ = []
for i in range(m):
pred = hypothesis(X[i],theta)
y_.append(pred)
y_ = np.array(y_)
def r2_score(Y,Y_):
num = np.sum((Y-Y_)**2)
denom = np.sum((Y - Y.mean()))
score = 1 - num/denom
return score*100
print(r2_score(y,y_))
Using this, i am not able to get a good R2 score. My r2 score is 5.591421040245647e+17.
Please advice.