import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#Loading the data
X = pd.read_csv(‘Linear_X_Train.csv’)
y = pd.read_csv(‘Linear_Y_Train.csv’)
#coverting to pandas dataframe
X= X.values
y = y.values
#Normalisation
u =X.mean()
std =X.std()
X = (X-u)/std
#Visualation
plt.style.use(‘seaborn’)
plt.scatter(X,y, color=“orange”)
plt.title("Hardwork v Performace ")
plt.xlabel(“Hardwork”)
plt.ylabel(“performance”)
plt.show()
#Regression
def hypothesis(x,theta):
y_ = theta[0] + theta[1]*x
return y_
def gradient(X,Y,theta):
m = X.shape[0]
grad = np.zeros((2,))
for i in range(m):
x = X[i]
y_ = hypothesis(x,theta)
y = Y[i]
grad[0] += (y_ - y)
grad[1] += (y_ - y)*x
return grad/m
def error(X,Y,theta):
m = X.shape[0]
total_error = 0.0
for i in range(m):
y_ = hypothesis(X[i],theta)
total_error += (y_ - Y[i])**2
return total_error/m
def gradientDescent(X,Y,max_steps = 100, learning_rate = 0.1):
theta = np.zeros((2,))
error_list = []
theta_list = []
for i in range(max_steps):
grad = gradient(X,Y,theta)
e = error(X,Y,theta)
error_list.append(e)
#Updating theta
theta[0] = theta[0] - learning_rategrad[0]
theta[1] = theta[1] - learning_rategrad[1]
theta_list.append((theta[0],theta[1]))
return theta,error_list,theta_list
theta,error_list,theta_list = gradientDescent(X,y)
plt.plot(error_list)
plt.title(“Reduction of error over time”)
plt.show()
#PREDICTIONS
y_ = hypothesis(X,theta)
plt.scatter(X,y)
plt.plot(X,y_,color=“red”,label = “Prediction”)
plt.legend()
plt.show()
#Loading the test data
x_test = pd.read_csv(‘Linear_X_Test.csv’).values
y_test = hypothesis(x_test,theta)
df = pd.DataFrame(data=y,columns=[“y”])
df.to_csv(‘ChallengeHardworkPays.csv’,index=False)
I am able to Run the code succesfully and get the desired csv file.
However it shows that my answer’s accuracy is -93%.
How can the accuracy be negative ?