In this it is giving 48 percent score
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
df= pd.read_csv(“mushroom.csv”)
df.head()
print(df.shape)
#Breaking ini train and test
le = LabelEncoder()
#applies transfromation on each column
ds = df.apply(le.fit_transform)
df.head()
data = ds.values
print(data.shape)
data_x=data[:,1:]
data_y=data[:,0]
#Breaking the data into train and test
x_train,x_test,y_train,y_test = train_test_split(data_x,data_y,test_size=0.2)
print(x_train.shape,y_train.shape)
print(x_test.shape,y_test.shape)
np.unique(y_train)
#INVERSE TRANSPORT
def prior_prob(y_train,label):
total_example=y_train.shape[0]
class_example=np.sum(y_train==label)
return (class_example)/float(total_example)
#y = np.array([0,5,5,1,1,1,0,1,0,0])
#prior_prob(y,1)
def cond_prob(x_train,y_train,feature_col,feature_val,label):
x_filtered = x_train[y_train==label]
numerator=np.sum(x_train[:,feature_col]==feature_val)
denominator = np.sum(y_train==label)
return numerator/float(denominator)
def predict(x_train,y_train,xtest):
classes = np.unique(y_train)
n_features=x_train.shape[1]
post_probs=[]
for label in classes:
likelihood = 1.0
for f in range(n_features):
cond = cond_prob(x_train,y_train,f,xtest[f],label)
likelihood*=cond
prior = prior_prob(y_train,label)
post=prior*likelihood
post_probs.append(post)
pred=np.argmax(post_probs)
return pred
output = predict(x_train,y_train,x_test[1])
output,
def score(x_train,y_train,x_test,y_test):
pred =[]
for i in range(x_test.shape[0]):
pred_label = predict(x_train,y_train,x_test[i])
pred.append(pred_label)
pred = np.array(pred)
accurancy = np.sum(pred==y_test)/y_test.shape[0]
return accurancy
score(x_train,y_train,x_test,y_test)