Mashroom problem

Sweta · May 14, 2020, 11:43am

In this it is giving 48 percent score

import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

df= pd.read_csv(“mushroom.csv”)
df.head()
print(df.shape)

#Breaking ini train and test

le = LabelEncoder()
#applies transfromation on each column
ds = df.apply(le.fit_transform)

df.head()

data = ds.values
print(data.shape)

data_x=data[:,1:]
data_y=data[:,0]

#Breaking the data into train and test

x_train,x_test,y_train,y_test = train_test_split(data_x,data_y,test_size=0.2)

print(x_train.shape,y_train.shape)
print(x_test.shape,y_test.shape)

np.unique(y_train)

#INVERSE TRANSPORT

def prior_prob(y_train,label):
total_example=y_train.shape[0]
class_example=np.sum(y_train==label)

return (class_example)/float(total_example)

#y = np.array([0,5,5,1,1,1,0,1,0,0])
#prior_prob(y,1)

def cond_prob(x_train,y_train,feature_col,feature_val,label):

x_filtered = x_train[y_train==label]
numerator=np.sum(x_train[:,feature_col]==feature_val) 
denominator = np.sum(y_train==label)
return numerator/float(denominator)

def predict(x_train,y_train,xtest):

classes = np.unique(y_train)
n_features=x_train.shape[1]
post_probs=[]

for label in classes:
    likelihood = 1.0
    for f in range(n_features):
        cond = cond_prob(x_train,y_train,f,xtest[f],label)
        likelihood*=cond
    prior = prior_prob(y_train,label)
    
    post=prior*likelihood
    post_probs.append(post)
        
        
pred=np.argmax(post_probs)   
return pred

output = predict(x_train,y_train,x_test[1])

output,

def score(x_train,y_train,x_test,y_test):
pred =[]
for i in range(x_test.shape[0]):
pred_label = predict(x_train,y_train,x_test[i])
pred.append(pred_label)
pred = np.array(pred)

accurancy = np.sum(pred==y_test)/y_test.shape[0]

return accurancy

score(x_train,y_train,x_test,y_test)

Manu-Pillai-1566551720093198 · May 14, 2020, 11:14pm

Hello @Sweta,

Share your code through CODING BLOCKS IDE.

Sweta · May 18, 2020, 2:03pm

Manu-Pillai-1566551720093198 · May 18, 2020, 4:09pm

Hello @Sweta,

It is fine to not get 90%+ accuracy in the first go that too with your custom code. I suggest you do challenges like these with libraries like sklearn. Assuming your code is logically correct, increasing the accuracy of a model is a big conceptual procedure on its own. Try different variants of naive bayes, different hyperparameters and may be different algorithms altogether. Increasing accuracy of a model is far more conceptual than implementing a model. For now, you just need to be sure that your code actually implements the algorithm correctly and leave the accuracy for now. For challenges, you can sure use sklearn.

If you have any further doubt feel free to continue here.

Happy Learning

Sweta · May 19, 2020, 7:53am

Yep! I will do , But the same problem already solved in class , I just cannot understand what is wrong with mine as I have done in similar way.
.