class DecisionTree:
def init(self,depth=0,max_depth=5):
self.left=None
self.right=None
self.fkey=None
self.fval=None
self.max_depth=max_depth
self.depth=depth
self.target=None
def train(self,x_train,y_train):
if(x_train.shape[0]==0):
return
features=list(x_train.columns)
infoGain=[]
for test_fkey in features:
threshold=x_train[test_fkey].mean()
ig=informationGain(x_train,y_train,test_fkey,threshold)
infoGain.append(ig)
self.fkey=features[ np.argmax(infoGain)]
self.fval=x_train[self.fkey].mean()
print('We Are making feature as ',self.fkey)
x_left,y_left,x_right,y_right=divide_data(x_train,y_train,self.fkey,self.fval)
x_left=x_left.reset_index(drop=True)
y_left=y_left.reset_index(drop=True)
x_right=x_right.reset_index(drop=True)
y_right=y_right.reset_index(drop=True)
ycolname=y_train.columns
ycolname=ycolname[0]
if(x_left.shape[0]==0 or x_right.shape[0]==0):
#That Means Its A leaf Node as We could Not classify it and its the max infoGain we could achieve
# For leaf Node we will set the target value
if(y_train[ycolname].mean()>=0.5):
self.target=1
else:
self.target=0
# This return ensures we dont grow decison tree further
return
if(self.depth>=self.max_depth):
if(y_train[ycolname].mean()>=0.5):
self.target=1
else:
self.target=0
return
#If U reach here that means we have not reached a leaf node
# So we need to create left and right subtree
self.left=DecisionTree(depth=self.depth+1,max_depth=self.max_depth)
self.left.train(x_left,y_left)
self.right=DecisionTree(depth=self.depth+1,max_depth=self.max_depth)
self.right.train(x_right,y_right)
#If you Want You can set the target for every node
#It is not essential as we will predict using leaf node only
if(y_train.mean()>0.5):
self.target=1
else:
self.target=0
return