概述
7.3 试编程实现拉普拉斯修正的朴素贝叶斯分类器,并以西瓜数据集3.0为训练集,对p.151 “测1”样本进行判别。
解答:
朴素贝叶斯的训练过程就是估计类别先验概率以及类条件概率的过程;测试阶段根据训练得到的概率值计算出类别的后验概率并取概率最大的类别作为样本分类。根据数据集3.0得到的拉普拉斯朴素贝叶斯分类器把测试样例预测为正类,即好瓜。
代码:
# -*- coding:gbk -*-
"""
@Author: Victoria
@Date: 2017.10.17 10:30
"""
import xlrd
import math
class LaplacianNB():
"""
Laplacian naive bayes for binary classification problem.
"""
def __init__(self):
"""
"""
def train(self, X, y):
"""
Training laplacian naive bayes classifier with traning set (X, y).
Input:
X: list of instances. Each instance is represented by (鑹叉辰锛屾牴钂傦紝鏁插0锛屾枃鐞嗭紝鑴愰儴锛岃Е鎰燂紝瀵嗗害锛屽惈绯栫巼)
y: list of labels. 0 represents bad, 1 represents good.
"""
N = len(y)
self.classes = self.count_list(y)
self.class_num = len(self.classes)
self.classes_p = {}
#print self.classes
for c, n in self.classes.items():
self.classes_p[c] = float(n+1) / (N+self.class_num)
self.discrete_attris_with_good_p = []
self.discrete_attris_with_bad_p = []
for i in range(6):
attr_with_good = []
attr_with_bad = []
for j in range(N):
if y[j] == 1:
attr_with_good.append(X[j][i])
else:
attr_with_bad.append(X[j][i])
unique_with_good = self.count_list(attr_with_good)
unique_with_bad = self.count_list(attr_with_bad)
self.discrete_attris_with_good_p.append(self.discrete_p(unique_with_good, self.classes[1]))
self.discrete_attris_with_bad_p.append(self.discrete_p(unique_with_bad, self.classes[0]))
self.good_mus = []
self.good_vars = []
self.bad_mus = []
self.bad_vars = []
for i in range(2):
attr_with_good = []
attr_with_bad = []
for j in range(N):
if y[j] == 1:
attr_with_good.append(X[j][i+6])
else:
attr_with_bad.append(X[j][i+6])
good_mu, good_var = self.mu_var_of_list(attr_with_good)
bad_mu, bad_var = self.mu_var_of_list(attr_with_bad)
self.good_mus.append(good_mu)
self.good_vars.append(good_var)
self.bad_mus.append(bad_mu)
self.bad_vars.append(bad_var)
def predict(self, x):
"""
"""
p_good = self.classes_p[1]
p_bad = self.classes_p[0]
for i in range(6):
p_good *= self.discrete_attris_with_good_p[i][x[i]]
p_bad *= self.discrete_attris_with_bad_p[i][x[i]]
for i in range(2):
p_good *= self.continuous_p(x[i+6], self.good_mus[i], self.good_vars[i])
p_bad *= self.continuous_p(x[i+6], self.bad_mus[i], self.bad_vars[i])
if p_good >= p_bad:
return p_good, p_bad, 1
else:
return p_good, p_bad, 0
def count_list(self, l):
"""
Get unique elements in list and corresponding count.
"""
unique_dict = {}
for e in set(l):
unique_dict[e] = l.count(e)
return unique_dict
def discrete_p(self, d, N_class):
"""
Compute discrete attribution probability based on {0:, 1:, 2: }.
"""
new_d = {}
#print d
for a, n in d.items():
new_d[a] = float(n+1) / (N_class + len(d))
return new_d
def continuous_p(self, x, mu, var):
p = 1.0 / (math.sqrt(2*math.pi) * math.sqrt(var)) * math.exp(- (x-mu)**2 /(2*var))
return p
def mu_var_of_list(self, l):
mu = sum(l) / float(len(l))
var = 0
for i in range(len(l)):
var += (l[i]-mu)**2
var = var / float(len(l))
return mu, var
if __name__=="__main__":
lnb = LaplacianNB()
workbook = xlrd.open_workbook("../../数据/3.0.xlsx")
sheet = workbook.sheet_by_name("Sheet1")
X = []
for i in range(17):
x = sheet.col_values(i)
for j in range(6):
x[j] = int(x[j])
x.pop()
X.append(x)
y = sheet.row_values(8)
y = [int(i) for i in y]
#print X, y
lnb.train(X, y)
#print lnb.discrete_attris_with_good_p
label = lnb.predict([1, 1, 1, 1, 1, 1, 0.697, 0.460])
print "predict ressult: ", label
结果:
predict ressult: (0.03191920486294201, 4.9158340214165893e-05, 1)#分别为正类概率,父类概率以及分类结果
最后
以上就是无心外套为你收集整理的西瓜书《机器学习》课后答案——chapter7_7.3的全部内容,希望文章能够帮你解决西瓜书《机器学习》课后答案——chapter7_7.3所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复