概述
import csv
from sklearn.feature_extraction import DictVectorizer
from sklearn import preprocessing
from sklearn import tree
import pydotplus
'''
数据集 play.csv
RID age income student credit_rating Class_buys_computer
1 youth high no fair no
2 youth high no excellent no
3 middle_aged high no fair yes
4 senior medium no fair yes
5 senior low yes fair yes
6 senior low yes excellent yes
7 middle_aged low yes excellent no
8 youth medium no fair yes
9 youth low yes fair no
10 senior medium yes fair yes
11 youth medium yes excellent yes
12 middle_aged medium no excellent yes
13 middle_aged high yes fair yes
14 senior medium no excellent no
'''
file = open("E:\play.csv", 'rt', encoding='utf-8')
reader = csv.reader(file)
'''
headers = reader.next() 报错
python csv2libsvm.py: AttributeError: '_csv.reader' object has no attribute 'next'
This is because of the differences between python 2 and python 3.
Use the built-in function next in python 3.
That is, write next(reader) instead of reader.next()
'''
headers = next(reader)
print("表头信息n" + str(headers))
feature_list,result_list = [],[]
for row in reader:
result_list.append(row[-1])
feature_list.append(dict(zip(headers[1:-1],row[1:-1])))
print("结果n"+str(result_list),"n特征值n"+str(feature_list))
vec = DictVectorizer() # 将dict类型的list数据,转换成numpy array
DummyX = vec.fit_transform(feature_list).toarray()
DummyY = preprocessing.LabelBinarizer().fit_transform(result_list)
#注意,dummyX是按首字母排序的
print("DummyXn"+str(DummyX),"nDummyYn"+str(DummyY))
clf = tree.DecisionTreeClassifier(criterion="entropy",random_state=0)
# clf = tree.DecisionTreeClassifier()
clf = clf.fit(DummyX,DummyY)
print("clfn"+str(clf))
#输出dot文件
with open("E:\play.dot","w") as f:
f = tree.export_graphviz(clf,out_file=f)
print( '特征向量n',vec.get_feature_names() )
# help(tree.export_graphviz)
dot_data = tree.export_graphviz(clf,
feature_names=vec.get_feature_names(),
special_characters=True,
filled=True, rounded=True,
out_file=None,)
print("dot_datan"+str(dot_data))
'''
pydotplus 画句子的依存结构树
pip install pydotplus 安装不上
pip install --upgrade --ignore-installed pydotplus 可以安装上
pydotplus.graphviz.InvocationException: GraphViz's executables not found
这是《机器学习升级版III》中“决策树随机森林实践”章节的问题。
解决方法:conda install graphviz ,安装完成,重启IDE集成开发工具
先安装GraphViz软件,将GraphViz解压后的目录添加到环境变量path里,然后pip 安装pydotplus,按照这个顺序
安装,如果还不行,重启一下ide或者电脑就行了
'''
graph = pydotplus.graph_from_dot_data(dot_data)
graph.write_pdf("E:\play.pdf")
#根据特征向量可知:0.0.1.|0.1.|1.0.0.|1.0.表示youth,fair,high,no
oneRowX=dummyX[0]
twoRowX=dummyX[1]
print("oneRowX:n",str(oneRowX),"ntwoRowXn",str(twoRowX))
#进行预测
A = ([[0,0,1,0,1,1,0,0,1,0]])
B = ([[1,0,0,0,1,1,0,0,1,0]])
predict_A = clf.predict(A)
predict_B = clf.predict(B)
print("predict_A",str(predict_A),"predict_B",str(predict_B))
最后
以上就是风趣机器猫为你收集整理的机器学习:sklearn&pydotplus实现Decision Tree的全部内容,希望文章能够帮你解决机器学习:sklearn&pydotplus实现Decision Tree所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复