概述
Hust第三次作业解析:
1.白葡萄酒读取数据
略(案例有)
2.数据处理
C
3.,4,5回归模型之前作业有
6.241二分类问题正确率
import numpy as np
import pandas as pd
accuracy_rate = None
from sklearn.metrics import accuracy_score
accuracy_rate = round(accuracy_score(y_true.values.flatten(), y_pred.values.flatten()), 2)
7.268二分类问题召回率
from sklearn import metrics
rec_rate = None
from sklearn.metrics import recall_score
rec_rate = round(recall_score(y_true.values.flatten(), y_pred.values.flatten()), 2)
8.271二分类问题特异度
from sklearn import metrics
spe_value = None
from sklearn.metrics import recall_score
spe_value = round(recall_score(y_true.values.flatten(), y_pred.values.flatten(), pos_label=0), 2)
9.273计算分类问题kappa值
from sklearn import metrics
kappa_score=None
from sklearn.metrics import cohen_kappa_score
kappa_score = round(cohen_kappa_score(y_true.values.flatten(), y_pred.values.flatten()), 2)
10.293二分类问题ACU
from sklearn import metrics
auc_value=None
from sklearn.metrics import roc_auc_score
auc_value = round(roc_auc_score(y_true.values.flatten(), y_prob.values.flatten()), 2)
11.394回归问题的平均绝对误差
from sklearn.metrics import mean_absolute_error as mae
mae_value = None
print(mae_value)
from sklearn.metrics import mean_absolute_error as mae
mae_value = round(mae(y_pred.values.flatten(), y_true.values.flatten()), 2)
12.393回归问题的均方误差
from sklearn.metrics import mean_squared_error as mse
mse_value=None
from sklearn.metrics import mean_squared_error as mse
mse_value = round(mse(y_pred.values.flatten(), y_true.values.flatten()), 2)
13.395回归问题的均方根误差
import math
from sklearn import metrics
rmse_value = None
from sklearn.metrics import mean_squared_error
rmse_value = round(math.sqrt(mean_squared_error(y_pred.values.flatten(), y_true.values.flatten())), 2)
14.295回归问题的R^2系数
from sklearn import metrics
r_square_value=None
from sklearn.metrics import r2_score
pred_value = y_pred.values.flatten()
true_value = y_true.values.flatten()
r_square_value = round(r2_score(true_value, pred_value), 2)
15.280计算信息熵
import pandas as pd
from scipy import log2
entropy_value = None
result = y['y_true'].value_counts()
total = y.shape[0]
entropy_value = 0
for i in result.index:
p = float(result[i]) / total
entropy_value += -p * log2(p)
entropy_value = round(entropy_value, 2)
16.279计算欧几里得距离
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances
x1 = x1.reshape((1, -1))
x2 = x2.reshape((1, -1))
dis = euclidean_distances(x1, x2)
dis = round(dis, 2)
17.281计算余弦相似度
import numpy as np
from numpy.linalg import norm
cos_value = None
from sklearn.metrics.pairwise import cosine_similarity
cos_value = cosine_similarity(x1.reshape((1, -1)), x2.reshape((1, -1)))[0]
cos_value = round(cos_value, 2)
18.283计算JACCARD距离
jaccard_dis = None
set1, set2 = set(x1), set(x2)
jaccard_dis = 1 - float(len(set1&set2)) / float(len(set1|set2))
jaccard_dis = round(jaccard_dis, 2)
19.284计算hamming距离
hamming_dis = None
hamming_dis = 0
for i, j in zip(x1, x2):
hamming_dis += 1 if i != j else 0
20.367计算曼哈顿距离
manhattan_dis = None
from sklearn.metrics.pairwise import manhattan_distances
x1 = x1.reshape((1, -1))
x2 = x2.reshape((1, -1))
manhattan_dis = manhattan_distances(x1, x2)
最后
以上就是长情小伙为你收集整理的数据科学导引的全部内容,希望文章能够帮你解决数据科学导引所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
发表评论 取消回复