复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114import matplotlib.pyplot as plt import numpy as np import csv from numpy import genfromtxt from scipy.stats import multivariate_normal from sklearn.metrics import f1_score #画图设置 plt.style.use('ggplot') plt.rcParams['font.family'] = 'serif' plt.rcParams['font.serif'] = 'Ubuntu' plt.rcParams['font.monospace'] = 'Ubuntu Mono' plt.rcParams['font.size'] = 12 plt.rcParams['axes.labelsize'] = 11 plt.rcParams['axes.labelweight'] = 'bold' plt.rcParams['axes.titlesize'] = 12 plt.rcParams['xtick.labelsize'] = 9 plt.rcParams['ytick.labelsize'] = 9 plt.rcParams['legend.fontsize'] = 11 plt.rcParams['figure.titlesize'] = 13 #读取文件 reader = csv.reader(open("train_server_data.csv", "r"), delimiter=",") reader1 = csv.reader(open("crossval_server_data.csv", "r"), delimiter=",") reader2 = csv.reader(open("test_server_data.csv", "r"), delimiter=",") #转为list格式 tr = list(reader) cv = list(reader1) ts = list(reader2) #得到训练,交叉,测试数据 train_data = np.array(tr[: :]).astype("float") crossval_data = np.array(cv[: :]).astype("float") test_data = np.array(ts[: :]).astype("float") ''' def read_dataset(filePath, delimiter=','): return genfromtxt(filePath, delimiter=delimiter) #特征归一化 def feature_normalize(dataset): mu = np.mean(dataset, axis=0) sigma = np.std(dataset, axis=0) return (dataset - mu) / sigma ''' #高斯预测 def estimate_gaussian(dataset): mu = np.mean(dataset, axis=0) sigma = np.cov(dataset.T) #求协方差,协方差表示两个变量在一起的水平。 return mu, sigma #多元高斯 def multivariate_gaussian(dataset, mu, sigma): p = multivariate_normal(mean=mu, cov=sigma) return p.pdf(dataset) #选择阈值 def select_threshold(probs, test_data): best_epsilon = 0 best_f1 = 0 f = 0 stepsize = (max(probs) - min(probs)) / 1000; epsilons = np.arange(min(probs), max(probs), stepsize) for epsilon in np.nditer(epsilons): predictions = (probs < epsilon) f = f1_score(test_data, predictions, average='binary') if f > best_f1: best_f1 = f best_epsilon = epsilon return best_f1, best_epsilon mu, sigma = estimate_gaussian(train_data) p = multivariate_gaussian(train_data,mu,sigma) #利用交叉熵寻找最佳阈值ep。selecting optimal value of epsilon using cross validation p_cv = multivariate_gaussian(crossval_data,mu,sigma) fscore, ep = select_threshold(p_cv,test_data) print(fscore, ep) #选择异常点 outliers = np.asarray(np.where(p < ep)) plt.figure(1) plt.xlabel('motor1') plt.ylabel('motor2') plt.title('Datapoints of distribution') plt.plot(train_data[:,0], train_data[:,1],'b+') plt.show() plt.figure(2) plt.xlabel('motor1') plt.ylabel('motor2') plt.title('Detection of Outliers') plt.plot(train_data[:,0],train_data[:,1],'bx') plt.plot(train_data[outliers,0],train_data[outliers,1],'ro') #把异常点标记红色 plt.show() ''' C:Usersz003tesjAppDataLocalProgramsPythonPython35libsite-packagessklearnmetricsclassification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples. 'precision', 'predicted', average, warn_for) 0.875 9.036201327981212e-05 C:Usersz003tesjAppDataLocalProgramsPythonPython35libsite-packagesmatplotlibfont_manager.py:1320: UserWarning: findfont: Font family ['serif'] not found. Falling back to DejaVu Sans (prop.get_family(), self.defaultFamily[fontext])) [Finished in 14.4s] '''
最后
以上就是过时自行车最近收集整理的关于基于高斯分布的异常检测代码实现的全部内容,更多相关基于高斯分布内容请搜索靠谱客的其他文章。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复