概述
利用k近邻对模拟数据回归&KNN预处理和分类马绞痛数据
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsRegressor
np.random.seed(0)
# random 40,1 0-5 matrix, axis=0,1 sort by column,row
X = np.sort(5 * np.random.rand(40, 1), axis = 0)
# 0-5 average split, convert to array
T = np.linspace(0, 5, 500)[:, np.newaxis]
# sinx and 1,N array
y = np.sin(X).ravel()
# add noise reshape y with 5, then add noise every 5
y[::5] += 1 * (0.5 - np.random.rand(8))
n_neighbors = [1, 3, 5, 8, 10, 13]
'''
plt.figure(figsize = (10, 20))
for i, k in enumerate(n_neighbors):
clf = KNeighborsRegressor(n_neighbors = k, p =2, metric = 'minkowski')
clf.fit(X, y)
y_ = clf.predict(T)
plt.subplot(6, 1, i+1)
plt.scatter(X, y, color='red', label = 'data')
plt.plot(T, y_, color='navy', label = 'prediction')
plt.axis('tight')
plt.legend()
plt.title('KNeighborsRegressor (k = %i)' %(k))
plt.tight_layout()
plt.show()
'''
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
# fill 0 value
from sklearn.impute import KNNImputer
# calclate 0 value distance
from sklearn.metrics.pairwise import nan_euclidean_distances
# Cross validation
from sklearn.model_selection import cross_val_score
# KFold func
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
X = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 7, 9]]
imputer = KNNImputer(n_neighbors = 2, metric = 'nan_euclidean')
p = imputer.fit_transform(X)
print(p)
p2 = nan_euclidean_distances(X)
print(p2)
# change csv file ? to None
input_file = 'horse-colic.csv' # C:/Users/haijun/Desktop/aliyun/
df_data = pd.read_csv(input_file, header = None, na_values = '?')
print('df_data: ', df_data.shape[0], df_data.shape[1])
# extract label(23) and feature(other column).
data = df_data.values
ix = [i for i in range(data.shape[1]) if i != 23]
X, y = data[:, ix], data[:, 23]
# check none ratio
for i in range(df_data.shape[1]):
n_miss = df_data[[i]].isnull().sum()
prec = n_miss / df_data.shape[0] * 100
if n_miss.values[0] > 0:
print('> feat: %d, missing: %d, missing ratio: (%.2f%%)' % (i, n_miss, prec))
# check none num
print('KNN none num: %d' % sum(np.isnan(X).flatten()))
# fill null
imputer = KNNImputer()
imputer.fit(X)
Xtrans = imputer.transform(X)
print('KNN after transform none num: %d' % sum(np.isnan(Xtrans).flatten()))
# train
results = list()
strategies = [str(i) for i in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 16, 18, 20, 21]]
for s in strategies:
# calss or reg
pipe = Pipeline(steps = [('imputer', KNNImputer(n_neighbors = int(s))), ('model', KNeighborsClassifier())])
scores = []
for k in range(20):
X_train, X_test, y_train, y_test = train_test_split(Xtrans, y, test_size = 0.2)
pipe.fit(X_train, y_train)
score = pipe.score(X_test, y_test)
scores.append(score)
results.append(np.array(scores))
print('>k: %s, Acc Mean: %.3f, Std: %.3f' % (s, np.mean(scores), np.std(scores)))
plt.boxplot(results, labels = strategies, showmeans = True)
plt.show()
最后
以上就是坚定书本为你收集整理的机器学习算法(九) K近邻算法-对模拟数据回归&KNN预处理和分类马绞痛数据的全部内容,希望文章能够帮你解决机器学习算法(九) K近邻算法-对模拟数据回归&KNN预处理和分类马绞痛数据所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复