概述
前几天在清理电脑时,发现以前在项目中使用SVM进行跌倒检测判别的资料,Mark一下:
对SVM的认识,想必学习机器学习的人都知道,主要作为一个二分类器使用,并且经常与逻辑斯特回归进行比较,其思想就是对正负样本划分超平面,离超平面最近的正负样本(支持向量)的间隔最大, 以达到分类的目的,下面对其做一些简要的介绍:
SVM的性质:
1. 监督学习,判别模型,凸二次规划;
2. 损失函数自带L2正则,在梯度下降时不需要另外添加正则函数;
3. 非线性分类时,需要使用核函数,将二维不可分情况下的样本映射到高维空间已达到可分类的目的,常用的核函数:线性核,高斯核,sigmod等;使用那种核函数,需要根据经验以及测试来验证。
4. 引入松弛变量以及惩罚因子,以达到软间隔分类,其实就是解决噪声不可分的问题;
5. 训练出的模型仅与支持向量有关,改变其他样本点(非支持向量)不影响模型的分类效果。
上面对SVM做了一些简单的总结,里面涉及到很多概念,以及原理,在这里就不一一说明了。下面就项目的基本思路以及Python实现做一些简单的介绍
1. 数据采集
通过智能手表进行跌倒检测,数据(X,Y,Z)来自与三轴重力加速度传感器,每次采集6秒的数据作为一个样本,其中包含了1200个数据,也就是1200个(X,Y,Z),当然在采集样本时,需要大量的各种动作支持,下蹲,上跳,走路,跑步,弯腰等等,最闹心的是跌倒,还要不同的姿势,画面自行脑补。
2. 特征提取
这一步也是关键的一步,至于特征提取的原理就不说了,在我觉得主要是对数据进行了压缩,我们使用不同的方法对采集到的1200个数据压缩到59个特征值,其中包含均值,中值,偏差,偏度,峰度,最大值,最小值等等。这样一来,大大压缩了数据,更加有利于计算。
3. SVM分类
上面对SVM的性质,以及主要思想已经做出介绍,在这里就不说明了,在项目中也就是调用接口。通过实际测试,其检测的准确率达到98%。
Python代码
#!/usr/bin/env python
# coding: utf-8
# In[4]:
import sys
from statistics import median
from statistics import stdev
from scipy.stats import kurtosis,skew
import math
import numpy as np
import os
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import csv
from sklearn import svm
from random import randint
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import f1_score
from sklearn.metrics import auc
from sklearn import datasets
from joblib import dump, load
# In[42]:
####特征值计算
def feature(FOLDER, label):
FALL_SIZE = 1200
df_list = []
sum_df = pd.DataFrame()
#sum_df = df_.fillna(0) # with 0s rather than NaNs
PATH = '/home/nsh/share/ML/MobiAct_Dataset_v2.0/Annotated Data/'
OUTPUT_PATH = '/home/nsh/share/ML/MobiAct_Dataset_v2.0/train_data_trainsform_lstm/'
#FOLDER = 'CSI'
FILE_PATH = PATH + FOLDER
OUTPUT_FILE_PATH = OUTPUT_PATH + FOLDER + '.csv'
count = 0
final = []
for file in os.listdir(FILE_PATH):
#print(file)
df = pd.read_csv(os.path.join(FILE_PATH,file))
df = df[(df['label'] == FOLDER).idxmax():]
df = df.reset_index(drop=True)
#print(df.head())
#print(df.count())
#
if not df.empty:
#
df_list.append(df)
#print(df.dtypes)
print(file)
df["acc_x"]= df["acc_x"].astype('float64')
df["acc_y"]= df["acc_y"].astype('float64')
df["acc_z"]= df["acc_z"].astype('float64')
#print(df.dtypes)
df['mag'] = df['acc_x']*df['acc_x'] + df['acc_y']*df['acc_y'] + df['acc_z']*df['acc_z']
#mag = math.sqrt(df['acc_x']*df['acc_x'] + df['acc_y']*df['acc_y'] + df['acc_z']*df['acc_z'])
#print(df.head())
OUTPUT_FILE_PATH = OUTPUT_PATH + FOLDER + '/' + file
OUTPUT_FOLDER_PATH = OUTPUT_PATH + FOLDER
if not os.path.exists(OUTPUT_FOLDER_PATH):
os.makedirs(OUTPUT_FOLDER_PATH)
#if(os.path.isdir(OUTPUT_FOLDER_PATH)):
#else:
#
os.mkdir(OUTPUT_FOLDER_PATH)
exists = os.path.isfile(OUTPUT_FILE_PATH)
if(exists):
print(OUTPUT_FILE_PATH + " exist , skip...")
else:
df.to_csv(OUTPUT_FILE_PATH,index=False)
#X = [float(df[k][2]) for k in range(1,1+50)]
X = []
Y = []
Z = []
MAG = []
ymag = []
#X_list = df["acc_x"].tolist()
df_count = df.shape[0]
print(df_count)
if(df_count<FALL_SIZE):
FALL_SIZE = df_count
for i in range(0,FALL_SIZE):
#label = data.iloc[i, 0]
X.append(df.iloc[i, 2])
Y.append(df.iloc[i, 3])
Z.append(df.iloc[i, 4])
MAG.append(df.iloc[i, 12])
ymag.append(float(Y[i])/float(math.sqrt(MAG[i])))
#计算特征值
TA = [math.asin(ymag[k]) for k in range(0,FALL_SIZE)]
avgX = sum(X)/len(X)
avgY = sum(Y)/len(Y)
avgZ = sum(Z)/len(Z)
medianX = median(X)
medianY = median(Y)
medianZ = median(Z)
stdX = stdev(X)
stdY = stdev(Y)
stdZ = stdev(Z)
skewX = skew(X)
skewY = skew(Y)
skewZ = skew(Z)
kurtosisX = kurtosis(X)
kurtosisY = kurtosis(Y)
kurtosisZ = kurtosis(Z)
minX = min(X)
minY = min(Y)
minZ = min(Z)
maxX = max(X)
maxY = max(Y)
maxZ
= max(Z)
slope = math.sqrt((maxX - minX)**2 + (maxY - minY)**2 + (maxZ - minZ)**2)
meanTA = sum(TA)/len(TA)
stdTA = stdev(TA)
skewTA = skew(TA)
kurtosisTA = kurtosis(TA)
absX = sum([abs(X[k] - avgX) for k in range(0,FALL_SIZE) ]) / len(X)
absY = sum([abs(Y[k] - avgY) for k in range(0,FALL_SIZE) ]) / len(Y)
absZ = sum([abs(Z[k] - avgZ) for k in range(0,FALL_SIZE) ]) / len(Z)
abs_meanX = sum([abs(X[k]) for k in range(0,FALL_SIZE)])/len(X)
abs_meanY = sum([abs(Y[k]) for k in range(0,FALL_SIZE)])/len(Y)
abs_meanZ = sum([abs(Z[k]) for k in range(0,FALL_SIZE)])/len(Z)
abs_medianX = median([abs(X[k]) for k in range(0,FALL_SIZE)])
abs_medianY = median([abs(Y[k]) for k in range(0,FALL_SIZE)])
abs_medianZ = median([abs(Z[k]) for k in range(0,FALL_SIZE)])
abs_stdX = stdev([abs(X[k]) for k in range(0,FALL_SIZE)])
abs_stdY = stdev([abs(Y[k]) for k in range(0,FALL_SIZE)])
abs_stdZ = stdev([abs(Z[k]) for k in range(0,FALL_SIZE)])
abs_skewX = skew([abs(X[k]) for k in range(0,FALL_SIZE)])
abs_skewY = skew([abs(Y[k]) for k in range(0,FALL_SIZE)])
abs_skewZ = skew([abs(Z[k]) for k in range(0,FALL_SIZE)])
abs_kurtosisX = kurtosis([abs(X[k]) for k in range(0,FALL_SIZE)])
abs_kurtosisY = kurtosis([abs(Y[k]) for k in range(0,FALL_SIZE)])
abs_kurtosisZ = kurtosis([abs(Z[k]) for k in range(0,FALL_SIZE)])
abs_minX = min([abs(X[k]) for k in range(0,FALL_SIZE)])
abs_minY = min([abs(Y[k]) for k in range(0,FALL_SIZE)])
abs_minZ = min([abs(Z[k]) for k in range(0,FALL_SIZE)])
abs_maxX = max([abs(X[k]) for k in range(0,FALL_SIZE)])
abs_maxY = max([abs(Y[k]) for k in range(0,FALL_SIZE)])
abs_maxZ
= max([abs(Z[k]) for k in range(0,FALL_SIZE)])
abs_slope = math.sqrt((abs_maxX - abs_minX)**2 + (abs_maxY - abs_minY)**2 + (abs_maxZ - abs_minZ)**2)
meanMag = sum(MAG)/len(MAG)
stdMag = stdev(MAG)
minMag = min(MAG)
maxMag = max(MAG)
DiffMinMaxMag = maxMag - minMag
ZCR_Mag = 0
AvgResAcc = (1/len(MAG))*sum(MAG)
#label = 0
#print(minX)
test = [avgX,avgY,avgZ,medianX,medianY,medianZ,stdX,stdY,stdZ,skewX,skewY,skewZ,kurtosisX,kurtosisY,kurtosisZ,
minX,minY,minZ,maxX,maxY,maxZ,slope,meanTA,stdTA,skewTA,kurtosisTA,absX,
absY,absZ,abs_meanX,abs_meanY,abs_meanZ,abs_medianX,abs_medianY,abs_medianZ,
abs_stdX,abs_stdY,abs_stdZ,abs_skewX,abs_skewY,abs_skewZ,abs_kurtosisX,
abs_kurtosisY,abs_kurtosisZ,abs_minX,abs_minY,abs_minZ,abs_maxX,abs_maxY
,abs_maxZ,abs_slope,meanMag,stdMag,minMag,maxMag,DiffMinMaxMag,ZCR_Mag,AvgResAcc,label]
final.append(test)
#count = count +1
#if(count > 1):
#
break
return final
# In[59]:
OUTPUT_PATH = '/home/nsh/share/ML/MobiAct_Dataset_v2.0/featured/'
FOLDER = 'WAL'
label = 0
OUTPUT_FILE_PATH = OUTPUT_PATH + FOLDER + '.csv'
if(os.path.isfile(OUTPUT_FILE_PATH)):
os.remove(OUTPUT_FILE_PATH)
with open(OUTPUT_FILE_PATH,'a') as f1:
writer=csv.writer(f1, delimiter=',',lineterminator='n',)
writer.writerow(['AvgX','AvgY','AvgZ','MedianX','MedianY','MedianZ','StdX',
'StdY','StdZ','SkewX','SkewY','SkewZ','KurtosisX','KurtosisY','KurtosisZ','MinX','MinY',
'MinZ','MaxX','MaxY','MaxZ','Slope','MeanTA','StdTA','SkewTA','KurtosisTA',
'AbsX','AbsY','AbsZ','AbsMeanX','AbsMeanY','AbsMeanZ','AbsMedianX','AbsMedianY','AbsMedianZ',
'AbsStdX','AbsStdY','AbsStdZ','AbsSkewX','AbsSkewY','AbsSkewZ',
'AbsKurtosisX','AbsKurtosisY','AbsKurtosisZ','AbsMinX','AbsMinY','AbsMinZ',
'AbsMaxX','AbsMaxY','AbsMaxZ','AbsSlope','MeanMag',
'StdMag','MinMag','MaxMag','DiffMinMaxMag','ZCR_Mag','AverageResultantAcceleration','label'])
lala = feature(FOLDER, label)
data_len = len(lala)
for p in range(0,data_len):
writer.writerow(lala[p])
print("total ", data_len," records process done")
# In[2]:
###数据加载
def get_all_data():
PATH = '/home/nsh/share/ML/MobiAct_Dataset_v2.0/featured/'
fs = os.listdir(PATH)
all_data = pd.DataFrame()
count = 0
for f in fs:
file_path = os.path.join(PATH, f)
#print(file_path)
if 'csv' in f:
#data = pd.read_csv(file_path, index_col=False, nrows=200, low_memory=False)
data = pd.read_csv(file_path, index_col=False, low_memory=False)
#data.info()
data = data.iloc[0:,0:59]
#data.info()
#data = data.fillna(method='ffill')
#print(data.dtypes)
#data = data.convert_objects(convert_numeric=True)
#print(data.dtypes)
#break
all_data = all_data.append(data)
#for fast test
#break
#count = count +1
#if(count > 5):
#
break
count_row = all_data.shape[0]
#print(count_row)
count_row = all_data.shape[1]
#print(count_row)
np.random.shuffle(all_data.values)
count_row = all_data.shape[1]
#print(count_row)
return all_data
# In[16]:
###训练模型
all_data = get_all_data()
#print(all_data.head())
_all_data_x = []
_all_data_y = []
count = all_data.shape[0]
count1 = all_data.shape[1]
#输入_all_data_x:59个特征;输出:_all_data_y(label):0/1;
for i in range(0,count):
_all_data_x.append(all_data.iloc[i, 0:58])
_all_data_y.append(all_data.iloc[i, 58:59])
#随机分割出训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(_all_data_x, _all_data_y, test_size=0.2, random_state=42)
#SVM分类
clf = svm.SVC(gamma='scale')
clf.fit(X_train, y_train)
test_count = len(X_test)
print(test_count)
###性能度量
y_predict = clf.predict(X_test)
score = roc_auc_score(y_test, y_predict)
#print(y_predict)
#print("actual result")
#print(y_test)
#for i in range(0, test_count):
#
print("actual vs predict", clf.predict(X_test[i]), ":", y_test[i])
print(score)
precision, recall, thresholds = precision_recall_curve(y_test, y_predict)
f1 = f1_score(y_test, y_predict)
auc = auc(recall, precision)
print("precision is ", precision, "recall is ", recall, "thresholds is " , thresholds)
print("f1 is ", f1, "auc is ", auc)
#print(y_predict)
#print(y_test)
print("done...")
###保存模型
dump(clf, 'fall_detect_svm.joblib')
#this is how to load the model
#clf_load = load('fall_detect_svm.joblib')
# In[5]:
###使用模型进行测试
all_data = get_all_data()
#print(all_data.head())
_all_data_x = []
_all_data_y = []
count = all_data.shape[0]
count1 = all_data.shape[1]
for i in range(0,count):
_all_data_x.append(all_data.iloc[i, 0:58])
_all_data_y.append(all_data.iloc[i, 58:59])
#print(_all_data_x[0])
#print(_all_data_y[0])
clf_load = load('fall_detect_svm.joblib')
X_train, X_test, y_train, y_test = train_test_split(_all_data_x, _all_data_y, test_size=0.1, random_state=42)
y_predict = clf_load.predict(X_train)
score = roc_auc_score(y_train, y_predict)
print(score)
# In[ ]:
最后
以上就是迷人皮卡丘为你收集整理的SVM(支持向量机)--跌倒检测的全部内容,希望文章能够帮你解决SVM(支持向量机)--跌倒检测所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
发表评论 取消回复