概述
### --------------------------------------------------- ###
# ------ 特征提取与处理 ------
# ------ 分类变量特征提取 ------
from sklearn.feature_extraction import DictVectorizer
one_hot_encoder = DictVectorizer()
instances = [{"city": "New York"}, {"city": "San Francisco"}, {"city": "Chapel Hill"}]
print one_hot_encoder.fit_transform(instances).toarray()
# ------ 词库表示法 ------
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
corpus = [
"UNC played Duke in basketball",
"Duke lost the basketball game",
"I ate a sandwich"
]
print vectorizer.fit_transform(corpus).todense()
print vectorizer.vocabulary_
# 词汇表里面有10个单词,但a不在词汇表里面,是
# 因为a的长度不符合CountVectorizer类的要求
from sklearn.metrics.pairwise import euclidean_distances
counts = vectorizer.fit_transform(corpus).todense()
print "counts: n", counts
for x, y in [[0, 1], [0, 2], [1, 2]]:
dist = euclidean_distances(counts[x], counts[y])
print '文档{}与文档{}的距离{}'.format(x, y, dist)
# ------ 图片特征提取 ------
# 通过图像值提取特征
from sklearn import datasets
digits = datasets.load_digits()
print 'Feature vector:n', digits.images[0].reshape(-1, 64)
# 对感兴趣的点进行特征提取
import numpy as np
from skimage.feature import corner_harris, corner_peaks
from skimage.color import rgb2gray
import matplotlib.pyplot as plt
import skimage.io as io
from skimage.exposure import equalize_hist
def show_corners(corners, image):
fig = plt.figure()
plt.gray()
plt.imshow(image)
y_corner, x_corner = zip(*corners)
plt.plot(x_corner, y_corner, 'or')
plt.xlim(0, image.shape[1])
plt.ylim(image.shape[0], 0)
fig.set_size_inches(np.array(fig.get_size_inches()) * 1.5)
# plt.show()
mandrill = io.imread(r'C:UsersadminDesktoptest.jpg')
mandrill = equalize_hist(rgb2gray(mandrill))
corners = corner_peaks(corner_harris(mandrill), min_distance=2)
show_corners(corners, mandrill)
"""
图片的像素高低是会影响兴趣点的数量的
"""
import mahotas as mh
from mahotas.features import surf
image = mh.imread(r'C:UsersadminDesktoptest.jpg', as_grey=True)
print '第一个SURF描述符:n'.format(surf.surf(image)[0])
print '抽取了%s个SURF描述符' % len(surf.surf(image))
最后
以上就是朴实御姐为你收集整理的『sklearn学习』《sklearn》第三章:特征提取与处理的全部内容,希望文章能够帮你解决『sklearn学习』《sklearn》第三章:特征提取与处理所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复