概述
import pandas as pd
pd.__version__
# 就是pandas模块,对一些汉字,处理不好,版本会继续升级的
输出
‘0.22.0’
import numpy as np
from pandas import Series, DataFrame
## 1、删除重复元素 ### 使用duplicated()函数检测重复的行,返回元素为布尔类型的Series对象,每个元素对应一行,如果该行不是第一次出现,则元素为True
# 在mysql中有属性表,城市,省份 市 市的名字只能是唯一,唯一键的效率低
# 如果有属性数据重复,那么pandas拿到的就是一个带有数据冗余的表
df = DataFrame({'color': ['white', 'red', 'white'], 'size': [10, 20, 10]})
df
color | size | |
---|---|---|
0 | white | 10 |
1 | red | 20 |
2 | white | 10 |
#
df.duplicated()
输出
0 False
1 False
2 True
dtype: bool
df.drop_duplicates()
color | size | |
---|---|---|
0 | white | 10 |
1 | red | 20 |
df2 = pd.concat([df, df], axis=1)
df2
color | size | color | size | |
---|---|---|---|---|
0 | white | 10 | white | 10 |
1 | red | 20 | red | 20 |
2 | white | 10 | white | 10 |
df2.duplicated()
输出
0 False
1 False
2 True
dtype: bool
df2.drop_duplicates()
color | size | color | size | |
---|---|---|---|---|
0 | white | 10 | white | 10 |
1 | red | 20 | red | 20 |
# 如果真的出现了两列完全相同,那么我们该怎么删除列
# drop 会删除所有的
df2.T.drop_duplicates().T
color | size | |
---|---|---|
0 | white | 10 |
1 | red | 20 |
2 | white | 10 |
df
color | size | |
---|---|---|
0 | white | 10 |
1 | red | 20 |
2 | white | 10 |
df.mean()
输出
size 13.333333
dtype: float64
d = {'white': 255, 'red': 128}
df.replace(d)
color | size | |
---|---|---|
0 | 255 | 10 |
1 | 128 | 20 |
2 | 255 | 10 |
d = {'white': 255, 'gray': 128}
df.replace(d)
# map也可以用来检索文章中敏感词
color | size | |
---|---|---|
0 | 255 | 10 |
1 | red | 20 |
2 | 255 | 10 |
df
color | size | |
---|---|---|
0 | white | 10 |
1 | red | 20 |
2 | white | 10 |
df['size'][0] = np.nan
C:UsersAdministratorAnaconda3libsite-packagesipykernel_launcher.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy “”“Entry point for launching an IPython kernel. C:UsersAdministratorAnaconda3libsite-packagespandascoreindexing.py:194: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy self._setitem_with_indexer(indexer, value)
# 我们的表中有中如果nan,我们想填充0, fillna()
d = {np.nan: 0}
df.replace(d)
color | size | |
---|---|---|
0 | white | 0.0 |
1 | red | 20.0 |
2 | white | 10.0 |
df2 = DataFrame(np.random.randint(0, 150, size=(4, 2)),
columns=['Python', 'Java'],
index=list('abcd'))
df2
Python | Java | |
---|---|---|
a | 58 | 1 |
b | 99 | 71 |
c | 66 | 7 |
d | 104 | 143 |
#使用map函数新建一列
# 生成一个Math的列
df2['Math'] = df2['Python'].map(lambda x: x + 20)
df2
Python | Java | Math | |
---|---|---|---|
a | 58 | 1 | 78 |
b | 99 | 71 | 119 |
c | 66 | 7 | 86 |
d | 104 | 143 | 124 |
def level(x):
if x > 100:
return '完美'
elif x > 100 and x >= 80:
return '优秀'
elif x < 80 and x >= 60:
return '及格'
else:
return '不及格'
#用数学成绩判断这个人的等级,然后将判断得到的等级添加为一列
df2['level'] = df2['Math'].map(level)
df2
Python | Java | Math | level | |
---|---|---|---|---|
a | 58 | 1 | 78 | 不及格 |
b | 99 | 71 | 119 | 完美 |
c | 66 | 7 | 86 | 不及格 |
d | 104 | 143 | 124 | 完美 |
df2['Math'] = df2['Math'].transform(lambda x: x + np.random.randint(0, 50, size=1)[0])
df2
Python | Java | Math | level | |
---|---|---|---|---|
a | 58 | 1 | 121 | 不及格 |
b | 99 | 71 | 161 | 完美 |
c | 66 | 7 | 134 | 不及格 |
d | 104 | 143 | 158 | 完美 |
使用map()函数新建一个新列
df2.drop('level', axis=1, inplace=True)
df2
# 我们在pandas中map中最大区别一个使用dict一个不可以使用
df2['level'] = df2['Java'].transform(level)
df2
Python | Java | Math | level | |
---|---|---|---|---|
a | 58 | 1 | 121 | 不及格 |
b | 99 | 71 | 161 | 及格 |
c | 66 | 7 | 134 | 不及格 |
d | 104 | 143 | 158 | 完美 |
df2
Python | Java | Math | level | |
---|---|---|---|---|
a | 58 | 1 | 121 | 不及格 |
b | 99 | 71 | 161 | 及格 |
c | 66 | 7 | 134 | 不及格 |
d | 104 | 143 | 158 | 完美 |
#这个是不行的
df2.index[0] = '张三'
# rename 要替换的索引存在则替换,不存在就不管
df2.rename({'a':'张三', 'Python': '大蟒蛇'},axis=1)
大蟒蛇 | Java | Math | level | |
---|---|---|---|---|
a | 58 | 1 | 121 | 不及格 |
b | 99 | 71 | 161 | 及格 |
c | 66 | 7 | 134 | 不及格 |
d | 104 | 143 | 158 | 完美 |
# rename中的参数值可以是一个func
def index_rename(item):
if item == 'a':
return '张三'
else:
return '李四'
df2.rename(index_rename)
Python | Java | Math | level | |
---|---|---|---|---|
张三 | 58 | 1 | 121 | 不及格 |
李四 | 99 | 71 | 161 | 及格 |
李四 | 66 | 7 | 134 | 不及格 |
李四 | 104 | 143 | 158 | 完美 |
# 数据中比较大或特别小的都可以认为是异常值
# 大头子和小头爸爸都算是人类中的异常值
# NaN也是异常值
df3 = DataFrame(np.random.randint(0, 150, size=(10, 4)),
columns=['Python', 'Java', 'PHP', 'VR'])
df3
Python | Java | PHP | VR | |
---|---|---|---|---|
0 | 122 | 19 | 146 | 88 |
1 | 82 | 105 | 120 | 146 |
2 | 134 | 19 | 81 | 127 |
3 | 0 | 78 | 75 | 39 |
4 | 3 | 115 | 19 | 8 |
5 | 16 | 147 | 71 | 44 |
6 | 16 | 92 | 124 | 131 |
7 | 42 | 141 | 64 | 114 |
8 | 31 | 103 | 64 | 13 |
9 | 103 | 83 | 128 | 108 |
df3.describe()
Python | Java | PHP | VR | |
---|---|---|---|---|
count | 10.000000 | 10.000000 | 10.000000 | 10.000000 |
mean | 54.900000 | 90.200000 | 89.200000 | 81.800000 |
std | 50.858081 | 43.649615 | 39.000855 | 51.410764 |
min | 0.000000 | 19.000000 | 19.000000 | 8.000000 |
25% | 16.000000 | 79.250000 | 65.750000 | 40.250000 |
50% | 36.500000 | 97.500000 | 78.000000 | 98.000000 |
75% | 97.750000 | 112.500000 | 123.000000 | 123.750000 |
max | 134.000000 | 147.000000 | 146.000000 | 146.000000 |
使用std()函数可以求得DataFrame对象每一列的标准差
df3.std()
输出
Python 50.858081
Java 43.649615
PHP 39.000855
VR 51.410764
dtype: float64
根据每一列的标准差,对DataFrame元素进行过滤。
借助any()函数, 测试是否有True,有一个或以上返回True,反之返回False
对每一列应用筛选条件,去除标准差太大的数据
df3
Python | Java | PHP | VR | |
---|---|---|---|---|
0 | 122 | 19 | 146 | 88 |
1 | 82 | 105 | 120 | 146 |
2 | 134 | 19 | 81 | 127 |
3 | 0 | 78 | 75 | 39 |
4 | 3 | 115 | 19 | 8 |
5 | 16 | 147 | 71 | 44 |
6 | 16 | 92 | 124 | 131 |
7 | 42 | 141 | 64 | 114 |
8 | 31 | 103 | 64 | 13 |
9 | 103 | 83 | 128 | 108 |
# 大于70的数据我们认定为异常值
cond1 = df3 >= 70
cond2 = cond1.all(axis=1)
df3[cond2]
Python | Java | PHP | VR | |
---|---|---|---|---|
1 | 82 | 105 | 120 | 146 |
9 | 103 | 83 | 128 | 108 |
删除特定索引df.drop(labels,inplace = True)
## 4. 排序 使用.take()函数排序 可以借助np.random.permutation()函数随机排序# 生成指定大小的矩阵
np.random.permutation([4, 2])
输出
array([4, 2])
df2
Python | Java | Math | level | |
---|---|---|---|---|
a | 58 | 1 | 121 | 不及格 |
b | 99 | 71 | 161 | 及格 |
c | 66 | 7 | 134 | 不及格 |
d | 104 | 143 | 158 | 完美 |
df2.iloc[::-1]
Python | Java | Math | level | |
---|---|---|---|---|
d | 104 | 143 | 158 | 完美 |
c | 66 | 7 | 134 | 不及格 |
b | 99 | 71 | 161 | 及格 |
a | 58 | 1 | 121 | 不及格 |
# 通过手写的方式将矩阵进行颠倒
df2.take([3, 2, 1, 0])
Python | Java | Math | level | |
---|---|---|---|---|
d | 104 | 143 | 158 | 完美 |
c | 66 | 7 | 134 | 不及格 |
b | 99 | 71 | 161 | 及格 |
a | 58 | 1 | 121 | 不及格 |
# 怎么进行一个随机的排序
df2.take(np.random.permutation(4), axis=1)
Python | level | Java | Math | |
---|---|---|---|---|
a | 58 | 不及格 | 1 | 121 |
b | 99 | 及格 | 71 | 161 |
c | 66 | 不及格 | 7 | 134 |
d | 104 | 完美 | 143 | 158 |
df4 = DataFrame(np.random.randint(0, 150, size=(1000, 4)),
columns=['Python', 'Java', 'Math', 'China'])
df4
Python | Java | Math | China | |
---|---|---|---|---|
0 | 0 | 94 | 45 | 52 |
1 | 130 | 44 | 120 | 60 |
2 | 45 | 134 | 149 | 14 |
3 | 18 | 62 | 4 | 55 |
4 | 73 | 66 | 112 | 27 |
5 | 17 | 90 | 61 | 136 |
6 | 106 | 101 | 29 | 100 |
7 | 60 | 58 | 142 | 60 |
8 | 47 | 94 | 70 | 24 |
9 | 9 | 115 | 101 | 80 |
10 | 92 | 2 | 135 | 35 |
11 | 149 | 57 | 39 | 92 |
12 | 131 | 143 | 149 | 142 |
13 | 35 | 68 | 12 | 119 |
14 | 137 | 62 | 47 | 68 |
15 | 67 | 74 | 140 | 34 |
16 | 85 | 139 | 88 | 104 |
17 | 32 | 122 | 31 | 145 |
18 | 65 | 24 | 33 | 17 |
19 | 68 | 41 | 60 | 99 |
20 | 76 | 104 | 29 | 78 |
21 | 61 | 9 | 26 | 58 |
22 | 65 | 23 | 12 | 22 |
23 | 27 | 90 | 122 | 93 |
24 | 55 | 119 | 49 | 27 |
25 | 92 | 95 | 116 | 127 |
26 | 127 | 96 | 39 | 60 |
27 | 116 | 15 | 103 | 130 |
28 | 86 | 53 | 11 | 128 |
29 | 120 | 91 | 138 | 124 |
… | … | … | … | … |
970 | 100 | 60 | 17 | 66 |
971 | 122 | 148 | 30 | 22 |
972 | 33 | 133 | 121 | 110 |
973 | 34 | 66 | 62 | 133 |
974 | 88 | 14 | 93 | 131 |
975 | 109 | 86 | 143 | 28 |
976 | 69 | 45 | 86 | 31 |
977 | 55 | 92 | 57 | 138 |
978 | 12 | 63 | 72 | 23 |
979 | 100 | 40 | 116 | 142 |
980 | 113 | 87 | 74 | 80 |
981 | 44 | 62 | 93 | 39 |
982 | 144 | 63 | 130 | 111 |
983 | 38 | 57 | 105 | 55 |
984 | 10 | 18 | 94 | 20 |
985 | 86 | 144 | 12 | 35 |
986 | 96 | 6 | 8 | 54 |
987 | 10 | 93 | 61 | 127 |
988 | 7 | 61 | 79 | 110 |
989 | 28 | 141 | 128 | 76 |
990 | 136 | 136 | 13 | 66 |
991 | 80 | 129 | 61 | 101 |
992 | 141 | 143 | 51 | 38 |
993 | 29 | 46 | 118 | 129 |
994 | 40 | 64 | 69 | 100 |
995 | 92 | 70 | 142 | 54 |
996 | 115 | 71 | 62 | 55 |
997 | 28 | 10 | 93 | 107 |
998 | 31 | 137 | 88 | 75 |
999 | 76 | 36 | 44 | 119 |
1000 rows × 4 columns
# 抽取1000行中50个
#公司年会可以做一个抽奖系统
df4.take(np.random.randint(0, 1000, size=50))
Python | Java | Math | China | |
---|---|---|---|---|
110 | 11 | 145 | 121 | 64 |
799 | 9 | 4 | 42 | 21 |
333 | 36 | 3 | 134 | 145 |
998 | 31 | 137 | 88 | 75 |
143 | 6 | 146 | 87 | 23 |
602 | 115 | 109 | 59 | 53 |
903 | 132 | 32 | 147 | 81 |
74 | 102 | 57 | 146 | 84 |
500 | 124 | 116 | 84 | 17 |
218 | 4 | 99 | 91 | 126 |
490 | 74 | 56 | 39 | 134 |
771 | 1 | 75 | 57 | 6 |
632 | 39 | 10 | 145 | 106 |
615 | 75 | 113 | 82 | 37 |
369 | 46 | 17 | 139 | 106 |
227 | 50 | 144 | 134 | 67 |
518 | 27 | 99 | 42 | 15 |
866 | 68 | 30 | 79 | 43 |
626 | 47 | 15 | 27 | 111 |
748 | 91 | 111 | 70 | 89 |
135 | 84 | 46 | 131 | 110 |
926 | 1 | 56 | 129 | 148 |
905 | 31 | 136 | 25 | 32 |
387 | 136 | 24 | 103 | 131 |
837 | 145 | 50 | 10 | 18 |
804 | 60 | 75 | 70 | 139 |
727 | 6 | 57 | 98 | 14 |
907 | 142 | 63 | 88 | 25 |
538 | 145 | 41 | 89 | 18 |
81 | 51 | 11 | 33 | 69 |
646 | 130 | 7 | 43 | 15 |
131 | 148 | 117 | 103 | 17 |
846 | 141 | 84 | 99 | 48 |
923 | 111 | 148 | 40 | 78 |
96 | 108 | 128 | 41 | 108 |
275 | 4 | 41 | 93 | 41 |
612 | 15 | 7 | 16 | 81 |
757 | 103 | 79 | 48 | 70 |
581 | 13 | 10 | 73 | 37 |
709 | 53 | 40 | 117 | 121 |
641 | 146 | 17 | 127 | 23 |
724 | 22 | 68 | 16 | 32 |
671 | 134 | 34 | 140 | 80 |
445 | 108 | 45 | 134 | 55 |
883 | 137 | 114 | 112 | 90 |
300 | 139 | 104 | 121 | 9 |
451 | 66 | 134 | 138 | 127 |
711 | 39 | 92 | 147 | 74 |
466 | 14 | 99 | 34 | 70 |
322 | 108 | 86 | 122 | 62 |
df5 = DataFrame({'item': ['萝卜', '白菜', '西红柿', '辣椒', '冬瓜','萝卜', '西红柿', '白菜',
'西红柿', '辣椒', '冬瓜'],
'seller': ['李大妈', '李大妈', '李大妈', '王大妈', '王大妈', '王大妈', '王大妈', '赵大妈', '赵大妈', '赵大妈', '赵大妈'],
'price': np.random.randint(3, 10, size=11)},
columns = ['item', 'seller', 'price'])
df5
item | seller | price | |
---|---|---|---|
0 | 萝卜 | 李大妈 | 9 |
1 | 白菜 | 李大妈 | 7 |
2 | 西红柿 | 李大妈 | 8 |
3 | 辣椒 | 王大妈 | 6 |
4 | 冬瓜 | 王大妈 | 8 |
5 | 萝卜 | 王大妈 | 5 |
6 | 西红柿 | 王大妈 | 4 |
7 | 白菜 | 赵大妈 | 8 |
8 | 西红柿 | 赵大妈 | 4 |
9 | 辣椒 | 赵大妈 | 6 |
10 | 冬瓜 | 赵大妈 | 6 |
# 首先是分组,然后是找出一个最小值
# 找出各蔬菜中最便宜的
df5.groupby(['item']).min()
seller | price | |
---|---|---|
item | ||
冬瓜 | 王大妈 | 6 |
白菜 | 李大妈 | 7 |
萝卜 | 李大妈 | 5 |
西红柿 | 李大妈 | 4 |
辣椒 | 王大妈 | 6 |
df5.groupby(['item']).max()
seller | price | |
---|---|---|
item | ||
冬瓜 | 赵大妈 | 8 |
白菜 | 赵大妈 | 8 |
萝卜 | 王大妈 | 9 |
西红柿 | 赵大妈 | 8 |
辣椒 | 赵大妈 | 6 |
# 求平均值,并且添加前缀
mean_price = df5.groupby(['item']).mean().add_prefix('mean_')
mean_price
# select avg('item) as asd
mean_price | |
---|---|
item | |
冬瓜 | 7.000000 |
白菜 | 7.500000 |
萝卜 | 7.000000 |
西红柿 | 5.333333 |
辣椒 | 6.000000 |
# 把得到的平均值融合到原表中
# left_on 设定左边表的关联列,右表对齐,多对多
df6 = pd.merge(df5, mean_price, left_on='item', right_index=True)
df6
item | seller | price | mean_price | |
---|---|---|---|---|
0 | 萝卜 | 李大妈 | 9 | 7.000000 |
5 | 萝卜 | 王大妈 | 5 | 7.000000 |
1 | 白菜 | 李大妈 | 7 | 7.500000 |
7 | 白菜 | 赵大妈 | 8 | 7.500000 |
2 | 西红柿 | 李大妈 | 8 | 5.333333 |
6 | 西红柿 | 王大妈 | 4 | 5.333333 |
8 | 西红柿 | 赵大妈 | 4 | 5.333333 |
3 | 辣椒 | 王大妈 | 6 | 6.000000 |
9 | 辣椒 | 赵大妈 | 6 | 6.000000 |
4 | 冬瓜 | 王大妈 | 8 | 7.000000 |
10 | 冬瓜 | 赵大妈 | 6 | 7.000000 |
# 先求一个平方差,求各个的蔬菜价格波动
price_std = df5.groupby(['item']).std().add_prefix('std_')
price_std
std_price | |
---|---|
item | |
冬瓜 | 1.414214 |
白菜 | 0.707107 |
萝卜 | 2.828427 |
西红柿 | 2.309401 |
辣椒 | 0.000000 |
# 将蔬菜的波动值融合到原数据中
df7 = pd.merge(df6, price_std, left_on='item', right_index=True)
df7
item | seller | price | mean_price | std_price | |
---|---|---|---|---|---|
0 | 萝卜 | 李大妈 | 9 | 7.000000 | 2.828427 |
5 | 萝卜 | 王大妈 | 5 | 7.000000 | 2.828427 |
1 | 白菜 | 李大妈 | 7 | 7.500000 | 0.707107 |
7 | 白菜 | 赵大妈 | 8 | 7.500000 | 0.707107 |
2 | 西红柿 | 李大妈 | 8 | 5.333333 | 2.309401 |
6 | 西红柿 | 王大妈 | 4 | 5.333333 | 2.309401 |
8 | 西红柿 | 赵大妈 | 4 | 5.333333 | 2.309401 |
3 | 辣椒 | 王大妈 | 6 | 6.000000 | 0.000000 |
9 | 辣椒 | 赵大妈 | 6 | 6.000000 | 0.000000 |
4 | 冬瓜 | 王大妈 | 8 | 7.000000 | 1.414214 |
10 | 冬瓜 | 赵大妈 | 6 | 7.000000 | 1.414214 |
# std()标准平方差,看数据的波动,描述情况
def std_price(std_p):
if std_p > 2.5:
return '价格很坑'
elif std_p > 1:
return '价格稳定'
else:
return '良心菜价'
# map
# 将各个蔬菜的价格情况添加为一列
df7['std_p'] = df7['std_price'].map(std_price)
df7
item | seller | price | mean_price | std_price | std_p | |
---|---|---|---|---|---|---|
0 | 萝卜 | 李大妈 | 9 | 7.000000 | 2.828427 | 价格很坑 |
5 | 萝卜 | 王大妈 | 5 | 7.000000 | 2.828427 | 价格很坑 |
1 | 白菜 | 李大妈 | 7 | 7.500000 | 0.707107 | 良心菜价 |
7 | 白菜 | 赵大妈 | 8 | 7.500000 | 0.707107 | 良心菜价 |
2 | 西红柿 | 李大妈 | 8 | 5.333333 | 2.309401 | 价格稳定 |
6 | 西红柿 | 王大妈 | 4 | 5.333333 | 2.309401 | 价格稳定 |
8 | 西红柿 | 赵大妈 | 4 | 5.333333 | 2.309401 | 价格稳定 |
3 | 辣椒 | 王大妈 | 6 | 6.000000 | 0.000000 | 良心菜价 |
9 | 辣椒 | 赵大妈 | 6 | 6.000000 | 0.000000 | 良心菜价 |
4 | 冬瓜 | 王大妈 | 8 | 7.000000 | 1.414214 | 价格稳定 |
10 | 冬瓜 | 赵大妈 | 6 | 7.000000 | 1.414214 | 价格稳定 |
df7
item | seller | price | mean_price | std_price | std_p | |
---|---|---|---|---|---|---|
0 | 萝卜 | 李大妈 | 9 | 7.000000 | 2.828427 | 价格很坑 |
5 | 萝卜 | 王大妈 | 5 | 7.000000 | 2.828427 | 价格很坑 |
1 | 白菜 | 李大妈 | 7 | 7.500000 | 0.707107 | 良心菜价 |
7 | 白菜 | 赵大妈 | 8 | 7.500000 | 0.707107 | 良心菜价 |
2 | 西红柿 | 李大妈 | 8 | 5.333333 | 2.309401 | 价格稳定 |
6 | 西红柿 | 王大妈 | 4 | 5.333333 | 2.309401 | 价格稳定 |
8 | 西红柿 | 赵大妈 | 4 | 5.333333 | 2.309401 | 价格稳定 |
3 | 辣椒 | 王大妈 | 6 | 6.000000 | 0.000000 | 良心菜价 |
9 | 辣椒 | 赵大妈 | 6 | 6.000000 | 0.000000 | 良心菜价 |
4 | 冬瓜 | 王大妈 | 8 | 7.000000 | 1.414214 | 价格稳定 |
10 | 冬瓜 | 赵大妈 | 6 | 7.000000 | 1.414214 | 价格稳定 |
# 将各个蔬菜的价格求和
df7.groupby(['item'])['price'].sum()
输出
item
冬瓜 14
白菜 15
萝卜 14
西红柿 16
辣椒 12
Name: price, dtype: int32
df7.groupby(['item'])['price'].transform(sum)
输出
0 14
5 14
1 15
7 15
2 16
6 16
8 16
3 12
9 12
4 14
10 14
Name: price, dtype: int32
transform()与apply()函数还能传入一个函数或者lambda
df = DataFrame({'color':['white','black','white','white','black','black'],
'status':['up','up','down','down','down','up'],
'value1':[12.33,14.55,22.34,27.84,23.40,18.33],
'value2':[11.23,31.80,29.99,31.18,18.25,22.44]})
apply的操作对象,也就是传给lambda的参数是整列的数组
# np.mean()
# apply 与 transform的最大区别,在于transform做了循环(交叉表)CROSS JOIN , apply比较简洁,调用原本的功能
df7.groupby(['item'])['price'].apply(sum)
输出
item
冬瓜 14
白菜 15
萝卜 14
西红柿 16
辣椒 12
Name: price, dtype: int64
df7.groupby(['item'])['price'].apply(np.mean)
- 输出 item 冬瓜 7.000000 白菜 7.500000 萝卜 7.000000 西红柿 5.333333 辣椒 6.000000 Name: price, dtype: float64 Series和DataFrame都有一个都有一个生成各类图标的plot方法,默认情况下锁生成的都是线形图
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
# matplotlib不引入,在调用plot()的时候不会报错,但是图片也显示不出
# pandas和matplotlib进行了深度合作
import matplotlib.pyplot as plt
# 线形图 简单示例Series图例表示.plot()
s = Series(np.random.randint(0 ,20, size=10))
s
输出
0 14
1 6
2 19
3 5
4 7
5 16
6 1
7 10
8 11
9 17
dtype: int32
s.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x9fe0240>
简单的DataFrame图标实例.plot()
df = DataFrame(np.random.randint(0, 150, size=(5, 4)),columns = ['Python', 'Java', 'PHP', 'Ruby'])
df.plot()
<matplotlib.axes._subplots.AxesSubplot at 0xa106390>
### 柱状图 DataFrame柱状图例
df.plot(kind='bar')
<matplotlib.axes._subplots.AxesSubplot at 0xa4c5240>
df.plot(kind='barh')
<matplotlib.axes._subplots.AxesSubplot at 0xa430be0>
读取tips.csv,查看每天聚会人数,每天各种聚会规模的比例饿
求和并df.sum(),注意灵活使用axis()
tip = pd.read_csv('./tips.csv')
tip
day | 1 | 2 | 3 | 4 | 5 | 6 | |
---|---|---|---|---|---|---|---|
0 | Fri | 1 | 16 | 1 | 1 | 0 | 0 |
1 | Stat | 2 | 53 | 18 | 13 | 1 | 0 |
2 | Sun | 0 | 39 | 15 | 18 | 3 | 1 |
3 | Thur | 1 | 48 | 4 | 5 | 1 | 3 |
将第一列day数据编程行索引set_index
tip.set_index('day',inplace=True)
tip
# 行 代表是星期几
# 列 代表的事几个人一同去吃饭
1 | 2 | 3 | 4 | 5 | 6 | |
---|---|---|---|---|---|---|
day | ||||||
Fri | 1 | 16 | 1 | 1 | 0 | 0 |
Stat | 2 | 53 | 18 | 13 | 1 | 0 |
Sun | 0 | 39 | 15 | 18 | 3 | 1 |
Thur | 1 | 48 | 4 | 5 | 1 | 3 |
tip.plot(kind='bar')
<matplotlib.axes._subplots.AxesSubplot at 0xc1ded68>
#将不重要的数据给去除掉
tip.drop(axis=1,labels=['1', '6'], inplace=True)
# 还能用什么方法保留2-5的数据
tip.iloc[:,1:-1].plot(kind='bar')
# 下课自己去尝试一下
<matplotlib.axes._subplots.AxesSubplot at 0xc481128>
tip.plot(kind='bar')
<matplotlib.axes._subplots.AxesSubplot at 0xc7a9668>
## 直方图 random生成随机直方图,调用hist()方法
nd = np.random.randint(0, 10, size=10)
nd
输出
array([2, 3, 5, 4, 7, 0, 4, 4, 0, 5])
s = Series(nd)
# bins 默认值是10 值越大条越细
s.hist(bins=50)
<matplotlib.axes._subplots.AxesSubplot at 0xc9296d8>
随机百分比密度图
# density 密度
s.plot(kind='kde')
<matplotlib.axes._subplots.AxesSubplot at 0xca69710>
这两张表经常会被画在一起,直方图以规格形式给出(以便画出密度图),然后在再其上绘制核密度估计。
接下来看看一个由两个不同de 标准正太正太分布组成的双峰分布。
np.random.normal()正太分布函数
直方图hist,函数中心必须添加属性normed = True
nd1 = np.random.normal(loc=15, scale=5, size=1000)
nd1
输出
array([16.50002167, 21.95871306, 22.75958416, 17.31162324, 21.11362045,
18.26581647, 14.3677174 , 14.04193695, 24.47250718, 12.15495935,
9.27291491, 12.31342298, 10.2092411 , 26.50106733, 15.73813638,
15.96716855, 16.69108033, 17.47009465, 23.25585844, 3.75013434,
25.12165759, 9.2131128 , 20.80536815, 15.5412481 , 28.90305594,
20.53246725, 11.80165575, 21.60802209, 11.17585822, 16.32740781,
15.18755747, 18.45527818, 20.69591603, 21.49051133, 17.71510416,
18.80558295, 24.17867724, 20.85117481, 20.24591299, 10.10623473,
14.24666052, 17.52448709, 19.22814077, 15.72357144, 20.43921318,
8.0917059 , 22.00146547, 23.50631652, 9.19318319, 9.66151146,
15.75132368, 15.32207848, 15.48579172, 25.26377042, 9.0799947 ,
7.36254758, 2.42731825, 18.10300724, 10.88015804, 13.39732585,
8.91052017, 24.09794363, 18.02868005, 9.901521 , 13.21712227,
19.5068725 , 10.50911924, 14.89803695, 15.50261478, 17.15580065,
14.98757024, 26.86473339, 12.68019989, 15.02038962, 17.42206335,
11.8994741 , 14.31309029, 13.75152275, 6.47324371, 14.73115004,
21.72693826, 20.54387112, 21.36295281, 25.51458225, 7.25840778,
13.76897525, 19.77348404, 15.09407833, 18.76318749, 5.43239314,
15.45835862, 15.40969353, 16.28667652, 13.07865591, 8.28808502,
13.42694347, 18.38122404, 17.6814759 , 19.28344426, 9.53901169,
14.46634917, 11.84352671, 11.09474199, 4.75771378, 15.53622797,
13.13240292, 11.15249509, 8.30862199, 17.21214064, 17.02251594,
16.28650651, 8.21461943, 14.76498248, 15.98369367, 11.98718219,
17.02526983, 19.97020523, 16.44969423, 13.71055375, 22.77569864,
8.51357972, 6.61518919, 14.84898364, 17.58370265, 17.49575552,
12.93721125, 14.53553582, 18.22937262, 15.59990324, 10.94411136,
15.15079863, 7.75436718, 19.64155892, 8.05933151, 14.14798446,
25.93909358, 9.48265882, 16.06401139, 17.17927089, 16.88383318,
19.95283715, 12.02420859, 18.16494132, 14.31856533, 17.48454134,
19.70450889, 16.15431485, 14.97920007, 12.75705767, 21.13982655,
23.76846867, 20.43796706, 17.5593216 , 13.81091458, 19.4130765 ,
22.30718067, 18.30679605, 18.73435888, 21.1085248 , 16.17991617,
17.66907561, 9.3648496 , 12.53151174, 18.74196939, 14.72622698,
21.91666941, 10.68175022, 28.06536912, 16.92750333, 10.48199841,
16.95819675, 11.6035696 , 17.37937365, 12.65895274, 11.54480806,
18.3616206 , 18.02565635, 15.90571496, 6.96905143, 23.91371845,
18.72841836, 2.36636948, 15.66448126, 19.13919071, 14.37065233,
14.82159192, 13.72377931, 16.78926541, 20.13959017, 16.77379021,
14.84332182, 17.34148733, 18.78528991, 7.46478532, 21.50181241,
7.9833104 , 17.80479141, 16.70124722, 13.64582571, 13.577898 ,
16.37998434, 19.03837668, 16.89253034, 20.42677538, 11.88847021,
11.38631853, 15.50082332, 16.7030249 , 16.088621 , 12.51561343,
15.73055771, 19.48992889, 13.06485701, 8.0678333 , 18.05531125,
5.43515846, 19.04760736, 7.879961 , 20.12691794, 8.43035117,
17.22251733, 8.03678616, 16.0078554 , 16.15014473, 8.21224197,
19.38089222, 9.01029769, 11.26062042, 14.76521656, 17.35078069,
22.80520652, 14.46913352, 17.42879149, 13.37647894, 17.25338735,
18.73637238, 2.88421749, 20.40011706, 10.39826822, 15.05082296,
19.06963941, 16.91706731, 13.82272106, 7.27754931, 16.11283426,
15.5246287 , 13.81157399, 6.09658837, 15.6001777 , 18.92162163,
13.38838482, 12.3894269 , 25.61593369, 14.51006631, 24.07157909,
16.8478554 , 19.06337918, 6.89606982, 15.89648191, 9.94890574,
4.1741199 , 19.96662183, 22.11552452, 20.07915682, 11.81127282,
10.01155602, 16.06106413, 13.72389102, 13.58895892, 6.38356799,
15.51871295, 18.04894363, 6.91148171, 15.76145194, 11.06726095,
15.0972117 , 13.89722476, 17.2514969 , 13.71947839, 15.46657072,
9.57084451, 8.69629316, 18.11964659, 13.78259735, 25.01368229,
9.83800798, 9.04644566, 9.01788547, 13.94342442, 19.78553559,
14.88336783, 7.98179993, 12.39448036, 17.1408828 , 19.33147132,
14.05125452, 17.23008172, 13.65371068, 16.17567752, 12.23122378,
12.10680811, 2.95619838, 6.89529116, 15.51517282, 15.19482832,
12.01639161, 18.7831425 , 17.0239132 , 1.38190082, 13.69724912,
13.06217886, 26.94699552, 22.59459457, 11.97867369, 13.93633583,
17.10552488, 22.58992482, 19.40934193, 19.53641763, 15.88442231,
7.9295681 , 21.77780904, 12.47072318, 13.81039032, 17.39736673,
13.15453916, 21.36911849, 10.70201259, 10.59387373, 12.65426619,
11.54376933, 13.14155316, 16.87773098, 11.76657509, 14.89503279,
11.12283823, 21.28503492, 19.51000396, 16.66250962, 12.52950223,
20.49618371, 16.8304768 , 13.58145978, 5.67632775, 12.51501766,
16.40642439, 17.349106 , 7.89876048, 23.19479679, 10.2073971 ,
14.0566871 , 14.78658926, 20.44095559, 15.54838402, 14.89340323,
12.12392313, 8.92467267, 12.74334742, 17.47857181, 19.19866611,
2.45721744, 8.89357759, 8.14078952, 20.12140264, 13.08791183,
15.15808184, 11.52595765, 9.67495873, 8.53996197, 14.04442005,
22.79638213, 13.1821865 , 12.72650507, 9.90846988, 11.41972578,
17.93019223, 17.29881884, 12.55815741, 14.25266947, 10.33093622,
7.142058 , 21.69019735, 17.25592536, 13.08063266, 8.6764044 ,
10.24371983, 15.41889379, 4.28811294, 11.39411962, 26.56646754,
16.96888119, 18.83606664, 15.01419964, 15.34971026, 22.02537937,
4.36119692, 25.28447139, 10.48726479, 13.59826026, 14.06848502,
12.99063847, 16.78391206, 9.74175223, 19.30650068, 22.54410113,
24.98914811, 9.94762802, 21.40109447, 13.6695364 , 7.72764777,
9.64404477, 11.17475083, 12.60055258, 20.97721358, 23.34476622,
18.61854549, 15.52419453, 18.91436655, 9.62666948, 10.61228808,
18.06891452, 19.51675254, 13.23077434, 19.00453996, 13.32899195,
12.26702829, 15.6820842 , 8.11843937, 21.44353167, 15.52251699,
11.4997531 , 24.72088535, 16.28930122, 8.0863221 , 14.2955109 ,
7.17201489, 13.87547583, 17.67027528, 19.13974024, 22.68693614,
24.18678422, 6.06103468, 24.76111537, 10.97781017, 15.81552381,
10.84972314, 21.14140714, 13.76751962, 6.04277574, 24.58010336,
8.68660216, 16.87910212, 26.79462555, 16.53919855, 12.56473841,
12.48541333, 14.29182684, 10.48183018, 17.41734179, 10.97468455,
11.04737238, 11.98624708, 6.25940958, 14.55275986, 10.96969175,
14.44198921, 12.22024554, 21.06729722, 18.63578895, 15.01203526,
9.78234677, 12.69823694, 18.53826221, 18.33718908, 8.67841207,
12.40316973, 6.25257103, 13.12125998, 23.65467315, 8.48692567,
16.48741407, 22.88008283, 9.74399847, 24.23586658, 20.85856245,
6.93278193, 9.39064606, 11.74417199, 17.26023393, 17.4899952 ,
15.60605781, 17.23917663, 16.29546289, 10.05711149, 7.27520158,
13.98169832, 14.95431197, 15.24916955, 14.78272085, 21.01423717,
11.47976526, 16.05894889, 16.30783588, 11.30069306, 21.44509951,
21.12234178, 13.63871927, 8.67404016, 19.55244501, 10.47571836,
21.03142778, 19.22024405, 17.56288251, 11.91033324, 11.17856874,
11.01553264, 14.59588468, 8.70018864, 12.13660946, 24.77501267,
10.5752547 , 8.6157514 , 6.26227458, 14.93877643, 21.75858592,
17.27632436, 13.10800654, 21.44729017, 11.76678708, 21.22406953,
16.94976341, 5.29558133, 21.19612368, 9.19780541, 18.17468954,
16.2252154 , 19.59557946, 11.66578002, 19.06531132, 21.36638746,
20.6541794 , 15.53407109, 14.31871485, 8.88234657, 19.95083641,
15.33807678, 23.35312598, -0.41594355, 12.99684309, 17.81847784,
8.65620742, 11.11906853, 14.72213943, 10.96787938, 19.0607508 ,
15.06548159, 10.73352375, 7.80835499, 18.45434514, 10.94886255,
17.02581022, 15.74533205, 16.83597387, 15.6272432 , 15.07400084,
21.43340505, 19.8596281 , 15.10743665, 9.8524759 , 18.4157017 ,
8.71576747, 8.81978059, 18.74270385, 20.19611499, 7.78305738,
11.12512739, 11.0320895 , 4.08279734, 19.50883865, 18.3420021 ,
16.38436628, 15.41935401, 14.88358696, 21.28094706, 8.07498229,
5.44891281, 22.96052191, 4.82706329, 18.64935207, 6.81372616,
21.48792701, 10.81966699, 16.51777675, 13.66587892, 5.15167454,
19.6746364 , 18.92921522, 11.80793426, 15.99329862, 13.5428769 ,
17.09127653, 18.0650194 , 16.9966228 , 15.88075782, 10.01499427,
15.53643766, 14.82116696, 19.64805793, 13.38260529, 17.55275748,
12.79123029, 1.60388552, 9.64273317, 14.10328556, 12.80516354,
16.13765219, 24.82846188, 12.40167473, 14.37984687, 7.17152562,
3.57487493, 18.68028442, 22.02231231, 13.08430578, 15.36424363,
12.58209423, 19.16279834, 15.5310864 , 6.74056615, 18.95227059,
9.62725175, 24.77371694, 18.72404572, 22.33711919, 13.33348445,
10.82508722, 9.09203216, 2.25354049, 11.87985931, 9.93989723,
11.64604036, 13.04827512, 20.30953976, 12.18749003, 13.07841278,
16.33515174, 18.03769259, 16.79215097, 25.2966099 , 25.94685352,
5.69546044, 11.92172349, 19.8342691 , 12.6440724 , 20.28437939,
16.35062192, 14.67498508, 18.03873384, 11.98190638, 12.48304321,
17.18353178, 11.6280484 , 14.99820737, 18.62490662, 21.36806158,
10.33361186, 20.65887684, 18.14897646, 13.90046949, 11.97019342,
12.86450733, 14.79502629, 7.12492396, 14.02432214, 7.74633751,
20.42791153, 4.51691996, 12.45499258, 26.46562207, 25.43928357,
16.02156583, 15.77301938, 11.19124392, 20.2017068 , 11.38777201,
15.18199219, 13.06922751, 10.84513727, 16.72115341, 18.10468922,
3.27337482, 15.77864477, 11.69318446, 16.11939922, 5.75250131,
11.70259621, 9.96164663, 14.04692601, 16.66432923, 13.02730275,
22.14321073, 9.97324488, 20.89977249, 16.61452685, 14.98716636,
24.34831533, 12.64388541, 24.29123835, 18.62064405, 14.92635478,
20.6628145 , 12.35438856, 16.62651241, 15.23094509, 15.17337696,
6.55103708, 19.01998175, 23.25536204, 17.41528444, 21.16583531,
18.9232849 , 19.15163022, 7.24348554, 10.57811529, 19.75374939,
18.20943943, 22.28176618, 18.12091144, 12.23462022, 25.14515739,
13.50928872, 8.66583537, 15.73187863, 15.96399654, 12.59117191,
23.55583777, 17.89899462, 18.94571775, 11.52067712, 13.39007944,
12.59774377, 11.15893922, 10.95135673, 14.42674691, 17.96761055,
16.49122727, 12.35296522, 15.12034443, 15.74986747, 12.51710365,
9.03581021, 13.25291267, 10.15981987, 25.01072793, 18.97541121,
19.75442295, 15.72658347, 15.0584492 , 17.99054063, 5.09522099,
12.27876186, 16.19625502, 11.07278001, 20.34912201, 13.00536634,
16.99162583, 13.84421155, 14.38223744, 14.4279466 , 20.38561836,
11.8408168 , 13.61657858, 20.71889558, 13.34126163, 14.15019112,
25.77991025, 22.55302383, 19.96159611, 13.83428248, 10.01061803,
12.17573564, 12.90200549, 13.5309241 , 19.83578067, 9.19966323,
15.90839431, 10.42371807, 13.24996161, 11.84023659, 2.7949252 ,
18.58428929, 14.08771667, 11.87739049, 12.94936314, 5.72137785,
9.00511033, 24.56904689, 12.63663553, 17.76632913, 23.15421102,
17.48732613, 15.18508386, 16.55137541, 21.34679622, 16.29028048,
11.45359196, 20.79345061, 2.00181134, 15.79546808, 20.83970512,
22.25406941, 10.11771432, 8.67460536, 22.40106521, 8.87171 ,
15.65385265, 11.95693078, 17.83327425, 9.60459978, 8.27386886,
13.12833899, 11.16273179, 19.53131749, 23.54517063, 24.42752656,
23.96310917, 18.4784533 , 7.23297108, 10.54061606, 20.86058868,
11.84239435, 18.31885798, 11.17861973, 10.97931156, 16.58560191,
9.07837253, 15.33024139, 23.10263136, 14.38242525, 20.74808168,
14.23372606, 15.70755858, 21.23669843, 1.35217317, 12.37976423,
7.14600676, 18.39477579, 17.74507314, 18.47339341, 15.85955812,
12.03810285, 19.7152867 , 15.52989343, 9.58677829, 12.8273546 ,
13.12251889, 11.9852327 , 9.64979431, 4.67254736, 5.83485027,
25.58650984, 22.28051372, 10.58979922, 18.30552752, 13.38469023,
15.49887756, 18.9906719 , 15.28172592, 13.98672976, 22.84162786,
11.16412929, 28.13020788, 14.53176387, 10.58195681, 13.62227215,
19.39882903, 16.4404991 , 16.22472679, 21.69188583, 9.39591894,
13.51732061, 12.43309456, 24.88460179, 18.21701532, 12.22188839,
17.66401711, 18.73947878, 19.96433349, 12.44923092, 16.2503667 ,
13.93141598, 20.68562248, 13.22824968, 14.49476981, 14.65557226,
13.50932288, 18.78651139, 13.11609163, 13.72922394, 5.48391225,
21.74546083, 4.54749793, 9.61640779, 12.76025987, 16.6814269 ,
7.54157478, 14.12360955, 20.48599741, 7.25144564, 18.90874243,
18.4448861 , 9.92069343, 16.25574177, 6.89835745, 13.93865526,
21.31573413, 13.36223344, 15.66061316, 19.88795267, 17.3822145 ,
18.74051049, 18.76796461, 4.8938116 , 15.55699742, 13.40461 ,
15.95881726, 18.13730597, 24.84545955, -0.18746347, 15.51857936,
14.85838324, 9.53991868, 8.24792235, 14.38302382, 16.59971307,
19.10383007, 16.70314952, 14.83957597, 19.67174747, 21.01421162,
16.47244826, 11.66594006, 17.90942654, 12.45942559, 11.75413215,
15.8393784 , 16.96650233, 21.34297276, 16.89814547, 17.18400988,
23.06562578, 9.31042315, 17.07230418, 14.9523615 , 15.95823228,
21.05921299, 16.08593307, 19.09498751, 10.39794857, 25.96392485,
14.69617158, 17.66141256, 10.20026414, 6.2382827 , 16.86058806,
19.25205008, 13.70010653, 2.27012525, 16.92507514, 24.39321811,
10.85770341, 23.02441749, 19.2417343 , 19.05590996, 22.87654077,
20.41296773, 23.48198919, 16.00712785, 7.06656844, 15.13328042,
12.70216947, 7.8267746 , 23.43791089, 28.6641215 , 14.95792843,
10.89927121, 8.22974538, 6.62927352, 10.62186059, 12.56804649,
13.18951132, 14.61382172, 23.37917646, 20.56671153, 16.86438089,
18.11011443, 20.30088321, 17.27765895, 7.96486177, 9.15479929])
nd2 = np.random.normal(loc=5, scale=1, size=1000)
nd2
输出
array([4.16357325, 5.62772375, 3.9552129 , 5.01722184, 5.94728292,
3.87611474, 6.40455754, 7.46665515, 3.38515994, 5.34430971,
3.92116982, 5.26760858, 3.87753152, 4.91594112, 2.28170611,
5.22466842, 4.24448323, 5.5262917 , 5.51089455, 6.09550044,
6.8681638 , 4.88041697, 4.89141158, 4.26124332, 5.4022748 ,
4.61356982, 4.62460457, 7.67978794, 4.57731853, 6.67464937,
5.10280295, 4.33677994, 4.19373832, 5.36394475, 4.27075858,
5.39031978, 4.30670864, 3.93891792, 6.19342666, 4.07486768,
6.27780597, 4.55468073, 6.73625441, 5.86779949, 5.49083393,
4.58853995, 5.6148867 , 3.48363662, 6.63894092, 5.22175345,
4.39575909, 4.6296257 , 5.13293428, 4.63102278, 5.83887074,
5.23785433, 5.8753402 , 3.73502303, 5.15459362, 4.57097034,
5.67559993, 4.69832925, 5.65150331, 4.33270153, 3.92274046,
5.70642851, 5.60828203, 4.59928909, 4.16696389, 5.27554203,
6.56415018, 5.11148937, 5.33306229, 4.79096096, 7.14275288,
3.24127949, 5.42305971, 4.8503601 , 4.73999867, 3.30017711,
5.48859061, 8.6720081 , 4.61849386, 5.82255506, 4.94013434,
4.29599778, 3.64307498, 5.91702747, 4.53273311, 2.86494732,
4.83081915, 4.66762299, 5.51913047, 4.86923194, 5.64247402,
6.92941377, 5.80016722, 4.71514671, 4.36175335, 5.29760305,
4.03054667, 5.77336301, 4.27461778, 5.39664654, 2.95356186,
5.69516542, 5.65799556, 5.59469922, 6.1820571 , 3.33644612,
5.01553752, 5.61859069, 5.76889055, 5.33857628, 4.44932627,
5.44414466, 6.69054537, 4.95445522, 3.93424904, 6.14749563,
5.1360424 , 4.9205245 , 6.27270956, 3.402828 , 6.63289999,
4.53090779, 4.06289069, 5.68322406, 4.82075541, 4.18470219,
4.23731251, 5.784845 , 4.69988419, 5.60999033, 3.89867943,
4.91213246, 6.32055125, 3.32742804, 4.387534 , 4.22955403,
5.10986226, 5.31619754, 5.26842713, 4.30329285, 4.02172716,
5.12122162, 5.74783566, 4.89342586, 4.14319473, 4.89207417,
5.17976012, 4.92222856, 4.41448012, 6.11759185, 6.20346597,
6.30937222, 4.45736507, 4.19464601, 5.95110871, 5.39087317,
4.56342672, 5.41105951, 5.19021551, 4.44773853, 6.43024933,
4.2240709 , 3.10760838, 4.10843011, 3.49803467, 4.13008625,
4.93954846, 5.36686046, 4.94802037, 5.92916376, 3.6722152 ,
5.59956637, 2.26681606, 4.98058069, 5.52226543, 4.3678841 ,
5.79127271, 7.01911131, 5.54222329, 5.1736914 , 5.12307703,
3.64086435, 4.49529974, 3.09617049, 5.23362009, 4.83003444,
5.97855068, 4.90465821, 3.3488062 , 5.68806677, 3.10608318,
3.59533022, 4.1582079 , 5.42337516, 5.18032938, 3.80619133,
6.16036382, 7.10779899, 5.79528339, 4.20739267, 5.7849504 ,
6.35741234, 4.87889289, 4.67202464, 5.59772812, 3.89601316,
3.81930584, 3.86334162, 2.12820156, 6.03205986, 3.93786607,
4.69154114, 5.01000487, 5.48795451, 5.64960885, 3.28107906,
6.44071533, 5.84515172, 3.77795634, 4.1543535 , 4.76779398,
6.46011012, 4.20959715, 6.12791645, 3.13111379, 6.32404882,
4.01307035, 7.04022176, 5.88297238, 4.77726627, 5.81734169,
4.5834276 , 4.32792513, 4.78572659, 6.2394519 , 4.95323565,
3.76792869, 5.04695662, 4.6782468 , 4.26210285, 2.68574935,
4.87784877, 4.16379962, 4.89492512, 5.29077423, 6.63540688,
5.91601612, 5.79836639, 4.46454219, 5.59057577, 5.64159953,
3.73283891, 5.76728448, 5.59155539, 4.62458597, 4.59258478,
4.26568473, 4.13624814, 5.45664658, 3.20651961, 4.408289 ,
5.91191617, 6.42389566, 5.3310918 , 5.16260576, 5.08216533,
4.23972288, 5.4333913 , 5.52618567, 3.53352888, 6.18801528,
5.09545601, 5.47251721, 4.52145652, 4.48497876, 6.13442928,
5.46427115, 2.29920696, 5.96166798, 6.86041016, 4.64935911,
4.05211169, 5.22005598, 5.03828418, 3.11720228, 6.5067688 ,
4.16690752, 4.31972409, 7.66002522, 6.29075211, 6.54033656,
4.73315998, 4.48450546, 6.9592562 , 3.47862276, 5.30881486,
4.82692032, 5.57301637, 4.19444764, 5.20859468, 4.50189408,
4.34951492, 4.63316205, 5.55343187, 3.70853791, 6.23107709,
4.27639353, 3.43623509, 3.70621964, 4.20023314, 4.38700297,
6.0247284 , 4.10301279, 1.73386839, 3.8811813 , 5.95340252,
3.32468732, 5.33303337, 5.91283534, 6.00924919, 3.6032262 ,
4.59126025, 4.29693335, 6.78247321, 7.06582257, 3.79111525,
3.78370384, 5.66027207, 4.96055824, 5.70154465, 4.65007777,
4.5035654 , 5.61986658, 4.58564207, 6.36564777, 5.77875425,
4.70467287, 5.17829573, 5.82797022, 3.48804111, 5.03259832,
3.43797394, 5.4791678 , 4.29187316, 5.43345946, 2.97895257,
4.17941581, 6.46108478, 4.16631229, 3.82299511, 4.88063746,
5.99597437, 4.2566243 , 5.61937518, 4.45414788, 5.92053089,
3.49520851, 7.42316601, 5.29250923, 4.28792588, 7.36984588,
4.04040733, 4.26886281, 3.77000458, 4.56072738, 4.63605669,
4.95127483, 3.53601143, 3.72015063, 4.80868551, 3.67868907,
4.44574129, 5.95172132, 5.8080427 , 5.30838712, 5.24493577,
5.19000903, 4.74896707, 5.44316688, 3.96915616, 4.68391686,
7.5726147 , 4.91730717, 5.45240095, 5.9629974 , 2.47171197,
3.69924953, 4.68509439, 4.43821199, 4.99523136, 4.40460386,
4.96303014, 4.71679413, 4.57039619, 5.33357121, 5.15079823,
6.49139364, 3.86845536, 4.98871996, 3.7839628 , 4.273599 ,
5.95315578, 3.74863635, 4.11147589, 5.53989483, 4.80278295,
4.90408198, 6.95778251, 5.19522392, 5.5683528 , 5.98498926,
5.37911824, 6.28442773, 6.06750223, 6.05986868, 4.71704863,
3.99302845, 4.95290817, 2.49207559, 4.87651285, 5.15237529,
3.49276428, 4.99630332, 4.53049613, 4.93972966, 5.14459539,
6.43671059, 3.6443922 , 3.62322561, 4.93096589, 4.03285254,
5.12044196, 4.7470407 , 5.05114539, 4.98548726, 3.84715856,
4.38895987, 5.02571047, 4.27669533, 5.80634551, 3.74243646,
5.74672054, 4.30977187, 5.07860367, 5.36591131, 6.49938734,
4.5655998 , 6.72658129, 5.24367282, 4.48870661, 4.71499478,
6.42220582, 6.1114622 , 5.48564748, 5.57308475, 6.15891602,
5.9758781 , 5.85915079, 3.91862602, 2.12821654, 3.87280873,
3.66024374, 4.50284895, 5.21899411, 5.38545771, 7.16073212,
6.59658302, 5.00053088, 6.18178777, 3.63806579, 5.01496147,
5.90991082, 6.97094723, 4.83097336, 5.30165196, 4.661253 ,
5.86529545, 5.02720143, 4.84598703, 4.33055532, 3.21912353,
4.45419335, 4.77518495, 5.14216553, 6.36418059, 6.26262535,
4.59296046, 5.03454922, 6.00619901, 4.18621875, 4.91093635,
3.29685616, 3.79522521, 6.9291816 , 6.4750611 , 4.89884067,
5.78038694, 4.02603052, 5.70340682, 3.66391833, 6.78237457,
5.45288145, 4.96589685, 4.6344795 , 3.73365996, 4.93605937,
6.30098803, 6.15180329, 3.62245617, 4.31080067, 4.89444726,
4.46041046, 6.46807424, 6.69713398, 5.37660044, 3.82659646,
5.74103172, 4.49821583, 5.42852421, 2.79327574, 4.5965607 ,
4.89687084, 5.06394347, 3.76460552, 5.573225 , 3.72458122,
3.05024013, 5.17225729, 4.97343936, 4.62345663, 6.6316699 ,
3.89960346, 4.6849749 , 5.38701991, 5.70404233, 4.69017532,
5.876532 , 4.74405497, 5.44952709, 4.89016766, 6.21227894,
5.01569238, 6.63113838, 6.08459047, 4.42572288, 4.23403788,
4.58841822, 5.13660595, 3.78069157, 4.95872333, 5.94666079,
6.99971432, 5.57830349, 5.19189896, 3.35896715, 5.36527688,
5.72545598, 3.56843588, 4.57611497, 5.73088564, 6.5592119 ,
5.78983926, 4.76242211, 5.51854665, 5.37074764, 4.80702089,
5.68180577, 6.68828184, 3.97815013, 3.86441813, 5.50323984,
6.2954246 , 6.81212302, 4.34765753, 5.38604839, 3.57545063,
4.2153493 , 6.40788719, 5.04704768, 5.71393966, 5.46348939,
4.36487098, 5.72428558, 3.64913714, 5.09075194, 5.10808268,
4.53924537, 3.95084051, 5.80156225, 3.95139318, 6.40185987,
5.10152297, 5.9083863 , 6.77868656, 4.93491808, 6.87341423,
5.00020607, 5.37300145, 4.94257872, 4.05335049, 5.65874513,
7.05585192, 3.07188578, 6.47767778, 5.15548659, 6.61809565,
4.63849661, 4.00552794, 7.4990428 , 5.55064212, 3.95348289,
6.77047746, 5.17235527, 4.08495527, 5.69626916, 5.27694599,
6.5455655 , 5.03440539, 3.79535853, 3.88216299, 5.66687272,
5.42595946, 3.98806174, 5.38019678, 4.85600623, 6.60249457,
4.43772189, 7.31528327, 5.45132995, 6.46431198, 5.05253691,
5.30463532, 5.59352554, 4.46884691, 4.4371996 , 5.15883358,
5.31511306, 6.16704347, 6.77732211, 6.10656385, 5.15621815,
5.46977566, 4.51159292, 3.94227825, 5.31667496, 5.67738107,
5.28083731, 6.15320115, 3.42747691, 5.72918766, 5.91724691,
4.83514468, 4.19425714, 4.59213095, 4.23491379, 4.44307898,
4.85002062, 3.68601655, 3.34430793, 4.44669012, 5.21651207,
4.39399604, 6.1542975 , 5.18053303, 4.56594122, 5.82571705,
5.27565334, 5.06206244, 3.30770505, 3.91693396, 6.98565476,
3.4926006 , 5.1485241 , 6.21774637, 4.28257422, 6.52841365,
4.62857032, 4.23136567, 3.82420662, 3.74394523, 5.0192126 ,
5.16862418, 5.14095923, 6.43276907, 5.84756387, 5.30127721,
5.2689874 , 3.70364905, 6.55048716, 6.30709324, 3.52150307,
5.95410199, 5.04133322, 4.25300762, 4.66354312, 5.79064765,
4.40433573, 4.45537903, 5.89482175, 5.87593757, 5.68857767,
4.86160107, 4.19193785, 5.17753232, 3.9977945 , 4.9442369 ,
6.07835905, 5.02438589, 6.22749511, 5.24584004, 3.63975901,
4.68905444, 4.78441954, 6.4812497 , 3.69047389, 4.88141576,
5.79649154, 5.04203019, 5.83245323, 3.9658845 , 3.81575922,
4.46942339, 3.53117414, 4.055562 , 5.39131185, 4.68754333,
4.53087311, 4.20410031, 6.27197988, 6.85397581, 4.85151187,
5.02835855, 5.27851599, 5.17836394, 4.06129462, 4.4375603 ,
7.08798937, 5.5102644 , 3.81629454, 5.56699322, 5.10566699,
4.86234081, 3.8165508 , 4.40874826, 3.41589827, 4.29403851,
2.93109546, 4.71142032, 4.21331257, 3.94113757, 4.79319043,
4.51598912, 5.84379645, 5.3311838 , 5.62069354, 4.56616148,
3.78008828, 4.12170311, 3.47164841, 6.45149467, 7.07439161,
5.15775446, 4.99605674, 3.68084454, 4.81140656, 5.31432729,
5.44268601, 4.28726986, 7.01091418, 3.61892038, 4.94733244,
4.32424272, 4.0247057 , 5.80500818, 5.6972797 , 5.65320886,
6.39972657, 3.3030268 , 5.06080049, 2.94627967, 5.61769648,
4.40227402, 3.50383268, 5.70211839, 4.11933258, 5.08979386,
4.2676073 , 4.36874 , 5.60255516, 4.0650777 , 3.69051336,
6.11521605, 5.13445683, 5.62023694, 5.67238026, 4.62919193,
6.29114967, 5.76445703, 5.33025919, 3.04761616, 5.74038603,
6.90571771, 4.88536199, 4.37548828, 4.03133291, 3.98986754,
5.18311004, 3.60657778, 4.68029741, 6.77247176, 4.05498816,
5.81458552, 5.68999805, 4.49644808, 7.23364798, 4.78154564,
4.30177813, 4.15465457, 4.53973133, 5.27878305, 4.19131211,
3.85860472, 6.69376269, 6.05901324, 5.08240134, 4.48290616,
4.74503704, 4.56161557, 4.76352048, 5.58432118, 6.72418505,
4.58035456, 6.77123094, 5.92590072, 5.92009114, 3.80774064,
7.94398176, 4.16758862, 4.72331011, 5.78362765, 3.21055947,
5.76164498, 6.4397337 , 4.44481261, 3.33294915, 6.01488334,
6.81204549, 2.42044915, 4.4403678 , 5.47945654, 4.07128056,
5.30664905, 6.41306649, 6.01389555, 4.4295987 , 7.10258643,
4.65877207, 4.96947419, 5.99469993, 6.2051869 , 5.70905434,
4.61033172, 5.73426407, 4.35125811, 4.53512318, 3.95150925,
6.78339022, 4.30245366, 5.76650037, 5.15131781, 5.52825882,
6.8477806 , 5.18687599, 4.53926858, 7.22687264, 5.63437343,
5.86334083, 5.63117323, 3.69713011, 6.21429763, 4.22379533,
4.58421616, 4.20044833, 4.88802996, 6.11973732, 4.91362631,
5.99532401, 5.23794912, 4.71085432, 5.65410814, 5.13094576,
4.77431904, 5.86111277, 5.20227685, 5.20100846, 3.51333093,
5.61254166, 5.17714846, 4.34431496, 4.04362843, 5.32914837,
4.45681308, 5.11813353, 6.10079781, 5.20358212, 3.62758645,
5.52902048, 4.01751996, 5.43673173, 4.99834851, 4.23534757,
4.06770858, 5.3580892 , 4.40710573, 5.43241561, 4.60487781,
6.48997503, 3.70168528, 5.23138227, 6.23080636, 4.74835939,
3.80599272, 3.29946934, 6.34557388, 4.93872346, 6.47946449,
6.41594052, 6.73218324, 3.04289943, 4.15115282, 5.10402299,
4.50899173, 4.5168367 , 4.65360808, 6.01393715, 6.4251474 ,
4.46519491, 5.05383961, 4.87180319, 3.27026279, 5.01396013,
4.34003878, 5.76289112, 5.32605534, 4.25412225, 5.12852744,
3.58240371, 3.23632483, 5.32664125, 4.8313264 , 3.84089868,
6.76103641, 4.67086501, 4.974855 , 5.90858545, 4.11203174,
3.47689329, 4.59854682, 3.79011385, 5.67780065, 4.04810594,
5.67560683, 4.41887216, 5.47302958, 3.9217647 , 4.22237868,
5.03075115, 5.40360709, 5.63968412, 5.95374395, 5.32658004,
5.41877689, 4.8850843 , 4.61497329, 4.62041275, 5.04798966,
5.41226352, 4.7350273 , 2.27956637, 3.28210161, 4.90247233,
5.97151358, 3.03264944, 4.59598116, 5.61863583, 5.10906549,
5.896816 , 5.0875292 , 6.55366492, 6.06902742, 4.61753771,
4.35189183, 4.70411494, 6.3450411 , 4.79457834, 4.91535206])
nd3 = np.concatenate([nd1, nd2])
nd3
输出
array([16.50002167, 21.95871306, 22.75958416, …, 6.3450411 ,
4.79457834, 4.91535206])
s8 =Series(nd3)
s8
输出
0 16.500022
1 21.958713
2 22.759584
3 17.311623
4 21.113620
5 18.265816
6 14.367717
7 14.041937
8 24.472507
9 12.154959
10 9.272915
11 12.313423
12 10.209241
13 26.501067
14 15.738136
15 15.967169
16 16.691080
17 17.470095
18 23.255858
19 3.750134
20 25.121658
21 9.213113
22 20.805368
23 15.541248
24 28.903056
25 20.532467
26 11.801656
27 21.608022
28 11.175858
29 16.327408
…
1970 5.030751
1971 5.403607
1972 5.639684
1973 5.953744
1974 5.326580
1975 5.418777
1976 4.885084
1977 4.614973
1978 4.620413
1979 5.047990
1980 5.412264
1981 4.735027
1982 2.279566
1983 3.282102
1984 4.902472
1985 5.971514
1986 3.032649
1987 4.595981
1988 5.618636
1989 5.109065
1990 5.896816
1991 5.087529
1992 6.553665
1993 6.069027
1994 4.617538
1995 4.351892
1996 4.704115
1997 6.345041
1998 4.794578
1999 4.915352
Length: 2000, dtype: float64
# 密度图
s8.plot(kind='kde')
<matplotlib.axes._subplots.AxesSubplot at 0xda06e48>
s8.hist(bins=70)
<matplotlib.axes._subplots.AxesSubplot at 0xd8187f0>
s8.plot(kind='kde')
# 怎么解决密度线趴下来的问题
# normed 统一的,将数据归一化
s8.hist(bins= 70, normed=True)
<matplotlib.axes._subplots.AxesSubplot at 0xd568a58>
## 散布图
df = DataFrame(np.random.randint(0, 150, size=(10, 3)),columns=['Python', 'Java', 'PHP'])
df
Python | Java | PHP | |
---|---|---|---|
0 | 50 | 148 | 144 |
1 | 43 | 104 | 98 |
2 | 16 | 139 | 91 |
3 | 37 | 54 | 79 |
4 | 120 | 111 | 147 |
5 | 2 | 90 | 132 |
6 | 104 | 33 | 130 |
7 | 59 | 32 | 82 |
8 | 39 | 17 | 52 |
9 | 147 | 49 | 29 |
df['C++'] = df['Python'].map(lambda x : 0.7 * x + 20)
df
Python | Java | PHP | C++ | |
---|---|---|---|---|
0 | 50 | 148 | 144 | 55.0 |
1 | 43 | 104 | 98 | 50.1 |
2 | 16 | 139 | 91 | 31.2 |
3 | 37 | 54 | 79 | 45.9 |
4 | 120 | 111 | 147 | 104.0 |
5 | 2 | 90 | 132 | 21.4 |
6 | 104 | 33 | 130 | 92.8 |
7 | 59 | 32 | 82 | 61.3 |
8 | 39 | 17 | 52 | 47.3 |
9 | 147 | 49 | 29 | 122.9 |
散布图 散布图是观察两个一维数据列之间的关系的有效方法
注意是用kind=’scatter’ , 给标签columns
# scatter 需要x和y两个参数,作用是对比
df.plot(kind='scatter',x='Python', y='Java')
<matplotlib.axes._subplots.AxesSubplot at 0xf094be0>
df.plot(kind='scatter',x='Python', y='C++')
<matplotlib.axes._subplots.AxesSubplot at 0xfaccda0>
散布图矩阵,当有多个时,两两点之间的联系
函数:pd.plotting.scatter_matrix(),注意参数diagnol:对角线
pd.plotting.scatter_matrix(df, figsize=(12, 9))
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000000000FE00198>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000000FE38C50>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000000FE5D860>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000000FE86550>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x000000000FE4A978>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000000FE4A7F0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000000FEF17F0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000000FF1B470>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x000000000FF4C160>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000000FF76550>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000000FFAD0F0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000000FFD9C50>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x00000000100137F0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000001002AC18>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000001007A9B0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000001008FE80>]],
dtype=object)
pd.plotting.scatter_matrix(df, figsize=(12, 9), diagonal='kde')
# 散点图可以让我们找到两列数据之间的关系
# 一,如果数据量太小,两列数据没有关系,加大数据量,又会形成关系
# 二,并不是说数据量越大,两列之间的关系就越明显,在数据过大的情况下,
# 关系会发生改变
最后
以上就是细心大船为你收集整理的pandas数据映射、聚合和绘图函数的全部内容,希望文章能够帮你解决pandas数据映射、聚合和绘图函数所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
发表评论 取消回复