我是靠谱客的博主 淡然万宝路,最近开发中收集的这篇文章主要介绍pandas 轴向连接,觉得挺不错的,现在分享给大家,希望可以做个参考。

概述


# encoding=utf-8
import numpy as np
import pandas as pd
#
轴向连接
# 1
arr = np.arange(12).reshape((3, 4))
print 'arr:=n', arr
arr_concatenate_arr = np.concatenate([arr, arr], axis=1)
# 2
s1 = pd.Series([0, 1], index=['a', 'b'])
s2 = pd.Series([2, 3, 4], index=['c', 'd', 'e'])
s3 = pd.Series([5, 6], index=['f', 'g'])
pd_s123 = pd.concat([s1, s2, s3], axis=1)
print 'pd_s123:=n', pd_s123
s4 = pd.concat([s1 * 5, s3])
print 's4:=n', s4
s1_concat_s4 = pd.concat([s1, s4], axis=1)
print 's1_concat_s4:=n', s1_concat_s4
# 3
result = pd.concat([s1, s2, s3], keys=['one', 'two', 'three'])
print 'result:=n', result
unstackresult = result.unstack()
print 'unstackresult:=n', unstackresult
# 4
result = pd.concat([s1, s2, s3], axis=0, keys=['one', 'two', 'three'])
print 'result:=n', result
df1 = pd.DataFrame(np.arange(6).reshape(3, 2), index=['a', 'b', 'c'],
columns=['one', 'two']
)
df2 = pd.DataFrame(5 + np.arange(4).reshape(2, 2),
index=['a', 'c'],
columns=['three', 'four']
)
print 'df1:=n', df1
print 'df2:=n', df2
# df1_concat_df2 = pd.concat([df1, df2], axis=1, keys=['level1', 'level2'])
# print 'df1_concat_df2:=n', df1_concat_df2
# df1_concat_df2 = pd.concat({'level1': df1, 'level2': df2}, axis=1)
# print 'df1_concat_df2:=n', df1_concat_df2
df1_concat_df2 = pd.concat([df1, df2], axis=1, keys=['level1', 'level2'],
names=['upper', 'lower']
)
print 'df1_concat_df2:=n', df1_concat_df2
# 5
df1 = pd.DataFrame(np.random.randn(3, 4), columns=['a', 'b', 'c', 'd'])
df2 = pd.DataFrame(np.random.randn(2, 3), columns=['b', 'd', 'a'])
print 'df1:=n', df1
print 'df2:=n', df2
df1_concat_df2 = pd.concat([df1, df2], ignore_index=True)
print 'df1_concat_df2:=n', df1_concat_df2
df1_concat_df2 = pd.concat([df1, df2], ignore_index=False)
print 'df1_concat_df2:=n', df1_concat_df2
## # 合并重叠数据
# 1
a = pd.Series(
[np.nan, 2.5, np.nan, 3.5, 4.5, np.nan],
index=['f', 'e', 'd', 'c', 'b', 'a']
)
b = pd.Series(
np.arange(len(a), dtype=np.float64),
index=['f', 'e', 'd', 'c', 'b', 'a']
)
b[-1] = np.nan
print 'a:=n', a
print 'b:=n', b
# print 'np.nan:=n', np.nan
# print 'np.where(pd.isnull(a), b, a):=n', np.where(pd.isnull(a), b, a)
# 2
#
数据填补
print 'b[:-2].combine_first(a[2:]):=n', b[:-2].combine_first(a[2:])
# 3
df1 = pd.DataFrame(
{
'a': [1., np.nan, 5., np.nan],
'b': [np.nan, 2., np.nan, 6.],
'c': range(2, 18, 4)
})
df2 = pd.DataFrame(
{
'a': [5., 4., np.nan, 3., 7.],
'b': [np.nan, 3., 4., 6., 8.]
}
)
df1.combine_first(df2)
# ## 重塑层次化索引
#
---stack:将数据的列"旋转" 为行
#
---Unstack: 将数据的行"旋转"为列
data = pd.DataFrame(np.arange(6).reshape((2, 3)),
index=pd.Index(['Ohio', 'Colorado'], name='state'),
columns=pd.Index(['one', 'two', 'three'], name='number')
)
print 'data:=n', data
result = data.stack()
print 'result:=n', result
print 'result.unstack():=n', result.unstack()
print 'result.unstack(0):=n', result.unstack(0)
print 'result.unstack('state')', result.unstack('state')
# 2
s1 = pd.Series([0, 1, 2, 3], index=['a', 'b', 'c', 'd'])
s2 = pd.Series([4, 5, 6], index=['c', 'd', 'e'])
data2 = pd.concat([s1, s2], keys=['one', 'two'])
print 'data2:=n', data2
print 'data2.unstack():=n', data2.unstack()
print 'data2.unstack().stack():=n', data2.unstack().stack()
#
不去除缺失值
print 'data2.unstack().stack(dropna=False):=n', data2.unstack().stack(dropna=False)
# 3
print 'result:=n', result
df = pd.DataFrame(
{
'left': result,
'right': result + 5
},
columns=pd.Index(['left', 'right'], name='side')
)
print 'df:=n', df
print 'df.unstack('state'):=n', df.unstack('state')
print 'df.unstack('state').stack('side'):=n', df.unstack('state').stack('side')
print 'test'

最后

以上就是淡然万宝路为你收集整理的pandas 轴向连接的全部内容,希望文章能够帮你解决pandas 轴向连接所遇到的程序开发问题。

如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。

本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
点赞(53)

评论列表共有 0 条评论

立即
投稿
返回
顶部