pd.wide_to_long()
pd.wide_to_long(df,stubnames(提取以指定字符串开头的列),i(用作索引的列),j(提取开头后剩余的部分会成一列,在此指定列名),sep(分隔符),suffix(捕获正则表达式匹配的后缀))
In [34]:
df = pd.DataFrame({"A1970" : {0 : "a", 1 : "b", 2 : "c"},
... "A1980" : {0 : "d", 1 : "e", 2 : "f"},
... "B1970" : {0 : 2.5, 1 : 1.2, 2 : .7},
... "B1980" : {0 : 3.2, 1 : 1.3, 2 : .1},
... "X" : dict(zip(range(3), np.random.randn(3)))
... })
In [36]:
df["id"] = df.index
df
Out[36]:
In [39]:
pd.wide_to_long(df,['A','B'],i='id',j='year')
Out[39]:
In [9]:
df = pd.DataFrame({
... 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3],
... 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3],
... 'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
... 'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9]
... })
In [10]:
df
Out[10]:
In [11]:
l=pd.wide_to_long(df,['ht'],i=['famid','birth'],j='age')
l
Out[11]:
In [45]:
w=l.unstack()
w
Out[45]:
In [46]:
w.columns
Out[46]:
In [47]:
w.columns=w.columns.map('{0[0]}{0[1]}'.format)
w.columns
Out[47]:
In [23]:
w.reset_index()
Out[23]: