这里主要应用到numpy.random.choice
可以根据需求,比如增加一列,A B C 为该列数据,随机概率生成
详见代码
import numpy as np import pandas as pd columns_user = ['number','gender','date'] columns_order = ['order_id','user_id','ispaid','price','paidtime'] user_info = pd.read_csv('user_info_utf.csv',names=columns_user) order_info = pd.read_csv('order_info_utf.csv',names = columns_order) # 增加一列gender a = np.random.choice(['Male','Female'],size=len(order_info['user_id']),p =[0.3,0.7]) order_info['gender'] = pd.Series(a) # 增加一列 商铺 print(pd.value_counts(order_info['gender'])) b = np.random.choice(['A','B','C'],size=len(order_info['user_id']),p=[0.5,0.3,0.2]) order_info['shop'] = pd.Series(b) print(pd.value_counts(order_info['shop'])) print(order_info.head())