Tushare是一个免费、开源的python财经数据接口包。主要实现对股票等金融数据从数据采集、清洗加工 到 数据存储的过程,能够为金融分析人员提供快速、整洁、和多样的便于分析的数据,为他们在数据获取方面极大地减轻工作量,使他们更加专注于策略和模型的研究与实现上。考虑到Python pandas包在金融量化分析中体现出的优势,Tushare返回的绝大部分的数据格式都是pandas DataFrame类型。
举例使用
import numpy as np import pandas as pd import matplotlib.pyplot as plt import tushare as ts # 使用tushare 获取每只股票的行情数据 df = ts.get_k_data('600519',start='2008-01-01') print(type(df)) df.to_csv('600519.csv') df = pd.read_csv('600519.csv',index_col='date',parse_dates=['date'])[['open','close','high','low']] print(df) # 输出该股票所有收盘比开盘上涨3%以上的日期 print(df[(df['close']-df['open'])/df['open']>0.03].index) # df.shift() 移动,正数向下移动,负数向上移动 # 输出该股票所有开盘比前日收盘跌幅超过2%的日期 df[(df['open']-df['close'].shift(1))/df['close'].shift(1)<=-0.02].index # 假如我从2008年1月1日开始,每月第一个交易日买入1手股票,每年最后一个交易日卖出所有股票,到今天为止,我的收益如何? price_last = df['open'][-1] df = df['2008-01':'2018-11'] #剔除首尾无用的数据 df_monthly = df.resample("MS" ).first() # 每月第一天 print("df_monthly 2008:") print(df_monthly) print("df_yearly:") df_yearly = df.resample("A").last()[:-1] # 每年最后一天 print(df_yearly) cost_money=0 hold = 0 for year in range(2008,2018): cost_money += df_monthly[str(year)]['open'].sum() * 100 hold += len(df_monthly[str(year)]['open'])*100 cost_money -= df_yearly[str(year)]['open'][0] * hold hold = 0 print('cost_money: %s'%(0-cost_money)) # 求5日均线和30日均线 df = pd.read_csv('601318.csv',index_col='date',parse_dates=['date'])[['open','close','low','high']] print(df.head()) df['ma5'] = np.NAN df['ma30'] = np.NAN # # for i in range(4,len(df)): # df.loc[df.index[i],'ma5'] = df['close'][i-4:i+1].mean() # # for i in range(29,len(df)): # df.loc[df.index[i],'ma30'] = df['close'][i-29:i+1].mean() # # print(df.head(50)) df['ma5'] = df['close'].rolling(5).mean() # 窗口向下滚动5个 df['ma30'] = df['close'].rolling(30).mean() # 窗口向下滚动30个 print(df.head(50)) # 画均线图 df = df[:800] df[['close','ma5','ma30']].plot() plt.show() # 金叉和死叉日期 golden_cross =[] death_cross = [] for i in range(1,len(df)): if df['ma5'][i]>=df['ma30'][i] and df['ma5'][i-1]< df['ma30'][i-1]: golden_cross.append(df.index[i].to_pydatetime()) if df['ma5'][i] <= df['ma30'][i] and df['ma5'][i - 1] > df['ma30'][i - 1]: death_cross.append(df.index[i]) print(golden_cross[:5]) sr1 = df['ma5'] < df['ma30'] sr2 = df['ma5'] >= df['ma30'] death_cross = df[sr1 & sr2.shift(1)].index golden_cross = df[~(sr1 | sr2.shift(1))].index print(death_cross)