下面给出计算隐马尔科夫模型的编程代码: from hmmlearn.hmm import GaussianHMM import datetime import numpy as np from matplotlib import cm, pyplot as plt import matplotlib.dates as dates import pandas as pd import seaborn as sns#导入模块 beginDate = '20100401' endDate = '20160401' data=DataAPI.MktIdxdGet(ticker='000001',beginDate=beginDate,endDate=endDate,field=['tradeDate','closeIndex','lowestIndex','highestIndex','turnoverVol'],pandas="1") data1=DataAPI.FstTotalGet(exchangeCD=u"XSHE",beginDate=beginDate,endDate=end Date,field=['tradeVal'],pandas="1")#深圳交易所融资融券信息,'tradeval'是指当日 融资融券余额 data2=DataAPI.FstTotalGet(exchangeCD=u"XSHG",beginDate=beginDate,endDate=end Date,field=['tradeVal'],pandas="1")#上海交易所融资融券信息,'tradeval'是指当日 融资融券余额 tradeVal = data1 + data2 #数据汇总 tradeDate = pd.to_datetime(data['tradeDate'][5:]) volume = data['turnoverVol'][5:] #成交量 closeIndex = data['closeIndex']#收盘价 deltaIndex=(np.log(np.array(data['highestIndex']))-np.log(np.array(data['lowestIndex'])))[5:] logReturn1 = np.array(np.diff(np.log(closeIndex)))[4:]#取对数收益率 logReturn5=np.log(np.array(closeIndex[5:]))- np.log(np.array(closeIndex[:-5]))#5 日对数收益率 logReturnFst = (np.array(np.diff(np.log(tradeVal['tradeVal']))))[4:] closeIndex = closeIndex[5:]#收盘价截取
from scipy import stats # To perform box-cox transformation from sklearn import preprocessing #导入模块 boxcox_volume,lmbda = stats.boxcox(volume/100000000000) # Standardize the observation sequence distribution rescaled_boxcox_volume=preprocessing.scale(boxcox_volume,axis=0, with_mean=True, with_std=True, copy=False)#数据处理 boxcox_deltaIndex,lmbda = stats.boxcox(deltaIndex) # Standardize the observation sequence distribution rescaled_boxcox_deltaIndex=preprocessing.scale(boxcox_deltaIndex, axis=0, with_mean=True, with_std=True, copy=False) X=np.column_stack([logReturn1,logReturn5,rescaled_boxcox_deltaIndex,rescaled_boxco x_volume,logReturnFst])#数据整合 # Make an HMM instance and execute fit model=GaussianHMM(n_components=5,covariance_type="diag", n_iter=10000).fit([X]) # Predict the optimal sequence of internal hidden state#计算模型 hidden_states = model.predict(X)#估计状态 print("Transition matrix") print(model.transmat_) print() #print("Means and vars of each hidden state") for i in range(model.n_components): print("{0}th hidden state".format(i)) print("mean = ", model.means_[i]) print("var = ", np.diag(model.covars_[i])) plt.figure(figsize=(15, 8)) plt.title('hidden states') for i in range(model.n_components): idx = (hidden_states==i) plt.plot_date(tradeDate[idx],closeIndex[idx],'.',label='%dth hidden state'%i,lw=1) plt.legend() plt.grid(True) model.transmat_
import xlwt import xlrd wb3=xlwt.Workbook() wb3.add_sheet('first',cell_overwrite_ok=True) ws_1=wb3.get_sheet(0) for r in range(model.transmat_.shape[0]): #行数 for c in range(model.transmat_.shape[1]): #列数 ws_1.write(r,c,model.transmat_[r,c]) wb3.save('文件夹 0/上证指数转移概率矩阵.xls') #整合数据 res=pd.DataFrame({'tradeDate':tradeDate,'logReturn1':logReturn1,'logReturn5':logReturn 5,'volume':volume,'zstate':hidden_states}).set_index('tradeDate') plt.figure(figsize=(15, 8)) plt.xlabel('time') plt.ylabel('earninigs multiplier') for i in range(model.n_components): idx = (hidden_states==i) idx = np.append(0,idx[:-1])#获得状态结果后第二天进行买入操作 #fast factor backtest df = res.logReturn1 res['sig_ret%s'%i] = df.multiply(idx,axis=0) plt.plot(np.exp(res['sig_ret%s'%i].cumsum()),label='%dth hidden state'%i) plt.legend() plt.grid(1)