【原创】利用Python开发一个配对股票分析的小程序（三）

接着进入主程序的开发。

#coding=utf-8
import os,time,sys,re,datetime
import csv #处理CSV文件
import scipy
import numpy as np
from matplotlib.font_manager import FontProperties
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.dates import YearLocator, MonthLocator,DateFormatter,AutoDateLocator
import MyStatis as MS

#读取目录下所有CSV文件
def Open_Csv(Dist):
    Array_List =[]
    Code_List= []
   Code_Name_List = {}
    for item inos.listdir(Dist): # 遍历指定目录
       if os.path.isfile(Dist+item) and item.endswith('.csv'):
           f = open((Dist+item),'rb') # 打开文件
           Code_List = Code_List + [str(item)[0:6]]
           reader = csv.reader(f)
           Array = []
           count = 0
           for line in reader:
               if count > 0:
                   Array.append([line[0],float(line[3])])
               if count == 1:
                   Code_Name_List[str(item)[0:6]] =line[2].decode('gb2312')#这里的编码解码其实非常复杂，经常搞出乱码##
               count = count + 1
           f.close()
           Array = Handle_Zero(Array)#处理0值
           Array_List.append(Array)
    return(Array_List,Code_List,Code_Name_List)

defHandle_Zero(Array):#处理0值---------------------------------------
    ifArray[0][1] == 0:
       Array[0][1] = Array[1][1]
    ifArray[1][1]==0:
       Array[1][1] = Array[2][1]
    ifArray[(len(Array)-1)][1] == 0:
       Array[(len(Array)-1)][1] = Array[(len(Array)-2)][1]

    for i inrange(2,(len(Array)-1)):
       if float(Array[i][1]) == 0:
           if Array[i+1][1]>0.001:
               Array[i][1] = round((Array[i-1][1] +Array[i+1][1] )/2,2)
           else:
               Array[i][1] = round((Array[i-1][1] +Array[i-2][1] )/2,2)

return Array

defGenerate_Diff(Dist,Array,Code_List,Code_Name_List,Min):#生成股票价格差的数组
    Array_Temp =[]
    Array_Diff =[]
   Two_Code_List = []
    Array_Len =len(Array)#总的数目
    Array_Short= []
    for i inrange(0,Array_Len):
       Array_Temp = Array_Temp +[len(Array[i])]
    MinLen =min(np.min(Array_Temp),Min) ############################
    MinIndex =MS.Get_MinIndex(Array_Temp)#找到最晚上市的是哪一个
    Temp_Array =[]
    for i inrange(0,MinLen):#考虑修改，以便调整前后顺序
       Temp_Array = Temp_Array + [Array[MinIndex][MinLen-i-1][0]]
   Array_Diff.append(Temp_Array)#第一行的时间
   Array_Short.append(Temp_Array)

    for i inrange(0,(Array_Len-1)):
       for j in range((i+1),(Array_Len)):
           Temp_Array = []
           for k in range(0,MinLen):#考虑修改，以便调整前后顺序
               Temp_Array = Temp_Array +[round((Array[i][MinLen-k-1][1]-Array[j][MinLen-k-1][1]),2)]
           Array_Diff.append(Temp_Array)
           Two_Code_List = Two_Code_List + [(Code_List[i]+''+Code_Name_List[Code_List[i]].encode('gb2312') +' -'+Code_List[j]+' '+Code_Name_List[Code_List[j]].encode('gb2312'))]

    for i inrange(0,Array_Len):
       Temp_Array = []
       for k inrange(0,MinLen):
           Temp_Array = Temp_Array + [Array[i][MinLen-k-1][1]]
       Array_Short.append(Temp_Array)

    return(Array_Diff,Two_Code_List,Array_Short)

defStatis_Normal(Array_Diff):#常规统计
    Temp_Array =[]
    Statis_List= []
    for i inrange(1,len(Array_Diff)):
       #Statis_List 1均值 2标差 3最小值 25% 50% 75% 7最大 8总数 1sigma 2sigma 3sigma共11位数字
       mean = round(np.mean(Array_Diff[i]),2)
       Temp_Array.append(mean)
       std = round(np.std(Array_Diff[i]),2)
       Temp_Array.append(std)

       Temp_Array.append(round(np.min(Array_Diff[i]),2))

       Temp_Array.append(MS.Proportion(Array_Diff[i],0.25))
       Temp_Array.append(MS.Proportion(Array_Diff[i],0.5))
       Temp_Array.append(MS.Proportion(Array_Diff[i],0.75))

       Temp_Array.append(round(np.max(Array_Diff[i]),2))

Temp_Array.append(len(Array_Diff[i]))

       Temp_Array.append(MS.Percent_Sigma(Array_Diff[i],mean,std))
       Temp_Array.append(MS.Percent_Sigma(Array_Diff[i],mean,(2*std)))
       Temp_Array.append(MS.Percent_Sigma(Array_Diff[i],mean,(3*std)))
       Statis_List.append(Temp_Array)
       Temp_Array = []
    returnStatis_List

#统计差值大于mean+std或者小于mean-std，持续时长>=T的数量，日期，持续时长
defStatis_Trend(Array_Diff,Statis_List,Two_Code_List,t,T):

    for m inrange(0,(len(Array_Diff)-1)):

       i = 0
       mean = Statis_List[m][0]
       std = round(float(t)*Statis_List[m][1],2)
       Urange = mean + std
       Lrange = mean - std
       Utimes = 0 #次数
       Ltimes = 0
       Unum = 0.00 #总天数
       Lnum = 0.00
       Temp_Array =[]
       Temp_Array.append(Two_Code_List[m])

       #Statis_List 每行的后续增加大于Urange的次数，总天数，比例，小于Lrange的次数，总天数，比例
       while i <(len(Array_Diff[m+1])-T):

           if ( Array_Diff[m+1][i+T-1]- Lrange) > 0.0001 and(Array_Diff[m+1][i+T-1]- Urange)<-0.0001:
               i = i+T

           elif (Array_Diff[m+1][i+T-1] - Urange)>=-0.0001:
               if (Array_Diff[m+1][i] - Urange)>=-0.0001:
                   j = i+1#注意长度是2的时候
                   while j<(i+T-1) :
                       if (Array_Diff[m+1][j]-Urange)>=-0.0001:
                           j = j+1
                       else:
                           break

                   if j==(i+T-1):#持续了T长，需要往后继续找
                       Temp_Array.append(Array_Diff[0][i])#日期
                       Temp_Array.append(' + exceed ')
                       Utimes = Utimes+1
                       j = j+1
                       while j<(len(Array_Diff[m+1])):#########
                           if (Array_Diff[m+1][j]-Urange)>=-0.0001:
                               j = j+1
                           else:
                               break
                       Unum = Unum + j-i#最后的j是低于上限的
                       Temp_Array.append(j-i)
                       i = j #i
                   else:
                       i = j
               elif (Array_Diff[m+1][i]-Lrange)<=0.0001:
                   i = i+1
               elif (Array_Diff[m+1][i]- Lrange)>0.0001 and(Array_Diff[m+1][i]-Urange)<-0.0001:
                   i = i+1
           elif (Array_Diff[m+1][i+T-1]-Lrange)<=0.0001:
               if (Array_Diff[m+1][i]-Lrange)<=0.0001:
                   j = i+1#注意长度是2的时候
                   while j<(i+T-1) :
                       if (Array_Diff[m+1][j]-Lrange)<=0.0001:
                           j = j+1
                       else:
                           break

                   if j==(i+T-1):#持续了T长，需要往后继续找
                       #------------------------------考虑Temp2?
                       Temp_Array.append(Array_Diff[0][i])#日期
                       Temp_Array.append(' - under ')
                       Ltimes = Ltimes+1
                       j = j+1
                       while j<(len(Array_Diff[m+1])): #################
                           if (Array_Diff[m+1][j]-Lrange)<=0.0001:
                               j = j+1
                           else:
                               break
                       Lnum = Lnum +j-i
                       Temp_Array.append(j-i)
                       i = j #i
                   else:
                       i = j
               elif (Array_Diff[m+1][i]-Urange)>=-0.0001:
                   i = i+1
               elif (Array_Diff[m+1][i]- Lrange )>0.0001 and(Array_Diff[m+1][i]- Urange)<-0.0001:
                   i = i+1

       Statis_List[m].append(Ltimes)
       Statis_List[m].append(Lnum)
       Statis_List[m].append(round(float(Lnum)/float(len(Array_Diff[m+1])),2))

       Statis_List[m].append(Utimes)
       Statis_List[m].append(Unum)
       Statis_List[m].append(round(float(Unum)/float(len(Array_Diff[m+1])),2))

       Statis_List.append(Temp_Array)

    returnStatis_List

defStatis_Diff(Array_Diff):#对差值进行统计
    #常规统计 &计算比例
    Statis_List=Statis_Normal(Array_Diff)
    returnStatis_List
#关机
def Close_machine():
   o="c:\\windows\\system32\\shutdown -s"#########
   os.system(o)#########

def Trend_Change():#统计中位值的反转概率
#这个倒是没有写

defDraw_Image(Array_Diff,Array_Short,Statis_List,Code_List,Code_Name_List,Two_Code_List,t,T,Dist):
   Draw_Diff(Array_Diff,Statis_List,Two_Code_List,Dist)#画差值图
   Draw_Trend(Array_Diff,Statis_List,Two_Code_List,t,T,Dist)#画差值变化趋势图
   Draw_TwoLine(Array_Short,Statis_List,Code_List,Code_Name_List,Two_Code_List,Dist)#画两条线的比对图
    print 'DrawFile Successfully'

defDraw_Trend(Array_Diff,Statis_List,Two_Code_List,t,T,Dist):#画差值的趋势变化图
    Font =FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=14)
    Dates =matplotlib.dates.datestr2num(Array_Diff[0])

    for i inrange(1,len(Array_Diff)):
       plt.figure(figsize=(10.24,7.68),dpi=300)
       mean = Statis_List[i-1][0]
       std = Statis_List[i-1][1]
       Range=[mean]*len(Array_Diff[0])
       for j in range(0,len(t)):
           Urange=[mean+t[j]*std]*len(Array_Diff[0])
           Lrange=[mean-t[j]*std]*len(Array_Diff[0])
           plt.plot_date(Dates,Urange,'k',linestyle='--')
           plt.plot_date(Dates,Lrange,'k',linestyle='--')

       plt.plot_date(Dates,Array_Diff[i],'r',linestyle='-',label=(Two_Code_List[i-1][0:6]+'- '+ \
                                                                  Two_Code_List[i-1][(Two_Code_List[i-1].index('-')+2):(Two_Code_List[i-1].index('-')+8)]))
       #plt.axvline(time[10000],hold=None,label="1",color='black',linestyle="--")
       plt.plot_date(Dates,Urange1,'k',linestyle='--')
       plt.plot_date(Dates,Urange2,'k',linestyle='--')
       plt.plot_date(Dates,Urange3,'k',linestyle='--')
       plt.plot_date(Dates,Lrange1,'k',linestyle='--')
       plt.plot_date(Dates,Lrange2,'k',linestyle='--')
       plt.plot_date(Dates,Lrange3,'k',linestyle='--')
       plt.plot_date(Dates,Range,'k',linestyle='-')
       plt.legend()
       plt.title(Two_Code_List[i-1].decode('gb2312')+u' 价格差波动图',fontproperties=Font)
       plt.xlabel(u'日期', fontproperties=Font)
       plt.ylabel(u'价格差', fontproperties=Font)
       plt.grid(True)
       plt.gca().xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
       plt.gcf().autofmt_xdate()
       #plt.show()
       plt.savefig(Dist+Two_Code_List[i-1].decode('gb2312')+'.jpg')
       plt.close()

defDraw_TwoLine(Array_Short,Statis_List,Code_List,Code_Name_List,Two_Code_List,Dist):#画原值两两比对的变化图
    Font =FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=14)
    Dates =matplotlib.dates.datestr2num(Array_Short[0])
    k=0
    for i inrange(1,(len(Array_Short)-1)):
       for j in range((i+1),len(Array_Short)):
           plt.figure(figsize=(10.24,7.68),dpi=300)
           mean =Statis_List[k][0]
           Temp = [mean]*len(Array_Short[0])
           Temp1 = np.array(Array_Short[i])-np.array(Temp)
           Temp2 = np.ndarray.tolist(Temp1)
           plt.plot_date(Dates,Temp2,'r',linestyle='-',label=(Code_List[i-1]+'-('+str(mean)+')'))
           plt.plot_date(Dates,Array_Short[j],'b',linestyle='-',label=(Code_List[j-1]))
           #plt.axvline(time[10000],hold=None,label="1",color='black',linestyle="--")

           plt.legend()
           plt.title(Two_Code_List[k].decode('gb2312')+u' 价格图',fontproperties=Font)
           plt.xlabel(u'日期', fontproperties=Font)
           plt.ylabel(u'价格', fontproperties=Font)
           plt.grid(True)
           plt.gca().xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
           plt.gcf().autofmt_xdate()
           #plt.show()
           plt.savefig(Dist+Two_Code_List[k].decode('gb2312')+u'双值.jpg')
           plt.close()
           k=k+1

defOutput_File(Array_Diff,Array_Short,Statis_List,Code_List,Code_Name_List,Two_Code_List,Sigma_List,Time_List,Dist):
   Output_Txt(Array_Diff,Array_Short,Statis_List,Code_List,Code_Name_List,Two_Code_List,Sigma_List,Time_List,Dist)
   Output_Csv(Array_Diff,Array_Short,Statis_List,Code_List,Code_Name_List,Two_Code_List,Sigma_List,Time_List,Dist)
    print'Output File Successfully'

defOutput_Txt(Array_Diff,Array_Short,Statis_List,Code_List,Code_Name_List,Two_Code_List,Sigma_List,Time_List,Dist):
    TODAY =datetime.date.today()
   CURRENTDAY=TODAY.strftime('%Y-%m-%d')
    TIME =time.strftime("%H:%M:%S")
   #写入本地文件
    fp =open(Dist+'output.txt','wb')
   fp.write('------------------------------\n'+CURRENTDAY +" "+TIME+""+' \n')

   fp.write(str(Code_List))
   fp.write('\n')
   fp.write(str(Array_Short))
   fp.write('\n')
    for i inrange(len(Two_Code_List)):
       fp.write(Two_Code_List[i])###############################
   fp.write('\n')
   fp.write(str(Array_Diff))
   fp.write('\n')
    Str = '均值标准差最小值 25% 中位数 75% 最大值数组长度 1sigma 2sigma3sigma'.decode('utf-8').encode('gb2312')
    Len =(len(Statis_List[0])-11)/6
    for i inrange(Len):
       Str = Str + (' 波动幅度超过 +'+str(Sigma_List[i])+'*Sigma 且持续时间超过'+str(Time_List[i])+'天的次数天数所占比例'+ ' 波动幅度超过 -' +\
             str(Sigma_List[i])+'*Sigma 且持续时间超过'+str(Time_List[i])+'天的次数天数所占比例').decode('utf-8').encode('gb2312')

   fp.write(Str)
   fp.write(str(Statis_List))
   fp.write('\n------------------------------------------------------------------------\n')
   fp.close()

defOutput_Csv(Array_Diff,Array_Short,Statis_List,Code_List,Code_Name_List,Two_Code_List,Sigma_List,Time_List,Dist):
    Csvfile =file(Dist+'output.csv', 'wb')
    writer =csv.writer(Csvfile)

   writer.writerow(Code_List)
   writer.writerow(Two_Code_List)
    Str =['mean', 'std', 'min', '25%', '50%', '75%', 'max', 'length','1sigma', '2sigma', '3sigma']
    Len =(len(Statis_List[0])-11)/6
    for i inrange(Len):
       Str = Str + [('exceed+'+str(Sigma_List[i])+'*Sigma '+str(Time_List[i])+'days times'),'days','proportion', ('under-' +\
             str(Sigma_List[i])+'*Sigma '+ str(Time_List[i])+'daystimes'),'days','proportion']

   writer.writerow(Str)
   writer.writerows(Statis_List)

   writer.writerows(Array_Short)
   writer.writerows(Array_Diff)

#主程序--------------------------------------------------
Bank = 'E:\\08 python\\bank\\'
BankOutput = Bank+ 'output\\'

Broker = 'E:\\08python\\broker\\'
BrokerOutput = Broker+ 'output\\'

SigmaList = [0.8,1,1.5]
TimeList = [20,15,10]

(ArrayList,CodeList,CodeNameList)= Open_Csv(Bank)
(ArrayDiff,TwoCodeList,ArrayShort) =Generate_Diff(Bank,ArrayList,CodeList,CodeNameList,100)#10000#################
StatisList = Statis_Diff(ArrayDiff)
for i in range(len(SigmaList)):
StatisList =Statis_Trend(ArrayDiff,StatisList,TwoCodeList,SigmaList[i],TimeList[i])
print 'Statistics Successfully'
Output_File(ArrayDiff,ArrayShort,StatisList,CodeList,CodeNameList,TwoCodeList,SigmaList,TimeList,BankOutput)
Draw_Image(ArrayDiff,ArrayShort,StatisList,CodeList,CodeNameList,TwoCodeList,SigmaList,TimeList,BankOutput)

####

(ArrayList,CodeList,CodeNameList)= Open_Csv(Broker)
(ArrayDiff,TwoCodeList,ArrayShort) =Generate_Diff(Broker,ArrayList,CodeList,CodeNameList,100)#10000#################
StatisList = Statis_Diff(ArrayDiff)
for i in range(len(SigmaList)):
StatisList =Statis_Trend(ArrayDiff,StatisList,TwoCodeList,SigmaList[i],TimeList[i])
print 'Statistics Successfully'
Output_File(ArrayDiff,ArrayShort,StatisList,CodeList,CodeNameList,TwoCodeList,SigmaList,TimeList,BrokerOutput)
Draw_Image(ArrayDiff,ArrayShort,StatisList,CodeList,CodeNameList,TwoCodeList,SigmaList,TimeList,BrokerOutput)

本站仅提供存储服务，所有内容均由用户发布，如发现有害或侵权内容，请点击举报。