打开APP
userphoto
未登录

开通VIP,畅享免费电子书等14项超值服

开通VIP
【原创】利用Python开发一个配对股票分析的小程序(三)

接着进入主程序的开发。

 

#coding=utf-8
import os,time,sys,re,datetime
import csv #处理CSV文件
import scipy
import numpy as np
from matplotlib.font_manager import FontProperties
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.dates import YearLocator, MonthLocator,DateFormatter,AutoDateLocator
import MyStatis as MS

 

#读取目录下所有CSV文件
def Open_Csv(Dist):
    Array_List =[]
    Code_List=  []
   Code_Name_List = {}
    for item inos.listdir(Dist): # 遍历指定目录
       if os.path.isfile(Dist+item) and item.endswith('.csv'):
           f = open((Dist+item),'rb') # 打开文件
           Code_List = Code_List + [str(item)[0:6]]
           reader = csv.reader(f)
           Array = []
           count = 0
           for line in reader:
               if count > 0:
                   Array.append([line[0],float(line[3])])
               if count == 1:
                   Code_Name_List[str(item)[0:6]] =line[2].decode('gb2312')#这里的编码解码其实非常复杂,经常搞出乱码##
               count = count + 1
           f.close()
           Array = Handle_Zero(Array)#处理0值
           Array_List.append(Array)
    return(Array_List,Code_List,Code_Name_List)  

defHandle_Zero(Array):#处理0值---------------------------------------
    ifArray[0][1] == 0:
       Array[0][1] = Array[1][1]
    ifArray[1][1]==0:
       Array[1][1] = Array[2][1]
    ifArray[(len(Array)-1)][1] == 0:
       Array[(len(Array)-1)][1] = Array[(len(Array)-2)][1]
       
    for i inrange(2,(len(Array)-1)):       
       if float(Array[i][1]) == 0:
           if Array[i+1][1]>0.001:
               Array[i][1] = round((Array[i-1][1] +Array[i+1][1] )/2,2)
           else:
               Array[i][1] = round((Array[i-1][1] +Array[i-2][1] )/2,2)

   return Array


defGenerate_Diff(Dist,Array,Code_List,Code_Name_List,Min):#生成股票价格差的数组
    Array_Temp =[]
    Array_Diff =[]
   Two_Code_List = []
    Array_Len =len(Array)#总的数目
    Array_Short= []
    for i inrange(0,Array_Len):
       Array_Temp = Array_Temp +[len(Array[i])]
    MinLen =min(np.min(Array_Temp),Min) ############################
    MinIndex =MS.Get_MinIndex(Array_Temp)#找到最晚上市的是哪一个
    Temp_Array =[]
    for i inrange(0,MinLen):#考虑修改,以便调整前后顺序
       Temp_Array = Temp_Array + [Array[MinIndex][MinLen-i-1][0]]
   Array_Diff.append(Temp_Array)#第一行的时间
   Array_Short.append(Temp_Array)
   
    for i inrange(0,(Array_Len-1)):
       for j in range((i+1),(Array_Len)):
           Temp_Array = []
           for k in range(0,MinLen):#考虑修改,以便调整前后顺序
               Temp_Array = Temp_Array +[round((Array[i][MinLen-k-1][1]-Array[j][MinLen-k-1][1]),2)]
           Array_Diff.append(Temp_Array)
           Two_Code_List = Two_Code_List + [(Code_List[i]+''+Code_Name_List[Code_List[i]].encode('gb2312') +' -'+Code_List[j]+' '+Code_Name_List[Code_List[j]].encode('gb2312'))]

    for i inrange(0,Array_Len):
       Temp_Array = []
       for k inrange(0,MinLen):           
           Temp_Array = Temp_Array + [Array[i][MinLen-k-1][1]]
       Array_Short.append(Temp_Array)
       
    return(Array_Diff,Two_Code_List,Array_Short)


defStatis_Normal(Array_Diff):#常规统计   
    Temp_Array =[]
    Statis_List= []
    for i inrange(1,len(Array_Diff)):
       #Statis_List 1均值 2标差 3最小值 25% 50% 75% 7最大 8总数 1sigma 2sigma 3sigma共11位数字
       mean = round(np.mean(Array_Diff[i]),2)
       Temp_Array.append(mean)
       std = round(np.std(Array_Diff[i]),2)
       Temp_Array.append(std)
       
       Temp_Array.append(round(np.min(Array_Diff[i]),2))
       
       Temp_Array.append(MS.Proportion(Array_Diff[i],0.25))
       Temp_Array.append(MS.Proportion(Array_Diff[i],0.5))
       Temp_Array.append(MS.Proportion(Array_Diff[i],0.75))
       
       Temp_Array.append(round(np.max(Array_Diff[i]),2))

       Temp_Array.append(len(Array_Diff[i]))

       Temp_Array.append(MS.Percent_Sigma(Array_Diff[i],mean,std))
       Temp_Array.append(MS.Percent_Sigma(Array_Diff[i],mean,(2*std)))
       Temp_Array.append(MS.Percent_Sigma(Array_Diff[i],mean,(3*std)))
       Statis_List.append(Temp_Array)
       Temp_Array = []
    returnStatis_List      

               

#统计差值大于mean+std或者小于mean-std,持续时长>=T的数量,日期,持续时长
defStatis_Trend(Array_Diff,Statis_List,Two_Code_List,t,T):
   
    for m inrange(0,(len(Array_Diff)-1)):
       
       i = 0
       mean = Statis_List[m][0]
       std = round(float(t)*Statis_List[m][1],2)
       Urange = mean + std
       Lrange = mean - std
       Utimes = 0 #次数
       Ltimes = 0
       Unum = 0.00 #总天数
       Lnum = 0.00
       Temp_Array =[]
       Temp_Array.append(Two_Code_List[m])
       
       #Statis_List 每行的后续增加 大于Urange的次数,总天数,比例,小于Lrange的次数,总天数,比例
       while i <(len(Array_Diff[m+1])-T):
           
           if ( Array_Diff[m+1][i+T-1]- Lrange) > 0.0001 and(Array_Diff[m+1][i+T-1]- Urange)<-0.0001:
               i = i+T
               
           elif (Array_Diff[m+1][i+T-1] - Urange)>=-0.0001:
               if (Array_Diff[m+1][i] - Urange)>=-0.0001:
                   j = i+1#注意长度是2的时候
                   while j<(i+T-1)  :
                       if  (Array_Diff[m+1][j]-Urange)>=-0.0001:
                           j = j+1
                       else:
                           break
                       
                   if j==(i+T-1):#持续了T长,需要往后继续找
                       Temp_Array.append(Array_Diff[0][i])#日期
                       Temp_Array.append(' + exceed ')
                       Utimes = Utimes+1
                       j = j+1
                       while j<(len(Array_Diff[m+1])):#########
                           if (Array_Diff[m+1][j]-Urange)>=-0.0001:
                               j = j+1
                           else:
                               break
                       Unum = Unum + j-i#最后的j是低于上限的
                       Temp_Array.append(j-i)                 
                       i = j #i
                   else:
                       i = j
               elif (Array_Diff[m+1][i]-Lrange)<=0.0001:
                   i = i+1
               elif (Array_Diff[m+1][i]- Lrange)>0.0001 and(Array_Diff[m+1][i]-Urange)<-0.0001:
                   i = i+1
           elif (Array_Diff[m+1][i+T-1]-Lrange)<=0.0001:
               if (Array_Diff[m+1][i]-Lrange)<=0.0001:
                   j = i+1#注意长度是2的时候
                   while j<(i+T-1)  :
                       if  (Array_Diff[m+1][j]-Lrange)<=0.0001:
                           j = j+1
                       else:
                           break
                       
                   if j==(i+T-1):#持续了T长,需要往后继续找
                       #------------------------------考虑Temp2?
                       Temp_Array.append(Array_Diff[0][i])#日期
                       Temp_Array.append(' - under ')
                       Ltimes = Ltimes+1
                       j = j+1
                       while j<(len(Array_Diff[m+1])): #################
                           if (Array_Diff[m+1][j]-Lrange)<=0.0001:
                               j = j+1
                           else:
                               break
                       Lnum = Lnum +j-i
                       Temp_Array.append(j-i)                                            
                       i = j #i
                   else:
                       i = j
               elif (Array_Diff[m+1][i]-Urange)>=-0.0001:
                   i = i+1
               elif (Array_Diff[m+1][i]- Lrange )>0.0001 and(Array_Diff[m+1][i]- Urange)<-0.0001:
                   i = i+1
                   
       Statis_List[m].append(Ltimes)
       Statis_List[m].append(Lnum)
       Statis_List[m].append(round(float(Lnum)/float(len(Array_Diff[m+1])),2))
   
       Statis_List[m].append(Utimes)
       Statis_List[m].append(Unum)
       Statis_List[m].append(round(float(Unum)/float(len(Array_Diff[m+1])),2))

       Statis_List.append(Temp_Array)
       
    returnStatis_List

defStatis_Diff(Array_Diff):#对差值进行统计
    #常规统计 &计算比例
    Statis_List=Statis_Normal(Array_Diff)   
    returnStatis_List
#关机
def Close_machine():
   o="c:\\windows\\system32\\shutdown -s"#########
   os.system(o)#########

def Trend_Change():#统计中位值的反转概率
   #这个倒是没有写

defDraw_Image(Array_Diff,Array_Short,Statis_List,Code_List,Code_Name_List,Two_Code_List,t,T,Dist):
   Draw_Diff(Array_Diff,Statis_List,Two_Code_List,Dist)#画差值图
   Draw_Trend(Array_Diff,Statis_List,Two_Code_List,t,T,Dist)#画差值变化趋势图
   Draw_TwoLine(Array_Short,Statis_List,Code_List,Code_Name_List,Two_Code_List,Dist)#画两条线的比对图
    print 'DrawFile Successfully'
   
defDraw_Trend(Array_Diff,Statis_List,Two_Code_List,t,T,Dist):#画差值的趋势变化图
    Font =FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=14)
    Dates =matplotlib.dates.datestr2num(Array_Diff[0])
   
    for i inrange(1,len(Array_Diff)):
       plt.figure(figsize=(10.24,7.68),dpi=300)
       mean = Statis_List[i-1][0]
       std = Statis_List[i-1][1]
       Range=[mean]*len(Array_Diff[0])
       for j in range(0,len(t)):
           Urange=[mean+t[j]*std]*len(Array_Diff[0])
           Lrange=[mean-t[j]*std]*len(Array_Diff[0])
           plt.plot_date(Dates,Urange,'k',linestyle='--')
           plt.plot_date(Dates,Lrange,'k',linestyle='--')

       plt.plot_date(Dates,Array_Diff[i],'r',linestyle='-',label=(Two_Code_List[i-1][0:6]+'- '+ \
                                                                  Two_Code_List[i-1][(Two_Code_List[i-1].index('-')+2):(Two_Code_List[i-1].index('-')+8)]))
       #plt.axvline(time[10000],hold=None,label="1",color='black',linestyle="--")
       plt.plot_date(Dates,Range,'k',linestyle='-')       
       plt.legend()
       plt.title(Two_Code_List[i-1].decode('gb2312')+u' 价格差波动图',fontproperties=Font)
       plt.xlabel(u'日期', fontproperties=Font)
       plt.ylabel(u'价格差', fontproperties=Font)
       plt.grid(True)
       plt.gca().xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
       plt.gcf().autofmt_xdate()
       
       #plt.show()
       plt.savefig(Dist+Two_Code_List[i-1].decode('gb2312')+u'趋势图.jpg')
       plt.close()
   
defDraw_Diff(Array_Diff,Statis_List,Two_Code_List,Dist):#画差值变化图
    Font =FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=14)
    Dates =matplotlib.dates.datestr2num(Array_Diff[0])
   
    for i inrange(1,len(Array_Diff)):
       plt.figure(figsize=(10.24,7.68),dpi=300)
       mean = Statis_List[i-1][0]
       std = Statis_List[i-1][1]
       Urange1=[mean+1*std]*len(Array_Diff[0])
       Urange2=[mean+2*std]*len(Array_Diff[0])
       Urange3=[mean+3*std]*len(Array_Diff[0])
       Range=[mean]*len(Array_Diff[0])
       Lrange1=[mean-1*std]*len(Array_Diff[0])
       Lrange2=[mean-2*std]*len(Array_Diff[0])
       Lrange3=[mean-3*std]*len(Array_Diff[0])

       plt.plot_date(Dates,Array_Diff[i],'r',linestyle='-',label=(Two_Code_List[i-1][0:6]+'- '+ \
                                                                  Two_Code_List[i-1][(Two_Code_List[i-1].index('-')+2):(Two_Code_List[i-1].index('-')+8)]))
       #plt.axvline(time[10000],hold=None,label="1",color='black',linestyle="--")
       plt.plot_date(Dates,Urange1,'k',linestyle='--')
       plt.plot_date(Dates,Urange2,'k',linestyle='--')
       plt.plot_date(Dates,Urange3,'k',linestyle='--')
       plt.plot_date(Dates,Lrange1,'k',linestyle='--')
       plt.plot_date(Dates,Lrange2,'k',linestyle='--')
       plt.plot_date(Dates,Lrange3,'k',linestyle='--')
       plt.plot_date(Dates,Range,'k',linestyle='-')       
       plt.legend()
       plt.title(Two_Code_List[i-1].decode('gb2312')+u' 价格差波动图',fontproperties=Font)
       plt.xlabel(u'日期', fontproperties=Font)
       plt.ylabel(u'价格差', fontproperties=Font)
       plt.grid(True)
       plt.gca().xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
       plt.gcf().autofmt_xdate()
       #plt.show()
       plt.savefig(Dist+Two_Code_List[i-1].decode('gb2312')+'.jpg')
       plt.close()
   
defDraw_TwoLine(Array_Short,Statis_List,Code_List,Code_Name_List,Two_Code_List,Dist):#画原值两两比对的变化图
    Font =FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=14)
    Dates =matplotlib.dates.datestr2num(Array_Short[0])
    k=0
    for i inrange(1,(len(Array_Short)-1)):
       for j in range((i+1),len(Array_Short)):
           plt.figure(figsize=(10.24,7.68),dpi=300)
           mean =Statis_List[k][0]           
           Temp = [mean]*len(Array_Short[0])
           Temp1 = np.array(Array_Short[i])-np.array(Temp)
           Temp2 = np.ndarray.tolist(Temp1)
           plt.plot_date(Dates,Temp2,'r',linestyle='-',label=(Code_List[i-1]+'-('+str(mean)+')'))
           plt.plot_date(Dates,Array_Short[j],'b',linestyle='-',label=(Code_List[j-1]))
           #plt.axvline(time[10000],hold=None,label="1",color='black',linestyle="--")
      
           plt.legend()
           plt.title(Two_Code_List[k].decode('gb2312')+u' 价格图',fontproperties=Font)
           plt.xlabel(u'日期', fontproperties=Font)
           plt.ylabel(u'价格', fontproperties=Font)
           plt.grid(True)
           plt.gca().xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
           plt.gcf().autofmt_xdate()
           #plt.show()
           plt.savefig(Dist+Two_Code_List[k].decode('gb2312')+u'双值.jpg')
           plt.close()
           k=k+1
   
defOutput_File(Array_Diff,Array_Short,Statis_List,Code_List,Code_Name_List,Two_Code_List,Sigma_List,Time_List,Dist):
   Output_Txt(Array_Diff,Array_Short,Statis_List,Code_List,Code_Name_List,Two_Code_List,Sigma_List,Time_List,Dist)
   Output_Csv(Array_Diff,Array_Short,Statis_List,Code_List,Code_Name_List,Two_Code_List,Sigma_List,Time_List,Dist)
    print'Output File Successfully'

defOutput_Txt(Array_Diff,Array_Short,Statis_List,Code_List,Code_Name_List,Two_Code_List,Sigma_List,Time_List,Dist):
    TODAY =datetime.date.today()
   CURRENTDAY=TODAY.strftime('%Y-%m-%d')
    TIME =time.strftime("%H:%M:%S")
   #写入本地文件
    fp =open(Dist+'output.txt','wb') 
   fp.write('------------------------------\n'+CURRENTDAY +" "+TIME+""+'  \n')

   fp.write(str(Code_List))
   fp.write('\n')
   fp.write(str(Array_Short))
   fp.write('\n')
    for i inrange(len(Two_Code_List)):
       fp.write(Two_Code_List[i])###############################
   fp.write('\n')
   fp.write(str(Array_Diff))
   fp.write('\n')
    Str = '均值标准差最小值 25% 中位数 75% 最大值 数组长度 1sigma 2sigma3sigma'.decode('utf-8').encode('gb2312')
    Len =(len(Statis_List[0])-11)/6
    for i inrange(Len):
       Str = Str + (' 波动幅度超过 +'+str(Sigma_List[i])+'*Sigma 且持续时间超过'+str(Time_List[i])+'天 的次数  天数 所占比例'+ ' 波动幅度超过 -' +\
             str(Sigma_List[i])+'*Sigma 且持续时间超过'+str(Time_List[i])+'天的次数 天数所占比例').decode('utf-8').encode('gb2312')
   
   fp.write(Str)   
   fp.write(str(Statis_List))
   fp.write('\n------------------------------------------------------------------------\n')
   fp.close()

defOutput_Csv(Array_Diff,Array_Short,Statis_List,Code_List,Code_Name_List,Two_Code_List,Sigma_List,Time_List,Dist):
    Csvfile =file(Dist+'output.csv', 'wb')
    writer =csv.writer(Csvfile)
   
   writer.writerow(Code_List)
   writer.writerow(Two_Code_List)
    Str =['mean', 'std', 'min', '25%', '50%', '75%', 'max', 'length','1sigma', '2sigma', '3sigma']
    Len =(len(Statis_List[0])-11)/6
    for i inrange(Len):
       Str = Str + [('exceed+'+str(Sigma_List[i])+'*Sigma '+str(Time_List[i])+'days times'),'days','proportion', ('under-' +\
             str(Sigma_List[i])+'*Sigma '+ str(Time_List[i])+'daystimes'),'days','proportion']
  
   writer.writerow(Str)
   writer.writerows(Statis_List)
   
   writer.writerows(Array_Short)
   writer.writerows(Array_Diff)

#主程序--------------------------------------------------
Bank = 'E:\\08 python\\bank\\'
BankOutput = Bank+ 'output\\'

Broker = 'E:\\08python\\broker\\'
BrokerOutput = Broker+ 'output\\'


SigmaList = [0.8,1,1.5]
TimeList = [20,15,10]

(ArrayList,CodeList,CodeNameList)= Open_Csv(Bank)
(ArrayDiff,TwoCodeList,ArrayShort) =Generate_Diff(Bank,ArrayList,CodeList,CodeNameList,100)#10000#################
StatisList = Statis_Diff(ArrayDiff)
for i in range(len(SigmaList)):
    StatisList =Statis_Trend(ArrayDiff,StatisList,TwoCodeList,SigmaList[i],TimeList[i])
print 'Statistics Successfully'
Output_File(ArrayDiff,ArrayShort,StatisList,CodeList,CodeNameList,TwoCodeList,SigmaList,TimeList,BankOutput)            
Draw_Image(ArrayDiff,ArrayShort,StatisList,CodeList,CodeNameList,TwoCodeList,SigmaList,TimeList,BankOutput)

####

(ArrayList,CodeList,CodeNameList)= Open_Csv(Broker)
(ArrayDiff,TwoCodeList,ArrayShort) =Generate_Diff(Broker,ArrayList,CodeList,CodeNameList,100)#10000#################
StatisList = Statis_Diff(ArrayDiff)
for i in range(len(SigmaList)):
    StatisList =Statis_Trend(ArrayDiff,StatisList,TwoCodeList,SigmaList[i],TimeList[i])
print 'Statistics Successfully'
Output_File(ArrayDiff,ArrayShort,StatisList,CodeList,CodeNameList,TwoCodeList,SigmaList,TimeList,BrokerOutput)            
Draw_Image(ArrayDiff,ArrayShort,StatisList,CodeList,CodeNameList,TwoCodeList,SigmaList,TimeList,BrokerOutput)

 

 

 


  

本站仅提供存储服务,所有内容均由用户发布,如发现有害或侵权内容,请点击举报
打开APP,阅读全文并永久保存 查看更多类似文章
猜你喜欢
类似文章
【热】打开小程序,算一算2024你的财运
Python将自己的图片数据集导入h5py,做识别的预处理
python sklearn的k-means聚类易懂实例
python 横坐标旋转
作业:三个排序法时间比较
给定一个字符串 S 和一个字符串 T,计算在 S 的子序列中 T 出现的个数
Python3 列表list合并的4种方法介绍
更多类似文章 >>
生活服务
热点新闻
分享 收藏 导长图 关注 下载文章
绑定账号成功
后续可登录账号畅享VIP特权!
如果VIP功能使用有故障,
可点击这里联系客服!

联系客服