Pair Trading - Exploring The Low Risk Statistical Arbitrage Trading Concepts

VJAY

Well-Known Member
Close 2 pairs ...but BPCL-HINDPETRO today eod showing Zscore of -1.28 !!!!!

1539788287363.png



1539788847846.png
 
Last edited:

VJAY

Well-Known Member
Ok .looks like the pair has diverged...in this case if the trade is not closed one can wait for mean reversion.
oK .....How can we judge it? as previous day its -0.6 ...also if we check it in open price its also not pointed it as diverged....
close done using 15 min lows

1539827090171.png
 

ncube

Well-Known Member
oK .....How can we judge it? as previous day its -0.6 ...also if we check it in open price its also not pointed it as diverged....
close done using 15 min lows

View attachment 29677
We will not know beforehand if the pair will mean revert or not...so we close the trade based on only 2 conditions...either zscore is below .5 or we have decent open profit...average abt 5-10%. Based on my experience so far I have observed that if the pairs mean revert within 5-7 days we make quick good profit...but if it takes more time the chances of making profit is reduced. Good thing is so far all pairs have mean reverted which gives good confidence...and success rate is good abt 60-70%. Trading few pairs simultaneously smoothen the returns curve with very low drawdowns. Trading non common pairs have higher profit potential.
 

checkmate7

Well-Known Member
Yes need to add stock industries as well in header like TATASTEEL_METALS.. You would get the industry info from NSE site.. Will send my CSV later along with code..
This is the updated "stockdata.csv" which has industry aswell separated by underscore . Need to keep this in folder "Pair trading" (C://Pair trading// )
Also attaching the code which generate the file Pair_Analysis_<Date>.csv in the same above folder(C://Pair trading//). Takes less than 1min to run on 204 stocks.




Python:
# -*- coding: utf-8 -*-
"""

"""

# Importing the required python libraries
import pandas as pd
import matplotlib.pyplot as plt
from pylab import rcParams
import warnings
from statsmodels.tsa.stattools import coint
warnings.filterwarnings('ignore')

import csv
import datetime
import os
# Function to get the cointegration significance score, by default the value is set to 0.05,
# it can be changed while calling the function. This function returns the pvalue for the pair.
def find_coint_significance(S1,S2,significance=0.01):
    pvalue = coint(S1, S2, autolag=None)[1]
    return pvalue 
# This function plots the zscore for the pairs selected. field lb is the lookback period
# i.e how far the historical days value to be considered. xres & yres can be changed to plot
# the stock lines at correct resolution, these values just divide the stock price by the value specified
# so that it can be plotted next to zscore line.

def plot_pairs(df,S1,S2,lb=20,xres=10,yres=10):
    spread = S1[-lb:] / S2[-lb:]
    spread.name = 'ratio'
    spread_mean = spread.mean()
    std_dev = spread.std()
    zscore = (spread - spread_mean)/std_dev
    zscore.name = 'zScore'
    #print("Y = " + S1.name + " Price :" ,df[S1.name].iloc[-1])
    #print("X = " + S2.name + " Price :" ,df[S2.name].iloc[-1])
    #print("zScore: ",round(zscore.iloc[-1],3))
    '''plt.style.use('seaborn-white')
    plt.style.use('ggplot')
    rcParams['figure.figsize'] = 25,10
    plt.plot(df[S1.name][-lb:].index, df[S1.name][-lb:].values/xres)
    plt.plot(df[S2.name][-lb:].index, df[S2.name][-lb:].values/yres)
    plt.plot(zscore[-lb:].index, zscore[-lb:].values)
    plt.legend(["Y = " + S1.name, "X = " + S2.name, 'Price Spread Rolling z-Score']);
    plt.axhline(0, color='black')
    plt.axhline(1.0, color='red', linestyle='--');
    plt.axhline(-1.0, color='green', linestyle='--');
    plt.show()'''
    return S1.name,S2.name,round(zscore.iloc[-1],3),df[S1.name].iloc[-1],df[S2.name].iloc[-1]
# This function loads the stock data file in to a pandas data frame for processing. I am considering
# the daily stock returns for cointegration testing which I feel is more accurate method.
  
def load_data(file):
    df = pd.read_csv(file, index_col=[0])
    returns = df.pct_change()[-200:]
    returns = returns.iloc[1:,:].dropna(axis=1)
    returns1=pd.read_csv(file, nrows=0,delim_whitespace=True)
    return returns,df,returns1

def update_eod(masterfile,eodfile):
    master = pd.read_csv(masterfile, index_col=[0])
    eod = pd.read_csv(eodfile, header=None,index_col=[0],usecols=[0,5])
    df = master.append(eod.T).dropna(axis=1).reset_index(drop=True)
    df.to_csv('C://master/stockdata.csv')
    return
#update_eod('C://master/stockdata.csv','C://master/eod.txt')
  
returns,df,returns1 = load_data('C://Pair Trading//stockdata.csv')


lst1=[]
lst1=returns1.columns.str.split(',').tolist()
lst1=lst1[0]
del lst1[0]
print("Test ", lst1[1])
date1=datetime.datetime.now().strftime("%Y-%m-%d")
filenm="C://Pair trading//Pair_Analysis_"+date1+".csv"

 
## delete only if file exists ##
if os.path.exists(filenm):
        os.remove(filenm)


num=len(lst1)
for i in range(num):
    for j in range(num-1):
      S1 = returns[lst1[i]]
      S2 = returns[lst1[j+1]]
      str_flag1=S1.name.split('_')
      str_flag2=S2.name.split('_')
      if(str_flag1[1]==str_flag2[1]):
          pValue=find_coint_significance(S1,S2,significance=0.05)
          SS1 = df[lst1[i]]
          SS2 = df[lst1[j+1]]
          str1,str2,zScore,price1,price2=plot_pairs(df,SS1,SS2,lb=20,xres=100,yres=300)
          if (pValue < 0.01 and pValue !=0.0 and abs(zScore)>2.0 ):
            #print("\nINDUSTRY= ", str_flag1[1])
            #print("Pair is Significant, pValue= ", pValue)
            #print("zScore: ",zScore)
            #print("Y = " + str_flag1[0] +" Price :",price1)
            #print("X = " + str_flag2[0]+" Price :",price2)
            
    
            with open(filenm, 'a', newline='') as csvFile:
                writer = csv.writer(csvFile)
                writer.writerows([["INDUSTRY= "+str_flag1[1]],
                                  ["Pair is Significant, pValue= "+str(pValue)],
                                  ["zScore: "+str(zScore)],
                                  ["Y = "+ str_flag1[0] +" Price :"+str(price1)],
                                  ["X = "+ str_flag2[0] +" Price :"+str(price2)],[]])
                csvFile.close()
 

Attachments

VJAY

Well-Known Member
This is the updated "stockdata.csv" which has industry aswell separated by underscore . Need to keep this in folder "Pair trading" (C://Pair trading// )
Also attaching the code which generate the file Pair_Analysis_<Date>.csv in the same above folder(C://Pair trading//). Takes less than 1min to run on 204 stocks.




Python:
# -*- coding: utf-8 -*-
"""

"""

# Importing the required python libraries
import pandas as pd
import matplotlib.pyplot as plt
from pylab import rcParams
import warnings
from statsmodels.tsa.stattools import coint
warnings.filterwarnings('ignore')

import csv
import datetime
import os
# Function to get the cointegration significance score, by default the value is set to 0.05,
# it can be changed while calling the function. This function returns the pvalue for the pair.
def find_coint_significance(S1,S2,significance=0.01):
    pvalue = coint(S1, S2, autolag=None)[1]
    return pvalue
# This function plots the zscore for the pairs selected. field lb is the lookback period
# i.e how far the historical days value to be considered. xres & yres can be changed to plot
# the stock lines at correct resolution, these values just divide the stock price by the value specified
# so that it can be plotted next to zscore line.

def plot_pairs(df,S1,S2,lb=20,xres=10,yres=10):
    spread = S1[-lb:] / S2[-lb:]
    spread.name = 'ratio'
    spread_mean = spread.mean()
    std_dev = spread.std()
    zscore = (spread - spread_mean)/std_dev
    zscore.name = 'zScore'
    #print("Y = " + S1.name + " Price :" ,df[S1.name].iloc[-1])
    #print("X = " + S2.name + " Price :" ,df[S2.name].iloc[-1])
    #print("zScore: ",round(zscore.iloc[-1],3))
    '''plt.style.use('seaborn-white')
    plt.style.use('ggplot')
    rcParams['figure.figsize'] = 25,10
    plt.plot(df[S1.name][-lb:].index, df[S1.name][-lb:].values/xres)
    plt.plot(df[S2.name][-lb:].index, df[S2.name][-lb:].values/yres)
    plt.plot(zscore[-lb:].index, zscore[-lb:].values)
    plt.legend(["Y = " + S1.name, "X = " + S2.name, 'Price Spread Rolling z-Score']);
    plt.axhline(0, color='black')
    plt.axhline(1.0, color='red', linestyle='--');
    plt.axhline(-1.0, color='green', linestyle='--');
    plt.show()'''
    return S1.name,S2.name,round(zscore.iloc[-1],3),df[S1.name].iloc[-1],df[S2.name].iloc[-1]
# This function loads the stock data file in to a pandas data frame for processing. I am considering
# the daily stock returns for cointegration testing which I feel is more accurate method.
 
def load_data(file):
    df = pd.read_csv(file, index_col=[0])
    returns = df.pct_change()[-200:]
    returns = returns.iloc[1:,:].dropna(axis=1)
    returns1=pd.read_csv(file, nrows=0,delim_whitespace=True)
    return returns,df,returns1

def update_eod(masterfile,eodfile):
    master = pd.read_csv(masterfile, index_col=[0])
    eod = pd.read_csv(eodfile, header=None,index_col=[0],usecols=[0,5])
    df = master.append(eod.T).dropna(axis=1).reset_index(drop=True)
    df.to_csv('C://master/stockdata.csv')
    return
#update_eod('C://master/stockdata.csv','C://master/eod.txt')
 
returns,df,returns1 = load_data('C://Pair Trading//stockdata.csv')


lst1=[]
lst1=returns1.columns.str.split(',').tolist()
lst1=lst1[0]
del lst1[0]
print("Test ", lst1[1])
date1=datetime.datetime.now().strftime("%Y-%m-%d")
filenm="C://Pair trading//Pair_Analysis_"+date1+".csv"


## delete only if file exists ##
if os.path.exists(filenm):
        os.remove(filenm)


num=len(lst1)
for i in range(num):
    for j in range(num-1):
      S1 = returns[lst1[i]]
      S2 = returns[lst1[j+1]]
      str_flag1=S1.name.split('_')
      str_flag2=S2.name.split('_')
      if(str_flag1[1]==str_flag2[1]):
          pValue=find_coint_significance(S1,S2,significance=0.05)
          SS1 = df[lst1[i]]
          SS2 = df[lst1[j+1]]
          str1,str2,zScore,price1,price2=plot_pairs(df,SS1,SS2,lb=20,xres=100,yres=300)
          if (pValue < 0.01 and pValue !=0.0 and abs(zScore)>2.0 ):
            #print("\nINDUSTRY= ", str_flag1[1])
            #print("Pair is Significant, pValue= ", pValue)
            #print("zScore: ",zScore)
            #print("Y = " + str_flag1[0] +" Price :",price1)
            #print("X = " + str_flag2[0]+" Price :",price2)
           
   
            with open(filenm, 'a', newline='') as csvFile:
                writer = csv.writer(csvFile)
                writer.writerows([["INDUSTRY= "+str_flag1[1]],
                                  ["Pair is Significant, pValue= "+str(pValue)],
                                  ["zScore: "+str(zScore)],
                                  ["Y = "+ str_flag1[0] +" Price :"+str(price1)],
                                  ["X = "+ str_flag2[0] +" Price :"+str(price2)],[]])
                csvFile.close()
Wow!!!!Thanks checkmate...will check it now :)