Pair Trading - Exploring The Low Risk Statistical Arbitrage Trading Concepts

VJAY

Well-Known Member
1539261035698.png

in this trade GDL not triggered
GATI buy triggered 115 @72.40....
Add-today Zscore 2.46
So we Add buy GATI 112 qty
Sell GDL 119qty
 
Last edited:

checkmate7

Well-Known Member
I have written the program that calculate the zscore and pvalue by doing permutation combination among all the stock provided...and write it to csv which has pValue<.01 and zScore >2.0..Attaching the result for Nifty 50
 

Attachments

checkmate7

Well-Known Member
If anyone requires this is the code which has been enhanced from the original Ncube code

Python:
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 11 21:15:34 2018

@author: HP
"""

# Importing the required python libraries
import pandas as pd
import matplotlib.pyplot as plt
from pylab import rcParams
import warnings
from statsmodels.tsa.stattools import coint
warnings.filterwarnings('ignore')

import csv


# Function to get the cointegration significance score, by default the value is set to 0.05,
# it can be changed while calling the function. This function returns the pvalue for the pair.
def find_coint_significance(S1,S2,significance=0.01):
    pvalue = coint(S1, S2, autolag=None)[1]
    return pvalue  
# This function plots the zscore for the pairs selected. field lb is the lookback period
# i.e how far the historical days value to be considered. xres & yres can be changed to plot
# the stock lines at correct resolution, these values just divide the stock price by the value specified
# so that it can be plotted next to zscore line.

def plot_pairs(df,S1,S2,lb=20,xres=10,yres=10):
    spread = S1[-lb:] / S2[-lb:]
    spread.name = 'ratio'
    spread_mean = spread.mean()
    std_dev = spread.std()
    zscore = (spread - spread_mean)/std_dev
    zscore.name = 'zScore'
    #print("Y = " + S1.name + " Price :" ,df[S1.name].iloc[-1])
    #print("X = " + S2.name + " Price :" ,df[S2.name].iloc[-1])
    #print("zScore: ",round(zscore.iloc[-1],3))
    '''plt.style.use('seaborn-white')
    plt.style.use('ggplot')
    rcParams['figure.figsize'] = 25,10
    plt.plot(df[S1.name][-lb:].index, df[S1.name][-lb:].values/xres)
    plt.plot(df[S2.name][-lb:].index, df[S2.name][-lb:].values/yres)
    plt.plot(zscore[-lb:].index, zscore[-lb:].values)
    plt.legend(["Y = " + S1.name, "X = " + S2.name, 'Price Spread Rolling z-Score']);
    plt.axhline(0, color='black')
    plt.axhline(1.0, color='red', linestyle='--');
    plt.axhline(-1.0, color='green', linestyle='--');
    plt.show()'''
    return S1.name,S2.name,round(zscore.iloc[-1],3),df[S1.name].iloc[-1],df[S2.name].iloc[-1]
# This function loads the stock data file in to a pandas data frame for processing. I am considering
# the daily stock returns for cointegration testing which I feel is more accurate method.
   
def load_data(file):
    df = pd.read_csv(file, index_col=[0])
    returns = df.pct_change()[-200:]
    returns = returns.iloc[1:,:].dropna(axis=1)
    returns1=pd.read_csv(file, nrows=0,delim_whitespace=True)
    return returns,df,returns1

def update_eod(masterfile,eodfile):
    master = pd.read_csv(masterfile, index_col=[0])
    eod = pd.read_csv(eodfile, header=None,index_col=[0],usecols=[0,5])
    df = master.append(eod.T).dropna(axis=1).reset_index(drop=True)
    df.to_csv('C://master/stockdata.csv')
    return
#update_eod('C://master/stockdata.csv','C://master/eod.txt')
   
returns,df,returns1 = load_data('C://stockdata.csv')


lst1=[]
lst1=returns1.columns.str.split(',').tolist()
lst1=lst1[0]
del lst1[0]
print("Test ", len(lst1))

num=len(lst1)
for i in range(num):
    for j in range(num-1):
      S1 = returns[lst1[i]]
      S2 = returns[lst1[j+1]]
      pValue=find_coint_significance(S1,S2,significance=0.05)
      SS1 = df[lst1[i]]
      SS2 = df[lst1[j+1]]
      str1,str2,zScore,price1,price2=plot_pairs(df,SS1,SS2,lb=20,xres=100,yres=300)
      if (pValue < 0.01 and pValue !=0.0 and abs(zScore)>2.0 ):
        #print("\nPair is Significant, pValue= ", pValue)
        #print("zScore: ",zScore)
        #print("Y = " + str1 +" Price :",price1)
        #print("X = " + str2+" Price :",price2)
       

        with open('D://Trade Book//Pair trading//Pair_Analysis.csv', 'a', newline='') as csvFile:
            writer = csv.writer(csvFile)
            writer.writerows([["Pair is Significant, pValue= "+str(pValue)],
                              ["zScore: "+str(zScore)],
                              ["Y = "+ str1 +" Price :"+str(price1)],
                              ["X = "+ str2 +" Price :"+str(price2)],[]])
            csvFile.close()
 
Last edited:

VJAY

Well-Known Member
If anyone requires this is the code which has been enhanced from the original Ncube code

Python:
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 11 21:15:34 2018

@author: HP
"""

# Importing the required python libraries
import pandas as pd
import matplotlib.pyplot as plt
from pylab import rcParams
import warnings
from statsmodels.tsa.stattools import coint
warnings.filterwarnings('ignore')

import csv


# Function to get the cointegration significance score, by default the value is set to 0.05,
# it can be changed while calling the function. This function returns the pvalue for the pair.
def find_coint_significance(S1,S2,significance=0.01):
    pvalue = coint(S1, S2, autolag=None)[1]
    return pvalue  
# This function plots the zscore for the pairs selected. field lb is the lookback period
# i.e how far the historical days value to be considered. xres & yres can be changed to plot
# the stock lines at correct resolution, these values just divide the stock price by the value specified
# so that it can be plotted next to zscore line.

def plot_pairs(df,S1,S2,lb=20,xres=10,yres=10):
    spread = S1[-lb:] / S2[-lb:]
    spread.name = 'ratio'
    spread_mean = spread.mean()
    std_dev = spread.std()
    zscore = (spread - spread_mean)/std_dev
    zscore.name = 'zScore'
    #print("Y = " + S1.name + " Price :" ,df[S1.name].iloc[-1])
    #print("X = " + S2.name + " Price :" ,df[S2.name].iloc[-1])
    #print("zScore: ",round(zscore.iloc[-1],3))
    '''plt.style.use('seaborn-white')
    plt.style.use('ggplot')
    rcParams['figure.figsize'] = 25,10
    plt.plot(df[S1.name][-lb:].index, df[S1.name][-lb:].values/xres)
    plt.plot(df[S2.name][-lb:].index, df[S2.name][-lb:].values/yres)
    plt.plot(zscore[-lb:].index, zscore[-lb:].values)
    plt.legend(["Y = " + S1.name, "X = " + S2.name, 'Price Spread Rolling z-Score']);
    plt.axhline(0, color='black')
    plt.axhline(1.0, color='red', linestyle='--');
    plt.axhline(-1.0, color='green', linestyle='--');
    plt.show()'''
    return S1.name,S2.name,round(zscore.iloc[-1],3),df[S1.name].iloc[-1],df[S2.name].iloc[-1]
# This function loads the stock data file in to a pandas data frame for processing. I am considering
# the daily stock returns for cointegration testing which I feel is more accurate method.
   
def load_data(file):
    df = pd.read_csv(file, index_col=[0])
    returns = df.pct_change()[-200:]
    returns = returns.iloc[1:,:].dropna(axis=1)
    returns1=pd.read_csv(file, nrows=0,delim_whitespace=True)
    return returns,df,returns1

def update_eod(masterfile,eodfile):
    master = pd.read_csv(masterfile, index_col=[0])
    eod = pd.read_csv(eodfile, header=None,index_col=[0],usecols=[0,5])
    df = master.append(eod.T).dropna(axis=1).reset_index(drop=True)
    df.to_csv('C://master/stockdata.csv')
    return
#update_eod('C://master/stockdata.csv','C://master/eod.txt')
   
returns,df,returns1 = load_data('C://stockdata.csv')


lst1=[]
lst1=returns1.columns.str.split(',').tolist()
lst1=lst1[0]
del lst1[0]
print("Test ", len(lst1))

num=len(lst1)
for i in range(num):
    for j in range(num-1):
      S1 = returns[lst1[i]]
      S2 = returns[lst1[j+1]]
      pValue=find_coint_significance(S1,S2,significance=0.05)
      SS1 = df[lst1[i]]
      SS2 = df[lst1[j+1]]
      str1,str2,zScore,price1,price2=plot_pairs(df,SS1,SS2,lb=20,xres=100,yres=300)
      if (pValue < 0.01 and pValue !=0.0 and abs(zScore)>2.0 ):
        #print("\nPair is Significant, pValue= ", pValue)
        #print("zScore: ",zScore)
        #print("Y = " + str1 +" Price :",price1)
        #print("X = " + str2+" Price :",price2)
       

        with open('D://Trade Book//Pair trading//Pair_Analysis.csv', 'a', newline='') as csvFile:
            writer = csv.writer(csvFile)
            writer.writerows([["Pair is Significant, pValue= "+str(pValue)],
                              ["zScore: "+str(zScore)],
                              ["Y = "+ str1 +" Price :"+str(price1)],
                              ["Y = "+ str2 +" Price :"+str(price2)],[]])
            csvFile.close()
Thanks for sharing,Ncube bro shared one code for the same ....though am not used it...
 

checkmate7

Well-Known Member
Today would run it on Nifty 200 while grouping them based on industry type...So program will only run on stocks with same industry..

But we need to back test these results....
 

VJAY

Well-Known Member
View attachment 29446
in this trade GDL not triggered
GATI buy triggered 115 @72.40....
Add-today Zscore 2.46
So we Add buy GATI 112 qty
Sell GDL 119qty
GATI Buy add triggered [email protected]
GDL short triggered [email protected] closed 162.10
Current zscore 1.21...so now no adds tomorrow
GDL qty -61
GATI-117
Dear ncube,
Here are we take normal short in GDL tomorrow?then we need to exit adds +excess qty in GATI?
 

VJAY

Well-Known Member

checkmate7

Well-Known Member
Here is analysis on all F&O stocks w.r.t Industry
 

Attachments