

// XLW headers:
#include <xlw/MyContainers.h>
#include <xlw/CellMatrix.h>
#include <xlw/DoubleOrNothing.h>
#include <xlw/ArgList.h>
using namespace xlw;

// Headers for Accumulator:

#include <boost/accumulators/accumulators.hpp>
#include <boost/accumulators/statistics/stats.hpp>
#include <boost/accumulators/statistics/mean.hpp>
#include <boost/accumulators/statistics/sum.hpp>
#include <boost/accumulators/statistics/count.hpp>
#include <boost/accumulators/statistics/variance.hpp>
#include <boost/accumulators/statistics/covariance.hpp>
#include <boost/accumulators/statistics/variates/covariate.hpp>
#include <boost/math/special_functions/binomial.hpp>
using namespace boost::accumulators;

#include <math.h>     // for sqrt in sign test and abs in correlation test
#include <algorithm>  // for random_shuffle algorithm

#include <boost/math/distributions/binomial.hpp> // Binomial distribution for sign test
using boost::math::binomial;
 
#include <vector> // used in correlation test

#include "PE_Test_Concrete.h"

//////////////////////////////////////////////////////////////////////////
// Function to clear accumulators, used in correlation test
#include <new>
#include <boost/utility.hpp>

template< typename DEFAULT_INITIALIZABLE >
inline void clear( DEFAULT_INITIALIZABLE& object )
{
  object.DEFAULT_INITIALIZABLE::~DEFAULT_INITIALIZABLE();
  ::new ( boost::addressof(object) ) DEFAULT_INITIALIZABLE() ;
}
///////////////////////////////////////////////////////////////////////////
//////////////// Sign test ////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////

void PE_SignTest::operator()(const ArgumentList& Input, ArgumentList& Output) const
{
	// Extract input data because it's not efficient to query ArgumentList in a loop.
	// Extracting traded quantity, bid price and ask price:
	MyArray TradedQuant = const_cast<ArgumentList&>(Input).GetArrayArgumentValue("traded quantity");
	MyArray Bid = const_cast<ArgumentList&>(Input).GetArrayArgumentValue("bid");
	MyArray Ask = const_cast<ArgumentList&>(Input).GetArrayArgumentValue("ask");
	
	CellMatrix TestResults(20, 3); // test results	
	
	accumulator_set<int, stats<tag::mean, tag::sum, tag::count> > all_periods; 
	// Accumulator object for: success probability, number of successes, number of
	// observations. "all_periods" means that all trading periods are included, 
	// even those with zero market exposure.

	accumulator_set<int, stats<tag::mean, tag::sum, tag::count> > active_periods; 
	// This object is similar to "all_periods", the only difference is that the 
	// periods with zero market exposure are excluded.

	
	int success;              // success indicator
	int num_obs;			  // number of observations
	unsigned long i;		  // loop counter
	double pvalue;			  // p-value for H0: probability of success = 0.5
							  //          vs Ha: probability of success > 0.5	
	double p_success;         // estimated success probability
	double ci_left;           // left boundary for Wilson confidence interval for the probability of success
	double ci_right;          // right boundary for Wilson confidence interval for the probability of success
	double z_975 = 1.960;     // .975 quantile of standard normal distribution
	double a1, a2, a3;        // intermediate variables to compute different parts of Wilson's formula
    
	double curr_exposure;     // current net exposure
	double last_exposure;	  // last period's net exposure
	                          // See http://www.hedgefund.net/glossary.php3 for terminology
	
	double buy_profit;        // profit from being 1 share long
	double sell_profit;       // profit from being 1 share short

	curr_exposure = TradedQuant[0];
	last_exposure = 0;
	buy_profit = sell_profit = 0;
	
	// counting the number of successes:
	for(i = 1; i < TradedQuant.size(); i++)
	{				
		last_exposure = curr_exposure;
		curr_exposure += TradedQuant[i];			
		buy_profit = Bid[i] - Ask[i - 1];
		sell_profit = Bid[i - 1] - Ask[i];
			
		success = 0;
		if (last_exposure > 0)
		{
			if (buy_profit > 0)
			success = 1;
		}
		else if (last_exposure < 0)
		{
			if (sell_profit > 0)
			success = 1;
		}
		else // last_exposure is zero					  
		{
			if (buy_profit <= 0 && sell_profit <= 0)
			success = 1;
		}		
		all_periods(success);
		if (last_exposure != 0) active_periods(success);							
	}
	
	// Computing final results for "all_periods"	
	num_obs = count(all_periods);
	p_success = mean(all_periods);
	// Construct the null binomial distribution with probability of success 0.5:
	binomial null_dist_all(num_obs, 0.5); 
	// Compute p-value:
	if (sum(all_periods) > 0) 
		pvalue = 1. - cdf(null_dist_all, sum(all_periods) - 1);
	else
		pvalue = 1.0;

	// Compute Wilson's interval:		
	// p_success = 0.4; // These are the values I used for testing
	// num_obs = 100;   // the corresponding interval must be [.3094 ; .4980]	
	a1 = num_obs / (num_obs + pow(z_975, 2));
	a2 = p_success + pow(z_975, 2) / 2 / num_obs;
	a3 = z_975 * sqrt( p_success * (1 - p_success) / num_obs + pow(z_975, 2) / 4 / pow(static_cast<double>(num_obs), 2));
	ci_left = a1 * (a2 - a3);  // by construction, can't be negative
	ci_right = a1 * (a2 + a3);
	if (ci_right > 1.0) ci_right = 1.0;

	// Output results:
	TestResults(0, 0) = " Sign test results for all periods (positive, negative, and zero market exposure) ";
	TestResults(2, 0) = " Number of trials ";
	TestResults(2, 1) = static_cast<double>(num_obs); // CellMatrix takes only double numbers
	TestResults(3, 0) = " Successes ";
	TestResults(3, 1) = static_cast<double>(sum(all_periods));
	TestResults(4, 0) = " Failures ";
	TestResults(4, 1) = static_cast<double>(num_obs - sum(all_periods));
	TestResults(5, 0) = " Success probability ";
	TestResults(5, 1) = p_success;
	TestResults(6, 0) = " 95% Wilson's confidence interval ";
	TestResults(6, 1) = ci_left;
	TestResults(6, 2) = ci_right;
	TestResults(7, 0) = " P-value for H0: success probability = 1/2 vs > 1/2 (under Bernoulli process) ";
	TestResults(7, 1) = sum(all_periods) > 0 ? (1. - cdf(null_dist_all, sum(all_periods) - 1)) : 1.0;

	// Computing final results for "active_periods"
	num_obs = count(active_periods);
	p_success = mean(active_periods);
	// Construct the null binomial distribution with probability of success 0.5:
	binomial null_dist_active(num_obs, 0.5); 
	// Compute p-value:
	if (sum(active_periods) > 0) 
		pvalue = 1. - cdf(null_dist_active, sum(active_periods) - 1);
	else
		pvalue = 1.0;
	
	// Compute Wilson's interval:		
	a1 = num_obs / (num_obs + pow(z_975, 2));
	a2 = p_success + pow(z_975, 2) / 2 / num_obs;
	a3 = z_975 * sqrt( p_success * (1 - p_success) / num_obs + pow(z_975, 2) / 4 / pow(static_cast<double>(num_obs), 2));
	ci_left = a1 * (a2 - a3);  // by construction, can't be negative
	ci_right = a1 * (a2 + a3);
	if (ci_right > 1.0) ci_right = 1.0;
	
	// Output results:
	TestResults(10, 0) = " Sign test results for active periods only (non-zero market exposure) ";
	TestResults(12, 0) = " Number of trials ";
	TestResults(12, 1) = num_obs; // CellMatrix takes only double numbers
	TestResults(13, 0) = " Successes ";
	TestResults(13, 1) = static_cast<double>(sum(active_periods));
	TestResults(14, 0) = " Failures ";
	TestResults(14, 1) = static_cast<double>(num_obs - sum(active_periods));
	TestResults(15, 0) = " Success probability ";
	TestResults(15, 1) = p_success;
	TestResults(16, 0) = " 95% Wilson's confidence interval ";
	TestResults(16, 1) = ci_left;
	TestResults(16, 2) = ci_right;
	TestResults(17, 0) = " P-value for H0: success probability = 1/2 vs > 1/2 (under Bernoulli process) ";
	TestResults(17, 1) = sum(active_periods) > 0 ? (1. - cdf(null_dist_active, sum(active_periods) - 1)) : 1.0;
	Output.add("test results", TestResults);
}

PE_Test* PE_SignTest::clone() const
{
	return new PE_SignTest(*this);	
}

///////////////////////////////////////////////////////////////////////////////////////////////
/////////// Correlation test //////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////

void PE_CorrelationTest::operator()(const ArgumentList& Input, ArgumentList& Output) const
{
	// Extract input data because it's not efficient to query ArgumentList in a loop.
	// Extracting traded quantity, bid, ask prices and the number of replications for 
	// correlation test:
	MyArray TradedQuant = const_cast<ArgumentList&>(Input).GetArrayArgumentValue("traded quantity");
	MyArray Bid = const_cast<ArgumentList&>(Input).GetArrayArgumentValue("bid");
	MyArray Ask = const_cast<ArgumentList&>(Input).GetArrayArgumentValue("ask");
	double num_replic = const_cast<ArgumentList&>(Input).GetDoubleArgumentValue("number of replications");
	
	
	std::vector<double> all_exposures; // vector of all market exposures 
	                                   // (For technical reasons, I can't use MyArray or valarray here)
	
	std::vector<double> all_profits;   // vector of all profits per share
	                                   // For positive (negative) exposure, the corresponding profit is equal to buy (-sell) profit
									   // Periods with zero market exposure are excluded.

	CellMatrix TestResults(10, 2);      // Test results
	
	// Accumulator object for market exposures:
	accumulator_set<double, stats<tag::variance> > exposures_accum;
	// Accumulator object for profits per share:
	accumulator_set<double, stats<tag::variance> > profits_accum;
	// Accumulator to compute covariance between esposure and profit:
	accumulator_set<double, stats<tag::covariance<double, tag::covariate1> > > covar_accum;
	// Accumulator for randomization p-value:
	accumulator_set<int, stats<tag::mean> > pvalue_accum; 
		
	unsigned long i;		  // loop counter	
	unsigned long j;		  // loop counter			
    double curr_exposure;     // current net exposure
	double last_exposure;	  // last period's net exposure
	
	double buy_profit;		 // profit from being 1 share long
	double sell_profit;		 // profit from being 1 share short
	double corr_orig;        // correlation between exposure and profit for the original sample
	double corr_shuffled;    // correlation between exposure and profit for the shuffled sample


	// Populate the vectors of exposures and profits and compute 
	// the correlation for the original sample:
	curr_exposure = TradedQuant[0];
	last_exposure = 0;
	buy_profit = sell_profit = 0;
	
	for(i = 1; i < TradedQuant.size(); i++)
	{				
		last_exposure = curr_exposure;
		curr_exposure += TradedQuant[i];			
		buy_profit = Bid[i] - Ask[i - 1];
		sell_profit = Bid[i - 1] - Ask[i];
			
		if (last_exposure != 0)
		{
			exposures_accum(last_exposure);
			all_exposures.push_back(last_exposure);
		}
		if (last_exposure > 0)
		{			
			profits_accum(buy_profit);
			all_profits.push_back(buy_profit);
			covar_accum(last_exposure, covariate1 = buy_profit);
		}
		else if (last_exposure < 0)
		{
			profits_accum(-sell_profit);
			all_profits.push_back(-sell_profit);
			covar_accum(last_exposure, covariate1 = -sell_profit);
		}		
	} // for i

	corr_orig = covariance(covar_accum) / sqrt(variance(exposures_accum) * variance(profits_accum));

	// Compute the radomization p-value:
	for (j = 1; j <= num_replic; j++)
	{
		// shuffle exposures using a built-in random generator from STL:
		std::random_shuffle(all_exposures.begin(), all_exposures.end());
        // clear the accumulators:
	    clear(covar_accum);
		clear(exposures_accum);
		clear(profits_accum);
		// compute correlation for the shuffled sample:
		for (i = 0; i < all_exposures.size(); i++)
		{
			exposures_accum(all_exposures[i]);
			profits_accum(all_profits[i]);
			covar_accum(all_exposures[i], covariate1 = all_profits[i]);
		}
		corr_shuffled = covariance(covar_accum) / sqrt(variance(exposures_accum) * variance(profits_accum));		
		pvalue_accum(corr_shuffled >= corr_orig ? 1 : 0);		
	}

	// Output of results:
	TestResults(0, 0) = " Correlation between market exposure and";
	TestResults(1, 0) = " the size and direction of market movements";
	TestResults(3, 0) = " Number of observations: ";
	TestResults(3, 1) = static_cast<double>(all_exposures.size());
	TestResults(4, 0) = " Number of replications for randomization test: ";
	TestResults(4, 1) = num_replic;
	TestResults(5, 0) = " Estimated correlation coefficient: ";
	TestResults(5, 1) = corr_orig;
	TestResults(6, 0) = " P-value for H0: correlation = 0 vs > 0 ";
	TestResults(6, 1) = mean(pvalue_accum);
	Output.add("test results", TestResults);	
}

PE_Test* PE_CorrelationTest::clone() const
{
	return new PE_CorrelationTest(*this);	
}

//////////////////////////////////////////////////////////////////////////////
////////////////////// Runs Test /////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////

void PE_RunsTest::operator()(const ArgumentList& Input, ArgumentList& Output) const
{
	// Extract input data because it's not efficient to query ArgumentList in a loop.
	// Extract bid and ask prices:
	MyArray Bid = const_cast<ArgumentList&>(Input).GetArrayArgumentValue("bid"); // bid price
	MyArray Ask = const_cast<ArgumentList&>(Input).GetArrayArgumentValue("ask"); // ask price
		
	CellMatrix TestResults(15, 2);    // Test results
	double curr_increm;               // Market increment(s), computed as a difference between today's and last period's 
	double last_increm;               // bid-ask midpoints. Periods where the midpoint doesn't change are discarded.
	double exp_runs;                  // Expected number of runs
	double pvalue_low;				  // Probabilities of obtaining extremely low 	                                  
	double pvalue_high;	              // or high number of runs
	double pvalue_2sided;			  // 2-sided p-value of obtaining the number of runs that deviates from
									  // the theoretical mean at least as much as the observed value
	unsigned long i;				  // loop counter	
	int j;						      // loop counter
	int num_up = 0;					  // number of up moves or "successes"
	int num_down = 0;				  // number of down moves or "failures"
	int num_runs = 1;				  // number of runs

	last_increm = (Bid[1] + Ask[1]) / 2 - (Bid[0] + Ask[0]) / 2;
	if (last_increm > 0) 
		num_up++;
	else if (last_increm < 0) 
		num_down++;

	for(i = 2; i < Bid.size(); i++)
	{				
		curr_increm = (Bid[i] + Ask[i]) / 2 - (Bid[i - 1] + Ask[i - 1]) / 2;
		if (curr_increm > 0) 
		{
			num_up++;
			if (last_increm < 0) num_runs++;
		}
		else if (curr_increm < 0)
		{
			num_down++;
			if (last_increm > 0) num_runs++;
		}
		if (curr_increm != 0) last_increm = curr_increm;
				
	} // for i
	
	exp_runs = 2 * num_up * num_down / static_cast<double>(num_up + num_down) + 1; 
	// compute p-values:
	pvalue_low = pvalue_high = pvalue_2sided = 0;
	for (j = 2; j <= std::min(num_up, num_down) * 2 + 1; j++)
	{
		if (j <= num_runs)
			pvalue_low += RunsPMF(num_up, num_down, j);
		if (j >= num_runs)
			pvalue_high += RunsPMF(num_up, num_down, j);		
		if (abs(j - exp_runs) >= abs(num_runs - exp_runs))
			pvalue_2sided += RunsPMF(num_up, num_down, j);			
	}

	//// Output of results:
	TestResults(0, 0) = " Runs test for independence ";
	TestResults(2, 0) = " Number of observations ";
	TestResults(2, 1) =  static_cast<double>(num_up + num_down); 
	TestResults(3, 0) = " Successes ";
	TestResults(3, 1) =  static_cast<double>(num_up); 
	TestResults(4, 0) = " Failures ";
	TestResults(4, 1) =  static_cast<double>(num_down); 
	TestResults(5, 0) = " Observed number of runs ";
	TestResults(5, 1) =  static_cast<double>(num_runs); 
	TestResults(6, 0) = " Expected number of runs ";
	TestResults(6, 1) =  exp_runs; 
	TestResults(8, 0) = " Two-sided p-value under independence ";
	TestResults(8, 1) =  pvalue_2sided; 
	TestResults(10, 0) = " Probability of getting at least the observed number ";
	TestResults(11, 0) = " of runs under independence ";
	TestResults(10, 1) = pvalue_high;
	TestResults(13, 0) = " Probability of getting at most the observed number ";
	TestResults(14, 0) = " of runs under independence ";	
	TestResults(13, 1) = pvalue_low;	
	Output.add("test results", TestResults);	
}

PE_Test* PE_RunsTest::clone() const
{
	return new PE_RunsTest(*this);	
}

double PE_RunsTest::RunsPMF(int n, int m, int r) const
{
	////////////////////////////////////////////////////////////////////////////////////////////
	// This function returns the probablity of obtaining r runs of wins and losses
	// given that there are n wins and m losses in total, and the trials are assumed
	// independent. The formula is given in Ross, "A first course in probability", 2003, p 61.
	//
	// Restrictions on input parameters: n > 0, m > 0, 2 <= r <= (min(m,n) * 2 + 1)
	// Other values of parameters are not meaningful, and the return value is indeterminate.
	////////////////////////////////////////////////////////////////////////////////////////////

	using namespace boost::math; // included locally because it clashes with boost::accumulators

	int k = r / 2;					// r is equal to either 2*k or 2*k + 1 
	double p;						// return value
	double a1, a2, a3, a4, a5;		// intermediate variables used to compute the pmf
	
	if (r % 2 == 0)
	{
		if ( m - 1 < k - 1 )
			a1 = 0;
		else if (m - 1 <= 0 || k - 1 <= 0) 
			a1 = 1;
		else
			a1 = binomial_coefficient<double>(m - 1, k - 1);
		
		if ( n - 1 < k - 1 )
			a2 = 0;
		else if (n - 1 <= 0 || k - 1 <= 0) 
			a2 = 1;
		else
			a2 = binomial_coefficient<double>(n - 1, k - 1);

		if (n <= 0)
			a3 = 1;
		else
			a3 = binomial_coefficient<double>(n + m, n);
		
		p = 2 * a1 * a2 / a3;
		
	}
	else
	{
		if (m - 1 < k - 1) 
			a1 = 0;
		else if (m - 1 <= 0 || k - 1 <= 0)
			a1 = 1;
		else
			a1 = binomial_coefficient<double>(m - 1, k - 1);

		if (n - 1 < k) 
			a2 = 0;
		else if (n - 1 <= 0 || k <= 0)
			a2 = 1;
		else
			a2 = binomial_coefficient<double>(n - 1, k);

		if (m - 1 < k) 
			a3 = 0;
		else if (m - 1 <= 0 || k <= 0)
			a3 = 1;
		else
			a3 = binomial_coefficient<double>(m - 1, k);

		if (n - 1 < k - 1) 
			a4 = 0;
		else if (n - 1 <= 0 || k - 1 <= 0)
			a4 = 1;
		else
			a4 = binomial_coefficient<double>(n - 1, k - 1);

		if (n <= 0)
			a5 = 1;
		else
			a5 = binomial_coefficient<double>(m + n, n);		
		
		p = (a1 * a2 + a3 * a4) / a5;		
	}
	return p;
}
