import numpy as np
import scipy as sp
from scipy import stats
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
import pandas as pd

# import data
url = 'http://apmonitor.com/che263/uploads/Main/stats_data.txt'
data = pd.read_csv(url)
x = data['x'].values
y = data['y'].values

def f(x, a, b):
    return a * x + b

# perform regression
popt, pcov = curve_fit(f, x, y)

# retrieve parameter values
a = popt[0]
b = popt[1]

# calculate parameter stdev
s_a = np.sqrt(pcov[0,0])
s_b = np.sqrt(pcov[1,1])

# #####################################
# Single Variable Confidence Interval
# #####################################
# calculate 95% confidence interval
aL = a-1.96*s_a
aU = a+1.96*s_a
bL = b-1.96*s_b
bU = b+1.96*s_b

# calculate 3sigma interval
a3L = a-3.0*s_a
a3U = a+3.0*s_a
b3L = b-3.0*s_b
b3U = b+3.0*s_b

# #####################################
# Linear Joint Confidence Interval
#  thanks to Trent Okeson
# #####################################
eigenval, eigenvec = np.linalg.eig(pcov)
max_eigenval = np.argmax(eigenval)
largest_eigenvec = eigenvec[:, max_eigenval]
largest_eigenval = eigenval[max_eigenval]

if max_eigenval == 1:
    smallest_eigenval = eigenval[0]
    smallest_eigenvec = eigenvec[:, 0]
else:
    smallest_eigenval = eigenval[1]
    smallest_eigenvec = eigenvec[:, 1]

angle = np.arctan2(largest_eigenvec[1], largest_eigenvec[0])
if angle < 0:
    angle += 2*np.pi

chisquare_val = 2.4477
theta_grid = np.linspace(0, 2*np.pi)
phi = angle
a_rot = chisquare_val*np.sqrt(largest_eigenval)
b_rot = chisquare_val*np.sqrt(smallest_eigenval)

ellipse_x_r = a_rot*np.cos(theta_grid)
ellipse_y_r = b_rot*np.sin(theta_grid)

R = [[np.cos(phi), np.sin(phi)], [-np.sin(phi), np.cos(phi)]]

r_ellipse = np.stack((ellipse_x_r, ellipse_y_r)).T.dot(R)
final_ellipse = r_ellipse + popt[None, :]

# #####################################
# Nonlinear Joint Confidence Interval
# #####################################
# Generate contour plot of SSE ratio vs. Parameters
# meshgrid is +/- change in the objective value
m = 200

i1 = np.linspace(a3L,a3U,m)
i2 = np.linspace(b3L,b3U,m)
a_grid, b_grid = np.meshgrid(i1, i2)
n = len(y) # number of data points
p = 2      # number of parameters

# sum of squared errors
sse = np.empty((m,m))
for i in range(m):
    for j in range(m):
        at = a_grid[i,j]
        bt = b_grid[i,j]
        sse[i,j] = np.sum((y-f(x,at,bt))**2)

# normalize to the optimal solution
best_sse = np.sum((y-f(x,a,b))**2)
fsse = (sse - best_sse) / best_sse

# compute f-statistic for the f-test
alpha = 0.05 # alpha, confidence
             # alpha=0.05 is 95% confidence
fstat = sp.stats.f.isf(alpha,p,(n-p))
flim = fstat * p / (n-p)
obj_lim = flim * best_sse + best_sse

# Create a contour plot
plt.figure()
CS = plt.contour(a_grid,b_grid,sse,[1,2,5,10],\
                 colors='gray')
plt.clabel(CS, inline=1, fontsize=10)
plt.xlabel('Slope (a)')
plt.ylabel('Intercept (b)')
# solid line to show confidence region
CS = plt.contour(a_grid,b_grid,sse,[obj_lim],\
                 colors='orange',linewidths=[2.0])
plt.clabel(CS, inline=1, fontsize=10)

# 2 dummy plots (gray,orange) for legend
plt.plot([aL,aL],[bL,bL],c='gray',\
          LineWidth=3,label='Objective')
plt.plot([aL,aU,aU,aL,aL],[bL,bL,bU,bU,bL],c='blue',\
          LineWidth=3,label='Parameter Confidence Region')
plt.plot(final_ellipse[:, 0], final_ellipse[:, 1], 'k--',\
         label='Linear Joint Confidence Region')
plt.plot([aL,aL],[bL,bL],c='orange',\
          LineWidth=4,\
          label='Nonlinear Joint Confidence Region')
plt.scatter(a,b,s=10,c='red',marker='o',\
            label='Optimal Solution')

plt.title('Parameter Statistics (y = a x + b)')
plt.legend(loc=1)

# Save the figure as a PNG
plt.savefig('contour.png')
plt.show()