import numpy as np import scipy as sp from scipy import stats from scipy.optimize import curve_fit import matplotlib.pyplot as plt import pandas as pd # import data url = 'http://apmonitor.com/che263/uploads/Main/stats_data.txt' data = pd.read_csv(url) x = data['x'].values y = data['y'].values def f(x, a, b): return a * x + b # perform regression popt, pcov = curve_fit(f, x, y) # retrieve parameter values a = popt[0] b = popt[1] # calculate parameter stdev s_a = np.sqrt(pcov[0,0]) s_b = np.sqrt(pcov[1,1]) # ##################################### # Single Variable Confidence Interval # ##################################### # calculate 95% confidence interval aL = a-1.96*s_a aU = a+1.96*s_a bL = b-1.96*s_b bU = b+1.96*s_b # calculate 3sigma interval a3L = a-3.0*s_a a3U = a+3.0*s_a b3L = b-3.0*s_b b3U = b+3.0*s_b # ##################################### # Linear Joint Confidence Interval # thanks to Trent Okeson # ##################################### eigenval, eigenvec = np.linalg.eig(pcov) max_eigenval = np.argmax(eigenval) largest_eigenvec = eigenvec[:, max_eigenval] largest_eigenval = eigenval[max_eigenval] if max_eigenval == 1: smallest_eigenval = eigenval[0] smallest_eigenvec = eigenvec[:, 0] else: smallest_eigenval = eigenval[1] smallest_eigenvec = eigenvec[:, 1] angle = np.arctan2(largest_eigenvec[1], largest_eigenvec[0]) if angle < 0: angle += 2*np.pi chisquare_val = 2.4477 theta_grid = np.linspace(0, 2*np.pi) phi = angle a_rot = chisquare_val*np.sqrt(largest_eigenval) b_rot = chisquare_val*np.sqrt(smallest_eigenval) ellipse_x_r = a_rot*np.cos(theta_grid) ellipse_y_r = b_rot*np.sin(theta_grid) R = [[np.cos(phi), np.sin(phi)], [-np.sin(phi), np.cos(phi)]] r_ellipse = np.stack((ellipse_x_r, ellipse_y_r)).T.dot(R) final_ellipse = r_ellipse + popt[None, :] # ##################################### # Nonlinear Joint Confidence Interval # ##################################### # Generate contour plot of SSE ratio vs. Parameters # meshgrid is +/- change in the objective value m = 200 i1 = np.linspace(a3L,a3U,m) i2 = np.linspace(b3L,b3U,m) a_grid, b_grid = np.meshgrid(i1, i2) n = len(y) # number of data points p = 2 # number of parameters # sum of squared errors sse = np.empty((m,m)) for i in range(m): for j in range(m): at = a_grid[i,j] bt = b_grid[i,j] sse[i,j] = np.sum((y-f(x,at,bt))**2) # normalize to the optimal solution best_sse = np.sum((y-f(x,a,b))**2) fsse = (sse - best_sse) / best_sse # compute f-statistic for the f-test alpha = 0.05 # alpha, confidence # alpha=0.05 is 95% confidence fstat = sp.stats.f.isf(alpha,p,(n-p)) flim = fstat * p / (n-p) obj_lim = flim * best_sse + best_sse # Create a contour plot plt.figure() CS = plt.contour(a_grid,b_grid,sse,[1,2,5,10],\ colors='gray') plt.clabel(CS, inline=1, fontsize=10) plt.xlabel('Slope (a)') plt.ylabel('Intercept (b)') # solid line to show confidence region CS = plt.contour(a_grid,b_grid,sse,[obj_lim],\ colors='orange',linewidths=[2.0]) plt.clabel(CS, inline=1, fontsize=10) # 2 dummy plots (gray,orange) for legend plt.plot([aL,aL],[bL,bL],c='gray',\ LineWidth=3,label='Objective') plt.plot([aL,aU,aU,aL,aL],[bL,bL,bU,bU,bL],c='blue',\ LineWidth=3,label='Parameter Confidence Region') plt.plot(final_ellipse[:, 0], final_ellipse[:, 1], 'k--',\ label='Linear Joint Confidence Region') plt.plot([aL,aL],[bL,bL],c='orange',\ LineWidth=4,\ label='Nonlinear Joint Confidence Region') plt.scatter(a,b,s=10,c='red',marker='o',\ label='Optimal Solution') plt.title('Parameter Statistics (y = a x + b)') plt.legend(loc=1) # Save the figure as a PNG plt.savefig('contour.png') plt.show()