Data Regression with Python
There is additional information on regression in the Data Science online course.
While this exercise demonstrates only one independent parameter and one dependent variable, any number of independent or dependent terms can be included. See Energy Price regression with three independent variables as an example.
import numpy as np x = np.array([0,1,2,3,4,5]) y = np.array([0,0.8,0.9,0.1,-0.8,-1])
p1 = np.polyfit(x,y,1) p2 = np.polyfit(x,y,2) p3 = np.polyfit(x,y,3)
import matplotlib.pyplot as plt plt.plot(x,y,'o') xp = np.linspace(-2,6,100) plt.plot(xp,np.polyval(p1,xp),'r-') plt.plot(xp,np.polyval(p2,xp),'b--') plt.plot(xp,np.polyval(p3,xp),'m:')
SSresid = np.sum(yresid**2) SStotal = len(y) * np.var(y)
from scipy.stats import linregress
A frequent activity for scientists and engineers is to develop correlations from data. By importing the data into Python, data analysis such as statistics, trending, or calculations can be made to synthesize the information into relevant and actionable information. This tutorial demonstrates how to create a linear or polynomial functions that best approximate the data trend, plot the results, and perform a basic statistical analysis. A script file of the Python source code with sample data is below.
Correlations from data are obtained by adjusting parameters of a model to best fit the measured outcomes. The analysis may include statistics, data visualization, or other calculations to synthesize the information into relevant and actionable information. This tutorial demonstrates how to create a linear, polynomial, or nonlinear functions that best approximate the data and analyze the result. Script files of the Python source code with sample data are available below.

a,b,c = x
return np.sum(((calc_y(x)-ym)/ym)**2)
from scipy.optimize import minimize
from gekko import GEKKO
def calc_y(x):
a = x[0] b = x[1] c = x[2] y = a + b/xm + c*np.log(xm) return y
def objective(x):
# calculate y y = calc_y(x) # calculate objective obj = 0.0 for i in range(len(ym)): obj = obj + ((y[i]-ym[i])/ym[i])**2 # return result return obj
x0 = np.zeros(3) x0[0] = 0.0 # a x0[1] = 0.0 # b x0[2] = 0.0 # c
print('Initial SSE Objective: ' + str(objective(x0)))
bnds100 = (-100.0, 100.0) no_bnds = (-1.0e10, 1.0e10) bnds = (no_bnds, no_bnds, bnds100) solution = minimize(objective,x0,method='SLSQP',bounds=bnds) x = solution.x y = calc_y(x)
m = GEKKO()
a = m.FV(value=0) b = m.FV(value=0) c = m.FV(value=0,lb=-100,ub=100) x = m.Param(value=xm) ymeas = m.Param(value=ym) ypred = m.Var()
a.STATUS = 1 # available to optimizer b.STATUS = 1 # to minimize objective c.STATUS = 1
m.Equation(ypred == a + b/x + c*m.log(x))
m.options.IMODE = 2 # regression mode
m.solve() # remote=False for local solve
print('Final SSE Objective: ' + str(objective(x)))
print('Final SSE Objective: ' + str(m.options.objfcnval))
print('a = ' + str(x[0])) print('b = ' + str(x[1])) print('c = ' + str(x[2]))
print('a = ' + str(a.value[0])) print('b = ' + str(b.value[0])) print('c = ' + str(c.value[0]))
plt.plot(xm,ym,'ro') plt.plot(xm,y,'bx');
plt.plot(x,ymeas,'ro') plt.plot(x,ypred,'bx');
(:toggle hide python_minimize button show="Python SciPy Solution":) (:div id=python_minimize:) (:source lang=python:) import numpy as np from scipy.optimize import minimize
xm = np.array([18.3447,79.86538,85.09788,10.5211,44.4556, 69.567,8.960,86.197,66.857,16.875, 52.2697,93.917,24.35,5.118,25.126, 34.037,61.4445,42.704,39.531,29.988])
ym = np.array([5.072,7.1588,7.263,4.255,6.282, 6.9118,4.044,7.2595,6.898,4.8744, 6.5179,7.3434,5.4316,3.38,5.464, 5.90,6.80,6.193,6.070,5.737])
def calc_y(x):
a = x[0] b = x[1] c = x[2] y = a + b/xm + c*np.log(xm) return y
def objective(x):
# calculate y y = calc_y(x) # calculate objective obj = 0.0 for i in range(len(ym)): obj = obj + ((y[i]-ym[i])/ym[i])**2 # return result return obj
x0 = np.zeros(3) x0[0] = 0.0 # a x0[1] = 0.0 # b x0[2] = 0.0 # c
print('Initial SSE Objective: ' + str(objective(x0)))
bnds100 = (-100.0, 100.0) no_bnds = (-1.0e10, 1.0e10) bnds = (no_bnds, no_bnds, bnds100) solution = minimize(objective,x0,method='SLSQP',bounds=bnds) x = solution.x y = calc_y(x)
print('Final SSE Objective: ' + str(objective(x)))
print('Solution') print('a = ' + str(x[0])) print('b = ' + str(x[1])) print('c = ' + str(x[2]))
import matplotlib.pyplot as plt plt.figure(1) plt.plot(xm,ym,'ro') plt.plot(xm,y,'bx'); plt.xlabel('x') plt.ylabel('y') plt.legend(['Measured','Predicted'],loc='best') plt.savefig('results.png') (:sourceend:) (:divend:)
Excel and MATLAB
This regression tutorial can also be completed with Excel and Matlab. Click on the appropriate link for additional information.
xm = np.array([18.3447,79.86538,85.09788,10.5211,44.4556, 69.567,8.960,86.197,66.857,16.875, 52.2697,93.917,24.35,5.118,25.126, 34.037,61.4445,42.704,39.531,29.988])
ym = np.array([5.072,7.1588,7.263,4.255,6.282, 6.9118,4.044,7.2595,6.898,4.8744, 6.5179,7.3434,5.4316,3.38,5.464, 5.90,6.80,6.193,6.070,5.737])
(:source lang=python:) import numpy as np from scipy.optimize import minimize
xm = np.array([18.34470085,79.86537666,85.09787509,10.52110327,44.45558653, 69.56726251,8.959848679,86.196964,66.85655694,16.87490807, 52.26970696,93.91681982,24.34668842,5.117815482,25.12622222, 34.03722832,61.44454908,42.703577,39.53089298,29.98844942])
ym = np.array([5.072227705,7.15881537,7.262764628,4.254581322,6.281866658, 6.911787335,4.043809747,7.259528698,6.898089228,4.874417979, 6.517943774,7.343419502,5.431648634,3.384634319,5.464227719, 5.90043173,6.803895621,6.193263135,6.070397707,5.736792474])
def calc_y(x):
a = x[0] b = x[1] c = x[2] y = a + b/xm + c*np.log(xm) return y
def objective(x):
# calculate y y = calc_y(x) # calculate objective obj = 0.0 for i in range(len(ym)): obj = obj + ((y[i]-ym[i])/ym[i])**2 # return result return obj
x0 = np.zeros(3) x0[0] = 0.0 # a x0[1] = 0.0 # b x0[2] = 0.0 # c
print('Initial SSE Objective: ' + str(objective(x0)))
bnds100 = (-100.0, 100.0) no_bnds = (-1.0e10, 1.0e10) bnds = (no_bnds, no_bnds, bnds100) solution = minimize(objective,x0,method='SLSQP',bounds=bnds) x = solution.x y = calc_y(x)
print('Final SSE Objective: ' + str(objective(x)))
print('Solution') print('a = ' + str(x[0])) print('b = ' + str(x[1])) print('c = ' + str(x[2]))
import matplotlib.pyplot as plt plt.figure(1) plt.plot(xm,ym,'ro') plt.plot(xm,y,'bx'); plt.xlabel('x') plt.ylabel('y') plt.legend(['Measured','Predicted'],loc='best') plt.savefig('results.png') (:sourceend:)
Data Regression with Python
