# Energy price non-linear regression # solve for oil sales price (outcome) # using 3 predictors of WTI Oil Price, # Henry Hub Price and MB Propane Spot Price import numpy as np # use 'pip install gekko' to get package from gekko import GEKKO import pandas as pd import numpy as np import matplotlib.pyplot as plt # data file from URL address data = 'https://apmonitor.com/me575/uploads/Main/oil_data.txt' df = pd.read_csv(data) xm1 = np.array(df["WTI_PRICE"]) # WTI Oil Price xm2 = np.array(df["HH_PRICE"]) # Henry Hub Gas Price xm3 = np.array(df["NGL_PRICE"]) # MB Propane Spot Price ym = np.array(df["BEST_PRICE"]) # oil sales price # GEKKO model m = GEKKO() a = m.FV(lb=-100.0,ub=100.0) b = m.FV(lb=-100.0,ub=100.0) c = m.FV(lb=-100.0,ub=100.0) d = m.FV(lb=-100.0,ub=100.0) x1 = m.Param(value=xm1) x2 = m.Param(value=xm2) x3 = m.Param(value=xm3) z = m.Param(value=ym) y = m.Var() m.Equation(y==a*(x1**b)*(x2**c)*(x3**d)) m.Minimize(((y-z)/z)**2) # Options a.STATUS = 1 b.STATUS = 1 c.STATUS = 1 d.STATUS = 1 m.options.IMODE = 2 m.options.SOLVER = 1 # Solve m.solve() print('a: ', a.value[0]) print('b: ', b.value[0]) print('c: ', c.value[0]) print('d: ', d.value[0]) cFormula = "Formula is : " + "\n" + \ r"$A * WTI^B * HH^C * PROPANE^D$" from scipy import stats slope, intercept, r_value, p_value, \ std_err = stats.linregress(ym,y) r2 = r_value**2 cR2 = "R^2 correlation = " + str(r_value**2) print(cR2) # plot solution plt.figure(1) plt.plot([20,140],[20,140],'k-',label='Measured') plt.plot(ym,y,'ro',label='Predicted') plt.xlabel('Measured Outcome (YM)') plt.ylabel('Predicted Outcome (Y)') plt.legend(loc='best') plt.text(25,115,'a =' + str(a.value[0])) plt.text(25,110,'b =' + str(b.value[0])) plt.text(25,105,'c =' + str(c.value[0])) plt.text(25,100,'d =' + str(d.value[0])) plt.text(25,90,r'$R^2$ =' + str(r_value**2)) plt.text(80,40,cFormula) plt.grid(True) plt.show()