LeastSquares.gms : Demonstrate the use of numpy.linalg.lstsq on the Filip test problem using Embedded Code Python

Description

Run numpy.linalg.lstsq on the Filip test problem (Level of Difficulty:
Higher) from NIST and verify that the results are those certified to
be correct. The problem and results were both taken from the NIST Web site.

This model is derived from the testlib model LS02 which was retired due
to the LS solver being dropped with GAMS 34. It shows the use of the
Python function numpy.linalg.lstsq as one alternative to adapt existing
GAMS code that used the LS solver before.


Category : GAMS Data Utilities library


Main file : LeastSquares.gms   includes :  LeastSquares.gms

$title Demonstrate the use of numpy.linalg.lstsq on the Filip test problem using Embedded Code Python (LeastSquares,SEQ=141)

$ontext
Run numpy.linalg.lstsq on the Filip test problem (Level of Difficulty:
Higher) from NIST and verify that the results are those certified to
be correct. The problem and results were both taken from the NIST Web site.

This model is derived from the testlib model LS02 which was retired due
to the LS solver being dropped with GAMS 34. It shows the use of the
Python function numpy.linalg.lstsq as one alternative to adapt existing
GAMS code that used the LS solver before.
$offtext


$ontext
Model: y = sum{p=0..10, beta(p) * power(x,p)} + error

Certified Regression Statistics
                                          Standard Deviation
 Parameter        Estimate                   of Estimate
   beta(0)      -1467.48961422980         298.084530995537
   beta(1)      -2772.17959193342         559.779865474950
   beta(2)      -2316.37108160893         466.477572127796
   beta(3)      -1127.97394098372         227.204274477751
   beta(4)      -354.478233703349         71.6478660875927
   beta(5)      -75.1242017393757         15.2897178747400
   beta(6)      -10.8753180355343         2.23691159816033
   beta(7)      -1.06221498588947         0.221624321934227
   beta(8)      -0.670191154593408E-01    0.142363763154724E-01
   beta(9)      -0.246781078275479E-02    0.535617408889821E-03
   beta(10)     -0.402962525080404E-04    0.896632837373868E-05


Residual
Standard Deviation      0.334801051324544E-02
R-Squared               0.996727416185620

Certified Analysis of Variance Table

Source of
Variation       Degrees     Sums of Squares       Mean Squares          F Statistic
                of Freedom
Regression      10          0.242391619837339     0.242391619837339E-01 2162.43954511489
Residual        71          0.795851382172941E-03 0.112091743968020E-04
$offtext

set i 'cases' /i01*i82/;

table data(i,*)
            y          x
i01       0.8116   -6.860120914
i02       0.9072   -4.324130045
i03       0.9052   -4.358625055
i04       0.9039   -4.358426747
i05       0.8053   -6.955852379
i06       0.8377   -6.661145254
i07       0.8667   -6.355462942
i08       0.8809   -6.118102026
i09       0.7975   -7.115148017
i10       0.8162   -6.815308569
i11       0.8515   -6.519993057
i12       0.8766   -6.204119983
i13       0.8885   -5.853871964
i14       0.8859   -6.109523091
i15       0.8959   -5.79832982
i16       0.8913   -5.482672118
i17       0.8959   -5.171791386
i18       0.8971   -4.851705903
i19       0.9021   -4.517126416
i20       0.909    -4.143573228
i21       0.9139   -3.709075441
i22       0.9199   -3.499489089
i23       0.8692   -6.300769497
i24       0.8872   -5.953504836
i25       0.89     -5.642065153
i26       0.891    -5.031376979
i27       0.8977   -4.680685696
i28       0.9035   -4.329846955
i29       0.9078   -3.928486195
i30       0.7675   -8.56735134
i31       0.7705   -8.363211311
i32       0.7713   -8.107682739
i33       0.7736   -7.823908741
i34       0.7775   -7.522878745
i35       0.7841   -7.218819279
i36       0.7971   -6.920818754
i37       0.8329   -6.628932138
i38       0.8641   -6.323946875
i39       0.8804   -5.991399828
i40       0.7668   -8.781464495
i41       0.7633   -8.663140179
i42       0.7678   -8.473531488
i43       0.7697   -8.247337057
i44       0.77     -7.971428747
i45       0.7749   -7.676129393
i46       0.7796   -7.352812702
i47       0.7897   -7.072065318
i48       0.8131   -6.774174009
i49       0.8498   -6.478861916
i50       0.8741   -6.159517513
i51       0.8061   -6.835647144
i52       0.846    -6.53165267
i53       0.8751   -6.224098421
i54       0.8856   -5.910094889
i55       0.8919   -5.598599459
i56       0.8934   -5.290645224
i57       0.894    -4.974284616
i58       0.8957   -4.64454848
i59       0.9047   -4.290560426
i60       0.9129   -3.885055584
i61       0.9209   -3.408378962
i62       0.9219   -3.13200249
i63       0.7739   -8.726767166
i64       0.7681   -8.66695597
i65       0.7665   -8.511026475
i66       0.7703   -8.165388579
i67       0.7702   -7.886056648
i68       0.7761   -7.588043762
i69       0.7809   -7.283412422
i70       0.7961   -6.995678626
i71       0.8253   -6.691862621
i72       0.8602   -6.392544977
i73       0.8809   -6.067374056
i74       0.8301   -6.684029655
i75       0.8664   -6.378719832
i76       0.8834   -6.065855188
i77       0.8898   -5.752272167
i78       0.8964   -5.132414673
i79       0.8963   -4.811352704
i80       0.9074   -4.098269308
i81       0.9119   -3.66174277
i82       0.9228   -3.2644011
;

set p  'coefficients' /
  b0   'beta(0)',
  b1   'beta(1)'
  b2   'beta(2)'
  b3   'beta(3)'
  b4   'beta(4)'
  b5   'beta(5)'
  b6   'beta(6)'
  b7   'beta(7)'
  b8   'beta(8)'
  b9   'beta(9)'
  b10  'beta(10)'
/;


* The values with _ appended are the certified values:
* we test that we reproduce these

parameters
  x(p)
  estimate_(p) 'Estimated coefficients' /
   'b0'      -1467.48961422980
   'b1'      -2772.17959193342
   'b2'      -2316.37108160893
   'b3'      -1127.97394098372
   'b4'      -354.478233703349
   'b5'      -75.1242017393757
   'b6'      -10.8753180355343
   'b7'      -1.06221498588947
   'b8'      -0.670191154593408E-01
   'b9'      -0.246781078275479E-02
   'b10'     -0.402962525080404E-04
  /;

scalars
  d
  sse     'sum of squared errors'
  tol     'tolerance' / 3e-3 /
  sigma_  'Standard error' / 0.334801051324544E-02 /
  sigma   'Standard error'
  r2_     'R-Squared' / 0.996727416185620 /
  r2      'R-Squared'
  df_     'Degrees of freedom' / 71 /
  df      'Degrees of freedom'
  rss_    'Residual sum of squares' / 0.795851382172941E-03 /
  rss     'Residual sum of squares'
  resvar_ 'Residual variance' / 0.112091743968020E-04 /
  resvar  'Residual variance'
  ;

$onEmbeddedCode Python:
  import numpy as np
  data = list(gams.get('data', keyFormat=KeyFormat.SKIP))
  data_x = data[1::2]
  data_y = data[::2]
  p = gams.get('p')
  A = np.array([[x**n for n in range(len(p))] for x in data_x])
  sol, sse = np.linalg.lstsq(A, data_y, rcond=-1)[:2]

  gams.set('x', list(zip(p, sol)))
  gams.set('sse', list(sse))

  # Calculate some statistical values. Note that some calculations might depend on this particular instance (e.g. r2 in case of intercept)
  from math import sqrt
  n = len(data_y)
  p = len(p)
  fitted = np.matmul(A, sol)
  resid = data_y - fitted
  rss = np.dot(resid, resid)
  df = n-p
  if df<=0:
    raise Exception('Degrees of freedom df=n-p should be >0')
  resvar = rss/df
  sigma = sqrt(resvar)
  meanf = np.sum(fitted)/n
  mss = np.dot(fitted-meanf, fitted-meanf)
  r2 = mss/(mss+rss)
  
  gams.set('sigma', [sigma])
  gams.set('resvar',[resvar])
  gams.set('rss',[rss])
  gams.set('df',[df])
  gams.set('r2',[r2])
$offEmbeddedCode x sse sigma resvar rss df r2

option decimals=8;
display x;

d = smax{p, abs(x(p)-estimate_(p))};
abort$[d > tol] 'bad solution x', x, estimate_, d;

d = abs(sigma_ - sigma);
abort$[d > tol] 'bad standard error', sigma_, sigma, d;

d = abs(r2_ - r2);
abort$[d > tol] 'bad R-Squared', r2_, r2, d;

d = abs(df_ - df);
abort$[d > tol] 'bad degrees of freedom', df_, df, d;

d = abs(rss_ - rss);
abort$[d > tol] 'bad residual sum of squares', rss_, rss, d;

d = abs(resvar_ - resvar);
abort$[d > tol] 'bad residual variance', resvar_, resvar, d;