generate.gms : Generate some random but structured GDX files

Description

This model provides some means to generate large scale multi-dimensional sparse
symbols and dumps them in a GDX file.

First some domain sets are generated based on some pairs i 10 j 20 (i.e.
i0 /i1*i10/, i1 /j1*j20/), some single numbers 5 10 20 (i.e. i0 /1*5/,
i1 /1*10/, i2 /1*20/) or minium information 3 50 (i.e. i0 /1*50/, i1 /1*50/,
i2 /1*50/) via the mkdom $batInclude.

Next, the $batInclude mksym with arguments symbols type (parameter, set, ...),
symbol name, symbols dimension, and data density (0<=density<=1) and an extra
argument. In case the symbol type is set, the extra argument indicates if a random
element text should be stored with the set records. For all other symbol types the
extra argument indicates to either store a random value or a fixed values (the value
of the extra argument.

At the bottom of the file, three cases are given that demonstrate how to create
different GDX files.

Contributor: Michael Bussieck, December 2022


Small Model of Type : GAMS


Category : GAMS API library


Main file : generate.gms

$TITLE 'Generate some random but structured GDX files' (generate,SEQ=63)

$ontext
This model provides some means to generate large scale multi-dimensional sparse 
symbols and dumps them in a GDX file. 

First some domain sets are generated based on some pairs i 10 j 20 (i.e. 
i0 /i1*i10/, i1 /j1*j20/), some single numbers 5 10 20 (i.e. i0 /1*5/, 
i1 /1*10/, i2 /1*20/) or minium information 3 50 (i.e. i0 /1*50/, i1 /1*50/,
i2 /1*50/) via the mkdom $batInclude.

Next, the $batInclude mksym with arguments symbols type (parameter, set, ...), 
symbol name, symbols dimension, and data density (0<=density<=1) and an extra 
argument. In case the symbol type is set, the extra argument indicates if a random
element text should be stored with the set records. For all other symbol types the 
extra argument indicates to either store a random value or a fixed values (the value 
of the extra argument.

At the bottom of the file, three cases are given that demonstrate how to create 
different GDX files.

Contributor: Michael Bussieck, December 2022

$offtext

$onEchoV > mkdom.gms
* Syntax for mkdom type dom0 dom1 dom2 ... | ndom ndim
*   type=pair   specify domN as pair of string and number, this creates "%string%1*%string%%number%",
*                       string needs to be unique over all doms
*   type=single specify domN as number, this creates "1*%number%"
*   type=min    specify ndom index domains i0 ... i(ndom-1) each with labels 1*%ndim%
$set type %1
$shift
$set dimtmp 0
$set cardi 1
$setGlobal dimadd 1
$setGlobal dimlen 
$label loopdim
$IfThenI %type%==pair
$set char "%1" set num %2
set i%dimtmp% / %char%1*%char%%2 /;
$eval cardi %cardi%+card(i%dimtmp%)
$shift shift
$elseIfI %type%==single
set i%dimtmp% / 1*%1 /;
$shift
$elseIfI %type%==min
set i%dimtmp% / 1*%2 /;
$ifE %dimtmp%+1=%1 $shift shift
$else
$error unknown type=%type%
$endIf
$setGlobal dimadd %dimadd%,%cardi%
$eval cardX card(i%dimtmp%)
$if not x%dimlen%==x $setGlobal dimlen %dimlen%,%cardX%
$if     x%dimlen%==x $setGlobal dimlen %cardX%
$eval dimtmp %dimtmp%+1
$if not x%1==x $goto loopdim
$gdxOut dom.gdx
$unload
$gdxOut
$offEcho

$onEchoV > mksym.gms
* Syntax mksym type name dim density havetext
*   type=set: for "havetext" <> '' or 0 we will write some element text, otherwise element text will be empty
*   type<>set:for "havetext" <> '' or 0 we will use "havetext" as a numercial value to store (in L for var and equ),
*             otherwise we create random numerical values 
$setArgs symtype symname dim density havetext
$if x%havetext%==x $setLocal havetext 0

* Declaration
$ifThen.domloop %dim%==0
%symtype% %symname% 'text for %symname%';
$else.domloop
$set dom i0
$set dimtmp 1
$label loop1
$if.idxloop %dimtmp%==%dim% $goto done1
$set dom %dom%,i%dimtmp%
$eval dimtmp %dimtmp%+1 
$goto loop1
$label done1
%symtype% %symname%(%dom%) 'text for %symname%';
$endIf.domloop
* Definition
$ifThen.domloop %dim%==0
$if     %havetext% == 0 $eval rvalue uniform(1e-9,1)
$if not %havetext% == 0 $set rvalue %havetext%
$ifI.symtype     %symtype%==parameter scalar %symname% / %rvalue% /;
$ifI.symtype not %symtype%==parameter %symtype% %symname% / L %rvalue% /;
$else.domloop
$onEmbeddedCode Python:
import numpy as np
import itertools
import gams.core.gdx as gdx
import gams.core.numpy as gnp
import shutil
import random
from string import ascii_lowercase, digits
import time

def cartesian_product(*arrays):
    la = len(arrays)
    dtype = np.result_type(*arrays)
    arr = np.empty((la, *map(len, arrays)), dtype=dtype)
    idx = slice(None), *itertools.repeat(None, la)
    for i, a in enumerate(arrays):
        arr[i, ...] = a[idx[: la - i]]
    return arr.reshape(la, -1).T

system_directory = r'%gams.sysdir% '.rstrip()
dim_add = [%dimadd%]
dim_len = [%dimlen%]
stype = {'set': gdx.GMS_DT_SET, 'parameter': gdx.GMS_DT_PAR, 'variable': gdx.GMS_DT_VAR, 'equation': gdx.GMS_DT_EQU}
np.random.seed(42) # we want fixed random sequence (to reproduce)

# Dimensionality of index domains
codes = [np.arange(dim_len[i]) for i in range(%dim%)]

# Create Cartesian product
st = time.time()
arr = cartesian_product(*tuple(codes))
gams.printLog(f'Time cartesian_product: {{:.2f}} secs'.format(time.time()-st))

# Drop records if density < 1
if %density% < 1 and %density% > 0.1:
    st = time.time()
    r, c = arr.shape
    idx = np.random.choice(np.arange(r), replace=False, size=(int(%density% * r),))
    arr = arr[np.sort(idx)]
    gams.printLog(f'Time drop records: {{:.2f}} secs'.format(time.time()-st))
elif %density% <= 0.1:
    st = time.time()
    r, c = arr.shape
    idx = np.array(random.sample(range(r), int(%density% * r)))
    arr = arr[np.sort(idx)]
    gams.printLog(f'Time drop records: {{:.2f}} secs'.format(time.time()-st))

# Adjust UEL index
for i in range(%dim%):
    arr[:, i] += dim_add[i]

# Append new symbol to GDX file that already contains domain sets
st = time.time()
shutil.copyfile('dom.gdx', 'tmp.gdx')
gams2np = gnp.Gams2Numpy(system_directory)
gdxHandle = gdx.new_gdxHandle_tp()
rc, msg = gdx.gdxCreateD(gdxHandle, system_directory, gdx.GMS_SSSIZE)
if not rc:
    raise Exception(msg)
if not gdx.gdxOpenAppend(gdxHandle, 'tmp.gdx', "")[0]:
    raise Exception("Error opening GDX file tmp.gdx for append")
gams.printLog(f'Time open GDX: {{:.2f}} secs'.format(time.time()-st))

st = time.time()
r, c = arr.shape
if '%symtype%' == 'set' and '%havetext%' != '0':
    chars = ascii_lowercase + digits
    element_text = np.array([[''.join(random.choice(chars) for _ in range(3)) for _ in range(r)]])
    arr = np.concatenate((arr, element_text.T), axis=1)
if '%symtype%' == 'parameter':
    if '%havetext%' != '0':
        arr = np.concatenate((arr, np.array([np.ones(r)*%havetext%]).T), axis=1)
    else:
        arr = np.concatenate((arr, np.array([np.random.random(r)]).T), axis=1)
if '%symtype%' == 'variable' or '%symtype%' == 'equation':
    if '%havetext%' != '0':
        arr = np.concatenate((arr, np.array([np.ones(r)*%havetext%,np.zeros(r),np.zeros(r),np.ones(r)*np.inf,np.ones(r)]).T), axis=1)
    else:
        arr = np.concatenate((arr, np.array([np.random.random(r),np.zeros(r),np.zeros(r),np.ones(r)*np.inf,np.ones(r)]).T), axis=1)
arr = arr.astype("object")
arr[:,:%dim%] = arr[:,:%dim%].astype(int)
gams.printLog(f'Time adding values: {{:.2f}} secs'.format(time.time()-st))

st = time.time()
gams2np.gdxWriteSymbolRaw(gdxHandle,'%symname%','text for %symname%',%dim%,stype['%symtype%'],0,arr,None,False)
gdx.gdxClose(gdxHandle)
gdx.gdxFree(gdxHandle)
gams.printLog(f'Time writing and closing: {{:.2f}} secs'.format(time.time()-st))
$offEmbeddedCode
$gdxLoad 'tmp.gdx' %symname%
$endIf.domloop
$offEcho

set i0,i1,i2,i3,i4,i5,i6,i7,i8,i9,i10,i11,i12,i13,i14,i15,i16,i17,i18,i19;

$ifThen.X 1==1
$batInclude mkdom min     3 500
$batinclude mksym parameter p3 3 0.001
$batinclude mksym parameter p0 0 1
$batinclude mksym set       s1 1 0.5 
$batinclude mksym set       s2 2 0.1 1
$batinclude mksym variable  v3 3 0.001
$batinclude mksym variable  v0 0 1   2
$batinclude mksym equation  e3 3 0.001
$batinclude mksym equation  e0 0 1
$gdxOut 1.gdx
$unload
$gdxOut
$elseIf.X 1==0
$batInclude mkdom min     1 10000000
$batinclude mksym parameter p1 1 0.001
$batinclude mksym parameter p0 0 1
$batinclude mksym set       s1 1 0.01 1
$batinclude mksym variable  v1 1 0.001
$batinclude mksym variable  v0 0 1   2
$batinclude mksym equation  e1 1 0.001
$batinclude mksym equation  e0 0 1
$gdxOut 2.gdx
$unload
$gdxOut
$elseIf.X 1==0
$batInclude mkdom min     1 5
$set maxcnt 5000
$maxgoto %maxcnt%
$set cnt 1
$label sloop
$batinclude mksym parameter p0_%cnt% 0 1
$batinclude mksym variable  v0_%cnt% 0 1 2
$batinclude mksym equation  e0_%cnt% 0 1
$eval cnt %cnt%+1
$if.JUMP not %cnt%==%maxcnt% $goto sloop
$gdxOut 3.gdx
$unload
$gdxOut
$endif.X