Calibration with initial model memory states as parameters¶

About this document¶

In [1]:

Copied!

from swift2.doc_helper import pkg_versions_info

print(pkg_versions_info("This document was generated from a jupyter notebook"))

#
from swift2.doc_helper import pkg_versions_info

print(pkg_versions_info("This document was generated from a jupyter notebook"))

#

This document was generated from a jupyter notebook on 2025-05-16 14:15:59.680048
    swift2 2.5.1
    uchronia 2.6.2

Essentials of setting up a calibration of initial states¶

This vignette will illustrate how to define two meta-parameters, S0 and R0, controlling the initial level of stores in the GR4J model, as fraction of the store capacities.

We'll load a simple catchment with one subarea only; the feature applies equally to catchment with multiple sub-areas

In [2]:

Copied!





import numpy as np
import pandas as pd
import swift2.doc_helper as std
import swift2.parameteriser as sp
import swift2.system as ssy
import numpy as np
import pandas as pd
import swift2.doc_helper as std
import swift2.parameteriser as sp
import swift2.system as ssy

In [3]:

Copied!

from cinterop.timeseries import xr_ts_end, xr_ts_start
from swift2.classes import CompositeParameteriser
from swift2.utils import mk_full_data_id
from cinterop.timeseries import xr_ts_end, xr_ts_start
from swift2.classes import CompositeParameteriser
from swift2.utils import mk_full_data_id

In [4]:

Copied!

%matplotlib inline
%matplotlib inline

In [5]:

Copied!





model_id = 'GR4J'
simul_start=pd.Timestamp('1990-11-01')
simul_end=pd.Timestamp('1992-12-31')
ms = std.create_subarea_simulation(data_id='MMH', simul_start=simul_start, simul_end=simul_end, 
	model_id=model_id, tstep='daily', varname_rain='P', varname_pet='E')
model_id = 'GR4J'
simul_start=pd.Timestamp('1990-11-01')
simul_end=pd.Timestamp('1992-12-31')
ms = std.create_subarea_simulation(data_id='MMH', simul_start=simul_start, simul_end=simul_end, 
	model_id=model_id, tstep='daily', varname_rain='P', varname_pet='E')

We define a few model state identifiers, and set them to be recorded to time series.

In [6]:

Copied!

gr4jModelVars = ssy.runoff_model_var_ids(model_id)
print(gr4jModelVars)
gr4jModelVars = ssy.runoff_model_var_ids(model_id)
print(gr4jModelVars)

['P', 'E', 'En', 'LAI', 'runoff', 'S', 'R', 'TWS', 'Eactual', 'Ps', 'Es', 'Pr', 'ech1', 'ech2', 'Perc', 'alpha', 'k', 'x1', 'x2', 'x3', 'x4', 'UHExponent', 'PercFactor']

In [7]:

Copied!





element_id = 'subarea.Subarea'
def mk_varid(short_id):
    return mk_full_data_id(element_id, short_id)

runoff_id = mk_varid('runoff')
s_store_id = mk_varid('S')
r_store_id = mk_varid('R')
ms.record_state([runoff_id, s_store_id, r_store_id])
element_id = 'subarea.Subarea'
def mk_varid(short_id):
    return mk_full_data_id(element_id, short_id)

runoff_id = mk_varid('runoff')
s_store_id = mk_varid('S')
r_store_id = mk_varid('R')
ms.record_state([runoff_id, s_store_id, r_store_id])

We'll set up a short runtime span, so that we illustrate the state initialisation feature.

In [8]:

Copied!





obs_runoff = std.sample_series('MMH', 'flow') #actually, this is a time series of runoff depth, not streamflow rate
obs_runoff[obs_runoff < -1] = np.nan
s = simul_start
w = s
# e = s + pd.DateOffset(days=90)
e = simul_end
ms.set_simulation_span(s, e)
obs_runoff = std.sample_series('MMH', 'flow') #actually, this is a time series of runoff depth, not streamflow rate
obs_runoff[obs_runoff < -1] = np.nan
s = simul_start
w = s
# e = s + pd.DateOffset(days=90)
e = simul_end
ms.set_simulation_span(s, e)

Let's apply some default model parameters to the model:

In [9]:

Copied!





pspec_gr4j = std.get_free_params(model_id)
pspec_gr4j['Name'] = mk_varid(pspec_gr4j['Name'])
# TODO : a print function for native parameterisers.
p = sp.create_parameteriser('Generic', pspec_gr4j)
p
pspec_gr4j = std.get_free_params(model_id)
pspec_gr4j['Name'] = mk_varid(pspec_gr4j['Name'])
# TODO : a print function for native parameterisers.
p = sp.create_parameteriser('Generic', pspec_gr4j)
p

Out[9]:

                 Name       Value   Min     Max
0  subarea.Subarea.x1  650.488000   1.0  3000.0
1  subarea.Subarea.x2   -0.280648 -27.0    27.0
2  subarea.Subarea.x3    7.891230   1.0   660.0
3  subarea.Subarea.x4   18.917200   1.0   240.0

In [10]:

Copied!

p.apply_sys_config(ms)
p.apply_sys_config(ms)

We get a time series of S if we run it at this point; the starting value is zero.

In [11]:

Copied!





import matplotlib.pyplot as plt

ms.exec_simulation()
tts = ms.get_recorded(s_store_id)
tts = tts.squeeze(drop=True)
g = tts.plot.line(add_legend=True, figsize=(16,8))
plt.title('GR4J S store. No state initializer');
import matplotlib.pyplot as plt

ms.exec_simulation()
tts = ms.get_recorded(s_store_id)
tts = tts.squeeze(drop=True)
g = tts.plot.line(add_legend=True, figsize=(16,8))
plt.title('GR4J S store. No state initializer');

No description has been provided for this image

In [12]:

Copied!

from swift2.utils import as_xarray_series
from swift2.utils import as_xarray_series

In [13]:

Copied!

from swift2.vis import plot_two_series
from swift2.vis import plot_two_series

In [14]:

Copied!

calc_runoff = ms.get_recorded(runoff_id).squeeze(drop=True)
plot_two_series(obs_runoff, calc_runoff, start_time = xr_ts_start(calc_runoff), end_time = xr_ts_end(calc_runoff), names=['observed','modelled'], xlab="time", ylab="runoff (mm)")
calc_runoff = ms.get_recorded(runoff_id).squeeze(drop=True)
plot_two_series(obs_runoff, calc_runoff, start_time = xr_ts_start(calc_runoff), end_time = xr_ts_end(calc_runoff), names=['observed','modelled'], xlab="time", ylab="runoff (mm)")

Let's define S0 and R0 parameters such that for each GR4J model instance, S = S0 * x1 and R = R0 * x3

In [15]:

Copied!





from swift2.utils import c

p_states = sp.linear_parameteriser(
                      c("S0","R0"), 
                      c("S","R"), 
                      c("x1","x3"),
                      c(0.0,0.0), 
                      c(0.6,0.6), 
                      c(0.4,0.4), 
                      'each subarea')
from swift2.utils import c

p_states = sp.linear_parameteriser(
                      c("S0","R0"), 
                      c("S","R"), 
                      c("x1","x3"),
                      c(0.0,0.0), 
                      c(0.6,0.6), 
                      c(0.4,0.4), 
                      'each subarea')

If one applies this parameteriser pState to the system, the the S store is set to the expected value relative to x1.

In [16]:

Copied!

p_states.apply_sys_config(ms)
ms.get_state_value(s_store_id)
p_states.apply_sys_config(ms)
ms.get_state_value(s_store_id)

Out[16]:

{'subarea.Subarea.S': 260.19520000000006}

However this is not enough to define a parameteriser as an initial state. If executing the simulation, the time series of S still starts at zero, because the resetting the model overrides the state S:

In [17]:

Copied!





ms.exec_simulation()
tts = ms.get_recorded(s_store_id)
tts = tts.squeeze(drop=True)
g = tts.plot.line(add_legend=True, figsize=(16,8))
plt.title('GR4J S store; incomplete store initialization');
ms.exec_simulation()
tts = ms.get_recorded(s_store_id)
tts = tts.squeeze(drop=True)
g = tts.plot.line(add_legend=True, figsize=(16,8))
plt.title('GR4J S store; incomplete store initialization');

You need to define a new parameteriser, that makes sure that the model is reset to the expected initial value.

In [18]:

Copied!

init_parameteriser = p_states.make_state_init_parameteriser()
init_parameteriser.apply_sys_config(ms)
init_parameteriser = p_states.make_state_init_parameteriser()
init_parameteriser.apply_sys_config(ms)

In [19]:

Copied!





ms.exec_simulation()
tts = ms.get_recorded(s_store_id)
tts = tts.squeeze(drop=True)
g = tts.plot.line(add_legend=True, figsize=(16,8))
plt.title('GR4J S store, with a proper state initializer');
ms.exec_simulation()
tts = ms.get_recorded(s_store_id)
tts = tts.squeeze(drop=True)
g = tts.plot.line(add_legend=True, figsize=(16,8))
plt.title('GR4J S store, with a proper state initializer');

In [ ]:

In [20]:

Copied!

calc_runoff = ms.get_recorded(runoff_id).squeeze(drop=True)
plot_two_series(obs_runoff, calc_runoff, start_time = xr_ts_start(calc_runoff), end_time = xr_ts_end(calc_runoff), names=['observed','modelled'], xlab="time", ylab="runoff (mm)")
calc_runoff = ms.get_recorded(runoff_id).squeeze(drop=True)
plot_two_series(obs_runoff, calc_runoff, start_time = xr_ts_start(calc_runoff), end_time = xr_ts_end(calc_runoff), names=['observed','modelled'], xlab="time", ylab="runoff (mm)")

There is logic in keeping the two previous steps in defining a parameteriser as separate, hence this present vignette emphasizes the importance of these two steps.

In [ ]:

Calibration¶

Once you have defined this state initialisation parameteriser using {r eval=FALSE} makeStateInitParameterizer, you can define a calibration objective the usual way. This vignette does not include calibration steps; please refer to other vignettes.

In [21]:

Copied!

p_composite = CompositeParameteriser.concatenate(p, init_parameteriser)
print(p_composite)
p_composite = CompositeParameteriser.concatenate(p, init_parameteriser)
print(p_composite)

                 Name       Value   Min     Max
0  subarea.Subarea.x1  650.488000   1.0  3000.0
1  subarea.Subarea.x2   -0.280648 -27.0    27.0
2  subarea.Subarea.x3    7.891230   1.0   660.0
3  subarea.Subarea.x4   18.917200   1.0   240.0
4                  R0    0.400000   0.0     0.6
5                  S0    0.400000   0.0     0.6

In [22]:

Copied!

objective = ms.create_objective(runoff_id, obs_runoff, 'NSE', w, e)
score = objective.get_score(p_composite)
print(score)
objective = ms.create_objective(runoff_id, obs_runoff, 'NSE', w, e)
score = objective.get_score(p_composite)
print(score)

{'scores': {'NSE': -0.03575597073595449}, 'sysconfig':                  Name       Value   Min     Max
0  subarea.Subarea.x1  650.488000   1.0  3000.0
1  subarea.Subarea.x2   -0.280648 -27.0    27.0
2  subarea.Subarea.x3    7.891230   1.0   660.0
3  subarea.Subarea.x4   18.917200   1.0   240.0
4                  R0    0.400000   0.0     0.6
5                  S0    0.400000   0.0     0.6}

In [23]:

Copied!

optim = objective.create_sce_optim_swift(population_initialiser=p)
optim = objective.create_sce_optim_swift(population_initialiser=p)

In [24]:

Copied!

%%time
results = optim.execute_optimisation()
%%time
results = optim.execute_optimisation()

CPU times: user 36.2 s, sys: 15.7 ms, total: 36.2 s
Wall time: 9.49 s

In [25]:

Copied!

results.get_best_score('NSE', convert_to_py=True)
results.get_best_score('NSE', convert_to_py=True)

Out[25]:

{'scores': {'NSE': 0.5625971076046495},
 'sysconfig':                  Name       Value   Min     Max
 0  subarea.Subarea.x1  580.276219   1.0  3000.0
 1  subarea.Subarea.x2  -13.355840 -27.0    27.0
 2  subarea.Subarea.x3  584.391889   1.0   660.0
 3  subarea.Subarea.x4    1.178550   1.0   240.0}

In [26]:

Copied!

s = results.get_best_score('NSE')
s = results.get_best_score('NSE')

In [27]:

Copied!

s.apply_sys_config(ms)
ms.exec_simulation()
s.apply_sys_config(ms)
ms.exec_simulation()

In [28]:

Copied!

calc_runoff = ms.get_recorded(runoff_id).squeeze(drop=True)
plot_two_series(obs_runoff, calc_runoff, start_time = xr_ts_start(calc_runoff), end_time = xr_ts_end(calc_runoff), names=['observed','modelled'], xlab="time", ylab="runoff (mm)")
calc_runoff = ms.get_recorded(runoff_id).squeeze(drop=True)
plot_two_series(obs_runoff, calc_runoff, start_time = xr_ts_start(calc_runoff), end_time = xr_ts_end(calc_runoff), names=['observed','modelled'], xlab="time", ylab="runoff (mm)")

In [29]:

Copied!

optim = objective.create_sce_optim_swift(population_initialiser=p_composite)
optim = objective.create_sce_optim_swift(population_initialiser=p_composite)

In [30]:

Copied!

%%time
results = optim.execute_optimisation()
%%time
results = optim.execute_optimisation()

CPU times: user 36.4 s, sys: 12.1 ms, total: 36.4 s
Wall time: 9.87 s

In [31]:

Copied!

results.get_best_score('NSE', convert_to_py=True)
results.get_best_score('NSE', convert_to_py=True)

Out[31]:

{'scores': {'NSE': 0.526419566545028},
 'sysconfig':                  Name       Value   Min     Max
 0  subarea.Subarea.x1  354.567580   1.0  3000.0
 1  subarea.Subarea.x2  -18.721008 -27.0    27.0
 2  subarea.Subarea.x3  646.923623   1.0   660.0
 3  subarea.Subarea.x4    1.709377   1.0   240.0
 4                  R0    0.251199   0.0     0.6
 5                  S0    0.454895   0.0     0.6}

In [32]:

Copied!

s = results.get_best_score('NSE')
s = results.get_best_score('NSE')

In [33]:

Copied!

s.apply_sys_config(ms)
ms.exec_simulation()
s.apply_sys_config(ms)
ms.exec_simulation()

In [34]:

Copied!

calc_runoff = ms.get_recorded(runoff_id).squeeze(drop=True)
plot_two_series(obs_runoff, calc_runoff, start_time = xr_ts_start(calc_runoff), end_time = xr_ts_end(calc_runoff), names=['observed','modelled'], xlab="time", ylab="runoff (mm)")
calc_runoff = ms.get_recorded(runoff_id).squeeze(drop=True)
plot_two_series(obs_runoff, calc_runoff, start_time = xr_ts_start(calc_runoff), end_time = xr_ts_end(calc_runoff), names=['observed','modelled'], xlab="time", ylab="runoff (mm)")

In [35]:

Copied!

obs_runoff.plot()
obs_runoff.plot()

Out[35]:

<Axes: >

In [36]:

Copied!

ms.get_played_varnames()
ms.get_played_varnames()

Out[36]:

['subarea.Subarea.E', 'subarea.Subarea.P']

In [37]:

Copied!

x = ms.get_played('subarea.Subarea.P').squeeze(drop=True).to_series()
x = ms.get_played('subarea.Subarea.P').squeeze(drop=True).to_series()

In [38]:

Copied!

rainfall_mth = x.resample("M").sum()
runoff_mth = obs_runoff[slice(x.index[0], x.index[-1])].resample("M").sum()
rainfall_mth = x.resample("M").sum()
runoff_mth = obs_runoff[slice(x.index[0], x.index[-1])].resample("M").sum()

/tmp/ipykernel_184804/2429096889.py:1: FutureWarning: 'M' is deprecated and will be removed in a future version, please use 'ME' instead.
  rainfall_mth = x.resample("M").sum()
/tmp/ipykernel_184804/2429096889.py:2: FutureWarning: 'M' is deprecated and will be removed in a future version, please use 'ME' instead.
  runoff_mth = obs_runoff[slice(x.index[0], x.index[-1])].resample("M").sum()

In [39]:

Copied!

plot_two_series(rainfall_mth, runoff_mth)
plot_two_series(rainfall_mth, runoff_mth)

In [ ]: