Coverage for src/efts_io/dimensions.py: 30.77%
46 statements
« prev ^ index » next coverage.py v7.6.1, created at 2025-07-24 10:14 +1000
« prev ^ index » next coverage.py v7.6.1, created at 2025-07-24 10:14 +1000
1"""Functions to create and manipulate dimensions for netCDF files."""
3from datetime import datetime
4from typing import Any, Dict, Iterable, Optional, Tuple, Union
6# import netCDF4
7import numpy as np
8import pandas as pd
9from cftime import DatetimeGregorian
11from efts_io.conventions import (
12 ENS_MEMBER_DIMNAME,
13 FILLVALUE_ATTR_KEY,
14 LEAD_TIME_DIMNAME,
15 STATION_DIMNAME,
16 STR_LEN_DIMNAME,
17 TIME_DIMNAME,
18 UNITS_ATTR_KEY,
19)
22def iso_date_time_str(t: Any) -> str:
23 """Convert a date-time object to a string in ISO format, using space as separator."""
24 return pd.Timestamp(t).isoformat(" ")
27#' Check that a date-time is in the UTC time zone, and return the date time offset 'zero'
28#'
29#' Check that a date-time is in the UTC time zone, and return the date time offset 'zero'
30#'
31#' @param d an object coercible to a POSIXct
32#' @export
33#' @return a character, the time zone offset string '+0000'
34#' @importFrom lubridate tz
35#' @examples
36#' start_time <- ISOdate(year=2010, month=08, day=01, hour = 12, min = 0, sec = 0, tz = 'UTC')
37#' check_is_utc(d=start_time)
38#'
39def check_is_utc(d: Any) -> bool:
40 """Check that a date-time is in the UTC time zone."""
41 a = pd.Timestamp(d)
42 if a.tz is None:
43 return True # ?
44 z = a.tz
45 from datetime import timezone
47 return z == timezone.utc
50#' Create a time axis unit known to work for netCDF
51#'
52#' Create a time axis unit known to work for netCDF
53#'
54#' @param d an object coercible to a POSIXct
55#' @param time_step the character prefix to put before the date, in the netCDF time axis unit definition.
56#' @param tzoffset an optional character, the time offset from UTC, e.g. '+1000' for 10 hours ahead of UTC.
57#' Can be missing, in which case it must be explicitly a UTC time.
58#' Note that the tzoffset completely supersedes the time zone if present.
59#' @export
60#' @return a character, the axis units to use for the netCDF 'time' dimension
61#' @examples
62#' start_time <- ISOdate(year=2010, month=08, day=01, hour = 12, min = 0, sec = 0, tz = 'UTC')
63#' create_netcdf_time_axis(d=start_time)
64#' start_time <- ISOdate(year=2015, month=10, day=04, hour = 01,
65#' min = 0, sec = 0, tz = 'Australia/Sydney')
66#' create_netcdf_time_axis(d=start_time, tzoffset='+1000')
67#'
68def create_netcdf_time_axis(d: Any, time_step: str = "hours since", tzoffset: Optional[str] = None) -> str:
69 """Create a time axis unit known to work for netCDF."""
70 if tzoffset is None:
71 if not check_is_utc(d):
72 raise ValueError("date time must have UTC or GMT as time zone")
73 tzoffset = "+0000"
74 return " ".join([time_step, iso_date_time_str(as_naive_timestamp(d)), tzoffset])
77def as_naive_timestamp(d: Union[datetime, pd.Timestamp]) -> pd.Timestamp:
78 """Convert a date-time object to a naive timestamp."""
79 return pd.Timestamp(
80 year=d.year,
81 month=d.month,
82 day=d.day,
83 hour=d.hour,
84 minute=d.minute,
85 second=d.second,
86 )
89#' Helper function to create the definition of the time dimension for use in a netCDF file
90#'
91#' Helper function to create the definition of the time dimension for use in a netCDF file. Defaults to create an hourly axis.
92#'
93#' @param from the start date of the time axis
94#' @param n length of the time dimension
95#' @param time_step unit prefix in the time dimension units
96#' @param time_step_delta integer, length of time units between each steps
97#' @param tzoffset an optional character, the time offset from UTC, e.g. '+1000' for 10 hours ahead of UTC. Can be missing, in which case 'from' must be explicitly a UTC time. Note that the tzoffset completely supersedes the time zone if present.
98#' @import ncdf4
99#' @export
100#' @return A list with keys units and values
101#' @seealso See
102#' \code{\link{create_efts}} for examples
103#' @examples
104#' timeAxisStart <- ISOdate(2015, 10, 4, 0, 0, 0, tz = "Australia/Canberra")
105#' (time_dim_info <- create_time_info(from = timeAxisStart, n = 24L,
106#' time_step = "hours since", time_step_delta = 3L, tzoffset = "+1000"))
107#'
108#' # Note that the time zone information of thes sart date is NOT
109#' # used by create_time_info; the tzoffset argument takes precedence
110#' timeAxisStart <- ISOdate(2015, 10, 4, 0, 0, 0, tz = "Australia/Perth")
111#' (time_dim_info <- create_time_info(from = timeAxisStart, n = 24L,
112#' time_step = "hours since", time_step_delta = 3L, tzoffset = "+1000"))
113#'
114def create_time_info(
115 start: Any,
116 n: int,
117 time_step: str = "hours since",
118 time_step_delta: int = 1,
119 tzoffset: Optional[str] = None,
120) -> Dict[str, Any]:
121 """Helper function to create the definition of the time dimension for use in a netCDF file."""
122 return {
123 UNITS_ATTR_KEY: create_netcdf_time_axis(
124 d=start,
125 time_step=time_step,
126 tzoffset=tzoffset,
127 ),
128 "values": np.arange(0, n) * time_step_delta,
129 }
132# #' Retrieves the first date of the time dimension from a netCDF file
133# #'
134# #' Retrieves the first date of the time dimension from a netCDF file of daily data, given the units found in the netCDF attribute for the time dimension
135# #'
136# #' @param time_units The string description of the units of the time dimension e.g. 'days since 1980-01-01' or 'hours since 2010-08-01 13:00:00 +0000'
137# #' @param time_zone the time zone to use for the returned value.
138# #' @export
139# #' @importFrom udunits2 ud.convert
140# #' @importFrom stringr str_split
141# #' @import lubridate
142# #' @return A POSIXct object, origin of the time dimension as defined
143# #' @examples
144# #'
145# #' x <- "hours since 2015-10-04 00:00:00 +1023"
146# #' get_start_date(x)
147# #' get_start_date(x,time_zone = 'UTC')
148# #' get_start_date(x,time_zone = 'Australia/Perth')
149# #' get_start_date(x,time_zone = 'Australia/Canberra')
150# #'
151# get_start_date(time_units, time_zone = "UTC") {
153# # temporary work around https://github.com/jmp75/efts/issues/3
154# udu <- stringr::str_split(time_units, pattern = ' +')[[1]]
155# s <- paste(udu[3], udu[4], sep='T')
156# dt <- ymd_hms(s)
157# if(is.na(dt)) stop(paste0('Could not parse date time out of string ', time_units))
158# return(dt)
160# # refDate <- lubridate::origin #
161# # class(refDate) <- c("POSIXct", "POSIXt") # workaround what I think is a lubridate bug (possibly now wolved);
162# # # try origin + days(1) and its effect, visibly because of class ordering on origin.
163# # isDaily <- is_daily_time_step(time_units)
164# # refDateUnits <- paste(ifelse(isDaily, "days", "hours"), "since 1970-01-01 00:00:00 +0000")
165# # offsetSinceRef <- udunits2::ud.convert(0, time_units, refDateUnits)
166# # offsetFun <- get_time_step_function(time_units)
167# # startDateUtc <- refDate + offsetFun(offsetSinceRef)
168# # startDate <- lubridate::with_tz(startDateUtc, time_zone)
169# # return(startDate)
170# }
172# #' Retrieves the unit string of the time dimension from a netCDF file
173# #'
174# #' @export
175# #' @param ncfile an object of class ncdf4
176# #' @param TIME_DIMNAME The name of the time dimension, by default 'time' as per the CF conventions.
177# #' @return a character
178# get_time_units(ncfile, TIME_DIMNAME = "time") {
179# return(ncdf4::ncatt_get(ncfile, TIME_DIMNAME, UNITS_ATTR_KEY)$value)
180# }
182# #' Retrieves the time dimension from a netCDF file
183# #'
184# #' @export
185# #' @param ncfile an object of class ncdf4
186# #' @param TIME_DIMNAME The name of the time dimension, by default 'time' as per the CF conventions.
187# #' @param time_zone the time zone to use for the returned value.
188# #' @return A vector of Dates
189# get_time_dimension(ncfile, TIME_DIMNAME = "time", time_zone = "UTC") {
190# time_units <- get_time_units(ncfile, TIME_DIMNAME)
191# timeValues <- ncdf4::ncvar_get(ncfile, TIME_DIMNAME)
192# startDate <- get_start_date(time_units, time_zone = time_zone)
193# offsetFun <- get_time_step_function(time_units)
194# startDate + offsetFun(timeValues)
195# }
197# #' @importFrom stringr str_sub
198# offset_as_duration(delta) {
199# h <- stringr::str_sub(delta, 1L, 2L) # 10
200# m <- stringr::str_sub(delta, 3L, 4L) # 30
201# return((lubridate::dhours(as.integer(h)) + lubridate::dminutes(as.integer(m))))
202# }
204# #' @importFrom stringr str_sub
205# offset_as_difftime(delta) {
206# h <- stringr::str_sub(delta, 1L, 2L) # 10
207# m <- stringr::str_sub(delta, 3L, 4L) # 30
208# b <- lubridate::origin + lubridate::dhours(as.integer(h)) + lubridate::dminutes(as.integer(m))
209# b - lubridate::origin
210# }
212# #' Finds the UTC offset in a date-time string
213# #'
214# #' Finds the UTC offset in a date-time or time axis specification string
215# #' such as 'hours since 2015-10-04 00:00:00 +1030'
216# #'
217# #' @param time_units the string to process
218# #' @param as_string a boolean. If true, return the time offset as a character, otherwise return a difftime object.
219# #' @return the time offset as a character, or as a difftime object.
220# #' @export
221# #' @examples
222# #'
223# #' x <- "hours since 2015-10-04 00:00:00 +1023"
224# #' find_utc_offset(x)
225# #' find_utc_offset(x, FALSE)
226# #' x <- "hours since 2015-10-04 00:00:00 -0837"
227# #' find_utc_offset(x)
228# #' find_utc_offset(x, FALSE)
229# #' x <- "hours since 2015-10-04 00:00:00"
230# #' find_utc_offset(x)
231# #' find_utc_offset(x, FALSE)
232# #'
233# find_utc_offset(time_units, as_string = TRUE) {
234# # TODO: there may be a smarter way using udunits to determine the offset,
235# # but not trivial either.
236# x <- stringr::str_split(time_units, "\\+")[[1]]
237# # [1] 'hours since 2015-10-04 00:00:00 ' '1030' the offset would have been
238# # with a positive sign: +1030
239# if (length(x) > 1) {
240# delta <- last(x) # 1030
241# if (as_string) {
242# return(paste0("+", delta))
243# } else {
244# return(+offset_as_difftime(delta))
245# }
246# }
247# x <- stringr::str_split(time_units, "[\\-]")[[1]]
248# # [1] 'hours since 2015' '10' '04 00:00:00 ' '1030' the offset would have
249# # been with a negative sign: -1030
250# if (length(x) == 4) {
251# delta <- last(x) # 1030
252# if (as_string) {
253# return(paste0("-", delta))
254# } else {
255# return(-offset_as_difftime(delta))
256# }
257# } else {
258# # length(x) < 4 : no offset detected
259# if (as_string) {
260# return("")
261# } else {
262# return(lubridate::origin - lubridate::origin)
263# }
264# }
265# }
268# ########################################
269# # Below are functions not exported
270# ########################################
272# is_daily_time_step(time_units) {
273# isDaily <- charmatch("days since", time_units)
274# isDaily <- ifelse(is.na(isDaily), FALSE, isDaily == 1)
275# isHourly <- charmatch("hours since", time_units)
276# isHourly <- ifelse(is.na(isHourly), FALSE, isHourly == 1)
277# if (!(isDaily | isHourly))
278# stop(paste("Could not detect if hourly or daily - unit not supported:",
279# time_units))
280# isDaily
281# }
283# #' Detect the unit of the time step in the time axis unit
284# #'
285# #' @param time_units The string description of the units of the time dimension e.g. 'days since 1980-01-01' or 'hours since 2010-08-01 13:00:00 +0000'
286# #' @import lubridate
287# #' @return A duration function from lubridate
288# get_time_step_function(time_units) {
289# isDaily <- is_daily_time_step(time_units)
290# offsetFun <- ifelse(isDaily, lubridate::ddays, lubridate::dhours)
291# return(offsetFun)
292# }
295def _cftime_to_pdtstamp(t: pd.Timestamp, tz_str: Optional[str]) -> pd.Timestamp:
296 return pd.Timestamp(t.isoformat(), tz=tz_str)
299_as_tstamps = np.vectorize(_cftime_to_pdtstamp)
302def cftimes_to_pdtstamps(
303 cftimes: Iterable[DatetimeGregorian],
304 tz_str: Optional[str] = None,
305) -> np.ndarray[pd.Timestamp,pd.Timestamp]:
306 """Convert one or more Climate and Forecast (CF) times to timestamps."""
307 return _as_tstamps(cftimes, tz_str)
310def create_timestamps(
311 time_dim_info: Dict[str, Any],
312 tz_str: Optional[str] = None,
313) -> np.ndarray[pd.Timestamp,pd.Timestamp]:
314 """Create time axis timestamps given the time dimension information."""
315 import xarray as xr
317 axis_units = time_dim_info[UNITS_ATTR_KEY]
318 axis_values = time_dim_info["values"]
319 var = xr.Variable(
320 dims=[TIME_DIMNAME],
321 data=axis_values,
322 encoding={FILLVALUE_ATTR_KEY: None},
323 attrs={
324 UNITS_ATTR_KEY: axis_units,
325 },
326 )
327 from xarray.coding import times
329 decod = times.CFDatetimeCoder(use_cftime=True)
330 time_coords = decod.decode(var, name=TIME_DIMNAME)
331 return cftimes_to_pdtstamps(time_coords.values, tz_str=tz_str)
334#' Creates dimensions for a netCDF EFTS data set
335#'
336#' Creates dimensions for a netCDF EFTS data set. Note that end users are unlikely to need to use this function directly, hence this is not exported
337#'
338#' @param time_dim_info a list with the units and values defining the time dimension of the data set
339#' @param str_len maximum length of the character for the station identifiers.
340#' @param lead_length length of the lead time.
341#' @param ensemble_length number of ensembles, i.e. number of forecasts for each point on the main time axis of the data set
342#' @param num_stations number of stations
343#' @import ncdf4
344#' @return A list of ncdf4 dimensions
345#' @seealso See
346#' \code{\link{create_efts}} for examples
347def _create_nc_dims(
348 time_dim_info: Dict[str, Any],
349 str_len: int = 30,
350 lead_length: int = 1,
351 ensemble_length: int = 1,
352 num_stations: int = 1,
353) -> Dict[str, Tuple[str, np.ndarray, Dict[str, str]]]:
354 """Creates dimensions for a netCDF EFTS data set."""
355 time_dim = (
356 TIME_DIMNAME,
357 time_dim_info["values"],
358 {UNITS_ATTR_KEY: time_dim_info[UNITS_ATTR_KEY], "longname": "time"},
359 )
360 # time_dim = ncdf4::ncdim_def(TIME_DIMNAME, units = time_dim_info$units, vals = time_dim_info$values,
361 # unlim = T, create_dimvar = TRUE, longname = "time")
362 station_dim = (
363 STATION_DIMNAME,
364 np.arange(1, num_stations + 1),
365 {UNITS_ATTR_KEY: "", "longname": STATION_DIMNAME},
366 )
367 str_dim = (
368 STR_LEN_DIMNAME,
369 np.arange(1, str_len + 1),
370 {UNITS_ATTR_KEY: "", "longname": "string length"},
371 )
372 lead_time_dim = (
373 LEAD_TIME_DIMNAME,
374 np.arange(1, lead_length + 1),
375 {UNITS_ATTR_KEY: "", "longname": "lead time"},
376 ) # TODO: check whether time_dim_info['units'] is alwaus suitable.
377 ensemble_dim = (
378 ENS_MEMBER_DIMNAME,
379 np.arange(1, ensemble_length + 1),
380 {UNITS_ATTR_KEY: "", "longname": "ensemble"},
381 )
382 return {
383 "time_dim": time_dim,
384 "lead_time_dim": lead_time_dim,
385 "station_dim": station_dim,
386 "str_dim": str_dim,
387 "ensemble_dim": ensemble_dim,
388 }