Coverage for src/efts_io/dimensions.py: 30.77%

46 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2025-07-24 10:14 +1000

1"""Functions to create and manipulate dimensions for netCDF files.""" 

2 

3from datetime import datetime 

4from typing import Any, Dict, Iterable, Optional, Tuple, Union 

5 

6# import netCDF4 

7import numpy as np 

8import pandas as pd 

9from cftime import DatetimeGregorian 

10 

11from efts_io.conventions import ( 

12 ENS_MEMBER_DIMNAME, 

13 FILLVALUE_ATTR_KEY, 

14 LEAD_TIME_DIMNAME, 

15 STATION_DIMNAME, 

16 STR_LEN_DIMNAME, 

17 TIME_DIMNAME, 

18 UNITS_ATTR_KEY, 

19) 

20 

21 

22def iso_date_time_str(t: Any) -> str: 

23 """Convert a date-time object to a string in ISO format, using space as separator.""" 

24 return pd.Timestamp(t).isoformat(" ") 

25 

26 

27#' Check that a date-time is in the UTC time zone, and return the date time offset 'zero' 

28#' 

29#' Check that a date-time is in the UTC time zone, and return the date time offset 'zero' 

30#' 

31#' @param d an object coercible to a POSIXct 

32#' @export 

33#' @return a character, the time zone offset string '+0000' 

34#' @importFrom lubridate tz 

35#' @examples 

36#' start_time <- ISOdate(year=2010, month=08, day=01, hour = 12, min = 0, sec = 0, tz = 'UTC') 

37#' check_is_utc(d=start_time) 

38#' 

39def check_is_utc(d: Any) -> bool: 

40 """Check that a date-time is in the UTC time zone.""" 

41 a = pd.Timestamp(d) 

42 if a.tz is None: 

43 return True # ? 

44 z = a.tz 

45 from datetime import timezone 

46 

47 return z == timezone.utc 

48 

49 

50#' Create a time axis unit known to work for netCDF 

51#' 

52#' Create a time axis unit known to work for netCDF 

53#' 

54#' @param d an object coercible to a POSIXct 

55#' @param time_step the character prefix to put before the date, in the netCDF time axis unit definition. 

56#' @param tzoffset an optional character, the time offset from UTC, e.g. '+1000' for 10 hours ahead of UTC. 

57#' Can be missing, in which case it must be explicitly a UTC time. 

58#' Note that the tzoffset completely supersedes the time zone if present. 

59#' @export 

60#' @return a character, the axis units to use for the netCDF 'time' dimension 

61#' @examples 

62#' start_time <- ISOdate(year=2010, month=08, day=01, hour = 12, min = 0, sec = 0, tz = 'UTC') 

63#' create_netcdf_time_axis(d=start_time) 

64#' start_time <- ISOdate(year=2015, month=10, day=04, hour = 01, 

65#' min = 0, sec = 0, tz = 'Australia/Sydney') 

66#' create_netcdf_time_axis(d=start_time, tzoffset='+1000') 

67#' 

68def create_netcdf_time_axis(d: Any, time_step: str = "hours since", tzoffset: Optional[str] = None) -> str: 

69 """Create a time axis unit known to work for netCDF.""" 

70 if tzoffset is None: 

71 if not check_is_utc(d): 

72 raise ValueError("date time must have UTC or GMT as time zone") 

73 tzoffset = "+0000" 

74 return " ".join([time_step, iso_date_time_str(as_naive_timestamp(d)), tzoffset]) 

75 

76 

77def as_naive_timestamp(d: Union[datetime, pd.Timestamp]) -> pd.Timestamp: 

78 """Convert a date-time object to a naive timestamp.""" 

79 return pd.Timestamp( 

80 year=d.year, 

81 month=d.month, 

82 day=d.day, 

83 hour=d.hour, 

84 minute=d.minute, 

85 second=d.second, 

86 ) 

87 

88 

89#' Helper function to create the definition of the time dimension for use in a netCDF file 

90#' 

91#' Helper function to create the definition of the time dimension for use in a netCDF file. Defaults to create an hourly axis. 

92#' 

93#' @param from the start date of the time axis 

94#' @param n length of the time dimension 

95#' @param time_step unit prefix in the time dimension units 

96#' @param time_step_delta integer, length of time units between each steps 

97#' @param tzoffset an optional character, the time offset from UTC, e.g. '+1000' for 10 hours ahead of UTC. Can be missing, in which case 'from' must be explicitly a UTC time. Note that the tzoffset completely supersedes the time zone if present. 

98#' @import ncdf4 

99#' @export 

100#' @return A list with keys units and values 

101#' @seealso See 

102#' \code{\link{create_efts}} for examples 

103#' @examples 

104#' timeAxisStart <- ISOdate(2015, 10, 4, 0, 0, 0, tz = "Australia/Canberra") 

105#' (time_dim_info <- create_time_info(from = timeAxisStart, n = 24L, 

106#' time_step = "hours since", time_step_delta = 3L, tzoffset = "+1000")) 

107#' 

108#' # Note that the time zone information of thes sart date is NOT 

109#' # used by create_time_info; the tzoffset argument takes precedence 

110#' timeAxisStart <- ISOdate(2015, 10, 4, 0, 0, 0, tz = "Australia/Perth") 

111#' (time_dim_info <- create_time_info(from = timeAxisStart, n = 24L, 

112#' time_step = "hours since", time_step_delta = 3L, tzoffset = "+1000")) 

113#' 

114def create_time_info( 

115 start: Any, 

116 n: int, 

117 time_step: str = "hours since", 

118 time_step_delta: int = 1, 

119 tzoffset: Optional[str] = None, 

120) -> Dict[str, Any]: 

121 """Helper function to create the definition of the time dimension for use in a netCDF file.""" 

122 return { 

123 UNITS_ATTR_KEY: create_netcdf_time_axis( 

124 d=start, 

125 time_step=time_step, 

126 tzoffset=tzoffset, 

127 ), 

128 "values": np.arange(0, n) * time_step_delta, 

129 } 

130 

131 

132# #' Retrieves the first date of the time dimension from a netCDF file 

133# #' 

134# #' Retrieves the first date of the time dimension from a netCDF file of daily data, given the units found in the netCDF attribute for the time dimension 

135# #' 

136# #' @param time_units The string description of the units of the time dimension e.g. 'days since 1980-01-01' or 'hours since 2010-08-01 13:00:00 +0000' 

137# #' @param time_zone the time zone to use for the returned value. 

138# #' @export 

139# #' @importFrom udunits2 ud.convert 

140# #' @importFrom stringr str_split 

141# #' @import lubridate 

142# #' @return A POSIXct object, origin of the time dimension as defined 

143# #' @examples 

144# #' 

145# #' x <- "hours since 2015-10-04 00:00:00 +1023" 

146# #' get_start_date(x) 

147# #' get_start_date(x,time_zone = 'UTC') 

148# #' get_start_date(x,time_zone = 'Australia/Perth') 

149# #' get_start_date(x,time_zone = 'Australia/Canberra') 

150# #' 

151# get_start_date(time_units, time_zone = "UTC") { 

152 

153# # temporary work around https://github.com/jmp75/efts/issues/3 

154# udu <- stringr::str_split(time_units, pattern = ' +')[[1]] 

155# s <- paste(udu[3], udu[4], sep='T') 

156# dt <- ymd_hms(s) 

157# if(is.na(dt)) stop(paste0('Could not parse date time out of string ', time_units)) 

158# return(dt) 

159 

160# # refDate <- lubridate::origin # 

161# # class(refDate) <- c("POSIXct", "POSIXt") # workaround what I think is a lubridate bug (possibly now wolved); 

162# # # try origin + days(1) and its effect, visibly because of class ordering on origin. 

163# # isDaily <- is_daily_time_step(time_units) 

164# # refDateUnits <- paste(ifelse(isDaily, "days", "hours"), "since 1970-01-01 00:00:00 +0000") 

165# # offsetSinceRef <- udunits2::ud.convert(0, time_units, refDateUnits) 

166# # offsetFun <- get_time_step_function(time_units) 

167# # startDateUtc <- refDate + offsetFun(offsetSinceRef) 

168# # startDate <- lubridate::with_tz(startDateUtc, time_zone) 

169# # return(startDate) 

170# } 

171 

172# #' Retrieves the unit string of the time dimension from a netCDF file 

173# #' 

174# #' @export 

175# #' @param ncfile an object of class ncdf4 

176# #' @param TIME_DIMNAME The name of the time dimension, by default 'time' as per the CF conventions. 

177# #' @return a character 

178# get_time_units(ncfile, TIME_DIMNAME = "time") { 

179# return(ncdf4::ncatt_get(ncfile, TIME_DIMNAME, UNITS_ATTR_KEY)$value) 

180# } 

181 

182# #' Retrieves the time dimension from a netCDF file 

183# #' 

184# #' @export 

185# #' @param ncfile an object of class ncdf4 

186# #' @param TIME_DIMNAME The name of the time dimension, by default 'time' as per the CF conventions. 

187# #' @param time_zone the time zone to use for the returned value. 

188# #' @return A vector of Dates 

189# get_time_dimension(ncfile, TIME_DIMNAME = "time", time_zone = "UTC") { 

190# time_units <- get_time_units(ncfile, TIME_DIMNAME) 

191# timeValues <- ncdf4::ncvar_get(ncfile, TIME_DIMNAME) 

192# startDate <- get_start_date(time_units, time_zone = time_zone) 

193# offsetFun <- get_time_step_function(time_units) 

194# startDate + offsetFun(timeValues) 

195# } 

196 

197# #' @importFrom stringr str_sub 

198# offset_as_duration(delta) { 

199# h <- stringr::str_sub(delta, 1L, 2L) # 10 

200# m <- stringr::str_sub(delta, 3L, 4L) # 30 

201# return((lubridate::dhours(as.integer(h)) + lubridate::dminutes(as.integer(m)))) 

202# } 

203 

204# #' @importFrom stringr str_sub 

205# offset_as_difftime(delta) { 

206# h <- stringr::str_sub(delta, 1L, 2L) # 10 

207# m <- stringr::str_sub(delta, 3L, 4L) # 30 

208# b <- lubridate::origin + lubridate::dhours(as.integer(h)) + lubridate::dminutes(as.integer(m)) 

209# b - lubridate::origin 

210# } 

211 

212# #' Finds the UTC offset in a date-time string 

213# #' 

214# #' Finds the UTC offset in a date-time or time axis specification string 

215# #' such as 'hours since 2015-10-04 00:00:00 +1030' 

216# #' 

217# #' @param time_units the string to process 

218# #' @param as_string a boolean. If true, return the time offset as a character, otherwise return a difftime object. 

219# #' @return the time offset as a character, or as a difftime object. 

220# #' @export 

221# #' @examples 

222# #' 

223# #' x <- "hours since 2015-10-04 00:00:00 +1023" 

224# #' find_utc_offset(x) 

225# #' find_utc_offset(x, FALSE) 

226# #' x <- "hours since 2015-10-04 00:00:00 -0837" 

227# #' find_utc_offset(x) 

228# #' find_utc_offset(x, FALSE) 

229# #' x <- "hours since 2015-10-04 00:00:00" 

230# #' find_utc_offset(x) 

231# #' find_utc_offset(x, FALSE) 

232# #' 

233# find_utc_offset(time_units, as_string = TRUE) { 

234# # TODO: there may be a smarter way using udunits to determine the offset, 

235# # but not trivial either. 

236# x <- stringr::str_split(time_units, "\\+")[[1]] 

237# # [1] 'hours since 2015-10-04 00:00:00 ' '1030' the offset would have been 

238# # with a positive sign: +1030 

239# if (length(x) > 1) { 

240# delta <- last(x) # 1030 

241# if (as_string) { 

242# return(paste0("+", delta)) 

243# } else { 

244# return(+offset_as_difftime(delta)) 

245# } 

246# } 

247# x <- stringr::str_split(time_units, "[\\-]")[[1]] 

248# # [1] 'hours since 2015' '10' '04 00:00:00 ' '1030' the offset would have 

249# # been with a negative sign: -1030 

250# if (length(x) == 4) { 

251# delta <- last(x) # 1030 

252# if (as_string) { 

253# return(paste0("-", delta)) 

254# } else { 

255# return(-offset_as_difftime(delta)) 

256# } 

257# } else { 

258# # length(x) < 4 : no offset detected 

259# if (as_string) { 

260# return("") 

261# } else { 

262# return(lubridate::origin - lubridate::origin) 

263# } 

264# } 

265# } 

266 

267 

268# ######################################## 

269# # Below are functions not exported 

270# ######################################## 

271 

272# is_daily_time_step(time_units) { 

273# isDaily <- charmatch("days since", time_units) 

274# isDaily <- ifelse(is.na(isDaily), FALSE, isDaily == 1) 

275# isHourly <- charmatch("hours since", time_units) 

276# isHourly <- ifelse(is.na(isHourly), FALSE, isHourly == 1) 

277# if (!(isDaily | isHourly)) 

278# stop(paste("Could not detect if hourly or daily - unit not supported:", 

279# time_units)) 

280# isDaily 

281# } 

282 

283# #' Detect the unit of the time step in the time axis unit 

284# #' 

285# #' @param time_units The string description of the units of the time dimension e.g. 'days since 1980-01-01' or 'hours since 2010-08-01 13:00:00 +0000' 

286# #' @import lubridate 

287# #' @return A duration function from lubridate 

288# get_time_step_function(time_units) { 

289# isDaily <- is_daily_time_step(time_units) 

290# offsetFun <- ifelse(isDaily, lubridate::ddays, lubridate::dhours) 

291# return(offsetFun) 

292# } 

293 

294 

295def _cftime_to_pdtstamp(t: pd.Timestamp, tz_str: Optional[str]) -> pd.Timestamp: 

296 return pd.Timestamp(t.isoformat(), tz=tz_str) 

297 

298 

299_as_tstamps = np.vectorize(_cftime_to_pdtstamp) 

300 

301 

302def cftimes_to_pdtstamps( 

303 cftimes: Iterable[DatetimeGregorian], 

304 tz_str: Optional[str] = None, 

305) -> np.ndarray[pd.Timestamp,pd.Timestamp]: 

306 """Convert one or more Climate and Forecast (CF) times to timestamps.""" 

307 return _as_tstamps(cftimes, tz_str) 

308 

309 

310def create_timestamps( 

311 time_dim_info: Dict[str, Any], 

312 tz_str: Optional[str] = None, 

313) -> np.ndarray[pd.Timestamp,pd.Timestamp]: 

314 """Create time axis timestamps given the time dimension information.""" 

315 import xarray as xr 

316 

317 axis_units = time_dim_info[UNITS_ATTR_KEY] 

318 axis_values = time_dim_info["values"] 

319 var = xr.Variable( 

320 dims=[TIME_DIMNAME], 

321 data=axis_values, 

322 encoding={FILLVALUE_ATTR_KEY: None}, 

323 attrs={ 

324 UNITS_ATTR_KEY: axis_units, 

325 }, 

326 ) 

327 from xarray.coding import times 

328 

329 decod = times.CFDatetimeCoder(use_cftime=True) 

330 time_coords = decod.decode(var, name=TIME_DIMNAME) 

331 return cftimes_to_pdtstamps(time_coords.values, tz_str=tz_str) 

332 

333 

334#' Creates dimensions for a netCDF EFTS data set 

335#' 

336#' Creates dimensions for a netCDF EFTS data set. Note that end users are unlikely to need to use this function directly, hence this is not exported 

337#' 

338#' @param time_dim_info a list with the units and values defining the time dimension of the data set 

339#' @param str_len maximum length of the character for the station identifiers. 

340#' @param lead_length length of the lead time. 

341#' @param ensemble_length number of ensembles, i.e. number of forecasts for each point on the main time axis of the data set 

342#' @param num_stations number of stations 

343#' @import ncdf4 

344#' @return A list of ncdf4 dimensions 

345#' @seealso See 

346#' \code{\link{create_efts}} for examples 

347def _create_nc_dims( 

348 time_dim_info: Dict[str, Any], 

349 str_len: int = 30, 

350 lead_length: int = 1, 

351 ensemble_length: int = 1, 

352 num_stations: int = 1, 

353) -> Dict[str, Tuple[str, np.ndarray, Dict[str, str]]]: 

354 """Creates dimensions for a netCDF EFTS data set.""" 

355 time_dim = ( 

356 TIME_DIMNAME, 

357 time_dim_info["values"], 

358 {UNITS_ATTR_KEY: time_dim_info[UNITS_ATTR_KEY], "longname": "time"}, 

359 ) 

360 # time_dim = ncdf4::ncdim_def(TIME_DIMNAME, units = time_dim_info$units, vals = time_dim_info$values, 

361 # unlim = T, create_dimvar = TRUE, longname = "time") 

362 station_dim = ( 

363 STATION_DIMNAME, 

364 np.arange(1, num_stations + 1), 

365 {UNITS_ATTR_KEY: "", "longname": STATION_DIMNAME}, 

366 ) 

367 str_dim = ( 

368 STR_LEN_DIMNAME, 

369 np.arange(1, str_len + 1), 

370 {UNITS_ATTR_KEY: "", "longname": "string length"}, 

371 ) 

372 lead_time_dim = ( 

373 LEAD_TIME_DIMNAME, 

374 np.arange(1, lead_length + 1), 

375 {UNITS_ATTR_KEY: "", "longname": "lead time"}, 

376 ) # TODO: check whether time_dim_info['units'] is alwaus suitable. 

377 ensemble_dim = ( 

378 ENS_MEMBER_DIMNAME, 

379 np.arange(1, ensemble_length + 1), 

380 {UNITS_ATTR_KEY: "", "longname": "ensemble"}, 

381 ) 

382 return { 

383 "time_dim": time_dim, 

384 "lead_time_dim": lead_time_dim, 

385 "station_dim": station_dim, 

386 "str_dim": str_dim, 

387 "ensemble_dim": ensemble_dim, 

388 }