Getting started¶

This is a sample notebook demonstrating how to use the geosdhydro package to convert a geopandas dataframe to a swift json file. A browsable form of this notebook should be at the package documentation.

In [1]:

Copied!

import geopandas as gpd
import geopandas as gpd

In [2]:

Copied!

from pathlib import Path
from pathlib import Path

In [3]:

Copied!

from geosdhydro import ShapefileToSwiftConverter
from geosdhydro import ShapefileToSwiftConverter

In [4]:

Copied!

fpath = Path.home() / "data/wnsw/Abercrombie/Abercrombie_links4swift.shp"
fpath = Path.home() / "data/wnsw/Abercrombie/Abercrombie_links4swift.shp"

In [5]:

Copied!

fpath.exists()
fpath.exists()

Out[5]:

True

In [6]:

Copied!

link_specs = gpd.read_file(fpath)

print(link_specs.head())
link_specs = gpd.read_file(fpath)

print(link_specs.head())

   OBJECTID  Shape_Leng  LinkID  FromNodeID  ToNodeID  HeadLink  SPathCnt  \
0        14    0.063634      14    43408481  43346081         0        11   
1        15    0.139247      15    43352571  43346081         0        11   
2        16    0.133341      16    43349393  43346081         0        18   
3        17    0.135098      17    43636314  43346081         0        14   
4        18    0.323005      18    43408465  43346081         0        40   

   LPathCnt      SPathLen      LPathLen        DArea  Prim_Link  SWIFT_ID  \
0        11  12423.789416  12423.789416          1.0          0      1801   
1        11  24712.339973  24712.339973          1.0          0      1804   
2        18  26499.836521  26499.836521          1.0          0      1805   
3        14  34835.592042  34835.592042          1.0          0      1802   
4        40  57889.009690  57889.009690  510940000.0          1        18   

   ToNodeID2       DArea2                                           geometry  
0          0         -1.0  LINESTRING (149.33174 -34.01786, 149.31528 -33...  
1          0         -1.0  LINESTRING (149.45444 -33.95167, 149.31528 -33...  
2          0         -1.0  LINESTRING (149.38417 -33.84222, 149.31528 -33...  
3          0         -1.0  LINESTRING (149.41821 -34.04389, 149.31528 -33...  
4   43346081  510940000.0  LINESTRING (149.59776 -34.11303, 149.31528 -33...

In [7]:

Copied!

{x: link_specs[x].dtype for x in link_specs.columns}
{x: link_specs[x].dtype for x in link_specs.columns}

Out[7]:

{'OBJECTID': dtype('int64'),
 'Shape_Leng': dtype('float64'),
 'LinkID': dtype('int64'),
 'FromNodeID': dtype('int64'),
 'ToNodeID': dtype('int64'),
 'HeadLink': dtype('int32'),
 'SPathCnt': dtype('int64'),
 'LPathCnt': dtype('int64'),
 'SPathLen': dtype('float64'),
 'LPathLen': dtype('float64'),
 'DArea': dtype('float64'),
 'Prim_Link': dtype('int32'),
 'SWIFT_ID': dtype('int32'),
 'ToNodeID2': dtype('int64'),
 'DArea2': dtype('float64'),
 'geometry': <geopandas.array.GeometryDtype at 0x7fd6b6662120>}

Note that some of the input columns, LinkID, ToNodeID, FromNodeID, are integers, because of habits. It is preferable to have them as strings, but the converter will transparently convert them to string. Another thing is that there is a duplicated ID in the links:

In [8]:

Copied!





gdf = link_specs
# Check for duplicates in the 'LinkID' column
duplicates = gdf['LinkID'][gdf['LinkID'].duplicated(keep=False)]

# Display the duplicated IDs
print(duplicates)
gdf = link_specs
# Check for duplicates in the 'LinkID' column
duplicates = gdf['LinkID'][gdf['LinkID'].duplicated(keep=False)]

# Display the duplicated IDs
print(duplicates)

3     17
17    17
Name: LinkID, dtype: int64

Let's see what happens:

In [9]:

Copied!





# THere are default values for the expected columns names and whether to retrieve lat/lon coordinates:
# converter = ShapefileToSwiftConverter(link_specs)

# but let us be explicit in this example tutorial
converter = ShapefileToSwiftConverter(
    gdf = link_specs,
    include_coordinates = False,
    linkid_field = 'LinkID',
    fromnodeid_field = 'FromNodeID',
    tonodeid_field = 'ToNodeID',
    spathlen_field = 'SPathLen',
    darea_field = 'DArea2',
    geometry_field = 'geometry',
)
# THere are default values for the expected columns names and whether to retrieve lat/lon coordinates:
# converter = ShapefileToSwiftConverter(link_specs)

# but let us be explicit in this example tutorial
converter = ShapefileToSwiftConverter(
    gdf = link_specs,
    include_coordinates = False,
    linkid_field = 'LinkID',
    fromnodeid_field = 'FromNodeID',
    tonodeid_field = 'ToNodeID',
    spathlen_field = 'SPathLen',
    darea_field = 'DArea2',
    geometry_field = 'geometry',
)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[9], line 5
      1 # THere are default values for the expected columns names and whether to retrieve lat/lon coordinates:
      2 # converter = ShapefileToSwiftConverter(link_specs)
      3 
      4 # but let us be explicit in this example tutorial
----> 5 converter = ShapefileToSwiftConverter(
      6     gdf = link_specs,
      7     include_coordinates = False,
      8     linkid_field = 'LinkID',
      9     fromnodeid_field = 'FromNodeID',
     10     tonodeid_field = 'ToNodeID',
     11     spathlen_field = 'SPathLen',
     12     darea_field = 'DArea2',
     13     geometry_field = 'geometry',
     14 )

File ~/src/geosdhydro/src/geosdhydro/_internal/swift.py:44, in ShapefileToSwiftConverter.__init__(self, gdf, include_coordinates, linkid_field, fromnodeid_field, tonodeid_field, spathlen_field, darea_field, geometry_field)
     42 self._darea_field = darea_field if darea_field else _default_darea_field
     43 self._geometry_field = geometry_field if geometry_field else _default_geometry_field
---> 44 self._check_geodf()

File ~/src/geosdhydro/src/geosdhydro/_internal/swift.py:99, in ShapefileToSwiftConverter._check_geodf(self)
     97 if not duplicates.empty:
     98     duplicate_indices = self.gdf[self.gdf[self._linkid_field].isin(duplicates.index)].index.tolist()
---> 99     raise ValueError(f"Column 'LinkID' contains duplicate values: {duplicates.index.tolist()} at indices {duplicate_indices}.")

ValueError: Column 'LinkID' contains duplicate values: ['17'] at indices [3, 17].

In [10]:

Copied!

link_specs.iloc[[3, 17]]
link_specs.iloc[[3, 17]]

Out[10]:

	OBJECTID	Shape_Leng	LinkID	FromNodeID	ToNodeID	HeadLink	SPathCnt	LPathCnt	SPathLen	LPathLen	DArea	Prim_Link	SWIFT_ID	ToNodeID2	DArea2	geometry
3	17	0.135098	17	43636314	43346081	0	14	14	34835.592042	34835.592042	1.0	0	1802	0	-1.0	LINESTRING (149.41821 -34.04389, 149.31528 -33...
17	17	0.128853	17	43351601	43349393	1	0	0	1.000000	0.000000	145644000.0	1	55	43349393	145644000.0	LINESTRING (149.42191 -33.79131, 149.38417 -33...

Not quite sure what was intended with the above. One of the entry has 1 meter-length links SPathLen, but a catchment area, the other is the other way around. I may have been a legacy workaround, or a data bug. Be it as it may, this is a useful way to illustrate the need to look at data, and the build-in checks in the package/features.

For the sake of the example, let us just drop these.

In [11]:

Copied!

link_specs = link_specs.drop(index=[3,17])
link_specs = link_specs.drop(index=[3,17])

and now we expect the converter to do the job:

In [12]:

Copied!





converter = ShapefileToSwiftConverter(
    gdf = link_specs,
    include_coordinates = False,
    linkid_field = 'LinkID',
    fromnodeid_field = 'FromNodeID',
    tonodeid_field = 'ToNodeID',
    spathlen_field = 'SPathLen',
    darea_field = 'DArea2',
    geometry_field = 'geometry',
)

converter = ShapefileToSwiftConverter(
    gdf = link_specs,
    include_coordinates = False,
    linkid_field = 'LinkID',
    fromnodeid_field = 'FromNodeID',
    tonodeid_field = 'ToNodeID',
    spathlen_field = 'SPathLen',
    darea_field = 'DArea2',
    geometry_field = 'geometry',
)

In [13]:

Copied!

result = converter.convert()
result = converter.convert()

result is a python dictionary

As expected given that some areas were negative in the input file (i.e. links without a contributing subarea), we have less subareas than links

In [14]:

Copied!

f"there are {len(result["Links"])} links, {len(result["Nodes"])} nodes, {len(result["SubAreas"])} subareas"
f"there are {len(result["Links"])} links, {len(result["Nodes"])} nodes, {len(result["SubAreas"])} subareas"

Out[14]:

'there are 27 links, 29 nodes, 16 subareas'

The object converter has a save_to_file method, or you can use the json module to save the above result:

In [14]:

Copied!

import json
import json

In [15]:

Copied!

fp = Path.home() / "tmp" / "abercrombie_swift.json"
# with open(fp, "w") as f:
#     json.dump(result, f, indent=2)
fp = Path.home() / "tmp" / "abercrombie_swift.json"
# with open(fp, "w") as f:
#     json.dump(result, f, indent=2)

Checking the json output loads as a catchment structure¶

In [18]:

Copied!

# This is be done if you have `swift2` in your python env using:

# from swift2.model_definitions import model_from_json_file
# sim = model_from_json_file(fp)
# This is be done if you have `swift2` in your python env using:

# from swift2.model_definitions import model_from_json_file
# sim = model_from_json_file(fp)

In [ ]:

Getting started¶

Checking the json output loads as a catchment structure¶

Feedback