Skip to content

Commit 8906833

Browse files
committed
[enh] Blocks object may use separate geometry and values as input parameters + tests
1 parent d820b24 commit 8906833

File tree

3 files changed

+125
-18
lines changed

3 files changed

+125
-18
lines changed

src/pyinterpolate/core/data_models/blocks.py

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,21 +22,32 @@
2222

2323
from pyinterpolate.distance.angular import calc_angles
2424
from pyinterpolate.distance.point import point_distance
25-
from pyinterpolate.transform.geo import points_to_lon_lat, largest_geometry
25+
from pyinterpolate.transform.geo import points_to_lon_lat, largest_geometry, \
26+
join_geometry_and_values
2627

2728

28-
# TODO: if multipolygon then get coordinates /
29-
# representative points from the largest block - as an option
3029
class Blocks:
3130
"""Class represents aggregated blocks data.
3231
3332
Parameters
3433
----------
35-
ds : gpd.GeoDataFrame
36-
Dataset with block values.
34+
ds : gpd.GeoDataFrame, optional
35+
Dataset with block values. Must be provided if ``values`` and
36+
``geometry`` parameters are not given.
3737
38-
value_column_name : Any
39-
Name of the column with block rates.
38+
values : ArrayLike, optional
39+
Aggregated values of each block. Optional parameter, if not
40+
given then ``ds`` must be provided.
41+
42+
geometries : ArrayLike, optional
43+
Array or similar structure with geometries. It must have the same
44+
length as ``values``. Optional parameter, if not given then ``ds``
45+
must be provided.
46+
47+
value_column_name : Any, optional
48+
Name of the column with block rates. Must be provided when
49+
the ``ds`` parameter is given, otherwise it is set to 'values' if
50+
not provided.
4051
4152
geometry_column_name : Any, default = 'geometry'
4253
Name of the column with a block geometry.
@@ -177,16 +188,18 @@ class Blocks:
177188
"""
178189

179190
def __init__(self,
180-
ds: gpd.GeoDataFrame,
181-
value_column_name,
182-
geometry_column_name='geometry',
183-
index_column_name=None,
184-
representative_points_column_name=None,
185-
representative_points_from_centroid=False,
186-
representative_points_from_random_sample=False,
187-
representative_points_from_largest_area=True,
188-
distances_between_representative_points=True,
189-
angles_between_representative_points=False):
191+
ds: gpd.GeoDataFrame = None,
192+
values: ArrayLike = None,
193+
geometries: ArrayLike = None,
194+
value_column_name: str = None,
195+
geometry_column_name = 'geometry',
196+
index_column_name = None,
197+
representative_points_column_name = None,
198+
representative_points_from_centroid = False,
199+
representative_points_from_random_sample = False,
200+
representative_points_from_largest_area = True,
201+
distances_between_representative_points = True,
202+
angles_between_representative_points = False):
190203

191204
# Helper params
192205
self._lon_col_name = 'lon'
@@ -202,7 +215,15 @@ def __init__(self,
202215
self._rep_ps_sample = representative_points_from_random_sample
203216
self._rep_ps_largest_area = representative_points_from_largest_area
204217

205-
self.ds = ds.copy(deep=True)
218+
if ds is not None:
219+
self.ds = ds.copy(deep=True)
220+
else:
221+
if value_column_name is None:
222+
value_column_name = 'values'
223+
self.ds = join_geometry_and_values(geometry=geometries,
224+
values=values,
225+
values_column_name=value_column_name)
226+
206227
self.value_column_name = value_column_name
207228
self.index_column_name = index_column_name
208229
self.geometry_column_name = geometry_column_name

src/pyinterpolate/transform/geo.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from typing import Tuple, Union
2+
from numpy.typing import ArrayLike
23

34
import geopandas as gpd
45
import numpy as np
@@ -34,6 +35,55 @@ def largest_geometry(geometry: MultiPolygon) -> Polygon:
3435
return poly
3536

3637

38+
def join_geometry_and_values(geometry,
39+
values,
40+
geometry_column_name: str = 'geometry',
41+
values_column_name: str = 'values') -> gpd.GeoDataFrame:
42+
"""
43+
Function creates single object from geometries and aggregated values.
44+
45+
Parameters
46+
----------
47+
geometry : ArrayLike
48+
49+
values : ArrayLike
50+
51+
geometry_column_name : str, default = 'geometry'
52+
53+
values_column_name : str, default = 'value'
54+
55+
Returns
56+
-------
57+
: gpd.GeoDataFrame
58+
"""
59+
60+
if len(geometry) != len(values):
61+
raise ValueError(
62+
'Number of geometries must be equal to number of values'
63+
)
64+
65+
if isinstance(values, pd.Series):
66+
values = values.values
67+
elif isinstance(values, pd.DataFrame):
68+
val_column_name = values.columns[0]
69+
values = values[val_column_name].values
70+
71+
if isinstance(geometry, pd.DataFrame):
72+
geom_column_name = geometry.columns[0]
73+
geometry = geometry[geom_column_name].values
74+
75+
if isinstance(geometry, gpd.GeoDataFrame):
76+
gdf = geometry.copy(deep=True)
77+
gdf[values_column_name] = values
78+
else:
79+
gdf = gpd.GeoDataFrame(
80+
values, columns=[values_column_name], geometry=geometry
81+
)
82+
gdf.columns = [values_column_name, geometry_column_name]
83+
84+
return gdf
85+
86+
3787
def points_to_lon_lat(points: gpd.GeoSeries) -> Tuple:
3888
"""
3989
Function transform GeoSeries to lon / lat series.

tests/test_core/test_blocks_model.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,3 +206,39 @@ def test_block_index_outputs():
206206

207207
real_value = block.block_real_value(block_id=block_id)
208208
assert isinstance(real_value, float)
209+
210+
211+
def test_geometry_values_separated():
212+
213+
# Pass as two Series
214+
geometries = CANCER_DATA['ds'][CANCER_DATA['geometry_column_name']]
215+
values = CANCER_DATA['ds'][CANCER_DATA['value_column_name']]
216+
block = Blocks(
217+
geometries=geometries,
218+
values=values
219+
)
220+
assert isinstance(block.block_data, np.ndarray)
221+
assert block.block_data.shape[0] == len(values)
222+
assert block.block_data.shape[1] == 3
223+
224+
# Pass as two DataFrames
225+
geometries = pd.DataFrame(CANCER_DATA['ds'][CANCER_DATA['geometry_column_name']])
226+
values = pd.DataFrame(CANCER_DATA['ds'][CANCER_DATA['value_column_name']])
227+
block = Blocks(
228+
geometries=geometries,
229+
values=values
230+
)
231+
assert isinstance(block.block_data, np.ndarray)
232+
assert block.block_data.shape[0] == len(values)
233+
assert block.block_data.shape[1] == 3
234+
235+
# Pass as two arrays
236+
geometries = CANCER_DATA['ds'][CANCER_DATA['geometry_column_name']].values
237+
values = CANCER_DATA['ds'][CANCER_DATA['value_column_name']].values
238+
block = Blocks(
239+
geometries=geometries,
240+
values=values
241+
)
242+
assert isinstance(block.block_data, np.ndarray)
243+
assert block.block_data.shape[0] == len(values)
244+
assert block.block_data.shape[1] == 3

0 commit comments

Comments
 (0)