Skip to content

Commit 258e868

Browse files
authored
Merge pull request #24 from alpha-beta-soup/feat/11
initial implementation of linetrace
2 parents e85cf0d + c3bae5a commit 258e868

File tree

7 files changed

+316
-9
lines changed

7 files changed

+316
-9
lines changed

h3pandas/const.py

+1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
COLUMN_H3_POLYFILL = "h3_polyfill"
2+
COLUMN_H3_LINETRACE = "h3_linetrace"

h3pandas/h3pandas.py

+49-2
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@
1212
from pandas.core.frame import DataFrame
1313
from geopandas.geodataframe import GeoDataFrame
1414

15-
from .const import COLUMN_H3_POLYFILL
15+
from .const import COLUMN_H3_POLYFILL, COLUMN_H3_LINETRACE
1616
from .util.decorator import catch_invalid_h3_address, doc_standard
1717
from .util.functools import wrapped_partial
18-
from .util.shapely import polyfill
18+
from .util.shapely import polyfill, linetrace
1919

2020
AnyDataFrame = Union[DataFrame, GeoDataFrame]
2121

@@ -758,6 +758,53 @@ def polyfill_resample(
758758

759759
return result.h3.h3_to_geo_boundary() if return_geometry else result
760760

761+
def linetrace(
762+
self, resolution : int, explode: bool = False
763+
) -> AnyDataFrame:
764+
"""Experimental. An H3 cell representation of a (Multi)LineString,
765+
which permits repeated cells, but not if they are repeated in
766+
immediate sequence.
767+
768+
Parameters
769+
----------
770+
resolution : int
771+
H3 resolution
772+
explode : bool
773+
If True, will explode the resulting list vertically.
774+
All other columns' values are copied.
775+
Default: False
776+
777+
Returns
778+
-------
779+
(Geo)DataFrame with H3 cells with centroids within the input polygons.
780+
781+
Examples
782+
--------
783+
>>> from shapely.geometry import LineString
784+
>>> gdf = gpd.GeoDataFrame(geometry=[LineString([[0, 0], [1, 0], [1, 1]])])
785+
>>> gdf.h3.linetrace(4)
786+
geometry h3_linetrace
787+
0 LINESTRING (0.00000 0.00000, 1.00000 0.00000, ... [83754efffffffff, 83754cfffffffff, 837541fffff... # noqa E501
788+
>>> gdf.h3.linetrace(4, explode=True)
789+
geometry h3_linetrace
790+
0 LINESTRING (0.00000 0.00000, 1.00000 0.00000, ... 83754efffffffff
791+
0 LINESTRING (0.00000 0.00000, 1.00000 0.00000, ... 83754cfffffffff
792+
0 LINESTRING (0.00000 0.00000, 1.00000 0.00000, ... 837541fffffffff
793+
794+
"""
795+
def func(row):
796+
return list(linetrace(row.geometry, resolution))
797+
798+
df = self._df
799+
800+
result = df.apply(func, axis=1)
801+
if not explode:
802+
assign_args = {COLUMN_H3_LINETRACE: result}
803+
return df.assign(**assign_args)
804+
805+
result = result.explode().to_frame(COLUMN_H3_LINETRACE)
806+
return df.join(result)
807+
761808
# Private methods
762809

763810
def _apply_index_assign(

h3pandas/util/decorator.py

+24-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from functools import wraps
2-
from typing import Callable
2+
from typing import Callable, Iterator
33
from h3 import H3CellError
44

55

@@ -34,6 +34,29 @@ def safe_f(*args, **kwargs):
3434
return safe_f
3535

3636

37+
def sequential_deduplication(func: Iterator[str]) -> Iterator[str]:
38+
"""
39+
Decorator that doesn't permit two consecutive items of an iterator
40+
to be the same.
41+
42+
Parameters
43+
----------
44+
f : Callable
45+
46+
Returns
47+
-------
48+
Yields from f, but won't yield two items in a row that are the same.
49+
"""
50+
def inner(*args):
51+
iterable = func(*args)
52+
last = None
53+
while (cell := next(iterable, None)) is not None:
54+
if cell != last:
55+
yield cell
56+
last = cell
57+
return inner
58+
59+
3760
# TODO: Test
3861
def doc_standard(column_name: str, description: str) -> Callable:
3962
"""Wrapper to provide a standard apply-to-H3-index docstring"""

h3pandas/util/shapely.py

+43-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1-
from typing import Union, Set, Tuple, List
2-
from shapely.geometry import Polygon, MultiPolygon
1+
from typing import Union, Set, Tuple, List, Iterator
2+
from shapely.geometry import Polygon, MultiPolygon, LineString, MultiLineString
33
from h3 import h3
4+
from .decorator import sequential_deduplication
45

56
MultiPolyOrPoly = Union[Polygon, MultiPolygon]
7+
MultiLineOrLine = Union[LineString, MultiLineString]
68

79

810
def _extract_coords(polygon: Polygon) -> Tuple[List, List[List]]:
@@ -46,3 +48,42 @@ def polyfill(
4648
return set(h3_addresses)
4749
else:
4850
raise TypeError(f"Unknown type {type(geometry)}")
51+
52+
53+
@sequential_deduplication
54+
def linetrace(
55+
geometry: MultiLineOrLine, resolution: int
56+
) -> Iterator[str]:
57+
"""h3.polyfill equivalent for shapely (Multi)LineString
58+
Does not represent lines with duplicate sequential cells,
59+
but cells may repeat non-sequentially to represent
60+
self-intersections
61+
62+
Parameters
63+
----------
64+
geometry : LineString or MultiLineString
65+
Line to trace with H3 cells
66+
resolution : int
67+
H3 resolution of the tracing cells
68+
69+
Returns
70+
-------
71+
Set of H3 addresses
72+
73+
Raises
74+
------
75+
TypeError if geometry is not a LineString or a MultiLineString
76+
"""
77+
if isinstance(geometry, MultiLineString):
78+
# Recurse after getting component linestrings from the multiline
79+
for line in map(lambda geom: linetrace(geom, resolution), geometry.geoms):
80+
yield from line
81+
elif isinstance(geometry, LineString):
82+
coords = zip(geometry.coords, geometry.coords[1:])
83+
while (vertex_pair := next(coords, None)) is not None:
84+
i, j = vertex_pair
85+
a = h3.geo_to_h3(*i[::-1], resolution)
86+
b = h3.geo_to_h3(*j[::-1], resolution)
87+
yield from h3.h3_line(a, b) # inclusive of a and b
88+
else:
89+
raise TypeError(f"Unknown type {type(geometry)}")

tests/test_h3pandas.py

+159-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from h3pandas import h3pandas # noqa: F401
22
from h3 import h3
33
import pytest
4-
from shapely.geometry import Polygon, box, Point
4+
from shapely.geometry import Polygon, LineString, MultiLineString, box, Point
55
import pandas as pd
66
import geopandas as gpd
77
from geopandas.testing import assert_geodataframe_equal
@@ -33,6 +33,33 @@ def basic_geodataframe_polygon(basic_geodataframe):
3333
return gpd.GeoDataFrame(geometry=[geom], crs="epsg:4326")
3434

3535

36+
@pytest.fixture
37+
def basic_geodataframe_linestring():
38+
geom = LineString([
39+
(174.793092, -37.005372), (175.621138, -40.323142)
40+
])
41+
return gpd.GeoDataFrame(geometry=[geom], crs="epsg:4326")
42+
43+
44+
@pytest.fixture
45+
# NB one of the LineString parts traverses the antimeridian
46+
def basic_geodataframe_multilinestring(basic_geodataframe):
47+
geom = MultiLineString([
48+
[[174.793092, -37.005372], [175.621138, -40.323142]],
49+
[
50+
[168.222656, -45.79817], [171.914063, -34.307144],
51+
[178.769531, -37.926868], [183.515625, -43.992815]
52+
]
53+
])
54+
return gpd.GeoDataFrame(geometry=[geom], crs="epsg:4326")
55+
56+
57+
@pytest.fixture
58+
def basic_geodataframe_empty_linestring():
59+
"""GeoDataFrame with Empty geometry"""
60+
return gpd.GeoDataFrame(geometry=[LineString()], crs="epsg:4326")
61+
62+
3663
@pytest.fixture
3764
def basic_geodataframe_polygons(basic_geodataframe):
3865
geoms = [box(0, 0, 1, 1), box(0, 0, 2, 2)]
@@ -77,6 +104,11 @@ def h3_geodataframe_with_values(h3_dataframe_with_values):
77104
)
78105

79106

107+
@pytest.fixture
108+
def h3_geodataframe_with_polyline_values(basic_geodataframe_linestring):
109+
return basic_geodataframe_linestring.assign(val=10)
110+
111+
80112
# Tests: H3 API
81113
class TestGeoToH3:
82114
def test_geo_to_h3(self, basic_dataframe):
@@ -271,6 +303,132 @@ def test_polyfill_explode_unequal_lengths(self, basic_geodataframe_polygons):
271303
assert set(result["h3_polyfill"]) == expected_indices
272304

273305

306+
class TestLineTrace:
307+
def test_empty_linetrace(self, basic_geodataframe_empty_linestring):
308+
result = basic_geodataframe_empty_linestring.h3.linetrace(2)
309+
assert len(result.iloc[0]["h3_linetrace"]) == 0
310+
311+
def test_linetrace(self, basic_geodataframe_linestring):
312+
result = basic_geodataframe_linestring.h3.linetrace(3)
313+
expected_indices = [
314+
"83bb50fffffffff",
315+
"83bb54fffffffff",
316+
"83bb72fffffffff",
317+
"83bb0dfffffffff",
318+
"83bb2bfffffffff"
319+
]
320+
assert len(result.iloc[0]["h3_linetrace"]) == 5
321+
assert list(result.iloc[0]["h3_linetrace"]) == expected_indices
322+
323+
def test_linetrace_explode(self, basic_geodataframe_linestring):
324+
result = basic_geodataframe_linestring.h3.linetrace(3, explode=True)
325+
expected_indices = [
326+
"83bb50fffffffff",
327+
"83bb54fffffffff",
328+
"83bb72fffffffff",
329+
"83bb0dfffffffff",
330+
"83bb2bfffffffff"
331+
]
332+
assert result.shape == (5, 2)
333+
assert result.iloc[0]['h3_linetrace'] == expected_indices[0]
334+
assert result.iloc[-1]['h3_linetrace'] == expected_indices[-1]
335+
336+
def test_linetrace_with_values(self, h3_geodataframe_with_polyline_values):
337+
result = h3_geodataframe_with_polyline_values.h3.linetrace(3)
338+
expected_indices = [
339+
"83bb50fffffffff",
340+
"83bb54fffffffff",
341+
"83bb72fffffffff",
342+
"83bb0dfffffffff",
343+
"83bb2bfffffffff"
344+
]
345+
assert result.shape == (1, 3)
346+
assert 'val' in result.columns
347+
assert result.iloc[0]['val'] == 10
348+
assert len(result.iloc[0]["h3_linetrace"]) == 5
349+
assert list(result.iloc[0]["h3_linetrace"]) == expected_indices
350+
351+
def test_linetrace_with_values_explode(self,
352+
h3_geodataframe_with_polyline_values):
353+
result = h3_geodataframe_with_polyline_values.h3.linetrace(3, explode=True)
354+
expected_indices = [
355+
"83bb50fffffffff",
356+
"83bb54fffffffff",
357+
"83bb72fffffffff",
358+
"83bb0dfffffffff",
359+
"83bb2bfffffffff"
360+
]
361+
assert result.shape == (5, 3)
362+
assert 'val' in result.columns
363+
assert result.iloc[0]['val'] == 10
364+
assert result.iloc[0]["h3_linetrace"] == expected_indices[0]
365+
assert result.iloc[-1]['h3_linetrace'] == expected_indices[-1]
366+
assert not result["val"].isna().any()
367+
368+
def test_linetrace_multiline(self, basic_geodataframe_multilinestring):
369+
result = basic_geodataframe_multilinestring.h3.linetrace(2)
370+
expected_indices = [
371+
"82bb57fffffffff", "82bb0ffffffffff",
372+
"82da87fffffffff", "82da97fffffffff",
373+
"82bb67fffffffff", "82bb47fffffffff",
374+
"82bb5ffffffffff", "82bb57fffffffff",
375+
"82ba27fffffffff", "82bb1ffffffffff",
376+
"82bb07fffffffff", "82bb37fffffffff"
377+
]
378+
assert len(result.iloc[0]["h3_linetrace"]) == 12 # 12 cells total
379+
assert list(result.iloc[0]["h3_linetrace"]) == expected_indices
380+
381+
def test_linetrace_multiline_explode_index_parts(
382+
self, basic_geodataframe_multilinestring
383+
):
384+
result = basic_geodataframe_multilinestring.explode(
385+
index_parts=True
386+
).h3.linetrace(
387+
2, explode=True
388+
)
389+
expected_indices = [
390+
[
391+
"82bb57fffffffff", "82bb0ffffffffff"
392+
],
393+
[
394+
"82da87fffffffff", "82da97fffffffff",
395+
"82bb67fffffffff", "82bb47fffffffff",
396+
"82bb5ffffffffff", "82bb57fffffffff",
397+
"82ba27fffffffff", "82bb1ffffffffff",
398+
"82bb07fffffffff", "82bb37fffffffff"
399+
]
400+
]
401+
assert len(result["h3_linetrace"]) == 12 # 12 cells in total
402+
assert result.iloc[0]["h3_linetrace"] == expected_indices[0][0]
403+
assert result.iloc[-1]["h3_linetrace"] == expected_indices[-1][-1]
404+
405+
def test_linetrace_multiline_index_parts_no_explode(
406+
self, basic_geodataframe_multilinestring
407+
):
408+
result = basic_geodataframe_multilinestring.explode(
409+
index_parts=True
410+
).h3.linetrace(
411+
2, explode=False
412+
)
413+
expected_indices = [
414+
[
415+
"82bb57fffffffff", "82bb0ffffffffff"
416+
],
417+
[
418+
"82da87fffffffff", "82da97fffffffff",
419+
"82bb67fffffffff", "82bb47fffffffff",
420+
"82bb5ffffffffff", "82bb57fffffffff",
421+
"82ba27fffffffff", "82bb1ffffffffff",
422+
"82bb07fffffffff", "82bb37fffffffff"
423+
]
424+
]
425+
assert len(result["h3_linetrace"]) == 2 # 2 parts
426+
assert len(result.iloc[0]["h3_linetrace"]) == 2 # 2 cells
427+
assert result.iloc[0]["h3_linetrace"] == expected_indices[0]
428+
assert len(result.iloc[-1]["h3_linetrace"]) == 10 # 10 cells
429+
assert result.iloc[-1]["h3_linetrace"] == expected_indices[-1]
430+
431+
274432
class TestCellArea:
275433
def test_cell_area(self, indexed_dataframe):
276434
expected = indexed_dataframe.assign(

tests/util/test_decorator.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from h3 import h3
22
import pytest
33

4-
from h3pandas.util.decorator import catch_invalid_h3_address
4+
from h3pandas.util.decorator import catch_invalid_h3_address, sequential_deduplication
55

66

77
class TestCatchInvalidH3Address:
@@ -18,3 +18,14 @@ def safe_h3_to_parent(h3_address):
1818

1919
with pytest.raises(ValueError):
2020
safe_h3_to_parent("891f1d48177fff1") # Originally H3CellError
21+
22+
23+
class TestSequentialDeduplication:
24+
def test_catch_sequential_duplicate_h3_addresses(self):
25+
@sequential_deduplication
26+
def function_taking_iterator(iterator):
27+
yield from iterator
28+
29+
_input = [1, 1, 2, 3, 3, 4, 5, 4, 3, 3, 2, 1, 1]
30+
result = function_taking_iterator(_input)
31+
assert list(result) == [1, 2, 3, 4, 5, 4, 3, 2, 1]

0 commit comments

Comments
 (0)