From e4dec60a394094ec0d1b9e90d665ce673ecc5707 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Mon, 4 Nov 2019 19:43:18 -0500 Subject: [PATCH 01/13] Add polygon rendering support based on spatialpandas extension arrays Also includes support for rendering points and lines from geometry extension arrays. --- datashader/__init__.py | 1 - datashader/core.py | 177 ++++++++++++++----- datashader/data_libraries/pandas.py | 3 +- datashader/glyphs/__init__.py | 4 +- datashader/glyphs/line.py | 121 ++++++++++++- datashader/glyphs/points.py | 122 +++++++++++++ datashader/glyphs/polygon.py | 255 ++++++++++++++++++++++++++++ datashader/tests/test_dask.py | 185 +++++++++++++------- datashader/tests/test_pandas.py | 179 ++++++++++++------- datashader/tests/test_polygons.py | 212 +++++++++++++++++++++++ datashader/utils.py | 8 +- 11 files changed, 1094 insertions(+), 173 deletions(-) create mode 100644 datashader/glyphs/polygon.py create mode 100644 datashader/tests/test_polygons.py diff --git a/datashader/__init__.py b/datashader/__init__.py index 433d45f51..aaa30dbc5 100644 --- a/datashader/__init__.py +++ b/datashader/__init__.py @@ -12,7 +12,6 @@ from . import transfer_functions as tf # noqa (API import) from . import data_libraries # noqa (API import) - # Make RaggedArray pandas extension array available for # pandas >= 0.24.0 is installed from pandas import __version__ as pandas_version diff --git a/datashader/core.py b/datashader/core.py index 319d1d161..a697e87fc 100644 --- a/datashader/core.py +++ b/datashader/core.py @@ -133,6 +133,18 @@ def validate(self, range): _axis_lookup = {'linear': LinearAxis(), 'log': LogAxis()} +def validate_xy_or_geometry(glyph, x, y, geometry): + if (geometry is None and (x is None or y is None) or + geometry is not None and (x is not None or y is not None)): + raise ValueError(""" +{glyph} coordinates may be specified by providing both the x and y arguments, or by +providing the geometry argument. Received: + x: {x} + y: {y} + geometry: {geometry} +""".format(glyph=glyph, x=repr(x), y=repr(y), geometry=repr(geometry))) + + class Canvas(object): """An abstract canvas representing the space in which to bin. @@ -157,7 +169,7 @@ def __init__(self, plot_width=600, plot_height=600, self.x_axis = _axis_lookup[x_axis_type] self.y_axis = _axis_lookup[y_axis_type] - def points(self, source, x, y, agg=None): + def points(self, source, x=None, y=None, agg=None, geometry=None): """Compute a reduction by pixel, mapping data to pixels as points. Parameters @@ -165,26 +177,38 @@ def points(self, source, x, y, agg=None): source : pandas.DataFrame, dask.DataFrame, or xarray.DataArray/Dataset The input datasource. x, y : str - Column names for the x and y coordinates of each point. + Column names for the x and y coordinates of each point. If provided, + the geometry argument may not also be provided. agg : Reduction, optional Reduction to compute. Default is ``count()``. + geometry: str + Column name of a PointsArray of the coordinates of each point. If provided, + the x and y arguments may not also be provided. """ - from .glyphs import Point + from .glyphs import Point, MultiPoint2dGeometry from .reductions import count as count_rdn + + validate_xy_or_geometry('Point', x, y, geometry) + if agg is None: agg = count_rdn() - if (isinstance(source, SpatialPointsFrame) and - source.spatial is not None and - source.spatial.x == x and source.spatial.y == y and - self.x_range is not None and self.y_range is not None): + # Handle down-selecting of SpatialPointsFrame + if geometry is None: + if (isinstance(source, SpatialPointsFrame) and + source.spatial is not None and + source.spatial.x == x and source.spatial.y == y and + self.x_range is not None and self.y_range is not None): - source = source.spatial_query( - x_range=self.x_range, y_range=self.y_range) + source = source.spatial_query( + x_range=self.x_range, y_range=self.y_range) + glyph = Point(x, y) + else: + glyph = MultiPoint2dGeometry(geometry) - return bypixel(source, self, Point(x, y), agg) + return bypixel(source, self, glyph, agg) - def line(self, source, x, y, agg=None, axis=0): + def line(self, source, x=None, y=None, agg=None, axis=0, geometry=None): """Compute a reduction by pixel, mapping data to pixels as one or more lines. @@ -215,6 +239,9 @@ def line(self, source, x, y, agg=None, axis=0): all rows in source * 1: Draw one line per row in source using data from the specified columns + geometry : str + Column name of a LinesArray of the coordinates of each line. If provided, + the x and y arguments may not also be provided. Examples -------- @@ -284,55 +311,61 @@ def line(self, source, x, y, agg=None, axis=0): """ from .glyphs import (LineAxis0, LinesAxis1, LinesAxis1XConstant, LinesAxis1YConstant, LineAxis0Multi, - LinesAxis1Ragged) + LinesAxis1Ragged, LineAxis1Geometry) from .reductions import any as any_rdn + + validate_xy_or_geometry('Line', x, y, geometry) + if agg is None: agg = any_rdn() - # Broadcast column specifications to handle cases where - # x is a list and y is a string or vice versa - orig_x, orig_y = x, y - x, y = _broadcast_column_specifications(x, y) + if geometry is not None: + glyph = LineAxis1Geometry(geometry) + else: + # Broadcast column specifications to handle cases where + # x is a list and y is a string or vice versa + orig_x, orig_y = x, y + x, y = _broadcast_column_specifications(x, y) - if axis == 0: - if (isinstance(x, (Number, string_types)) and - isinstance(y, (Number, string_types))): - glyph = LineAxis0(x, y) - elif (isinstance(x, (list, tuple)) and - isinstance(y, (list, tuple))): - glyph = LineAxis0Multi(tuple(x), tuple(y)) - else: - raise ValueError(""" + if axis == 0: + if (isinstance(x, (Number, string_types)) and + isinstance(y, (Number, string_types))): + glyph = LineAxis0(x, y) + elif (isinstance(x, (list, tuple)) and + isinstance(y, (list, tuple))): + glyph = LineAxis0Multi(tuple(x), tuple(y)) + else: + raise ValueError(""" Invalid combination of x and y arguments to Canvas.line when axis=0. Received: x: {x} y: {y} See docstring for more information on valid usage""".format( - x=repr(orig_x), y=repr(orig_y))) + x=repr(orig_x), y=repr(orig_y))) - elif axis == 1: - if isinstance(x, (list, tuple)) and isinstance(y, (list, tuple)): - glyph = LinesAxis1(tuple(x), tuple(y)) - elif (isinstance(x, np.ndarray) and - isinstance(y, (list, tuple))): - glyph = LinesAxis1XConstant(x, tuple(y)) - elif (isinstance(x, (list, tuple)) and - isinstance(y, np.ndarray)): - glyph = LinesAxis1YConstant(tuple(x), y) - elif (isinstance(x, (Number, string_types)) and - isinstance(y, (Number, string_types))): - glyph = LinesAxis1Ragged(x, y) - else: - raise ValueError(""" + elif axis == 1: + if isinstance(x, (list, tuple)) and isinstance(y, (list, tuple)): + glyph = LinesAxis1(tuple(x), tuple(y)) + elif (isinstance(x, np.ndarray) and + isinstance(y, (list, tuple))): + glyph = LinesAxis1XConstant(x, tuple(y)) + elif (isinstance(x, (list, tuple)) and + isinstance(y, np.ndarray)): + glyph = LinesAxis1YConstant(tuple(x), y) + elif (isinstance(x, (Number, string_types)) and + isinstance(y, (Number, string_types))): + glyph = LinesAxis1Ragged(x, y) + else: + raise ValueError(""" Invalid combination of x and y arguments to Canvas.line when axis=1. Received: x: {x} y: {y} See docstring for more information on valid usage""".format( - x=repr(orig_x), y=repr(orig_y))) + x=repr(orig_x), y=repr(orig_y))) - else: - raise ValueError(""" + else: + raise ValueError(""" The axis argument to Canvas.line must be 0 or 1 Received: {axis}""".format(axis=axis)) @@ -575,6 +608,64 @@ def area(self, source, x, y, agg=None, axis=0, y_stack=None): return bypixel(source, self, glyph, agg) + def polygons(self, source, geometry, agg=None): + """Compute a reduction by pixel, mapping data to pixels as one or + more filled polygons. + + Parameters + ---------- + source : xarray.DataArray or Dataset + The input datasource. + geometry : str + Column name of a PolygonsArray of the coordinates of each line. + agg : Reduction, optional + Reduction to compute. Default is ``any()``. + + Returns + ------- + data : xarray.DataArray + + Examples + -------- + >>> from math import inf # doctest: +SKIP + ... import datashader as ds + ... import datashader.transfer_functions as tf + ... from datashader.geom import PolygonsArray + ... import pandas as pd + ... + ... polygons = PolygonsArray([ + ... # ## First Element + ... # Filled quadrilateral (CCW order) + ... [0, 0, 1, 0, 2, 2, -1, 4, 0, 0, + ... # Triangular hole (CW order) + ... -inf, -inf, 0.5, 1, 1, 2, 1.5, 1.5, 0.5, 1, + ... # Rectangular hole (CW order) + ... -inf, -inf, 0, 2, 0, 2.5, 0.5, 2.5, 0.5, 2, 0, 2, + ... # Filled triangle + ... inf, inf, 2.5, 3, 3.5, 3, 3.5, 4, 2.5, 3, + ... ], + ... + ... # ## Second Element + ... # Filled rectangle (CCW order) + ... [3, 0, 3, 2, 4, 2, 4, 0, 3, 0, + ... # Rectangular hole (CW order) + ... -inf, -inf, 3.25, 0.25, 3.75, 0.25, 3.75, 1.75, 3.25, 1.75, 3.25, 0.25, + ... ] + ... ]) + ... + ... df = pd.DataFrame({'polygons': polygons, 'v': range(len(polygons))}) + ... + ... cvs = ds.Canvas() + ... agg = cvs.polygons(df, geometry='polygons', agg=ds.sum('v')) + ... tf.shade(agg) + """ + from .glyphs import PolygonGeom + from .reductions import any as any_rdn + if agg is None: + agg = any_rdn() + glyph = PolygonGeom(geometry) + return bypixel(source, self, glyph, agg) + def quadmesh(self, source, x=None, y=None, agg=None): """Samples a recti- or curvi-linear quadmesh by canvas size and bounds. Parameters diff --git a/datashader/data_libraries/pandas.py b/datashader/data_libraries/pandas.py index 0f4ba627c..a5aa65307 100644 --- a/datashader/data_libraries/pandas.py +++ b/datashader/data_libraries/pandas.py @@ -4,7 +4,7 @@ from datashader.core import bypixel from datashader.compiler import compile_components -from datashader.glyphs.points import _PointLike +from datashader.glyphs.points import _PointLike, _GeometryLike from datashader.glyphs.area import _AreaToLineLike from datashader.utils import Dispatcher from collections import OrderedDict @@ -21,6 +21,7 @@ def pandas_pipeline(df, schema, canvas, glyph, summary): @glyph_dispatch.register(_PointLike) +@glyph_dispatch.register(_GeometryLike) @glyph_dispatch.register(_AreaToLineLike) def default(glyph, source, schema, canvas, summary, cuda=False): create, info, append, _, finalize = compile_components(summary, schema, glyph, cuda) diff --git a/datashader/glyphs/__init__.py b/datashader/glyphs/__init__.py index 8abcb83c3..66ce04fc6 100644 --- a/datashader/glyphs/__init__.py +++ b/datashader/glyphs/__init__.py @@ -1,5 +1,5 @@ from __future__ import absolute_import -from .points import Point # noqa (API import) +from .points import Point, MultiPoint2dGeometry # noqa (API import) from .line import ( # noqa (API import) LineAxis0, LineAxis0Multi, @@ -7,6 +7,7 @@ LinesAxis1XConstant, LinesAxis1YConstant, LinesAxis1Ragged, + LineAxis1Geometry, ) from .area import ( # noqa (API import) AreaToZeroAxis0, @@ -23,6 +24,7 @@ AreaToLineAxis1Ragged, ) from .trimesh import Triangles # noqa (API import) +from .polygon import PolygonGeom # noqa (API import) from .quadmesh import ( # noqa (API import) QuadMeshRectilinear, QuadMeshCurvialinear ) diff --git a/datashader/glyphs/line.py b/datashader/glyphs/line.py index 957dd1e80..5fa7b37e3 100644 --- a/datashader/glyphs/line.py +++ b/datashader/glyphs/line.py @@ -2,8 +2,8 @@ import numpy as np from toolz import memoize +from datashader.glyphs.points import _PointLike, _GeometryLike from datashader.glyphs.glyph import isnull -from datashader.glyphs.points import _PointLike from datashader.utils import isreal, ngjit from numba import cuda @@ -458,6 +458,53 @@ def extend(aggs, df, vt, bounds, plot_start=True): return extend +class LineAxis1Geometry(_GeometryLike): + + @property + def geom_dtypes(self): + from spatialpandas.geometry import ( + Line2dDtype, MultiLine2dDtype, Ring2dDtype, Polygon2dDtype, + MultiPolygon2dDtype + ) + return (Line2dDtype, MultiLine2dDtype, Ring2dDtype, + Polygon2dDtype, MultiPolygon2dDtype) + + @memoize + def _build_extend(self, x_mapper, y_mapper, info, append): + from spatialpandas.geometry import ( + Polygon2dArray, MultiPolygon2dArray + ) + expand_aggs_and_cols = self.expand_aggs_and_cols(append) + map_onto_pixel = _build_map_onto_pixel_for_line(x_mapper, y_mapper) + draw_segment = _build_draw_segment( + append, map_onto_pixel, expand_aggs_and_cols + ) + + perform_extend_cpu = _build_extend_line_axis1_geometry( + draw_segment, expand_aggs_and_cols + ) + geometry_name = self.geometry + + def extend(aggs, df, vt, bounds, plot_start=True): + sx, tx, sy, ty = vt + xmin, xmax, ymin, ymax = bounds + aggs_and_cols = aggs + info(df) + geom_array = df[geometry_name].array + # line may be clipped, then mapped to pixels + + if isinstance(geom_array, (Polygon2dArray, MultiPolygon2dArray)): + # Convert polygon array to multi line of boundary + geom_array = geom_array.boundary + + perform_extend_cpu( + sx, tx, sy, ty, + xmin, xmax, ymin, ymax, + geom_array, *aggs_and_cols + ) + + return extend + + def _build_map_onto_pixel_for_line(x_mapper, y_mapper): @ngjit def map_onto_pixel(sx, tx, sy, ty, xmin, xmax, ymin, ymax, x, y): @@ -909,3 +956,75 @@ def extend_cpu_numba( segment_start, x0, x1, y0, y1, *aggs_and_cols) return extend_cpu + + +def _build_extend_line_axis1_geometry( + draw_segment, expand_aggs_and_cols +): + def extend_cpu( + sx, tx, sy, ty, + xmin, xmax, ymin, ymax, + geometry, *aggs_and_cols + ): + + values = geometry.buffer_values + missing = geometry.isna() + offsets = geometry.buffer_offsets + + if len(offsets) == 2: + # MultiLineArray + offsets0, offsets1 = offsets + else: + # LineArray + offsets1 = offsets[0] + offsets0 = np.arange(len(offsets1)) + + extend_cpu_numba( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, + values, missing, offsets0, offsets1, *aggs_and_cols + ) + + @ngjit + @expand_aggs_and_cols + def extend_cpu_numba( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, + values, missing, offsets0, offsets1, *aggs_and_cols + ): + for i in range(len(offsets0) - 1): + if missing[i]: + continue + + start0 = offsets0[i] + stop0 = offsets0[i + 1] + + for j in range(start0, stop0): + start1 = offsets1[j] + stop1 = offsets1[j + 1] + + for k in range(start1, stop1 - 2, 2): + x0 = values[k] + if not np.isfinite(x0): + continue + + y0 = values[k + 1] + if not np.isfinite(y0): + continue + + x1 = values[k + 2] + if not np.isfinite(x1): + continue + + y1 = values[k + 3] + if not np.isfinite(y1): + continue + + segment_start = ( + (k == start1) or + not np.isfinite(values[k - 2]) or + not np.isfinite(values[k - 1]) + ) + + draw_segment(i, sx, tx, sy, ty, xmin, xmax, ymin, ymax, + segment_start, x0, x1, y0, y1, *aggs_and_cols) + + return extend_cpu diff --git a/datashader/glyphs/points.py b/datashader/glyphs/points.py index f0a131119..3c45c3ad4 100644 --- a/datashader/glyphs/points.py +++ b/datashader/glyphs/points.py @@ -22,6 +22,63 @@ def values(s): return s.values +class _GeometryLike(Glyph): + def __init__(self, geometry): + self.geometry = geometry + + @property + def ndims(self): + return 1 + + @property + def inputs(self): + return (self.geometry,) + + @property + def geom_dtypes(self): + from spatialpandas.geometry import GeometryDtype + return (GeometryDtype,) + + def validate(self, in_dshape): + if not isinstance(in_dshape[str(self.geometry)], self.geom_dtypes): + raise ValueError( + '{col} must be an array with one of the following types: {typs}'.format( + col=self.geometry, + typs=', '.join(typ.__name__ for typ in self.geom_dtypes) + )) + + @property + def x_label(self): + return 'x' + + @property + def y_label(self): + return 'y' + + def required_columns(self): + return [self.geometry] + + def compute_x_bounds(self, df): + bounds = df[self.geometry].array.bounds_x + return self.maybe_expand_bounds(bounds) + + def compute_y_bounds(self, df): + bounds = df[self.geometry].array.bounds_y + return self.maybe_expand_bounds(bounds) + + @memoize + def compute_bounds_dask(self, ddf): + r = ddf.map_partitions(lambda df: np.array( + [list(df[self.geometry].array.bounds)] + )).compute() + + x_extents = np.nanmin(r[:, 0]), np.nanmax(r[:, 2]) + y_extents = np.nanmin(r[:, 1]), np.nanmax(r[:, 3]) + + return (self.maybe_expand_bounds(x_extents), + self.maybe_expand_bounds(y_extents)) + + class _PointLike(Glyph): """Shared methods between Point and Line""" def __init__(self, x, y): @@ -142,3 +199,68 @@ def extend(aggs, df, vt, bounds): ) return extend + + +class MultiPoint2dGeometry(_GeometryLike): + + @property + def geom_dtypes(self): + from spatialpandas.geometry import MultiPoint2dDtype + return (MultiPoint2dDtype,) + + @memoize + def _build_extend(self, x_mapper, y_mapper, info, append): + geometry_name = self.geometry + + @ngjit + @self.expand_aggs_and_cols(append) + def _perform_extend_points( + i, j, sx, tx, sy, ty, xmin, xmax, ymin, ymax, values, *aggs_and_cols + ): + x = values[j] + y = values[j + 1] + # points outside bounds are dropped; remainder + # are mapped onto pixels + if (xmin <= x <= xmax) and (ymin <= y <= ymax): + xx = int(x_mapper(x) * sx + tx) + yy = int(y_mapper(y) * sy + ty) + xi, yi = (xx - 1 if x == xmax else xx, + yy - 1 if y == ymax else yy) + + append(i, xi, yi, *aggs_and_cols) + + @ngjit + @self.expand_aggs_and_cols(append) + def extend_cpu( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, + values, missing, offsets, *aggs_and_cols + ): + n = len(offsets) - 1 + for i in range(n): + if missing[i] is True: + continue + start = offsets[i] + stop = offsets[i + 1] + for j in range(start, stop, 2): + _perform_extend_points( + i, j, sx, tx, sy, ty, xmin, xmax, ymin, ymax, + values, *aggs_and_cols + ) + + def extend(aggs, df, vt, bounds): + aggs_and_cols = aggs + info(df) + sx, tx, sy, ty = vt + xmin, xmax, ymin, ymax = bounds + + geometry = df[geometry_name].array + + values = geometry.buffer_values + missing = geometry.isna() + offsets = geometry.buffer_offsets[0] + + extend_cpu( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, + values, missing, offsets, *aggs_and_cols + ) + + return extend diff --git a/datashader/glyphs/polygon.py b/datashader/glyphs/polygon.py new file mode 100644 index 000000000..34286c91c --- /dev/null +++ b/datashader/glyphs/polygon.py @@ -0,0 +1,255 @@ +from toolz import memoize +import numpy as np + +from datashader.glyphs.line import _build_map_onto_pixel_for_line +from datashader.glyphs.points import _GeometryLike +from datashader.utils import ngjit + + +class PolygonGeom(_GeometryLike): + @property + def geom_dtypes(self): + from spatialpandas.geometry import Polygon2dDtype, MultiPolygon2dDtype + return Polygon2dDtype, MultiPolygon2dDtype + + @memoize + def _build_extend(self, x_mapper, y_mapper, info, append): + expand_aggs_and_cols = self.expand_aggs_and_cols(append) + map_onto_pixel = _build_map_onto_pixel_for_line(x_mapper, y_mapper) + draw_segment = _build_draw_polygon( + append, map_onto_pixel, x_mapper, y_mapper, expand_aggs_and_cols + ) + + perform_extend_cpu = _build_extend_polygon_geometry( + draw_segment, expand_aggs_and_cols + ) + geom_name = self.geometry + + def extend(aggs, df, vt, bounds, plot_start=True): + sx, tx, sy, ty = vt + xmin, xmax, ymin, ymax = bounds + aggs_and_cols = aggs + info(df) + geom_array = df[geom_name].array + # line may be clipped, then mapped to pixels + perform_extend_cpu( + sx, tx, sy, ty, + xmin, xmax, ymin, ymax, + geom_array, *aggs_and_cols + ) + + return extend + + +def _build_draw_polygon(append, map_onto_pixel, x_mapper, y_mapper, expand_aggs_and_cols): + @ngjit + @expand_aggs_and_cols + def draw_polygon( + i, sx, tx, sy, ty, xmin, xmax, ymin, ymax, + offsets, values, xs, ys, yincreasing, eligible, + *aggs_and_cols + ): + """Draw a polygon using a winding-number scan-line algorithm + """ + # Initialize values of pre-allocated buffers + xs.fill(np.nan) + ys.fill(np.nan) + yincreasing.fill(0) + eligible.fill(1) + + # First pass, compute bounding box of polygon vertices in data coordinates + start_index = offsets[0] + stop_index = offsets[-1] + # num_edges = stop_index - start_index - 2 + poly_xmin = np.min(values[start_index:stop_index:2]) + poly_ymin = np.min(values[start_index + 1:stop_index:2]) + poly_xmax = np.max(values[start_index:stop_index:2]) + poly_ymax = np.max(values[start_index + 1:stop_index:2]) + + # skip polygon if outside viewport + if (poly_xmax < xmin or poly_xmin > xmax + or poly_ymax < ymin or poly_ymin > ymax): + return + + # Compute pixel bounds for polygon + startxi, startyi = map_onto_pixel( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, + max(poly_xmin, xmin), max(poly_ymin, ymin) + ) + stopxi, stopyi = map_onto_pixel( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, + min(poly_xmax, xmax), min(poly_ymax, ymax) + ) + stopxi += 1 + stopyi += 1 + + # Handle subpixel polygons (pixel width or height of polygon is 1) + if (stopxi - startxi) == 1 or (stopyi - startyi) == 1: + for yi in range(startyi, stopyi): + for xi in range(startxi, stopxi): + append(i, xi, yi, *aggs_and_cols) + return + + # Build arrays of edges in canvas coordinates + ei = 0 + for j in range(len(offsets) - 1): + start = offsets[j] + stop = offsets[j + 1] + for k in range(start, stop - 2, 2): + x0 = values[k] + y0 = values[k + 1] + x1 = values[k + 2] + y1 = values[k + 3] + + # Map to canvas coordinates without rounding + x0c = x_mapper(x0) * sx + tx + y0c = y_mapper(y0) * sy + ty + x1c = x_mapper(x1) * sx + tx + y1c = y_mapper(y1) * sy + ty + + if y1c > y0c: + xs[ei, 0] = x0c + ys[ei, 0] = y0c + xs[ei, 1] = x1c + ys[ei, 1] = y1c + yincreasing[ei] = 1 + elif y1c < y0c: + xs[ei, 1] = x0c + ys[ei, 1] = y0c + xs[ei, 0] = x1c + ys[ei, 0] = y1c + yincreasing[ei] = -1 + else: + # Skip horizontal edges + continue + + ei += 1 + + # Perform scan-line algorithm + num_edges = ei + for yi in range(startyi, stopyi): + # All edges eligible at start of new row + eligible.fill(1) + for xi in range(startxi, stopxi): + # Init winding number + winding_number = 0 + for ei in range(num_edges): + if eligible[ei] == 0: + # We've already determined that edge is above, below, or left + # of edge for the current pixel + continue + + # Get edge coordinates. + # Note: y1c > y0c due to how xs/ys were populated + x0c = xs[ei, 0] + x1c = xs[ei, 1] + y0c = ys[ei, 0] + y1c = ys[ei, 1] + + # Reject edges that are above, below, or left of current pixel. + # Note: Edge skipped if lower vertex overlaps, + # but is kept if upper vertex overlaps + if (y0c >= yi or y1c < yi + or (x0c < xi and x1c < xi) + ): + # Edge not eligible for any remaining pixel in this row + eligible[ei] = 0 + continue + + if xi <= x0c and xi <= x1c: + # Edge is fully to the right of the pixel, so we know ray to the + # the right of pixel intersects edge. + winding_number += yincreasing[ei] + else: + # Now check if edge is to the right of pixel using cross product + # A is vector from pixel to first vertex + ax = x0c - xi + ay = y0c - yi + + # B is vector from pixel to second vertex + bx = x1c - xi + by = y1c - yi + + # Compute cross product of B and A + bxa = (bx * ay - by * ax) + + if bxa < 0 or (bxa == 0 and yincreasing[ei]): + # Edge to the right + winding_number += yincreasing[ei] + else: + # Edge to left, not eligible for any remaining pixel in row + eligible[ei] = 0 + continue + + if winding_number != 0: + # If winding number is not zero, point + # is inside polygon + append(i, xi, yi, *aggs_and_cols) + + return draw_polygon + + +def _build_extend_polygon_geometry( + draw_polygon, expand_aggs_and_cols +): + def extend_cpu( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, geometry, *aggs_and_cols + ): + values = geometry.buffer_values + missing = geometry.isna() + offsets = geometry.buffer_offsets + + if len(offsets) == 3: + # MultiPolygon2dArray + offsets0, offsets1, offsets2 = offsets + else: + # Polygon2dArray + offsets1, offsets2 = offsets + offsets0 = np.arange(len(offsets1)) + + extend_cpu_numba( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, + values, missing, offsets0, offsets1, offsets2, *aggs_and_cols + ) + + @ngjit + @expand_aggs_and_cols + def extend_cpu_numba( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, + values, missing, offsets0, offsets1, offsets2, *aggs_and_cols + ): + # Pre-allocate temp arrays + if len(offsets0) > 1: + max_edges = -1 + for i in range(len(offsets0) - 1): + if missing[i]: + continue + start = offsets2[offsets1[offsets0[i]]] + stop = offsets2[offsets1[offsets0[i + 1]]] + max_edges = max(max_edges, (stop - start) // 2) + else: + max_edges = 0 + + xs = np.full((max_edges, 2), np.nan, dtype=np.float32) + ys = np.full((max_edges, 2), np.nan, dtype=np.float32) + yincreasing = np.zeros(max_edges, dtype=np.int8) + + # Initialize array indicating which edges are still eligible for processing + eligible = np.ones(max_edges, dtype=np.int8) + + for i in range(len(offsets0) - 1): + if missing[i]: + continue + + # i: row index + # start, stop: start and stop index into values for the multiple polygons + # in row i. + # Note: the draw_polygon method handles the edges of all of the filled + # polygons and holes in one pass. + start = offsets1[offsets0[i]] + stop = offsets1[offsets0[i + 1]] + + draw_polygon(i, sx, tx, sy, ty, xmin, xmax, ymin, ymax, + offsets2[start:stop + 1], values, + xs, ys, yincreasing, eligible, *aggs_and_cols) + + return extend_cpu diff --git a/datashader/tests/test_dask.py b/datashader/tests/test_dask.py index a9c935e33..140d0d8f8 100644 --- a/datashader/tests/test_dask.py +++ b/datashader/tests/test_dask.py @@ -1,4 +1,7 @@ from __future__ import division, absolute_import + +from numpy import nan + from dask.context import config import dask.dataframe as dd import numpy as np @@ -10,6 +13,11 @@ import pytest +try: + import spatialpandas +except ImportError: + spatialpandas = None + from datashader.tests.test_pandas import ( assert_eq_xr, assert_eq_ndarray, values ) @@ -325,6 +333,27 @@ def test_log_axis_points(ddf): assert_eq_xr(c_logxy.points(ddf, 'log_x', 'log_y', ds.count('i32')), out) +@pytest.mark.skipif(not spatialpandas, reason="spatialpandas not installed") +def test_points_geometry(): + axis = ds.core.LinearAxis() + lincoords = axis.compute_index(axis.compute_scale_and_translate((0., 2.), 3), 3) + + ddf = dd.from_pandas(pd.DataFrame({ + 'geom': pd.array( + [[0, 0], [0, 1, 1, 1], [0, 2, 1, 2, 2, 2]], dtype='MultiPoint2d[float64]'), + 'v': [1, 2, 3] + }), npartitions=3) + + cvs = ds.Canvas(plot_width=3, plot_height=3) + agg = cvs.points(ddf, geometry='geom', agg=ds.sum('v')) + sol = np.array([[1, nan, nan], + [2, 2, nan], + [3, 3, 3]], dtype='float64') + out = xr.DataArray(sol, coords=[lincoords, lincoords], + dims=['y', 'x']) + assert_eq_xr(agg, out) + + @pytest.mark.parametrize('DataFrame', DataFrames) def test_line(DataFrame): axis = ds.core.LinearAxis() @@ -348,8 +377,7 @@ def test_line(DataFrame): # # Line tests -@pytest.mark.parametrize('DataFrame', DataFrames) -@pytest.mark.parametrize('df_kwargs,x,y,ax', [ +line_manual_range_params = [ # axis1 none constant (dict(data={ 'x0': [4, -4, 4], @@ -358,20 +386,20 @@ def test_line(DataFrame): 'y0': [0, 0, 0], 'y1': [-4, 4, 0], 'y2': [0, 0, 0] - }), ['x0', 'x1', 'x2'], ['y0', 'y1', 'y2'], 1), + }), dict(x=['x0', 'x1', 'x2'], y=['y0', 'y1', 'y2'], axis=1)), # axis1 x constant (dict(data={ 'y0': [0, 0, 0], 'y1': [0, 4, -4], 'y2': [0, 0, 0] - }), np.array([-4, 0, 4]), ['y0', 'y1', 'y2'], 1), + }), dict(x=np.array([-4, 0, 4]), y=['y0', 'y1', 'y2'], axis=1)), # axis0 single (dict(data={ 'x': [4, 0, -4, np.nan, -4, 0, 4, np.nan, 4, 0, -4], 'y': [0, -4, 0, np.nan, 0, 4, 0, np.nan, 0, 0, 0], - }), 'x', 'y', 0), + }), dict(x='x', y='y', axis=0)), # axis0 multi (dict(data={ @@ -381,7 +409,7 @@ def test_line(DataFrame): 'y0': [0, -4, 0], 'y1': [0, 4, 0], 'y2': [0, 0, 0] - }), ['x0', 'x1', 'x2'], ['y0', 'y1', 'y2'], 0), + }), dict(x=['x0', 'x1', 'x2'], y=['y0', 'y1', 'y2'], axis=0)), # axis0 multi with string (dict(data={ @@ -389,17 +417,28 @@ def test_line(DataFrame): 'y0': [0, -4, 0], 'y1': [0, 4, 0], 'y2': [0, 0, 0] - }), 'x0', ['y0', 'y1', 'y2'], 0), + }), dict(x='x0', y=['y0', 'y1', 'y2'], axis=0)), # axis1 RaggedArray (dict(data={ 'x': [[4, 0, -4], [-4, 0, 4, 4, 0, -4]], 'y': [[0, -4, 0], [0, 4, 0, 0, 0, 0]], - }, dtype='Ragged[int64]'), 'x', 'y', 1), -]) -def test_line_manual_range(DataFrame, df_kwargs, x, y, ax): + }, dtype='Ragged[int64]'), dict(x='x', y='y', axis=1)), +] +if spatialpandas: + line_manual_range_params.append( + # geometry + (dict(data={ + 'geom': [[4, 0, 0, -4, -4, 0], + [-4, 0, 0, 4, 4, 0, 4, 0, 0, 0, -4, 0]] + }, dtype='Line2d[int64]'), dict(geometry='geom')) + ) +@pytest.mark.parametrize('DataFrame', DataFrames) +@pytest.mark.parametrize('df_kwargs,cvs_kwargs', line_manual_range_params) +def test_line_manual_range(DataFrame, df_kwargs, cvs_kwargs): if DataFrame is dask_cudf_DataFrame: - if df_kwargs.get('dtype', '').startswith('Ragged'): + dtype = df_kwargs.get('dtype', '') + if dtype.startswith('Ragged') or dtype.startswith('Line2d'): pytest.skip("Ragged array not supported with cudf") axis = ds.core.LinearAxis() @@ -409,7 +448,7 @@ def test_line_manual_range(DataFrame, df_kwargs, x, y, ax): cvs = ds.Canvas(plot_width=7, plot_height=7, x_range=(-3, 3), y_range=(-3, 3)) - agg = cvs.line(ddf, x, y, ds.count(), axis=ax) + agg = cvs.line(ddf, agg=ds.count(), **cvs_kwargs) sol = np.array([[0, 0, 1, 0, 1, 0, 0], [0, 1, 0, 0, 0, 1, 0], @@ -424,8 +463,7 @@ def test_line_manual_range(DataFrame, df_kwargs, x, y, ax): assert_eq_xr(agg, out) -@pytest.mark.parametrize('DataFrame', DataFrames) -@pytest.mark.parametrize('df_kwargs,x,y,ax', [ +line_autorange_params = [ # axis1 none constant (dict(data={ 'x0': [0, 0, 0], @@ -434,20 +472,20 @@ def test_line_manual_range(DataFrame, df_kwargs, x, y, ax): 'y0': [-4, 4, -4], 'y1': [0, 0, 0], 'y2': [4, -4, 4] - }), ['x0', 'x1', 'x2'], ['y0', 'y1', 'y2'], 1), + }), dict(x=['x0', 'x1', 'x2'], y=['y0', 'y1', 'y2'], axis=1)), # axis1 y constant (dict(data={ 'x0': [0, 0, 0], 'x1': [-4, 0, 4], 'x2': [0, 0, 0], - }), ['x0', 'x1', 'x2'], np.array([-4, 0, 4]), 1), + }), dict(x=['x0', 'x1', 'x2'], y=np.array([-4, 0, 4]), axis=1)), # axis0 single (dict(data={ 'x': [0, -4, 0, np.nan, 0, 0, 0, np.nan, 0, 4, 0], 'y': [-4, 0, 4, np.nan, 4, 0, -4, np.nan, -4, 0, 4], - }), 'x', 'y', 0), + }), dict(x='x', y='y', axis=0)), # axis0 multi (dict(data={ @@ -457,7 +495,7 @@ def test_line_manual_range(DataFrame, df_kwargs, x, y, ax): 'y0': [-4, 0, 4], 'y1': [4, 0, -4], 'y2': [-4, 0, 4] - }), ['x0', 'x1', 'x2'], ['y0', 'y1', 'y2'], 0), + }), dict(x=['x0', 'x1', 'x2'], y=['y0', 'y1', 'y2'], axis=0)), # axis0 multi with string (dict(data={ @@ -465,18 +503,29 @@ def test_line_manual_range(DataFrame, df_kwargs, x, y, ax): 'x1': [0, 0, 0], 'x2': [0, 4, 0], 'y0': [-4, 0, 4] - }), ['x0', 'x1', 'x2'], 'y0', 0), + }), dict(x=['x0', 'x1', 'x2'], y='y0', axis=0)), # axis1 RaggedArray (dict(data={ 'x': [[0, -4, 0], [0, 0, 0], [0, 4, 0]], 'y': [[-4, 0, 4], [4, 0, -4], [-4, 0, 4]], - }, dtype='Ragged[int64]'), 'x', 'y', 1), - -]) -def test_line_autorange(DataFrame, df_kwargs, x, y, ax): + }, dtype='Ragged[int64]'), dict(x='x', y='y', axis=1)), +] +if spatialpandas: + line_autorange_params.append( + # geometry + (dict(data={ + 'geom': [[0, -4, -4, 0, 0, 4], + [0, 4, 0, 0, 0, -4], + [0, -4, 4, 0, 0, 4]] + }, dtype='Line2d[int64]'), dict(geometry='geom')) + ) +@pytest.mark.parametrize('DataFrame', DataFrames) +@pytest.mark.parametrize('df_kwargs,cvs_kwargs', line_autorange_params) +def test_line_autorange(DataFrame, df_kwargs, cvs_kwargs): if DataFrame is dask_cudf_DataFrame: - if df_kwargs.get('dtype', '').startswith('Ragged'): + dtype = df_kwargs.get('dtype', '') + if dtype.startswith('Ragged') or dtype.startswith('Line2d'): pytest.skip("Ragged array not supported with cudf") axis = ds.core.LinearAxis() @@ -487,7 +536,7 @@ def test_line_autorange(DataFrame, df_kwargs, x, y, ax): cvs = ds.Canvas(plot_width=9, plot_height=9) - agg = cvs.line(ddf, x, y, ds.count(), axis=ax) + agg = cvs.line(ddf, agg=ds.count(), **cvs_kwargs) sol = np.array([[0, 0, 0, 0, 3, 0, 0, 0, 0], [0, 0, 0, 1, 1, 1, 0, 0, 0], @@ -582,7 +631,7 @@ def test_auto_range_line(DataFrame): @pytest.mark.parametrize('DataFrame', DataFrames) -@pytest.mark.parametrize('df_kwargs,x,y,ax', [ +@pytest.mark.parametrize('df_kwargs,cvs_kwargs', [ # axis1 none constant (dict(data={ 'x0': [-4, np.nan], @@ -591,13 +640,13 @@ def test_auto_range_line(DataFrame): 'y0': [0, np.nan], 'y1': [-4, 4], 'y2': [0, 0] - }, dtype='float32'), ['x0', 'x1', 'x2'], ['y0', 'y1', 'y2'], 1), + }, dtype='float32'), dict(x=['x0', 'x1', 'x2'], y=['y0', 'y1', 'y2'], axis=1)), # axis0 single (dict(data={ 'x': [-4, -2, 0, np.nan, 2, 4], 'y': [0, -4, 0, np.nan, 4, 0], - }), 'x', 'y', 0), + }), dict(x='x', y='y', axis=0)), # axis0 multi (dict(data={ @@ -605,15 +654,15 @@ def test_auto_range_line(DataFrame): 'x1': [np.nan, 2, 4], 'y0': [0, -4, 0], 'y1': [np.nan, 4, 0], - }, dtype='float32'), ['x0', 'x1'], ['y0', 'y1'], 0), + }, dtype='float32'), dict(x=['x0', 'x1'], y=['y0', 'y1'], axis=0)), # axis1 ragged arrays (dict(data={ 'x': pd.array([[-4, -2, 0], [2, 4]]), 'y': pd.array([[0, -4, 0], [4, 0]]) - }, dtype='Ragged[float32]'), 'x', 'y', 1) + }, dtype='Ragged[float32]'), dict(x='x', y='y', axis=1)) ]) -def test_area_to_zero_fixedrange(DataFrame, df_kwargs, x, y, ax): +def test_area_to_zero_fixedrange(DataFrame, df_kwargs, cvs_kwargs): if DataFrame is dask_cudf_DataFrame: if df_kwargs.get('dtype', '').startswith('Ragged'): pytest.skip("Ragged array not supported with cudf") @@ -630,7 +679,7 @@ def test_area_to_zero_fixedrange(DataFrame, df_kwargs, x, y, ax): ddf = DataFrame(**df_kwargs) - agg = cvs.area(ddf, x, y, ds.count(), axis=ax) + agg = cvs.area(ddf, agg=ds.count(), **cvs_kwargs) sol = np.array([[0, 1, 1, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 0, 0, 0, 0, 0], @@ -645,7 +694,7 @@ def test_area_to_zero_fixedrange(DataFrame, df_kwargs, x, y, ax): @pytest.mark.parametrize('DataFrame', DataFrames) -@pytest.mark.parametrize('df_kwargs,x,y,ax', [ +@pytest.mark.parametrize('df_kwargs,cvs_kwargs', [ # axis1 none constant (dict(data={ 'x0': [-4, 0], @@ -654,7 +703,8 @@ def test_area_to_zero_fixedrange(DataFrame, df_kwargs, x, y, ax): 'y0': [0, 0], 'y1': [-4, -4], 'y2': [0, 0] - }, dtype='float32'), ['x0', 'x1', 'x2'], ['y0', 'y1', 'y2'], 1), + }, dtype='float32'), + dict(x=['x0', 'x1', 'x2'], y=['y0', 'y1', 'y2'], axis=1)), # axis1 y constant (dict(data={ @@ -662,13 +712,13 @@ def test_area_to_zero_fixedrange(DataFrame, df_kwargs, x, y, ax): 'x1': [-2, 2], 'x2': [0, 4], }, dtype='float32'), - ['x0', 'x1', 'x2'], np.array([0, -4, 0], dtype='float32'), 1), + dict(x=['x0', 'x1', 'x2'], y=np.array([0, -4, 0], dtype='float32'), axis=1)), # axis0 single (dict(data={ 'x': [-4, -2, 0, 0, 2, 4], 'y': [0, -4, 0, 0, -4, 0], - }), 'x', 'y', 0), + }), dict(x='x', y='y', axis=0)), # axis0 multi (dict(data={ @@ -676,22 +726,22 @@ def test_area_to_zero_fixedrange(DataFrame, df_kwargs, x, y, ax): 'x1': [0, 2, 4], 'y0': [0, -4, 0], 'y1': [0, -4, 0], - }, dtype='float32'), ['x0', 'x1'], ['y0', 'y1'], 0), + }, dtype='float32'), dict(x=['x0', 'x1'], y=['y0', 'y1'], axis=0)), # axis0 multi, y string (dict(data={ 'x0': [-4, -2, 0], 'x1': [0, 2, 4], 'y0': [0, -4, 0], - }, dtype='float32'), ['x0', 'x1'], 'y0', 0), + }, dtype='float32'), dict(x=['x0', 'x1'], y='y0', axis=0)), # axis1 ragged arrays (dict(data={ 'x': [[-4, -2, 0], [0, 2, 4]], 'y': [[0, -4, 0], [0, -4, 0]] - }, dtype='Ragged[float32]'), 'x', 'y', 1) + }, dtype='Ragged[float32]'), dict(x='x', y='y', axis=1)) ]) -def test_area_to_zero_autorange(DataFrame, df_kwargs, x, y, ax): +def test_area_to_zero_autorange(DataFrame, df_kwargs, cvs_kwargs): if DataFrame is dask_cudf_DataFrame: if df_kwargs.get('dtype', '').startswith('Ragged'): pytest.skip("Ragged array not supported with cudf") @@ -705,7 +755,7 @@ def test_area_to_zero_autorange(DataFrame, df_kwargs, x, y, ax): cvs = ds.Canvas(plot_width=13, plot_height=7) ddf = DataFrame(**df_kwargs) - agg = cvs.area(ddf, x, y, ds.count(), axis=ax) + agg = cvs.area(ddf, agg=ds.count(), **cvs_kwargs) sol = np.array([[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], @@ -722,7 +772,7 @@ def test_area_to_zero_autorange(DataFrame, df_kwargs, x, y, ax): @pytest.mark.parametrize('DataFrame', DataFrames) -@pytest.mark.parametrize('df_kwargs,x,y,ax', [ +@pytest.mark.parametrize('df_kwargs,cvs_kwargs', [ # axis1 none constant (dict(data={ 'x0': [-4, np.nan], @@ -731,13 +781,13 @@ def test_area_to_zero_autorange(DataFrame, df_kwargs, x, y, ax): 'y0': [0, np.nan], 'y1': [-4, 4], 'y2': [0, 0] - }, dtype='float32'), ['x0', 'x1', 'x2'], ['y0', 'y1', 'y2'], 1), + }, dtype='float32'), dict(x=['x0', 'x1', 'x2'], y=['y0', 'y1', 'y2'], axis=1)), # axis0 single (dict(data={ 'x': [-4, -2, 0, np.nan, 2, 4], 'y': [0, -4, 0, np.nan, 4, 0], - }), 'x', 'y', 0), + }), dict(x='x', y='y', axis=0)), # axis0 multi (dict(data={ @@ -745,15 +795,15 @@ def test_area_to_zero_autorange(DataFrame, df_kwargs, x, y, ax): 'x1': [np.nan, 2, 4], 'y0': [0, -4, 0], 'y1': [np.nan, 4, 0], - }, dtype='float32'), ['x0', 'x1'], ['y0', 'y1'], 0), + }, dtype='float32'), dict(x=['x0', 'x1'], y=['y0', 'y1'], axis=0)), # axis1 ragged arrays (dict(data={ 'x': [[-4, -2, 0], [2, 4]], 'y': [[0, -4, 0], [4, 0]], - }, dtype='Ragged[float32]'), 'x', 'y', 1) + }, dtype='Ragged[float32]'), dict(x='x', y='y', axis=1)) ]) -def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, x, y, ax): +def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, cvs_kwargs): if DataFrame is dask_cudf_DataFrame: if df_kwargs.get('dtype', '').startswith('Ragged'): pytest.skip("Ragged array not supported with cudf") @@ -768,7 +818,7 @@ def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, x, y, ax): ddf = DataFrame(**df_kwargs) - agg = cvs.area(ddf, x, y, ds.count(), axis=ax) + agg = cvs.area(ddf, agg=ds.count(), **cvs_kwargs) sol = np.array([[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], @@ -785,7 +835,7 @@ def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, x, y, ax): @pytest.mark.parametrize('DataFrame', DataFrames) -@pytest.mark.parametrize('df_kwargs,x,y,y_stack,ax', [ +@pytest.mark.parametrize('df_kwargs,cvs_kwargs', [ # axis1 none constant (dict(data={ 'x0': [-4, 0], @@ -798,7 +848,8 @@ def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, x, y, ax): 'y4': [-2, -2], 'y5': [0, 0], }, dtype='float32'), - ['x0', 'x1', 'x2'], ['y0', 'y1', 'y2'], ['y3', 'y4', 'y5'], 1), + dict(x=['x0', 'x1', 'x2'], y=['y0', 'y1', 'y2'], + y_stack=['y3', 'y4', 'y5'], axis=1)), # axis1 y constant (dict(data={ @@ -806,16 +857,15 @@ def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, x, y, ax): 'x1': [-2, 2], 'x2': [0, 4], }, dtype='float32'), - ['x0', 'x1', 'x2'], - np.array([0, -4, 0]), - np.array([0, -2, 0], dtype='float32'), 1), + dict(x=['x0', 'x1', 'x2'], y=np.array([0, -4, 0]), + y_stack=np.array([0, -2, 0], dtype='float32'), axis=1)), # axis0 single (dict(data={ 'x': [-4, -2, 0, 0, 2, 4], 'y': [0, -4, 0, 0, -4, 0], 'y_stack': [0, -2, 0, 0, -2, 0], - }), 'x', 'y', 'y_stack', 0), + }), dict(x='x', y='y', y_stack='y_stack', axis=0)), # axis0 multi (dict(data={ @@ -825,7 +875,8 @@ def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, x, y, ax): 'y1': [0, -4, 0], 'y2': [0, -2, 0], 'y3': [0, -2, 0], - }, dtype='float32'), ['x0', 'x1'], ['y0', 'y1'], ['y2', 'y3'], 0), + }, dtype='float32'), + dict(x=['x0', 'x1'], y=['y0', 'y1'], y_stack=['y2', 'y3'], axis=0)), # axis0 multi, y string (dict(data={ @@ -833,16 +884,16 @@ def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, x, y, ax): 'x1': [0, 2, 4], 'y0': [0, -4, 0], 'y2': [0, -2, 0], - }, dtype='float32'), ['x0', 'x1'], 'y0', 'y2', 0), + }, dtype='float32'), dict(x=['x0', 'x1'], y='y0', y_stack='y2', axis=0)), # axis1 ragged arrays (dict(data={ 'x': [[-4, -2, 0], [0, 2, 4]], 'y': [[0, -4, 0], [0, -4, 0]], 'y_stack': [[0, -2, 0], [0, -2, 0]] - }, dtype='Ragged[float32]'), 'x', 'y', 'y_stack', 1) + }, dtype='Ragged[float32]'), dict(x='x', y='y', y_stack='y_stack', axis=1)) ]) -def test_area_to_line_autorange(DataFrame, df_kwargs, x, y, y_stack, ax): +def test_area_to_line_autorange(DataFrame, df_kwargs, cvs_kwargs): if DataFrame is dask_cudf_DataFrame: if df_kwargs.get('dtype', '').startswith('Ragged'): pytest.skip("Ragged array not supported with cudf") @@ -856,7 +907,7 @@ def test_area_to_line_autorange(DataFrame, df_kwargs, x, y, y_stack, ax): cvs = ds.Canvas(plot_width=13, plot_height=7) ddf = DataFrame(**df_kwargs) - agg = cvs.area(ddf, x, y, ds.count(), axis=ax, y_stack=y_stack) + agg = cvs.area(ddf, agg=ds.count(), **cvs_kwargs) sol = np.array([[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], @@ -873,7 +924,7 @@ def test_area_to_line_autorange(DataFrame, df_kwargs, x, y, y_stack, ax): @pytest.mark.parametrize('DataFrame', DataFrames) -@pytest.mark.parametrize('df_kwargs,x,y,y_stack,ax', [ +@pytest.mark.parametrize('df_kwargs,cvs_kwargs', [ # axis1 none constant (dict(data={ 'x0': [-4, np.nan], @@ -886,14 +937,15 @@ def test_area_to_line_autorange(DataFrame, df_kwargs, x, y, y_stack, ax): 'y5': [0, 0], 'y6': [0, 0] }, dtype='float32'), - ['x0', 'x1', 'x2'], ['y0', 'y1', 'y2'], ['y4', 'y5', 'y6'], 1), + dict(x=['x0', 'x1', 'x2'], y=['y0', 'y1', 'y2'], + y_stack=['y4', 'y5', 'y6'], axis=1)), # axis0 single (dict(data={ 'x': [-4, -2, 0, np.nan, 2, 4], 'y': [0, -4, 0, np.nan, 4, 0], 'y_stack': [0, 0, 0, 0, 0, 0], - }), 'x', 'y', 'y_stack', 0), + }), dict(x='x', y='y', y_stack='y_stack', axis=0)), # axis0 multi (dict(data={ @@ -903,16 +955,17 @@ def test_area_to_line_autorange(DataFrame, df_kwargs, x, y, y_stack, ax): 'y1': [np.nan, 4, 0], 'y2': [0, 0, 0], 'y3': [0, 0, 0], - }, dtype='float32'), ['x0', 'x1'], ['y0', 'y1'], ['y2', 'y3'], 0), + }, dtype='float32'), + dict(x=['x0', 'x1'], y=['y0', 'y1'], y_stack=['y2', 'y3'], axis=0)), # axis1 ragged arrays (dict(data={ 'x': [[-4, -2, 0], [2, 4]], 'y': [[0, -4, 0], [4, 0]], 'y_stack': [[0, 0, 0], [0, 0]], - }, dtype='Ragged[float32]'), 'x', 'y', 'y_stack', 1) + }, dtype='Ragged[float32]'), dict(x='x', y='y', y_stack='y_stack', axis=1)) ]) -def test_area_to_line_autorange_gap(DataFrame, df_kwargs, x, y, y_stack, ax): +def test_area_to_line_autorange_gap(DataFrame, df_kwargs, cvs_kwargs): if DataFrame is dask_cudf_DataFrame: if df_kwargs.get('dtype', '').startswith('Ragged'): pytest.skip("Ragged array not supported with cudf") @@ -929,7 +982,7 @@ def test_area_to_line_autorange_gap(DataFrame, df_kwargs, x, y, y_stack, ax): # When a line is specified to fill to, this line is not included in # the fill. So we expect the y=0 line to not be filled. - agg = cvs.area(ddf, x, y, ds.count(), y_stack=y_stack, axis=ax) + agg = cvs.area(ddf, agg=ds.count(), **cvs_kwargs) sol = np.array([[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], diff --git a/datashader/tests/test_pandas.py b/datashader/tests/test_pandas.py index 61dd47871..1cb17bb6a 100644 --- a/datashader/tests/test_pandas.py +++ b/datashader/tests/test_pandas.py @@ -1,5 +1,7 @@ from __future__ import absolute_import from collections import OrderedDict +from numpy import nan + import numpy as np import pandas as pd import xarray as xr @@ -40,6 +42,15 @@ def cudf_DataFrame(*args, **kwargs): dfs = [df_pd] DataFrames = [pd.DataFrame] + +try: + import spatialpandas + from spatialpandas.geometry import Line2dDtype +except ImportError: + Line2dDtype = None + spatialpandas = None + + c = ds.Canvas(plot_width=2, plot_height=2, x_range=(0, 1), y_range=(0, 1)) c_logx = ds.Canvas(plot_width=2, plot_height=2, x_range=(1, 10), y_range=(0, 1), x_axis_type='log') @@ -307,6 +318,27 @@ def test_log_axis_points(df): assert_eq_xr(c_logxy.points(df, 'log_x', 'log_y', ds.count('i32')), out) +@pytest.mark.skipif(not spatialpandas, reason="spatialpandas not installed") +def test_points_geometry(): + axis = ds.core.LinearAxis() + lincoords = axis.compute_index(axis.compute_scale_and_translate((0., 2.), 3), 3) + + df = pd.DataFrame({ + 'geom': pd.array( + [[0, 0], [0, 1, 1, 1], [0, 2, 1, 2, 2, 2]], dtype='MultiPoint2d[float64]'), + 'v': [1, 2, 3] + }) + + cvs = ds.Canvas(plot_width=3, plot_height=3) + agg = cvs.points(df, geometry='geom', agg=ds.sum('v')) + sol = np.array([[1, nan, nan], + [2, 2, nan], + [3, 3, 3]], dtype='float64') + out = xr.DataArray(sol, coords=[lincoords, lincoords], + dims=['y', 'x']) + assert_eq_xr(agg, out) + + def test_line(): axis = ds.core.LinearAxis() lincoords = axis.compute_index(axis.compute_scale_and_translate((-3., 3.), 7), 7) @@ -652,8 +684,7 @@ def test_bug_570(): # # Line tests -@pytest.mark.parametrize('DataFrame', DataFrames) -@pytest.mark.parametrize('df_args,x,y,ax', [ +line_manual_range_params = [ # axis1 none constant ([{ 'x0': [4, -4], @@ -662,27 +693,27 @@ def test_bug_570(): 'y0': [0, 0], 'y1': [-4, 4], 'y2': [0, 0] - }], ['x0', 'x1', 'x2'], ['y0', 'y1', 'y2'], 1), + }], dict(x=['x0', 'x1', 'x2'], y=['y0', 'y1', 'y2'], axis=1)), # axis1 x constant ([{ 'y0': [0, 0], 'y1': [-4, 4], 'y2': [0, 0] - }], np.array([-4, 0, 4]), ['y0', 'y1', 'y2'], 1), + }], dict(x=np.array([-4, 0, 4]), y=['y0', 'y1', 'y2'], axis=1)), # axis1 y constant ([{ 'x0': [0, 0], 'x1': [-4, 4], 'x2': [0, 0] - }], ['x0', 'x1', 'x2'], np.array([-4, 0, 4]), 1), + }], dict(x=['x0', 'x1', 'x2'], y=np.array([-4, 0, 4]), axis=1)), # axis0 single ([{ 'x': [0, -4, 0, np.nan, 0, 4, 0], 'y': [-4, 0, 4, np.nan, -4, 0, 4], - }], 'x', 'y', 0), + }], dict(x='x', y='y', axis=0)), # axis0 multi ([{ @@ -690,7 +721,7 @@ def test_bug_570(): 'x1': [0, 4, 0], 'y0': [-4, 0, 4], 'y1': [-4, 0, 4], - }], ['x0', 'x1'], ['y0', 'y1'], 0), + }], dict(x=['x0', 'x1'], y=['y0', 'y1'], axis=0)), # axis0 multi with string ([{ @@ -698,17 +729,32 @@ def test_bug_570(): 'x1': [0, 4, 0], 'y0': [-4, 0, 4], 'y1': [-4, 0, 4], - }], ['x0', 'x1'], 'y0', 0), + }], dict(x=['x0', 'x1'], y='y0', axis=0)), # axis1 ragged arrays ([{ 'x': pd.array([[4, 0], [0, -4, 0, 4]], dtype='Ragged[float32]'), 'y': pd.array([[0, -4], [-4, 0, 4, 0]], dtype='Ragged[float32]') - }], 'x', 'y', 1) -]) -def test_line_manual_range(DataFrame, df_args, x, y, ax): + }], dict(x='x', y='y', axis=1)), +] +if spatialpandas: + line_manual_range_params.append( + # geometry + ([{ + 'geom': pd.array( + [[4, 0, 0, -4], [0, -4, -4, 0, 0, 4, 4, 0]], dtype='Line2d[float32]' + ), + }], dict(geometry='geom')) + ) +@pytest.mark.parametrize('DataFrame', DataFrames) +@pytest.mark.parametrize('df_args,cvs_kwargs', line_manual_range_params) +def test_line_manual_range(DataFrame, df_args, cvs_kwargs): if cudf and DataFrame is cudf_DataFrame: - if isinstance(getattr(df_args[0].get('x', []), 'dtype', ''), RaggedDtype): + if (isinstance(getattr(df_args[0].get('x', []), 'dtype', ''), RaggedDtype) or + spatialpandas and isinstance( + getattr(df_args[0].get('geom', []), 'dtype', ''), Line2dDtype + ) + ): pytest.skip("cudf DataFrames do not support extension types") df = DataFrame(*df_args) @@ -720,7 +766,7 @@ def test_line_manual_range(DataFrame, df_args, x, y, ax): cvs = ds.Canvas(plot_width=7, plot_height=7, x_range=(-3, 3), y_range=(-3, 3)) - agg = cvs.line(df, x, y, ds.count(), axis=ax) + agg = cvs.line(df, agg=ds.count(), **cvs_kwargs) sol = np.array([[0, 0, 1, 0, 1, 0, 0], [0, 1, 0, 0, 0, 1, 0], @@ -735,8 +781,7 @@ def test_line_manual_range(DataFrame, df_args, x, y, ax): assert_eq_xr(agg, out) -@pytest.mark.parametrize('DataFrame', DataFrames) -@pytest.mark.parametrize('df_args,x,y,ax', [ +line_autorange_params = [ # axis1 none constant ([{ 'x0': [0, 0], @@ -745,20 +790,20 @@ def test_line_manual_range(DataFrame, df_args, x, y, ax): 'y0': [-4, -4], 'y1': [0, 0], 'y2': [4, 4] - }], ['x0', 'x1', 'x2'], ['y0', 'y1', 'y2'], 1), + }], dict(x=['x0', 'x1', 'x2'], y=['y0', 'y1', 'y2'], axis=1)), # axis1 y constant ([{ 'x0': [0, 0], 'x1': [-4, 4], 'x2': [0, 0] - }], ['x0', 'x1', 'x2'], np.array([-4, 0, 4]), 1), + }], dict(x=['x0', 'x1', 'x2'], y=np.array([-4, 0, 4]), axis=1)), # axis0 single ([{ 'x': [0, -4, 0, np.nan, 0, 4, 0], 'y': [-4, 0, 4, np.nan, -4, 0, 4], - }], 'x', 'y', 0), + }], dict(x='x', y='y', axis=0)), # axis0 multi ([{ @@ -766,7 +811,7 @@ def test_line_manual_range(DataFrame, df_args, x, y, ax): 'x1': [0, 4, 0], 'y0': [-4, 0, 4], 'y1': [-4, 0, 4], - }], ['x0', 'x1'], ['y0', 'y1'], 0), + }], dict(x=['x0', 'x1'], y=['y0', 'y1'], axis=0)), # axis0 multi with string ([{ @@ -774,17 +819,32 @@ def test_line_manual_range(DataFrame, df_args, x, y, ax): 'x1': [0, 4, 0], 'y0': [-4, 0, 4], 'y1': [-4, 0, 4], - }], ['x0', 'x1'], 'y0', 0), + }], dict(x=['x0', 'x1'], y='y0', axis=0)), # axis1 ragged arrays ([{ 'x': pd.array([[0, -4, 0], [0, 4, 0]], dtype='Ragged[float32]'), 'y': pd.array([[-4, 0, 4], [-4, 0, 4]], dtype='Ragged[float32]') - }], 'x', 'y', 1) -]) -def test_line_autorange(DataFrame, df_args, x, y, ax): + }], dict(x='x', y='y', axis=1)), +] +if spatialpandas: + line_autorange_params.append( + # geometry + ([{ + 'geom': pd.array( + [[0, -4, -4, 0, 0, 4], [0, -4, 4, 0, 0, 4]], dtype='Line2d[float32]' + ), + }], dict(geometry='geom')) + ) +@pytest.mark.parametrize('DataFrame', DataFrames) +@pytest.mark.parametrize('df_args,cvs_kwargs', line_autorange_params) +def test_line_autorange(DataFrame, df_args, cvs_kwargs): if cudf and DataFrame is cudf_DataFrame: - if isinstance(getattr(df_args[0].get('x', []), 'dtype', ''), RaggedDtype): + if (isinstance(getattr(df_args[0].get('x', []), 'dtype', ''), RaggedDtype) or + spatialpandas and isinstance( + getattr(df_args[0].get('geom', []), 'dtype', ''), Line2dDtype + ) + ): pytest.skip("cudf DataFrames do not support extension types") df = DataFrame(*df_args) @@ -795,7 +855,7 @@ def test_line_autorange(DataFrame, df_args, x, y, ax): cvs = ds.Canvas(plot_width=9, plot_height=9) - agg = cvs.line(df, x, y, ds.count(), axis=ax) + agg = cvs.line(df, agg=ds.count(), **cvs_kwargs) sol = np.array([[0, 0, 0, 0, 2, 0, 0, 0, 0], [0, 0, 0, 1, 0, 1, 0, 0, 0], @@ -920,7 +980,7 @@ def test_line_autorange_axis1_ragged(): @pytest.mark.parametrize('DataFrame', DataFrames) -@pytest.mark.parametrize('df_kwargs,x,y,ax', [ +@pytest.mark.parametrize('df_kwargs,cvs_kwargs', [ # axis1 none constant (dict(data={ 'x0': [-4, np.nan], @@ -929,13 +989,13 @@ def test_line_autorange_axis1_ragged(): 'y0': [0, np.nan], 'y1': [-4, 4], 'y2': [0, 0] - }, dtype='float32'), ['x0', 'x1', 'x2'], ['y0', 'y1', 'y2'], 1), + }, dtype='float32'), dict(x=['x0', 'x1', 'x2'], y=['y0', 'y1', 'y2'], axis=1)), # axis0 single (dict(data={ 'x': [-4, -2, 0, np.nan, 2, 4], 'y': [0, -4, 0, np.nan, 4, 0], - }), 'x', 'y', 0), + }), dict(x='x', y='y', axis=0)), # axis0 multi (dict(data={ @@ -943,15 +1003,15 @@ def test_line_autorange_axis1_ragged(): 'x1': [np.nan, 2, 4], 'y0': [0, -4, 0], 'y1': [np.nan, 4, 0], - }, dtype='float32'), ['x0', 'x1'], ['y0', 'y1'], 0), + }, dtype='float32'), dict(x=['x0', 'x1'], y=['y0', 'y1'], axis=0)), # axis1 ragged arrays (dict(data={ 'x': pd.array([[-4, -2, 0], [2, 4]], dtype='Ragged[float32]'), 'y': pd.array([[0, -4, 0], [4, 0]], dtype='Ragged[float32]') - }), 'x', 'y', 1) + }), dict(x='x', y='y', axis=1)) ]) -def test_area_to_zero_fixedrange(DataFrame, df_kwargs, x, y, ax): +def test_area_to_zero_fixedrange(DataFrame, df_kwargs, cvs_kwargs): if cudf and DataFrame is cudf_DataFrame: if isinstance(getattr(df_kwargs['data'].get('x', []), 'dtype', ''), RaggedDtype): pytest.skip("cudf DataFrames do not support extension types") @@ -968,7 +1028,7 @@ def test_area_to_zero_fixedrange(DataFrame, df_kwargs, x, y, ax): cvs = ds.Canvas(plot_width=9, plot_height=5, x_range=[-3.75, 3.75], y_range=[-2.25, 2.25]) - agg = cvs.area(df, x, y, ds.count(), axis=ax) + agg = cvs.area(df, agg=ds.count(), **cvs_kwargs) sol = np.array([[0, 1, 1, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 0, 0, 0, 0, 0], @@ -983,7 +1043,7 @@ def test_area_to_zero_fixedrange(DataFrame, df_kwargs, x, y, ax): @pytest.mark.parametrize('DataFrame', DataFrames) -@pytest.mark.parametrize('df_kwargs,x,y,ax', [ +@pytest.mark.parametrize('df_kwargs,cvs_kwargs', [ # axis1 none constant (dict(data={ 'x0': [-4, 0], @@ -992,7 +1052,7 @@ def test_area_to_zero_fixedrange(DataFrame, df_kwargs, x, y, ax): 'y0': [0, 0], 'y1': [-4, -4], 'y2': [0, 0] - }, dtype='float32'), ['x0', 'x1', 'x2'], ['y0', 'y1', 'y2'], 1), + }, dtype='float32'), dict(x=['x0', 'x1', 'x2'], y=['y0', 'y1', 'y2'], axis=1)), # axis1 y constant (dict(data={ @@ -1000,13 +1060,13 @@ def test_area_to_zero_fixedrange(DataFrame, df_kwargs, x, y, ax): 'x1': [-2, 2], 'x2': [0, 4], }, dtype='float32'), - ['x0', 'x1', 'x2'], np.array([0, -4, 0], dtype='float32'), 1), + dict(x=['x0', 'x1', 'x2'], y=np.array([0, -4, 0], dtype='float32'), axis=1)), # axis0 single (dict(data={ 'x': [-4, -2, 0, 0, 2, 4], 'y': [0, -4, 0, 0, -4, 0], - }), 'x', 'y', 0), + }), dict(x='x', y='y', axis=0)), # axis0 multi (dict(data={ @@ -1014,22 +1074,22 @@ def test_area_to_zero_fixedrange(DataFrame, df_kwargs, x, y, ax): 'x1': [0, 2, 4], 'y0': [0, -4, 0], 'y1': [0, -4, 0], - }, dtype='float32'), ['x0', 'x1'], ['y0', 'y1'], 0), + }, dtype='float32'), dict(x=['x0', 'x1'], y=['y0', 'y1'], axis=0)), # axis0 multi, y string (dict(data={ 'x0': [-4, -2, 0], 'x1': [0, 2, 4], 'y0': [0, -4, 0], - }, dtype='float32'), ['x0', 'x1'], 'y0', 0), + }, dtype='float32'), dict(x=['x0', 'x1'], y='y0', axis=0)), # axis1 ragged arrays (dict(data={ 'x': pd.array([[-4, -2, 0], [0, 2, 4]], dtype='Ragged[float32]'), 'y': pd.array([[0, -4, 0], [0, -4, 0]], dtype='Ragged[float32]') - }), 'x', 'y', 1) + }), dict(x='x', y='y', axis=1)) ]) -def test_area_to_zero_autorange(DataFrame, df_kwargs, x, y, ax): +def test_area_to_zero_autorange(DataFrame, df_kwargs, cvs_kwargs): if cudf and DataFrame is cudf_DataFrame: if isinstance(getattr(df_kwargs['data'].get('x', []), 'dtype', ''), RaggedDtype): pytest.skip("cudf DataFrames do not support extension types") @@ -1044,7 +1104,7 @@ def test_area_to_zero_autorange(DataFrame, df_kwargs, x, y, ax): cvs = ds.Canvas(plot_width=13, plot_height=7) - agg = cvs.area(df, x, y, ds.count(), axis=ax) + agg = cvs.area(df, agg=ds.count(), **cvs_kwargs) sol = np.array([[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], @@ -1061,7 +1121,7 @@ def test_area_to_zero_autorange(DataFrame, df_kwargs, x, y, ax): @pytest.mark.parametrize('DataFrame', DataFrames) -@pytest.mark.parametrize('df_kwargs,x,y,ax', [ +@pytest.mark.parametrize('df_kwargs,cvs_kwargs', [ # axis1 none constant (dict(data={ 'x0': [-4, np.nan], @@ -1072,13 +1132,13 @@ def test_area_to_zero_autorange(DataFrame, df_kwargs, x, y, ax): # 'y0': [0, 1], 'y1': [-4, 4], 'y2': [0, 0] - }, dtype='float32'), ['x0', 'x1', 'x2'], ['y0', 'y1', 'y2'], 1), + }, dtype='float32'), dict(x=['x0', 'x1', 'x2'], y=['y0', 'y1', 'y2'], axis=1)), # axis0 single (dict(data={ 'x': [-4, -2, 0, np.nan, 2, 4], 'y': [0, -4, 0, np.nan, 4, 0], - }), 'x', 'y', 0), + }), dict(x='x', y='y', axis=0)), # axis0 multi (dict(data={ @@ -1086,15 +1146,15 @@ def test_area_to_zero_autorange(DataFrame, df_kwargs, x, y, ax): 'x1': [np.nan, 2, 4], 'y0': [0, -4, 0], 'y1': [np.nan, 4, 0], - }, dtype='float32'), ['x0', 'x1'], ['y0', 'y1'], 0), + }, dtype='float32'), dict(x=['x0', 'x1'], y=['y0', 'y1'], axis=0)), # axis1 ragged arrays (dict(data={ 'x': pd.array([[-4, -2, 0], [2, 4]], dtype='Ragged[float32]'), 'y': pd.array([[0, -4, 0], [4, 0]], dtype='Ragged[float32]') - }), 'x', 'y', 1) + }), dict(x='x', y='y', axis=1)) ]) -def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, x, y, ax): +def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, cvs_kwargs): if cudf and DataFrame is cudf_DataFrame: if isinstance(getattr(df_kwargs['data'].get('x', []), 'dtype', ''), RaggedDtype): pytest.skip("cudf DataFrames do not support extension types") @@ -1109,7 +1169,7 @@ def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, x, y, ax): cvs = ds.Canvas(plot_width=13, plot_height=7) - agg = cvs.area(df, x, y, ds.count(), axis=ax) + agg = cvs.area(df, agg=ds.count(), **cvs_kwargs) sol = np.array([[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], @@ -1126,7 +1186,7 @@ def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, x, y, ax): @pytest.mark.parametrize('DataFrame', DataFrames) -@pytest.mark.parametrize('df_kwargs,x,y,y_stack,ax', [ +@pytest.mark.parametrize('df_kwargs,cvs_kwargs', [ # axis1 none constant (dict(data={ 'x0': [-4, 0], @@ -1139,7 +1199,8 @@ def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, x, y, ax): 'y4': [-2, -2], 'y5': [0, 0], }, dtype='float32'), - ['x0', 'x1', 'x2'], ['y0', 'y1', 'y2'], ['y3', 'y4', 'y5'], 1), + dict(x=['x0', 'x1', 'x2'], y=['y0', 'y1', 'y2'], + y_stack=['y3', 'y4', 'y5'], axis=1)), # axis1 y constant (dict(data={ @@ -1147,16 +1208,15 @@ def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, x, y, ax): 'x1': [-2, 2], 'x2': [0, 4], }, dtype='float32'), - ['x0', 'x1', 'x2'], - np.array([0, -4, 0]), - np.array([0, -2, 0], dtype='float32'), 1), + dict(x=['x0', 'x1', 'x2'], y=np.array([0, -4, 0]), + y_stack=np.array([0, -2, 0], dtype='float32'), axis=1)), # axis0 single (dict(data={ 'x': [-4, -2, 0, 0, 2, 4], 'y': [0, -4, 0, 0, -4, 0], 'y_stack': [0, -2, 0, 0, -2, 0], - }), 'x', 'y', 'y_stack', 0), + }), dict(x='x', y='y', y_stack='y_stack', axis=0)), # axis0 multi (dict(data={ @@ -1166,7 +1226,8 @@ def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, x, y, ax): 'y1': [0, -4, 0], 'y2': [0, -2, 0], 'y3': [0, -2, 0], - }, dtype='float32'), ['x0', 'x1'], ['y0', 'y1'], ['y2', 'y3'], 0), + }, dtype='float32'), dict(x=['x0', 'x1'], y=['y0', 'y1'], + y_stack=['y2', 'y3'], axis=0)), # axis0 multi, y string (dict(data={ @@ -1174,16 +1235,16 @@ def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, x, y, ax): 'x1': [0, 2, 4], 'y0': [0, -4, 0], 'y2': [0, -2, 0], - }, dtype='float32'), ['x0', 'x1'], 'y0', 'y2', 0), + }, dtype='float32'), dict(x=['x0', 'x1'], y='y0', y_stack='y2', axis=0)), # axis1 ragged arrays (dict(data={ 'x': pd.array([[-4, -2, 0], [0, 2, 4]], dtype='Ragged[float32]'), 'y': pd.array([[0, -4, 0], [0, -4, 0]], dtype='Ragged[float32]'), 'y_stack': pd.array([[0, -2, 0], [0, -2, 0]], dtype='Ragged[float32]') - }), 'x', 'y', 'y_stack', 1) + }), dict(x='x', y='y', y_stack='y_stack', axis=1)) ]) -def test_area_to_line_autorange(DataFrame, df_kwargs, x, y, y_stack, ax): +def test_area_to_line_autorange(DataFrame, df_kwargs, cvs_kwargs): if cudf and DataFrame is cudf_DataFrame: if isinstance(getattr(df_kwargs['data'].get('x', []), 'dtype', ''), RaggedDtype): pytest.skip("cudf DataFrames do not support extension types") @@ -1198,7 +1259,7 @@ def test_area_to_line_autorange(DataFrame, df_kwargs, x, y, y_stack, ax): cvs = ds.Canvas(plot_width=13, plot_height=7) - agg = cvs.area(df, x, y, ds.count(), axis=ax, y_stack=y_stack) + agg = cvs.area(df, agg=ds.count(), **cvs_kwargs) sol = np.array([[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], diff --git a/datashader/tests/test_polygons.py b/datashader/tests/test_polygons.py new file mode 100644 index 000000000..29da5ee30 --- /dev/null +++ b/datashader/tests/test_polygons.py @@ -0,0 +1,212 @@ +import pytest +import pandas as pd +import numpy as np +import xarray as xr +from numpy import nan +import datashader as ds +from datashader.tests.test_pandas import assert_eq_xr +import dask.dataframe as dd + +try: + # Import to register extension arrays + import spatialpandas # noqa (register EAs) +except ImportError: + spatialpandas = None + + +def dask_DataFrame(*args, **kwargs): + return dd.from_pandas(pd.DataFrame(*args, **kwargs), npartitions=3) + + +DataFrames = [pd.DataFrame, dask_DataFrame] + + +@pytest.mark.skipif(not spatialpandas, reason="spacialpandas not installed") +@pytest.mark.parametrize('DataFrame', DataFrames) +def test_multipolygon_manual_range(DataFrame): + df = DataFrame({ + 'polygons': pd.Series([[ + [ + [0, 0, 2, 0, 2, 2, 1, 3, 0, 0], + [1, 0.25, 1, 2, 1.75, .25, 0.25, 0.25] + ], [ + [2.5, 1, 4, 1, 4, 2, 2.5, 2, 2.5, 1] + ], + ]], dtype='MultiPolygon2d[float64]'), + 'v': [1] + }) + + cvs = ds.Canvas(plot_width=16, plot_height=16) + agg = cvs.polygons(df, geometry='polygons', agg=ds.count()) + + axis = ds.core.LinearAxis() + lincoords_x = axis.compute_index( + axis.compute_scale_and_translate((0., 4.), 16), 16) + lincoords_y = axis.compute_index( + axis.compute_scale_and_translate((0., 3.), 16), 16) + + sol = np.array([ + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1], + [0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1], + [0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1], + [0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1], + [0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1], + [0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], dtype='i4') + + out = xr.DataArray(sol, coords=[lincoords_y, lincoords_x], dims=['y', 'x']) + + assert_eq_xr(agg, out) + + +@pytest.mark.skipif(not spatialpandas, reason="spacialpandas not installed") +@pytest.mark.parametrize('DataFrame', DataFrames) +def test_multiple_polygons_auto_range(DataFrame): + df = DataFrame({ + 'polygons': pd.Series([[ + [ + [0, 0, 2, 0, 2, 2, 1, 3, 0, 0], + [1, 0.25, 1, 2, 1.75, .25, 0.25, 0.25] + ], [ + [2.5, 1, 4, 1, 4, 2, 2.5, 2, 2.5, 1] + ], + ]], dtype='MultiPolygon2d[float64]'), + 'v': [1] + }) + + cvs = ds.Canvas(plot_width=16, plot_height=16, + x_range=[-1, 3.5], y_range=[0.1, 2]) + agg = cvs.polygons(df, geometry='polygons', agg=ds.count()) + + axis = ds.core.LinearAxis() + lincoords_x = axis.compute_index( + axis.compute_scale_and_translate((-1, 3.5), 16), 16) + lincoords_y = axis.compute_index( + axis.compute_scale_and_translate((0.1, 2), 16), 16) + + sol = np.array([ + [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1], + [0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1] + ], dtype='i4') + + out = xr.DataArray(sol, coords=[lincoords_y, lincoords_x], dims=['y', 'x']) + + assert_eq_xr(agg, out) + + +@pytest.mark.skipif(not spatialpandas, reason="spacialpandas not installed") +@pytest.mark.parametrize('DataFrame', DataFrames) +def test_no_overlap(DataFrame): + df = DataFrame({ + 'polygons': pd.Series([ + [ + [1, 1, 2, 2, 1, 3, 0, 2, 1, 1], + [0.5, 1.5, 0.5, 2.5, 1.5, 2.5, 1.5, 1.5, 0.5, 1.5] + ], [ + [0.5, 1.5, 1.5, 1.5, 1.5, 2.5, 0.5, 2.5, 0.5, 1.5] + ], [ + [0, 1, 2, 1, 2, 3, 0, 3, 0, 1, 1, 1, 0, 2, 1, 3, 2, 2, 1, 1] + ] + ], dtype='Polygon2d[float64]'), + }) + + cvs = ds.Canvas(plot_width=16, plot_height=16) + agg = cvs.polygons(df, geometry='polygons', agg=ds.count()) + + axis = ds.core.LinearAxis() + lincoords_x = axis.compute_index( + axis.compute_scale_and_translate((0, 2), 16), 16) + lincoords_y = axis.compute_index( + axis.compute_scale_and_translate((1, 3), 16), 16) + + sol = np.array([ + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + ], dtype='i4') + + out = xr.DataArray(sol, coords=[lincoords_y, lincoords_x], dims=['y', 'x']) + + assert_eq_xr(agg, out) + + +@pytest.mark.skipif(not spatialpandas, reason="spacialpandas not installed") +@pytest.mark.parametrize('DataFrame', DataFrames) +def test_no_overlap_agg(DataFrame): + df = DataFrame({ + 'polygons': pd.Series([ + [[1, 1, 2, 2, 1, 3, 0, 2, 1, 1], + [0.5, 1.5, 0.5, 2.5, 1.5, 2.5, 1.5, 1.5, 0.5, 1.5]], + [[0.5, 1.5, 1.5, 1.5, 1.5, 2.5, 0.5, 2.5, 0.5, 1.5]], + [[0, 1, 2, 1, 2, 3, 0, 3, 0, 1, 1, 1, 0, 2, 1, 3, 2, 2, 1, 1]] + ], dtype='Polygon2d[float64]'), + 'v': range(3) + }) + + cvs = ds.Canvas(plot_width=16, plot_height=16) + agg = cvs.polygons(df, geometry='polygons', agg=ds.sum('v')) + + axis = ds.core.LinearAxis() + lincoords_x = axis.compute_index( + axis.compute_scale_and_translate((0, 2), 16), 16) + lincoords_y = axis.compute_index( + axis.compute_scale_and_translate((1, 3), 16), 16) + + sol = np.array([ + [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan], + [nan, 2., 2., 2., 2., 2., 2., 2., 0., 0., 2., 2., 2., 2., 2., 2.], + [nan, 2., 2., 2., 2., 2., 2., 0., 0., 0., 0., 2., 2., 2., 2., 2.], + [nan, 2., 2., 2., 2., 2., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2.], + [nan, 2., 2., 2., 2., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2.], + [nan, 2., 2., 2., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 2., 2.], + [nan, 2., 2., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 2.], + [nan, 2., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0.], + [nan, 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0.], + [nan, 2., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0.], + [nan, 2., 2., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 2.], + [nan, 2., 2., 2., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 2., 2.], + [nan, 2., 2., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2.], + [nan, 2., 2., 2., 2., 2., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2.], + [nan, 2., 2., 2., 2., 2., 2., 0., 0., 0., 0., 2., 2., 2., 2., 2.], + [nan, 2., 2., 2., 2., 2., 2., 2., 0., 0., 2., 2., 2., 2., 2., 2.] + ]) + + out = xr.DataArray(sol, coords=[lincoords_y, lincoords_x], dims=['y', 'x']) + assert_eq_xr(agg, out) diff --git a/datashader/utils.py b/datashader/utils.py index df82a8f93..7a6592319 100644 --- a/datashader/utils.py +++ b/datashader/utils.py @@ -24,6 +24,12 @@ except ImportError: cudf = None +try: + from spatialpandas.geometry import GeometryDtype +except ImportError: + GeometryDtype = type(None) + + ngjit = nb.jit(nopython=True, nogil=True) @@ -408,7 +414,7 @@ def dshape_from_pandas_helper(col): # Pandas stores this as a pytz.tzinfo, but DataShape wants a string tz = str(tz) return datashape.Option(datashape.DateTime(tz=tz)) - elif isinstance(col.dtype, RaggedDtype): + elif isinstance(col.dtype, (RaggedDtype, GeometryDtype)): return col.dtype dshape = datashape.CType.from_numpy_dtype(col.dtype) dshape = datashape.string if dshape == datashape.object_ else dshape From 32cfd054541ce7aa6b4ad5521af40ac373565ff1 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Tue, 26 Nov 2019 07:38:18 -0500 Subject: [PATCH 02/13] Update to latest spatialpandas API --- datashader/core.py | 4 +-- datashader/glyphs/__init__.py | 2 +- datashader/glyphs/line.py | 12 +++---- datashader/glyphs/points.py | 19 +++++------ datashader/glyphs/polygon.py | 8 ++--- datashader/tests/test_dask.py | 37 ++++++++++++--------- datashader/tests/test_pandas.py | 55 ++++++++++++++++++------------- datashader/tests/test_polygons.py | 16 +++++---- 8 files changed, 83 insertions(+), 70 deletions(-) diff --git a/datashader/core.py b/datashader/core.py index a697e87fc..5c1a988ac 100644 --- a/datashader/core.py +++ b/datashader/core.py @@ -185,7 +185,7 @@ def points(self, source, x=None, y=None, agg=None, geometry=None): Column name of a PointsArray of the coordinates of each point. If provided, the x and y arguments may not also be provided. """ - from .glyphs import Point, MultiPoint2dGeometry + from .glyphs import Point, MultiPointGeometry from .reductions import count as count_rdn validate_xy_or_geometry('Point', x, y, geometry) @@ -204,7 +204,7 @@ def points(self, source, x=None, y=None, agg=None, geometry=None): x_range=self.x_range, y_range=self.y_range) glyph = Point(x, y) else: - glyph = MultiPoint2dGeometry(geometry) + glyph = MultiPointGeometry(geometry) return bypixel(source, self, glyph, agg) diff --git a/datashader/glyphs/__init__.py b/datashader/glyphs/__init__.py index 66ce04fc6..ce3472adc 100644 --- a/datashader/glyphs/__init__.py +++ b/datashader/glyphs/__init__.py @@ -1,5 +1,5 @@ from __future__ import absolute_import -from .points import Point, MultiPoint2dGeometry # noqa (API import) +from .points import Point, MultiPointGeometry # noqa (API import) from .line import ( # noqa (API import) LineAxis0, LineAxis0Multi, diff --git a/datashader/glyphs/line.py b/datashader/glyphs/line.py index 5fa7b37e3..6ff91aa4d 100644 --- a/datashader/glyphs/line.py +++ b/datashader/glyphs/line.py @@ -463,16 +463,16 @@ class LineAxis1Geometry(_GeometryLike): @property def geom_dtypes(self): from spatialpandas.geometry import ( - Line2dDtype, MultiLine2dDtype, Ring2dDtype, Polygon2dDtype, - MultiPolygon2dDtype + LineDtype, MultiLineDtype, RingDtype, PolygonDtype, + MultiPolygonDtype ) - return (Line2dDtype, MultiLine2dDtype, Ring2dDtype, - Polygon2dDtype, MultiPolygon2dDtype) + return (LineDtype, MultiLineDtype, RingDtype, + PolygonDtype, MultiPolygonDtype) @memoize def _build_extend(self, x_mapper, y_mapper, info, append): from spatialpandas.geometry import ( - Polygon2dArray, MultiPolygon2dArray + PolygonArray, MultiPolygonArray ) expand_aggs_and_cols = self.expand_aggs_and_cols(append) map_onto_pixel = _build_map_onto_pixel_for_line(x_mapper, y_mapper) @@ -492,7 +492,7 @@ def extend(aggs, df, vt, bounds, plot_start=True): geom_array = df[geometry_name].array # line may be clipped, then mapped to pixels - if isinstance(geom_array, (Polygon2dArray, MultiPolygon2dArray)): + if isinstance(geom_array, (PolygonArray, MultiPolygonArray)): # Convert polygon array to multi line of boundary geom_array = geom_array.boundary diff --git a/datashader/glyphs/points.py b/datashader/glyphs/points.py index 3c45c3ad4..97ca2b97c 100644 --- a/datashader/glyphs/points.py +++ b/datashader/glyphs/points.py @@ -59,21 +59,18 @@ def required_columns(self): return [self.geometry] def compute_x_bounds(self, df): - bounds = df[self.geometry].array.bounds_x + bounds = df[self.geometry].array.total_bounds_x return self.maybe_expand_bounds(bounds) def compute_y_bounds(self, df): - bounds = df[self.geometry].array.bounds_y + bounds = df[self.geometry].array.total_bounds_y return self.maybe_expand_bounds(bounds) @memoize def compute_bounds_dask(self, ddf): - r = ddf.map_partitions(lambda df: np.array( - [list(df[self.geometry].array.bounds)] - )).compute() - - x_extents = np.nanmin(r[:, 0]), np.nanmax(r[:, 2]) - y_extents = np.nanmin(r[:, 1]), np.nanmax(r[:, 3]) + total_bounds = ddf[self.geometry].total_bounds + x_extents = (total_bounds[0], total_bounds[2]) + y_extents = (total_bounds[1], total_bounds[3]) return (self.maybe_expand_bounds(x_extents), self.maybe_expand_bounds(y_extents)) @@ -201,12 +198,12 @@ def extend(aggs, df, vt, bounds): return extend -class MultiPoint2dGeometry(_GeometryLike): +class MultiPointGeometry(_GeometryLike): @property def geom_dtypes(self): - from spatialpandas.geometry import MultiPoint2dDtype - return (MultiPoint2dDtype,) + from spatialpandas.geometry import MultiPointDtype + return (MultiPointDtype,) @memoize def _build_extend(self, x_mapper, y_mapper, info, append): diff --git a/datashader/glyphs/polygon.py b/datashader/glyphs/polygon.py index 34286c91c..7d29cce9d 100644 --- a/datashader/glyphs/polygon.py +++ b/datashader/glyphs/polygon.py @@ -9,8 +9,8 @@ class PolygonGeom(_GeometryLike): @property def geom_dtypes(self): - from spatialpandas.geometry import Polygon2dDtype, MultiPolygon2dDtype - return Polygon2dDtype, MultiPolygon2dDtype + from spatialpandas.geometry import PolygonDtype, MultiPolygonDtype + return PolygonDtype, MultiPolygonDtype @memoize def _build_extend(self, x_mapper, y_mapper, info, append): @@ -199,10 +199,10 @@ def extend_cpu( offsets = geometry.buffer_offsets if len(offsets) == 3: - # MultiPolygon2dArray + # MultiPolygonArray offsets0, offsets1, offsets2 = offsets else: - # Polygon2dArray + # PolygonArray offsets1, offsets2 = offsets offsets0 = np.arange(len(offsets1)) diff --git a/datashader/tests/test_dask.py b/datashader/tests/test_dask.py index 140d0d8f8..b08bfd361 100644 --- a/datashader/tests/test_dask.py +++ b/datashader/tests/test_dask.py @@ -14,9 +14,9 @@ import pytest try: - import spatialpandas + import spatialpandas as sp except ImportError: - spatialpandas = None + sp = None from datashader.tests.test_pandas import ( assert_eq_xr, assert_eq_ndarray, values @@ -41,8 +41,12 @@ _ddf = dd.from_pandas(df_pd, npartitions=2) -def dask_DataFrame(*args, **kwargs): - return dd.from_pandas(pd.DataFrame(*args, **kwargs), npartitions=2) +def dask_DataFrame(*args, geo=False, **kwargs): + if geo: + df = sp.GeoDataFrame(*args, **kwargs) + else: + df = pd.DataFrame(*args, **kwargs) + return dd.from_pandas(df, npartitions=2) try: @@ -51,7 +55,8 @@ def dask_DataFrame(*args, **kwargs): import dask_cudf ddfs = [_ddf, dask_cudf.from_dask_dataframe(_ddf)] - def dask_cudf_DataFrame(*args, **kwargs): + def dask_cudf_DataFrame(*args, geo=False, **kwargs): + assert not geo cdf = cudf.DataFrame.from_pandas( pd.DataFrame(*args, **kwargs), nan_as_null=False ) @@ -333,14 +338,14 @@ def test_log_axis_points(ddf): assert_eq_xr(c_logxy.points(ddf, 'log_x', 'log_y', ds.count('i32')), out) -@pytest.mark.skipif(not spatialpandas, reason="spatialpandas not installed") +@pytest.mark.skipif(not sp, reason="spatialpandas not installed") def test_points_geometry(): axis = ds.core.LinearAxis() lincoords = axis.compute_index(axis.compute_scale_and_translate((0., 2.), 3), 3) - ddf = dd.from_pandas(pd.DataFrame({ + ddf = dd.from_pandas(sp.GeoDataFrame({ 'geom': pd.array( - [[0, 0], [0, 1, 1, 1], [0, 2, 1, 2, 2, 2]], dtype='MultiPoint2d[float64]'), + [[0, 0], [0, 1, 1, 1], [0, 2, 1, 2, 2, 2]], dtype='MultiPoint[float64]'), 'v': [1, 2, 3] }), npartitions=3) @@ -425,26 +430,26 @@ def test_line(DataFrame): 'y': [[0, -4, 0], [0, 4, 0, 0, 0, 0]], }, dtype='Ragged[int64]'), dict(x='x', y='y', axis=1)), ] -if spatialpandas: +if sp: line_manual_range_params.append( # geometry (dict(data={ 'geom': [[4, 0, 0, -4, -4, 0], [-4, 0, 0, 4, 4, 0, 4, 0, 0, 0, -4, 0]] - }, dtype='Line2d[int64]'), dict(geometry='geom')) + }, dtype='Line[int64]'), dict(geometry='geom')) ) @pytest.mark.parametrize('DataFrame', DataFrames) @pytest.mark.parametrize('df_kwargs,cvs_kwargs', line_manual_range_params) def test_line_manual_range(DataFrame, df_kwargs, cvs_kwargs): if DataFrame is dask_cudf_DataFrame: dtype = df_kwargs.get('dtype', '') - if dtype.startswith('Ragged') or dtype.startswith('Line2d'): + if dtype.startswith('Ragged') or dtype.startswith('Line'): pytest.skip("Ragged array not supported with cudf") axis = ds.core.LinearAxis() lincoords = axis.compute_index(axis.compute_scale_and_translate((-3., 3.), 7), 7) - ddf = DataFrame(**df_kwargs) + ddf = DataFrame(geo='geometry' in cvs_kwargs, **df_kwargs) cvs = ds.Canvas(plot_width=7, plot_height=7, x_range=(-3, 3), y_range=(-3, 3)) @@ -511,28 +516,28 @@ def test_line_manual_range(DataFrame, df_kwargs, cvs_kwargs): 'y': [[-4, 0, 4], [4, 0, -4], [-4, 0, 4]], }, dtype='Ragged[int64]'), dict(x='x', y='y', axis=1)), ] -if spatialpandas: +if sp: line_autorange_params.append( # geometry (dict(data={ 'geom': [[0, -4, -4, 0, 0, 4], [0, 4, 0, 0, 0, -4], [0, -4, 4, 0, 0, 4]] - }, dtype='Line2d[int64]'), dict(geometry='geom')) + }, dtype='Line[int64]'), dict(geometry='geom')) ) @pytest.mark.parametrize('DataFrame', DataFrames) @pytest.mark.parametrize('df_kwargs,cvs_kwargs', line_autorange_params) def test_line_autorange(DataFrame, df_kwargs, cvs_kwargs): if DataFrame is dask_cudf_DataFrame: dtype = df_kwargs.get('dtype', '') - if dtype.startswith('Ragged') or dtype.startswith('Line2d'): + if dtype.startswith('Ragged') or dtype.startswith('Line'): pytest.skip("Ragged array not supported with cudf") axis = ds.core.LinearAxis() lincoords = axis.compute_index( axis.compute_scale_and_translate((-4., 4.), 9), 9) - ddf = DataFrame(**df_kwargs) + ddf = DataFrame(geo='geometry' in cvs_kwargs, **df_kwargs) cvs = ds.Canvas(plot_width=9, plot_height=9) diff --git a/datashader/tests/test_pandas.py b/datashader/tests/test_pandas.py index 1cb17bb6a..c43127c2d 100644 --- a/datashader/tests/test_pandas.py +++ b/datashader/tests/test_pandas.py @@ -27,28 +27,37 @@ df_pd.f64[2] = np.nan dfs_pd = [df_pd] + +try: + import spatialpandas as sp + from spatialpandas.geometry import LineDtype +except ImportError: + LineDtype = None + sp = None + + +def pd_DataFrame(*args, geo=False, **kwargs): + if geo: + return sp.GeoDataFrame(*args, **kwargs) + else: + return pd.DataFrame(*args, **kwargs) + + try: import cudf import cupy - def cudf_DataFrame(*args, **kwargs): + def cudf_DataFrame(*args, geo=False, **kwargs): + assert not geo return cudf.DataFrame.from_pandas( pd.DataFrame(*args, **kwargs), nan_as_null=False ) df_cuda = cudf_DataFrame(df_pd) dfs = [df_pd, df_cuda] - DataFrames = [pd.DataFrame, cudf_DataFrame] + DataFrames = [pd_DataFrame, cudf_DataFrame] except ImportError: cudf = cupy = None dfs = [df_pd] - DataFrames = [pd.DataFrame] - - -try: - import spatialpandas - from spatialpandas.geometry import Line2dDtype -except ImportError: - Line2dDtype = None - spatialpandas = None + DataFrames = [pd_DataFrame] c = ds.Canvas(plot_width=2, plot_height=2, x_range=(0, 1), y_range=(0, 1)) @@ -318,14 +327,14 @@ def test_log_axis_points(df): assert_eq_xr(c_logxy.points(df, 'log_x', 'log_y', ds.count('i32')), out) -@pytest.mark.skipif(not spatialpandas, reason="spatialpandas not installed") +@pytest.mark.skipif(not sp, reason="spatialpandas not installed") def test_points_geometry(): axis = ds.core.LinearAxis() lincoords = axis.compute_index(axis.compute_scale_and_translate((0., 2.), 3), 3) df = pd.DataFrame({ 'geom': pd.array( - [[0, 0], [0, 1, 1, 1], [0, 2, 1, 2, 2, 2]], dtype='MultiPoint2d[float64]'), + [[0, 0], [0, 1, 1, 1], [0, 2, 1, 2, 2, 2]], dtype='MultiPoint[float64]'), 'v': [1, 2, 3] }) @@ -737,12 +746,12 @@ def test_bug_570(): 'y': pd.array([[0, -4], [-4, 0, 4, 0]], dtype='Ragged[float32]') }], dict(x='x', y='y', axis=1)), ] -if spatialpandas: +if sp: line_manual_range_params.append( # geometry ([{ 'geom': pd.array( - [[4, 0, 0, -4], [0, -4, -4, 0, 0, 4, 4, 0]], dtype='Line2d[float32]' + [[4, 0, 0, -4], [0, -4, -4, 0, 0, 4, 4, 0]], dtype='Line[float32]' ), }], dict(geometry='geom')) ) @@ -751,13 +760,13 @@ def test_bug_570(): def test_line_manual_range(DataFrame, df_args, cvs_kwargs): if cudf and DataFrame is cudf_DataFrame: if (isinstance(getattr(df_args[0].get('x', []), 'dtype', ''), RaggedDtype) or - spatialpandas and isinstance( - getattr(df_args[0].get('geom', []), 'dtype', ''), Line2dDtype + sp and isinstance( + getattr(df_args[0].get('geom', []), 'dtype', ''), LineDtype ) ): pytest.skip("cudf DataFrames do not support extension types") - df = DataFrame(*df_args) + df = DataFrame(geo='geometry' in cvs_kwargs, *df_args) axis = ds.core.LinearAxis() lincoords = axis.compute_index( @@ -827,12 +836,12 @@ def test_line_manual_range(DataFrame, df_args, cvs_kwargs): 'y': pd.array([[-4, 0, 4], [-4, 0, 4]], dtype='Ragged[float32]') }], dict(x='x', y='y', axis=1)), ] -if spatialpandas: +if sp: line_autorange_params.append( # geometry ([{ 'geom': pd.array( - [[0, -4, -4, 0, 0, 4], [0, -4, 4, 0, 0, 4]], dtype='Line2d[float32]' + [[0, -4, -4, 0, 0, 4], [0, -4, 4, 0, 0, 4]], dtype='Line[float32]' ), }], dict(geometry='geom')) ) @@ -841,13 +850,13 @@ def test_line_manual_range(DataFrame, df_args, cvs_kwargs): def test_line_autorange(DataFrame, df_args, cvs_kwargs): if cudf and DataFrame is cudf_DataFrame: if (isinstance(getattr(df_args[0].get('x', []), 'dtype', ''), RaggedDtype) or - spatialpandas and isinstance( - getattr(df_args[0].get('geom', []), 'dtype', ''), Line2dDtype + sp and isinstance( + getattr(df_args[0].get('geom', []), 'dtype', ''), LineDtype ) ): pytest.skip("cudf DataFrames do not support extension types") - df = DataFrame(*df_args) + df = DataFrame(geo='geometry' in cvs_kwargs, *df_args) axis = ds.core.LinearAxis() lincoords = axis.compute_index( diff --git a/datashader/tests/test_polygons.py b/datashader/tests/test_polygons.py index 29da5ee30..893998033 100644 --- a/datashader/tests/test_polygons.py +++ b/datashader/tests/test_polygons.py @@ -10,15 +10,17 @@ try: # Import to register extension arrays import spatialpandas # noqa (register EAs) + from spatialpandas import GeoDataFrame except ImportError: spatialpandas = None + GeoDataFrame = None -def dask_DataFrame(*args, **kwargs): - return dd.from_pandas(pd.DataFrame(*args, **kwargs), npartitions=3) +def dask_GeoDataFrame(*args, **kwargs): + return dd.from_pandas(GeoDataFrame(*args, **kwargs), npartitions=3) -DataFrames = [pd.DataFrame, dask_DataFrame] +DataFrames = [GeoDataFrame, dask_GeoDataFrame] @pytest.mark.skipif(not spatialpandas, reason="spacialpandas not installed") @@ -32,7 +34,7 @@ def test_multipolygon_manual_range(DataFrame): ], [ [2.5, 1, 4, 1, 4, 2, 2.5, 2, 2.5, 1] ], - ]], dtype='MultiPolygon2d[float64]'), + ]], dtype='MultiPolygon[float64]'), 'v': [1] }) @@ -80,7 +82,7 @@ def test_multiple_polygons_auto_range(DataFrame): ], [ [2.5, 1, 4, 1, 4, 2, 2.5, 2, 2.5, 1] ], - ]], dtype='MultiPolygon2d[float64]'), + ]], dtype='MultiPolygon[float64]'), 'v': [1] }) @@ -131,7 +133,7 @@ def test_no_overlap(DataFrame): ], [ [0, 1, 2, 1, 2, 3, 0, 3, 0, 1, 1, 1, 0, 2, 1, 3, 2, 2, 1, 1] ] - ], dtype='Polygon2d[float64]'), + ], dtype='Polygon[float64]'), }) cvs = ds.Canvas(plot_width=16, plot_height=16) @@ -176,7 +178,7 @@ def test_no_overlap_agg(DataFrame): [0.5, 1.5, 0.5, 2.5, 1.5, 2.5, 1.5, 1.5, 0.5, 1.5]], [[0.5, 1.5, 1.5, 1.5, 1.5, 2.5, 0.5, 2.5, 0.5, 1.5]], [[0, 1, 2, 1, 2, 3, 0, 3, 0, 1, 1, 1, 0, 2, 1, 3, 2, 2, 1, 1]] - ], dtype='Polygon2d[float64]'), + ], dtype='Polygon[float64]'), 'v': range(3) }) From 07dc3cede1173a803d8e6631097f201ebe0f8d7a Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Tue, 26 Nov 2019 11:08:19 -0500 Subject: [PATCH 03/13] Use sindex to downselect eligible partitions/rows for geometry columns --- datashader/core.py | 39 +++++++++++++++++++++++++++++++++ datashader/glyphs/line.py | 10 ++++++--- datashader/glyphs/points.py | 11 ++++++---- datashader/glyphs/polygon.py | 17 +++++++++----- datashader/tests/test_pandas.py | 2 +- 5 files changed, 65 insertions(+), 14 deletions(-) diff --git a/datashader/core.py b/datashader/core.py index 5c1a988ac..66a3482ff 100644 --- a/datashader/core.py +++ b/datashader/core.py @@ -204,6 +204,19 @@ def points(self, source, x=None, y=None, agg=None, geometry=None): x_range=self.x_range, y_range=self.y_range) glyph = Point(x, y) else: + from spatialpandas import GeoDataFrame + from spatialpandas.dask import DaskGeoDataFrame + if isinstance(source, DaskGeoDataFrame): + # Downselect partitions to those that may contain points in viewport + x_range = self.x_range if self.x_range is not None else (None, None) + y_range = self.y_range if self.y_range is not None else (None, None) + source = source.cx_partitions[slice(*x_range), slice(*y_range)] + elif not isinstance(source, GeoDataFrame): + raise ValueError( + "source must be an instance of spatialpandas.GeoDataFrame or \n" + "spatialpandas.dask.DaskGeoDataFrame.\n" + " Received value of type {typ}".format(typ=type(source))) + glyph = MultiPointGeometry(geometry) return bypixel(source, self, glyph, agg) @@ -320,6 +333,19 @@ def line(self, source, x=None, y=None, agg=None, axis=0, geometry=None): agg = any_rdn() if geometry is not None: + from spatialpandas import GeoDataFrame + from spatialpandas.dask import DaskGeoDataFrame + if isinstance(source, DaskGeoDataFrame): + # Downselect partitions to those that may contain lines in viewport + x_range = self.x_range if self.x_range is not None else (None, None) + y_range = self.y_range if self.y_range is not None else (None, None) + source = source.cx_partitions[slice(*x_range), slice(*y_range)] + elif not isinstance(source, GeoDataFrame): + raise ValueError( + "source must be an instance of spatialpandas.GeoDataFrame or \n" + "spatialpandas.dask.DaskGeoDataFrame.\n" + " Received value of type {typ}".format(typ=type(source))) + glyph = LineAxis1Geometry(geometry) else: # Broadcast column specifications to handle cases where @@ -661,6 +687,19 @@ def polygons(self, source, geometry, agg=None): """ from .glyphs import PolygonGeom from .reductions import any as any_rdn + from spatialpandas import GeoDataFrame + from spatialpandas.dask import DaskGeoDataFrame + if isinstance(source, DaskGeoDataFrame): + # Downselect partitions to those that may contain polygons in viewport + x_range = self.x_range if self.x_range is not None else (None, None) + y_range = self.y_range if self.y_range is not None else (None, None) + source = source.cx_partitions[slice(*x_range), slice(*y_range)] + elif not isinstance(source, GeoDataFrame): + raise ValueError( + "source must be an instance of spatialpandas.GeoDataFrame or \n" + "spatialpandas.dask.DaskGeoDataFrame.\n" + " Received value of type {typ}".format(typ=type(source))) + if agg is None: agg = any_rdn() glyph = PolygonGeom(geometry) diff --git a/datashader/glyphs/line.py b/datashader/glyphs/line.py index 6ff91aa4d..4b9220a04 100644 --- a/datashader/glyphs/line.py +++ b/datashader/glyphs/line.py @@ -979,18 +979,22 @@ def extend_cpu( offsets1 = offsets[0] offsets0 = np.arange(len(offsets1)) + # Compute indices of potentially intersecting polygons using + # geometry's R-tree + eligible_inds = geometry.sindex.intersects((xmin, ymin, xmax, ymax)) + extend_cpu_numba( sx, tx, sy, ty, xmin, xmax, ymin, ymax, - values, missing, offsets0, offsets1, *aggs_and_cols + values, missing, offsets0, offsets1, eligible_inds, *aggs_and_cols ) @ngjit @expand_aggs_and_cols def extend_cpu_numba( sx, tx, sy, ty, xmin, xmax, ymin, ymax, - values, missing, offsets0, offsets1, *aggs_and_cols + values, missing, offsets0, offsets1, eligible_inds, *aggs_and_cols ): - for i in range(len(offsets0) - 1): + for i in eligible_inds: if missing[i]: continue diff --git a/datashader/glyphs/points.py b/datashader/glyphs/points.py index 97ca2b97c..594f1b06c 100644 --- a/datashader/glyphs/points.py +++ b/datashader/glyphs/points.py @@ -230,10 +230,9 @@ def _perform_extend_points( @self.expand_aggs_and_cols(append) def extend_cpu( sx, tx, sy, ty, xmin, xmax, ymin, ymax, - values, missing, offsets, *aggs_and_cols + values, missing, offsets, eligible_inds, *aggs_and_cols ): - n = len(offsets) - 1 - for i in range(n): + for i in eligible_inds: if missing[i] is True: continue start = offsets[i] @@ -255,9 +254,13 @@ def extend(aggs, df, vt, bounds): missing = geometry.isna() offsets = geometry.buffer_offsets[0] + # Compute indices of potentially intersecting polygons using + # geometry's R-tree + eligible_inds = geometry.sindex.intersects((xmin, ymin, xmax, ymax)) + extend_cpu( sx, tx, sy, ty, xmin, xmax, ymin, ymax, - values, missing, offsets, *aggs_and_cols + values, missing, offsets, eligible_inds, *aggs_and_cols ) return extend diff --git a/datashader/glyphs/polygon.py b/datashader/glyphs/polygon.py index 7d29cce9d..a0ffd1cd5 100644 --- a/datashader/glyphs/polygon.py +++ b/datashader/glyphs/polygon.py @@ -16,12 +16,12 @@ def geom_dtypes(self): def _build_extend(self, x_mapper, y_mapper, info, append): expand_aggs_and_cols = self.expand_aggs_and_cols(append) map_onto_pixel = _build_map_onto_pixel_for_line(x_mapper, y_mapper) - draw_segment = _build_draw_polygon( + draw_polygon = _build_draw_polygon( append, map_onto_pixel, x_mapper, y_mapper, expand_aggs_and_cols ) perform_extend_cpu = _build_extend_polygon_geometry( - draw_segment, expand_aggs_and_cols + draw_polygon, expand_aggs_and_cols ) geom_name = self.geometry @@ -30,7 +30,7 @@ def extend(aggs, df, vt, bounds, plot_start=True): xmin, xmax, ymin, ymax = bounds aggs_and_cols = aggs + info(df) geom_array = df[geom_name].array - # line may be clipped, then mapped to pixels + perform_extend_cpu( sx, tx, sy, ty, xmin, xmax, ymin, ymax, @@ -198,6 +198,10 @@ def extend_cpu( missing = geometry.isna() offsets = geometry.buffer_offsets + # Compute indices of potentially intersecting polygons using + # geometry's R-tree + eligible_inds = geometry.sindex.intersects((xmin, ymin, xmax, ymax)) + if len(offsets) == 3: # MultiPolygonArray offsets0, offsets1, offsets2 = offsets @@ -208,14 +212,15 @@ def extend_cpu( extend_cpu_numba( sx, tx, sy, ty, xmin, xmax, ymin, ymax, - values, missing, offsets0, offsets1, offsets2, *aggs_and_cols + values, missing, offsets0, offsets1, offsets2, eligible_inds, *aggs_and_cols ) @ngjit @expand_aggs_and_cols def extend_cpu_numba( sx, tx, sy, ty, xmin, xmax, ymin, ymax, - values, missing, offsets0, offsets1, offsets2, *aggs_and_cols + values, missing, offsets0, offsets1, offsets2, + eligible_inds, *aggs_and_cols ): # Pre-allocate temp arrays if len(offsets0) > 1: @@ -236,7 +241,7 @@ def extend_cpu_numba( # Initialize array indicating which edges are still eligible for processing eligible = np.ones(max_edges, dtype=np.int8) - for i in range(len(offsets0) - 1): + for i in eligible_inds: if missing[i]: continue diff --git a/datashader/tests/test_pandas.py b/datashader/tests/test_pandas.py index c43127c2d..bf0992892 100644 --- a/datashader/tests/test_pandas.py +++ b/datashader/tests/test_pandas.py @@ -332,7 +332,7 @@ def test_points_geometry(): axis = ds.core.LinearAxis() lincoords = axis.compute_index(axis.compute_scale_and_translate((0., 2.), 3), 3) - df = pd.DataFrame({ + df = sp.GeoDataFrame({ 'geom': pd.array( [[0, 0], [0, 1, 1, 1], [0, 2, 1, 2, 2, 2]], dtype='MultiPoint[float64]'), 'v': [1, 2, 3] From 87b24285957ed00f100ad7879d0ecf283a02f2e4 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Tue, 26 Nov 2019 11:19:00 -0500 Subject: [PATCH 04/13] Support rendering Point geometry columns --- datashader/glyphs/points.py | 45 +++++++++++++++++++++++++-------- datashader/tests/test_pandas.py | 23 ++++++++++++++++- 2 files changed, 56 insertions(+), 12 deletions(-) diff --git a/datashader/glyphs/points.py b/datashader/glyphs/points.py index 594f1b06c..b3434b861 100644 --- a/datashader/glyphs/points.py +++ b/datashader/glyphs/points.py @@ -202,8 +202,8 @@ class MultiPointGeometry(_GeometryLike): @property def geom_dtypes(self): - from spatialpandas.geometry import MultiPointDtype - return (MultiPointDtype,) + from spatialpandas.geometry import PointDtype, MultiPointDtype + return PointDtype, MultiPointDtype @memoize def _build_extend(self, x_mapper, y_mapper, info, append): @@ -228,7 +228,21 @@ def _perform_extend_points( @ngjit @self.expand_aggs_and_cols(append) - def extend_cpu( + def extend_point_cpu( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, + values, missing, eligible_inds, *aggs_and_cols + ): + for i in eligible_inds: + if missing[i] is True: + continue + _perform_extend_points( + i, 2 * i, sx, tx, sy, ty, xmin, xmax, ymin, ymax, + values, *aggs_and_cols + ) + + @ngjit + @self.expand_aggs_and_cols(append) + def extend_multipoint_cpu( sx, tx, sy, ty, xmin, xmax, ymin, ymax, values, missing, offsets, eligible_inds, *aggs_and_cols ): @@ -244,23 +258,32 @@ def extend_cpu( ) def extend(aggs, df, vt, bounds): + from spatialpandas.geometry import PointArray + aggs_and_cols = aggs + info(df) sx, tx, sy, ty = vt xmin, xmax, ymin, ymax = bounds geometry = df[geometry_name].array - values = geometry.buffer_values - missing = geometry.isna() - offsets = geometry.buffer_offsets[0] - # Compute indices of potentially intersecting polygons using # geometry's R-tree eligible_inds = geometry.sindex.intersects((xmin, ymin, xmax, ymax)) + missing = geometry.isna() - extend_cpu( - sx, tx, sy, ty, xmin, xmax, ymin, ymax, - values, missing, offsets, eligible_inds, *aggs_and_cols - ) + if isinstance(geometry, PointArray): + values = geometry.flat_values + extend_point_cpu( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, + values, missing, eligible_inds, *aggs_and_cols + ) + else: + values = geometry.buffer_values + offsets = geometry.buffer_offsets[0] + + extend_multipoint_cpu( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, + values, missing, offsets, eligible_inds, *aggs_and_cols + ) return extend diff --git a/datashader/tests/test_pandas.py b/datashader/tests/test_pandas.py index bf0992892..6ebf09d91 100644 --- a/datashader/tests/test_pandas.py +++ b/datashader/tests/test_pandas.py @@ -328,7 +328,28 @@ def test_log_axis_points(df): @pytest.mark.skipif(not sp, reason="spatialpandas not installed") -def test_points_geometry(): +def test_points_geometry_point(): + axis = ds.core.LinearAxis() + lincoords = axis.compute_index(axis.compute_scale_and_translate((0., 2.), 3), 3) + + df = sp.GeoDataFrame({ + 'geom': pd.array( + [[0, 0], [0, 1], [1, 1], [0, 2], [1, 2], [2, 2]], dtype='Point[float64]'), + 'v': [1, 2, 2, 3, 3, 3] + }) + + cvs = ds.Canvas(plot_width=3, plot_height=3) + agg = cvs.points(df, geometry='geom', agg=ds.sum('v')) + sol = np.array([[1, nan, nan], + [2, 2, nan], + [3, 3, 3]], dtype='float64') + out = xr.DataArray(sol, coords=[lincoords, lincoords], + dims=['y', 'x']) + assert_eq_xr(agg, out) + + +@pytest.mark.skipif(not sp, reason="spatialpandas not installed") +def test_points_geometry_multipoint(): axis = ds.core.LinearAxis() lincoords = axis.compute_index(axis.compute_scale_and_translate((0., 2.), 3), 3) From d268d6593b75c90626b3f49dd95dace38a5eff02 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Sat, 30 Nov 2019 10:32:21 -0500 Subject: [PATCH 05/13] Add spatialpandas test dependency --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 8eee2a446..58e518272 100644 --- a/setup.py +++ b/setup.py @@ -36,6 +36,7 @@ 'nbsmoke >=0.2.6', 'fastparquet >=0.1.6', # optional dependency 'pandas >=0.24.1', # optional ragged array support + 'spatialpandas', ], 'examples': [], 'examples_extra':[ From bb7213fb8425fe2f6dab5fb5e1ae86892212c044 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Sat, 30 Nov 2019 18:25:27 -0500 Subject: [PATCH 06/13] try add conda forge for pyarrow dependency --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index f0f5867ca..c7c1bcebe 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,7 @@ dist: xenial env: global: - PYENV_VERSION=3.7 - - CHANS_DEV="-c pyviz/label/dev" + - CHANS_DEV="-c pyviz/label/dev -c conda-forge" - CHANS_REL="-c pyviz" - LABELS_DEV="--label dev" - LABELS_REL="--label dev --label main" From a4c3297964c1b039ac195ead5824a82c7397f082 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Sat, 30 Nov 2019 19:38:21 -0500 Subject: [PATCH 07/13] travis: Install spatialpandas for testing with python 3 only --- .travis.yml | 7 ++++++- setup.py | 1 - 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index c7c1bcebe..574c5738c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,7 @@ dist: xenial env: global: - PYENV_VERSION=3.7 - - CHANS_DEV="-c pyviz/label/dev -c conda-forge" + - CHANS_DEV="-c pyviz/label/dev" - CHANS_REL="-c pyviz" - LABELS_DEV="--label dev" - LABELS_REL="--label dev --label main" @@ -60,6 +60,11 @@ jobs: - doit env_create $CHANS_DEV --python=$PYTHON_VERSION --name=$PYTHON_VERSION - source activate $PYTHON_VERSION - doit develop_install $CHANS_DEV $OPTS + # Install spatialpandas here because it's python 3 only and requires + # conda-forge for some dependencies + - if [[ "PYTHON_VERSION" != "2.7" ]]; then + conda install -c pyviz -c conda-forge spatialpandas; + fi - doit env_capture script: - doit test_all diff --git a/setup.py b/setup.py index 58e518272..8eee2a446 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,6 @@ 'nbsmoke >=0.2.6', 'fastparquet >=0.1.6', # optional dependency 'pandas >=0.24.1', # optional ragged array support - 'spatialpandas', ], 'examples': [], 'examples_extra':[ From 47e8d818f5b88b249142a1a5bce7df111e768895 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Sat, 30 Nov 2019 19:42:48 -0500 Subject: [PATCH 08/13] pyviz -> pyviz/label/dev --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 574c5738c..9a597c0ab 100644 --- a/.travis.yml +++ b/.travis.yml @@ -63,7 +63,7 @@ jobs: # Install spatialpandas here because it's python 3 only and requires # conda-forge for some dependencies - if [[ "PYTHON_VERSION" != "2.7" ]]; then - conda install -c pyviz -c conda-forge spatialpandas; + conda install -c pyviz/label/dev -c conda-forge spatialpandas; fi - doit env_capture script: From fb7f571e32b993384cf5bc3e94f83f15034cebf4 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Sat, 30 Nov 2019 19:53:40 -0500 Subject: [PATCH 09/13] bash syntax --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 9a597c0ab..06830630f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -62,7 +62,7 @@ jobs: - doit develop_install $CHANS_DEV $OPTS # Install spatialpandas here because it's python 3 only and requires # conda-forge for some dependencies - - if [[ "PYTHON_VERSION" != "2.7" ]]; then + - if [[ "$PYTHON_VERSION" != "2.7" ]]; then conda install -c pyviz/label/dev -c conda-forge spatialpandas; fi - doit env_capture From 01a46968631d2189368879ae7ea8b630e055bdf0 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Sun, 1 Dec 2019 05:20:26 -0500 Subject: [PATCH 10/13] Python 2 flakes --- datashader/tests/test_dask.py | 8 ++++---- datashader/tests/test_pandas.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/datashader/tests/test_dask.py b/datashader/tests/test_dask.py index b08bfd361..fbfdc9ad6 100644 --- a/datashader/tests/test_dask.py +++ b/datashader/tests/test_dask.py @@ -41,8 +41,8 @@ _ddf = dd.from_pandas(df_pd, npartitions=2) -def dask_DataFrame(*args, geo=False, **kwargs): - if geo: +def dask_DataFrame(*args, **kwargs): + if kwargs.pop("geo", False): df = sp.GeoDataFrame(*args, **kwargs) else: df = pd.DataFrame(*args, **kwargs) @@ -55,8 +55,8 @@ def dask_DataFrame(*args, geo=False, **kwargs): import dask_cudf ddfs = [_ddf, dask_cudf.from_dask_dataframe(_ddf)] - def dask_cudf_DataFrame(*args, geo=False, **kwargs): - assert not geo + def dask_cudf_DataFrame(*args, **kwargs): + assert not kwargs.pop("geo", False) cdf = cudf.DataFrame.from_pandas( pd.DataFrame(*args, **kwargs), nan_as_null=False ) diff --git a/datashader/tests/test_pandas.py b/datashader/tests/test_pandas.py index 6ebf09d91..bd39d9d16 100644 --- a/datashader/tests/test_pandas.py +++ b/datashader/tests/test_pandas.py @@ -36,8 +36,8 @@ sp = None -def pd_DataFrame(*args, geo=False, **kwargs): - if geo: +def pd_DataFrame(*args, **kwargs): + if kwargs.pop("geo", False): return sp.GeoDataFrame(*args, **kwargs) else: return pd.DataFrame(*args, **kwargs) @@ -46,8 +46,8 @@ def pd_DataFrame(*args, geo=False, **kwargs): try: import cudf import cupy - def cudf_DataFrame(*args, geo=False, **kwargs): - assert not geo + def cudf_DataFrame(*args, **kwargs): + assert not kwargs.pop("geo", False) return cudf.DataFrame.from_pandas( pd.DataFrame(*args, **kwargs), nan_as_null=False ) From b51a14b1efa2a9419d8e53681a07889ff86eec5e Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Sun, 1 Dec 2019 05:53:57 -0500 Subject: [PATCH 11/13] Include holoviews in tests since it is used in some nbsmoked notebooks --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 8eee2a446..c3f4bd711 100644 --- a/setup.py +++ b/setup.py @@ -36,6 +36,7 @@ 'nbsmoke >=0.2.6', 'fastparquet >=0.1.6', # optional dependency 'pandas >=0.24.1', # optional ragged array support + 'holoviews >=1.10.0', ], 'examples': [], 'examples_extra':[ From 5376488c9c5dc61708223087d5c08e9a5b24ee16 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Thu, 5 Dec 2019 07:44:23 -0500 Subject: [PATCH 12/13] Fix Canvas.polygons docstring example --- datashader/core.py | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/datashader/core.py b/datashader/core.py index 66a3482ff..deea52959 100644 --- a/datashader/core.py +++ b/datashader/core.py @@ -653,33 +653,28 @@ def polygons(self, source, geometry, agg=None): Examples -------- - >>> from math import inf # doctest: +SKIP - ... import datashader as ds + >>> import datashader as ds # doctest: +SKIP ... import datashader.transfer_functions as tf - ... from datashader.geom import PolygonsArray + ... from spatialpandas.geometry import PolygonArray + ... from spatialpandas import GeoDataFrame ... import pandas as pd ... - ... polygons = PolygonsArray([ - ... # ## First Element - ... # Filled quadrilateral (CCW order) - ... [0, 0, 1, 0, 2, 2, -1, 4, 0, 0, - ... # Triangular hole (CW order) - ... -inf, -inf, 0.5, 1, 1, 2, 1.5, 1.5, 0.5, 1, - ... # Rectangular hole (CW order) - ... -inf, -inf, 0, 2, 0, 2.5, 0.5, 2.5, 0.5, 2, 0, 2, - ... # Filled triangle - ... inf, inf, 2.5, 3, 3.5, 3, 3.5, 4, 2.5, 3, + ... polygons = PolygonArray([ + ... # First Element + ... [[0, 0, 1, 0, 2, 2, -1, 4, 0, 0], # Filled quadrilateral (CCW order) + ... [0.5, 1, 1, 2, 1.5, 1.5, 0.5, 1], # Triangular hole (CW order) + ... [0, 2, 0, 2.5, 0.5, 2.5, 0.5, 2, 0, 2], # Rectangular hole (CW order) + ... [2.5, 3, 3.5, 3, 3.5, 4, 2.5, 3], # Filled triangle ... ], ... - ... # ## Second Element - ... # Filled rectangle (CCW order) - ... [3, 0, 3, 2, 4, 2, 4, 0, 3, 0, - ... # Rectangular hole (CW order) - ... -inf, -inf, 3.25, 0.25, 3.75, 0.25, 3.75, 1.75, 3.25, 1.75, 3.25, 0.25, + ... # Second Element + ... [[3, 0, 3, 2, 4, 2, 4, 0, 3, 0], # Filled rectangle (CCW order) + ... # Rectangular hole (CW order) + ... [3.25, 0.25, 3.75, 0.25, 3.75, 1.75, 3.25, 1.75, 3.25, 0.25], ... ] ... ]) ... - ... df = pd.DataFrame({'polygons': polygons, 'v': range(len(polygons))}) + ... df = GeoDataFrame({'polygons': polygons, 'v': range(len(polygons))}) ... ... cvs = ds.Canvas() ... agg = cvs.polygons(df, geometry='polygons', agg=ds.sum('v')) From 6a3c737f369ad68ef5bc7bec2712e37a38932172 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Tue, 10 Dec 2019 19:40:56 -0500 Subject: [PATCH 13/13] Fix handling of subpixel multi polygons --- datashader/glyphs/polygon.py | 35 +++++++------- datashader/tests/test_polygons.py | 78 +++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 16 deletions(-) diff --git a/datashader/glyphs/polygon.py b/datashader/glyphs/polygon.py index a0ffd1cd5..8c42ebbb4 100644 --- a/datashader/glyphs/polygon.py +++ b/datashader/glyphs/polygon.py @@ -82,11 +82,17 @@ def draw_polygon( stopxi += 1 stopyi += 1 - # Handle subpixel polygons (pixel width or height of polygon is 1) - if (stopxi - startxi) == 1 or (stopyi - startyi) == 1: - for yi in range(startyi, stopyi): - for xi in range(startxi, stopxi): - append(i, xi, yi, *aggs_and_cols) + # Handle subpixel polygons (pixel width and/or height of polygon is 1) + if (stopxi - startxi) == 1 and (stopyi - startyi) == 1: + append(i, startxi, startyi, *aggs_and_cols) + return + elif (stopxi - startxi) == 1: + for yi in range(min(startyi, stopyi) + 1, max(startyi, stopyi)): + append(i, startxi, yi, *aggs_and_cols) + return + elif (stopyi - startyi) == 1: + for xi in range(min(startxi, stopxi) + 1, max(startxi, stopxi)): + append(i, xi, startyi, *aggs_and_cols) return # Build arrays of edges in canvas coordinates @@ -245,16 +251,13 @@ def extend_cpu_numba( if missing[i]: continue - # i: row index - # start, stop: start and stop index into values for the multiple polygons - # in row i. - # Note: the draw_polygon method handles the edges of all of the filled - # polygons and holes in one pass. - start = offsets1[offsets0[i]] - stop = offsets1[offsets0[i + 1]] - - draw_polygon(i, sx, tx, sy, ty, xmin, xmax, ymin, ymax, - offsets2[start:stop + 1], values, - xs, ys, yincreasing, eligible, *aggs_and_cols) + polygon_inds = offsets1[offsets0[i]:offsets0[i + 1] + 1] + for j in range(len(polygon_inds) - 1): + start = polygon_inds[j] + stop = polygon_inds[j + 1] + + draw_polygon(i, sx, tx, sy, ty, xmin, xmax, ymin, ymax, + offsets2[start:stop + 1], values, + xs, ys, yincreasing, eligible, *aggs_and_cols) return extend_cpu diff --git a/datashader/tests/test_polygons.py b/datashader/tests/test_polygons.py index 893998033..df835473e 100644 --- a/datashader/tests/test_polygons.py +++ b/datashader/tests/test_polygons.py @@ -11,9 +11,11 @@ # Import to register extension arrays import spatialpandas # noqa (register EAs) from spatialpandas import GeoDataFrame + from spatialpandas.geometry import MultiPolygonArray except ImportError: spatialpandas = None GeoDataFrame = None + MultiPolygonArray = None def dask_GeoDataFrame(*args, **kwargs): @@ -212,3 +214,79 @@ def test_no_overlap_agg(DataFrame): out = xr.DataArray(sol, coords=[lincoords_y, lincoords_x], dims=['y', 'x']) assert_eq_xr(agg, out) + + +@pytest.mark.skipif(not spatialpandas, reason="spacialpandas not installed") +@pytest.mark.parametrize('DataFrame', DataFrames) +@pytest.mark.parametrize('scale', [4, 100]) +def test_multipolygon_subpixel_vertical(DataFrame, scale): + df = GeoDataFrame({ + 'geometry': MultiPolygonArray([[ + [[0, 0, 1, 0, 1, 1, 0, 1, 0, 0]], + [[2, 0, 3, 0, 3, 1, 2, 1, 2, 0]], + ]]) + }) + + cvs = ds.Canvas( + plot_height=8, plot_width=8, + x_range=(0, 4), + y_range=(-2 * scale, 2 * scale) + ) + agg = cvs.polygons(df, 'geometry', agg=ds.count()) + + sol = np.array([ + [0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 0, 0, 1, 1, 0], + [0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0] + ], dtype=np.int32) + + axis = ds.core.LinearAxis() + lincoords_x = axis.compute_index( + axis.compute_scale_and_translate((0, 4), 8), 8) + lincoords_y = axis.compute_index( + axis.compute_scale_and_translate((-2 * scale, 2 * scale), 8), 8) + out = xr.DataArray(sol, coords=[lincoords_y, lincoords_x], dims=['y', 'x']) + assert_eq_xr(agg, out) + + +@pytest.mark.skipif(not spatialpandas, reason="spacialpandas not installed") +@pytest.mark.parametrize('DataFrame', DataFrames) +@pytest.mark.parametrize('scale', [4, 100]) +def test_multipolygon_subpixel_horizontal(DataFrame, scale): + df = GeoDataFrame({ + 'geometry': MultiPolygonArray([[ + [[0, 0, 1, 0, 1, 1, 0, 1, 0, 0]], + [[0, 2, 1, 2, 1, 3, 0, 3, 0, 2]], + ]]) + }) + + cvs = ds.Canvas( + plot_height=8, plot_width=8, + x_range=(-2 * scale, 2 * scale), + y_range=(0, 4) + ) + agg = cvs.polygons(df, 'geometry', agg=ds.count()) + + sol = np.array([ + [0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 1, 0, 0, 0], + [0, 0, 0, 0, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 1, 0, 0, 0], + [0, 0, 0, 0, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0] + ], dtype=np.int32) + + axis = ds.core.LinearAxis() + lincoords_x = axis.compute_index( + axis.compute_scale_and_translate((-2 * scale, 2 * scale), 8), 8) + lincoords_y = axis.compute_index( + axis.compute_scale_and_translate((0, 4), 8), 8) + out = xr.DataArray(sol, coords=[lincoords_y, lincoords_x], dims=['y', 'x']) + assert_eq_xr(agg, out)