Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
cd0a81e
h3 cell index validation
kylebarron Oct 10, 2025
c0278d5
Add tests from upstream
kylebarron Oct 10, 2025
20ff5f5
fix one test
kylebarron Oct 10, 2025
a3b660b
fix casting
kylebarron Oct 10, 2025
3b2a7d0
Passing all h3 validation tests
kylebarron Oct 10, 2025
66a54ae
bump deck-layers
kylebarron Oct 10, 2025
3e7af17
Move validation
kylebarron Oct 10, 2025
90e5988
Implement h3_to_str
kylebarron Oct 10, 2025
bda35b9
Create h3 hexagon model
kylebarron Oct 10, 2025
462c064
Create layer
kylebarron Oct 10, 2025
adf7e56
add str to h3
kylebarron Oct 10, 2025
a7fea97
move test into folder
kylebarron Oct 13, 2025
b014f0b
h3 trait
kylebarron Oct 13, 2025
9fd0b2d
Improved str_to_h3
kylebarron Oct 13, 2025
efc1f00
fix str to h3 tests
kylebarron Oct 13, 2025
9e20327
Add h3 trait tests
kylebarron Oct 13, 2025
f55e5b1
Merge branch 'main' into kyle/h3-layer
kylebarron Oct 13, 2025
cd02e13
regen lockfile
kylebarron Oct 13, 2025
186c441
fix type check
kylebarron Oct 13, 2025
642617d
lower case str tests
kylebarron Oct 13, 2025
4d9995f
Add types
kylebarron Oct 13, 2025
abd79d3
Merge branch 'main' into kyle/h3-layer
kylebarron Oct 14, 2025
473f84c
lock
kylebarron Oct 14, 2025
164617c
Merge branch 'main' into kyle/h3-layer
kylebarron Oct 28, 2025
54f6990
h3 sample default viewport
kylebarron Oct 28, 2025
af6ba6f
bump version and ensure export
kylebarron Oct 28, 2025
da95f79
Merge branch 'main' into kyle/h3-layer
kylebarron Oct 28, 2025
1e5d305
Merge branch 'main' into kyle/h3-layer
kylebarron Oct 28, 2025
0b1ac80
Merge branch 'main' into kyle/h3-layer
kylebarron Oct 28, 2025
e077c0e
Add high_precision prop to h3 layer
kylebarron Oct 28, 2025
14aa37c
Merge branch 'main' into kyle/h3-layer
kylebarron Oct 29, 2025
78e89f8
remove h3 layer from public api
kylebarron Oct 29, 2025
e5bc5ce
update test imports
kylebarron Oct 29, 2025
3a2ea63
fix import
kylebarron Oct 29, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions lonboard/_geoarrow/ops/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Geometry operations on GeoArrow memory."""

from .bbox import total_bounds
from .centroid import weighted_centroid
from .bbox import Bbox, total_bounds
from .centroid import WeightedCentroid, weighted_centroid
from .reproject import reproject_column, reproject_table
3 changes: 3 additions & 0 deletions lonboard/_h3/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from ._h3_to_str import h3_to_str
from ._str_to_h3 import str_to_h3
from ._validate_h3_cell import validate_h3_indices
27 changes: 27 additions & 0 deletions lonboard/_h3/_h3_to_str.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import numpy as np

if TYPE_CHECKING:
from numpy.typing import NDArray


def h3_to_str(h3_indices: NDArray[np.uint64]) -> NDArray[np.str_]:
"""Convert an array of H3 indices (uint64) to their hexadecimal string representations.

Returns a numpy array of type S15 (fixed-length ASCII strings of length 15).
"""
# Ensure input is a numpy array of uint64
hex_chars = np.empty((h3_indices.size, 15), dtype="S1")

# Prepare hex digits lookup
hex_digits = np.array(list("0123456789ABCDEF"), dtype="S1")

# Fill each digit
for i in range(15):
shift = (15 - 1 - i) * 4
hex_chars[:, i] = hex_digits[(h3_indices >> shift) & 0xF]

return hex_chars.view("<S15")[:, 0]
50 changes: 50 additions & 0 deletions lonboard/_h3/_str_to_h3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import numpy as np

if TYPE_CHECKING:
from numpy.typing import NDArray


def str_to_h3(hex_arr: NDArray[np.str_]) -> NDArray[np.uint64]:
"""Convert an array of hexadecimal strings to H3 indices (uint64).

This is a pure NumPy vectorized implementation that processes hex strings
character by character without Python loops.

Args:
hex_arr: Array of hexadecimal strings (15 characters each)

Returns:
Array of H3 indices as uint64 integers

"""
if len(hex_arr) == 0:
return np.array([], dtype=np.uint64)

# Convert to S15 fixed-width byte strings if needed
# View as 2D array of individual bytes (shape: n x 15)
hex_bytes = np.asarray(hex_arr, dtype="S15").view("S1").reshape(len(hex_arr), -1)

# Convert ASCII bytes to numeric values
# Get the ASCII code of each character
ascii_vals = hex_bytes.view(np.uint8)

# Convert hex ASCII to numeric values (0-15)
# '0'-'9' (48-57) -> 0-9
# 'A'-'F' (65-70) -> 10-15
# 'a'-'f' (97-102) -> 10-15
vals = ascii_vals - ord("0") # Shift '0' to 0
vals = np.where(vals > 9, vals - 7, vals) # 'A'=65-48=17 -> 17-7=10
vals = np.where(vals > 15, vals - 32, vals) # 'a'=97-48=49 -> 49-7=42 -> 42-32=10

# Create powers of 16 for each position (most significant first)
# For 15 hex digits: [16^14, 16^13, ..., 16^1, 16^0]
n_digits = hex_bytes.shape[1]
powers = 16 ** np.arange(n_digits - 1, -1, -1, dtype=np.uint64)

# Compute dot product to get final uint64 values
# Each row: sum(digit_i * 16^(n-1-i))
return np.dot(vals.astype(np.uint64), powers)
219 changes: 219 additions & 0 deletions lonboard/_h3/_validate_h3_cell.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
"""Implement h3 cell validation in pure numpy.

It's hard to surface errors from deck.gl back to Python, so it's a bad user experience
if the JS console errors and silently nothing renders. But also I don't want to depend
on the h3 library for this because the h3 library isn't vectorized (arghhhh!) and I
don't want to require the dependency.

So instead, I spend my time porting code into Numpy 😄.

Ported from Rust code in h3o:

https://github.com/HydroniumLabs/h3o/blob/07dcb85d9cb539f685ec63050ef0954b1d9f3864/src/index/cell.rs#L1897-L1962
"""

from __future__ import annotations

from typing import TYPE_CHECKING

import numpy as np

if TYPE_CHECKING:
from numpy.typing import NDArray

__all__ = ["validate_h3_indices"]

MODE_OFFSET = 59
"""Offset (in bits) of the mode in an H3 index."""

MODE_MASK = 0b1111 << MODE_OFFSET

EDGE_OFFSET = 56
"""Offset (in bits) of the cell edge in an H3 index."""

EDGE_MASK = 0b111 << EDGE_OFFSET

VERTEX_OFFSET = 56
"""Offset (in bits) of the cell vertex in an H3 index."""

VERTEX_MASK = 0b111 << VERTEX_OFFSET

DIRECTIONS_MASK = 0x0000_1FFF_FFFF_FFFF
"""Bitmask to select the directions bits in an H3 index."""

INDEX_MODE_CELL = 1
"""H3 index mode for cells."""

BASE_CELL_OFFSET = 45
"""Offset (in bits) of the base cell in an H3 index."""

BASE_CELL_MASK = 0b111_1111 << BASE_CELL_OFFSET
"""Bitmask to select the base cell bits in an H3 index."""

MAX_BASE_CELL = 121
"""Maximum value for a base cell."""

RESOLUTION_OFFSET = 52
"""The bit offset of the resolution in an H3 index."""

RESOLUTION_MASK = 0b1111 << RESOLUTION_OFFSET
"""Bitmask to select the resolution bits in an H3 index."""

MAX_RESOLUTION = 15
"""Maximum supported H3 resolution."""

DIRECTION_BITSIZE = 3
"""Size, in bits, of a direction (range [0; 6])."""

BASE_PENTAGONS_HI = 0x0020_0802_0008_0100
"""Bitmap where a bit's position represents a base cell value (high part).

Refactored from upstream 128 bit integer
https://github.com/HydroniumLabs/h3o/blob/3b40550291a57552117c48c19841557a3b0431e1/src/base_cell.rs#L12
"""

BASE_PENTAGONS_LO = 0x8402_0040_0100_4010
"""Bitmap where a bit's position represents a base cell value (low part).

Refactored from upstream 128 bit integer
https://github.com/HydroniumLabs/h3o/blob/3b40550291a57552117c48c19841557a3b0431e1/src/base_cell.rs#L12
"""

PENTAGON_BASE_CELLS = np.array(
[4, 14, 24, 33, 38, 49, 58, 63, 72, 83, 97, 107],
dtype=np.uint8,
)
"""Set of pentagon base cells."""


def validate_h3_indices(h3_indices: NDArray[np.uint64]) -> None:
"""Validate an array of uint64 H3 indices.

Raises ValueError if any index is invalid.
"""
invalid_reserved_bits = h3_indices >> 56 & 0b1000_0111 != 0
bad_indices = np.where(invalid_reserved_bits)[0]
if len(bad_indices) > 0:
raise ValueError(
f"Tainted reserved bits in indices: {bad_indices.tolist()}\n"
f"with values {h3_indices[bad_indices].tolist()}",
)

invalid_mode = get_mode(h3_indices) != INDEX_MODE_CELL
bad_indices = np.where(invalid_mode)[0]
if len(bad_indices) > 0:
raise ValueError(
f"Invalid index mode in indices: {bad_indices.tolist()}",
f"with values {h3_indices[bad_indices].tolist()}",
)

base = get_base_cell(h3_indices)
invalid_base_cell = base > MAX_BASE_CELL
bad_indices = np.where(invalid_base_cell)[0]
if len(bad_indices) > 0:
raise ValueError(
f"Invalid base cell in indices: {bad_indices.tolist()}",
f"with values {h3_indices[bad_indices].tolist()}",
)

# Resolution is always valid: coded on 4 bits, valid range is [0; 15].
resolution = get_resolution(h3_indices)

# Check that we have a tail of unused cells after `resolution` cells.
#
# We expect every bit to be 1 in the tail (because unused cells are
# represented by `0b111`), i.e. every bit set to 0 after a NOT.
unused_count = MAX_RESOLUTION - resolution
unused_bitsize = unused_count * DIRECTION_BITSIZE
unused_mask = (1 << unused_bitsize.astype(np.uint64)) - 1
invalid_unused_direction_pattern = (~h3_indices) & unused_mask != 0
bad_indices = np.where(invalid_unused_direction_pattern)[0]
if len(bad_indices) > 0:
raise ValueError(
f"Invalid unused direction pattern in indices: {bad_indices.tolist()}",
f"with values {h3_indices[bad_indices].tolist()}",
)

# Check that we have `resolution` valid cells (no unused ones).
dirs_mask = (1 << (resolution * DIRECTION_BITSIZE).astype(np.uint64)) - 1
dirs = (h3_indices >> unused_bitsize) & dirs_mask
invalid_unused_direction = has_unused_direction(dirs)
bad_indices = np.where(invalid_unused_direction)[0]
if len(bad_indices) > 0:
raise ValueError(
f"Unexpected unused direction in indices: {bad_indices.tolist()}",
f"with values {h3_indices[bad_indices].tolist()}",
)

# Check for pentagons with deleted subsequence.
has_pentagon_base = np.logical_and(is_pentagon(base), resolution != 0)
pentagon_base_indices = np.where(has_pentagon_base)[0]
if len(pentagon_base_indices) > 0:
pentagons = h3_indices[pentagon_base_indices]
pentagon_resolutions = resolution[pentagon_base_indices]
pentagon_dirs = dirs[pentagon_base_indices]

# Move directions to the front, so that we can count leading zeroes.
pentagon_offset = 64 - (pentagon_resolutions * DIRECTION_BITSIZE)

# NOTE: The following was ported via GPT from Rust `leading_zeros`
# https://github.com/HydroniumLabs/h3o/blob/07dcb85d9cb539f685ec63050ef0954b1d9f3864/src/index/cell.rs#L1951

# Find the position of the first bit set, if it's a multiple of 3
# that means we have a K axe as the first non-center direction,
# which is forbidden.
shifted = pentagon_dirs << pentagon_offset

# Compute leading zeros for each element (assuming 64-bit unsigned integers)
# where `leading_zeros = 64 - shifted.bit_length()`
# numpy doesn't have bit_length, so use log2 and handle zeros
bitlen = np.where(shifted == 0, 0, np.floor(np.log2(shifted)).astype(int) + 1)
leading_zeros = 64 - bitlen

# Add 1 and check if multiple of 3
is_multiple_of_3 = ((leading_zeros + 1) % 3) == 0
bad_indices = np.where(is_multiple_of_3)[0]
if len(bad_indices) > 0:
raise ValueError(
f"Pentagonal cell index with a deleted subsequence: {bad_indices.tolist()}",
f"with values {pentagons[bad_indices].tolist()}",
)


def get_mode(bits: NDArray[np.uint64]) -> NDArray[np.uint8]:
"""Return the H3 index mode bits."""
return ((bits & MODE_MASK) >> MODE_OFFSET).astype(np.uint8)


def get_base_cell(bits: NDArray[np.uint64]) -> NDArray[np.uint8]:
"""Return the H3 index base cell bits."""
return ((bits & BASE_CELL_MASK) >> BASE_CELL_OFFSET).astype(np.uint8)


def get_resolution(bits: NDArray[np.uint64]) -> NDArray[np.uint8]:
"""Return the H3 index resolution."""
return ((bits & RESOLUTION_MASK) >> RESOLUTION_OFFSET).astype(np.uint8)


def has_unused_direction(dirs: NDArray) -> NDArray[np.bool_]:
"""Check if there is at least one unused direction in the given directions.

Copied from upstream
https://github.com/HydroniumLabs/h3o/blob/07dcb85d9cb539f685ec63050ef0954b1d9f3864/src/index/cell.rs#L2056-L2107
"""
LO_MAGIC = 0b001_001_001_001_001_001_001_001_001_001_001_001_001_001_001 # noqa: N806
HI_MAGIC = 0b100_100_100_100_100_100_100_100_100_100_100_100_100_100_100 # noqa: N806

return ((~dirs - LO_MAGIC) & (dirs & HI_MAGIC)) != 0


def is_pentagon(cell: NDArray[np.uint8]) -> NDArray[np.bool_]:
"""Return true if the base cell is pentagonal.

Note that this is **not** copied from the upstream:
https://github.com/HydroniumLabs/h3o/blob/3b40550291a57552117c48c19841557a3b0431e1/src/base_cell.rs#L33-L47

Because they use a 128 bit integer as a bitmap, which is not available in
numpy. Instead we use a simple lookup in a static array.
"""
return np.isin(cell, PENTAGON_BASE_CELLS)
11 changes: 9 additions & 2 deletions lonboard/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,19 @@ def auto_downcast(df: DF) -> DF:

check_pandas_version()

# This will fail if the df is pandas input:
# TypeError: data type 'geometry' not understood
try:
df_attr = df.select_dtypes(exclude="geometry")
except TypeError:
df_attr = df

# Convert objects to numeric types where possible.
# Note: we have to exclude geometry because
# `convert_dtypes(dtype_backend="pyarrow")` fails on the geometory column, but we
# `convert_dtypes(dtype_backend="pyarrow")` fails on the geometry column, but we
# also have to manually cast to a non-geo data frame because it'll fail to convert
# dtypes on a GeoDataFrame without a geom col
casted_df = pd.DataFrame(df.select_dtypes(exclude="geometry")).convert_dtypes( # type: ignore
casted_df = pd.DataFrame(df_attr).convert_dtypes( # type: ignore
infer_objects=True,
convert_string=True,
convert_integer=True,
Expand Down
15 changes: 8 additions & 7 deletions lonboard/layer/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,20 @@
from lonboard._geoarrow._duckdb import from_duckdb as _from_duckdb
from lonboard._geoarrow.c_stream_import import import_arrow_c_stream
from lonboard._geoarrow.geopandas_interop import geopandas_to_geoarrow
from lonboard._geoarrow.ops import reproject_table
from lonboard._geoarrow.ops.bbox import Bbox, total_bounds
from lonboard._geoarrow.ops.centroid import WeightedCentroid, weighted_centroid
from lonboard._geoarrow.ops import (
Bbox,
WeightedCentroid,
reproject_table,
total_bounds,
weighted_centroid,
)
from lonboard._geoarrow.ops.coord_layout import make_geometry_interleaved
from lonboard._geoarrow.parse_wkb import parse_serialized_table
from lonboard._geoarrow.row_index import add_positional_row_index
from lonboard._serialization import infer_rows_per_chunk
from lonboard._utils import auto_downcast as _auto_downcast
from lonboard._utils import get_geometry_column_index, remove_extension_kwargs
from lonboard.traits import (
ArrowTableTrait,
VariableLengthTuple,
)
from lonboard.traits import ArrowTableTrait, VariableLengthTuple

if TYPE_CHECKING:
import sys
Expand Down
3 changes: 1 addition & 2 deletions lonboard/layer/_bitmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@

import traitlets as t

from lonboard._geoarrow.ops.bbox import Bbox
from lonboard._geoarrow.ops.centroid import WeightedCentroid
from lonboard._geoarrow.ops import Bbox, WeightedCentroid
from lonboard.layer._base import BaseLayer
from lonboard.traits import (
VariableLengthTuple,
Expand Down
Loading