| 
 | 1 | +"""Implement h3 cell validation in pure numpy.  | 
 | 2 | +
  | 
 | 3 | +It's hard to surface errors from deck.gl back to Python, so it's a bad user experience  | 
 | 4 | +if the JS console errors and silently nothing renders. But also I don't want to depend  | 
 | 5 | +on the h3 library for this because the h3 library isn't vectorized (arghhhh!) and I  | 
 | 6 | +don't want to require the dependency.  | 
 | 7 | +
  | 
 | 8 | +So instead, I spend my time porting code into Numpy 😄.  | 
 | 9 | +
  | 
 | 10 | +Ported from Rust code in h3o:  | 
 | 11 | +
  | 
 | 12 | +https://github.com/HydroniumLabs/h3o/blob/07dcb85d9cb539f685ec63050ef0954b1d9f3864/src/index/cell.rs#L1897-L1962  | 
 | 13 | +"""  | 
 | 14 | + | 
 | 15 | +from __future__ import annotations  | 
 | 16 | + | 
 | 17 | +from typing import TYPE_CHECKING  | 
 | 18 | + | 
 | 19 | +import numpy as np  | 
 | 20 | + | 
 | 21 | +if TYPE_CHECKING:  | 
 | 22 | +    from numpy.typing import NDArray  | 
 | 23 | + | 
 | 24 | +__all__ = ["validate_h3_indices"]  | 
 | 25 | + | 
 | 26 | +MODE_OFFSET = 59  | 
 | 27 | +"""Offset (in bits) of the mode in an H3 index."""  | 
 | 28 | + | 
 | 29 | +MODE_MASK = 0b1111 << MODE_OFFSET  | 
 | 30 | + | 
 | 31 | +EDGE_OFFSET = 56  | 
 | 32 | +"""Offset (in bits) of the cell edge in an H3 index."""  | 
 | 33 | + | 
 | 34 | +EDGE_MASK = 0b111 << EDGE_OFFSET  | 
 | 35 | + | 
 | 36 | +VERTEX_OFFSET = 56  | 
 | 37 | +"""Offset (in bits) of the cell vertex in an H3 index."""  | 
 | 38 | + | 
 | 39 | +VERTEX_MASK = 0b111 << VERTEX_OFFSET  | 
 | 40 | + | 
 | 41 | +DIRECTIONS_MASK = 0x0000_1FFF_FFFF_FFFF  | 
 | 42 | +"""Bitmask to select the directions bits in an H3 index."""  | 
 | 43 | + | 
 | 44 | +INDEX_MODE_CELL = 1  | 
 | 45 | +"""H3 index mode for cells."""  | 
 | 46 | + | 
 | 47 | +BASE_CELL_OFFSET = 45  | 
 | 48 | +"""Offset (in bits) of the base cell in an H3 index."""  | 
 | 49 | + | 
 | 50 | +BASE_CELL_MASK = 0b111_1111 << BASE_CELL_OFFSET  | 
 | 51 | +"""Bitmask to select the base cell bits in an H3 index."""  | 
 | 52 | + | 
 | 53 | +MAX_BASE_CELL = 121  | 
 | 54 | +"""Maximum value for a base cell."""  | 
 | 55 | + | 
 | 56 | +RESOLUTION_OFFSET = 52  | 
 | 57 | +"""The bit offset of the resolution in an H3 index."""  | 
 | 58 | + | 
 | 59 | +RESOLUTION_MASK = 0b1111 << RESOLUTION_OFFSET  | 
 | 60 | +"""Bitmask to select the resolution bits in an H3 index."""  | 
 | 61 | + | 
 | 62 | +MAX_RESOLUTION = 15  | 
 | 63 | +"""Maximum supported H3 resolution."""  | 
 | 64 | + | 
 | 65 | +DIRECTION_BITSIZE = 3  | 
 | 66 | +"""Size, in bits, of a direction (range [0; 6])."""  | 
 | 67 | + | 
 | 68 | +BASE_PENTAGONS_HI = 0x0020_0802_0008_0100  | 
 | 69 | +"""Bitmap where a bit's position represents a base cell value (high part).  | 
 | 70 | +
  | 
 | 71 | +Refactored from upstream 128 bit integer  | 
 | 72 | +https://github.com/HydroniumLabs/h3o/blob/3b40550291a57552117c48c19841557a3b0431e1/src/base_cell.rs#L12  | 
 | 73 | +"""  | 
 | 74 | + | 
 | 75 | +BASE_PENTAGONS_LO = 0x8402_0040_0100_4010  | 
 | 76 | +"""Bitmap where a bit's position represents a base cell value (low part).  | 
 | 77 | +
  | 
 | 78 | +Refactored from upstream 128 bit integer  | 
 | 79 | +https://github.com/HydroniumLabs/h3o/blob/3b40550291a57552117c48c19841557a3b0431e1/src/base_cell.rs#L12  | 
 | 80 | +"""  | 
 | 81 | + | 
 | 82 | +PENTAGON_BASE_CELLS = np.array(  | 
 | 83 | +    [4, 14, 24, 33, 38, 49, 58, 63, 72, 83, 97, 107],  | 
 | 84 | +    dtype=np.uint8,  | 
 | 85 | +)  | 
 | 86 | +"""Set of pentagon base cells."""  | 
 | 87 | + | 
 | 88 | + | 
 | 89 | +def validate_h3_indices(h3_indices: NDArray[np.uint64]) -> None:  | 
 | 90 | +    """Validate an array of uint64 H3 indices.  | 
 | 91 | +
  | 
 | 92 | +    Raises ValueError if any index is invalid.  | 
 | 93 | +    """  | 
 | 94 | +    invalid_reserved_bits = h3_indices >> 56 & 0b1000_0111 != 0  | 
 | 95 | +    bad_indices = np.where(invalid_reserved_bits)[0]  | 
 | 96 | +    if len(bad_indices) > 0:  | 
 | 97 | +        raise ValueError(  | 
 | 98 | +            f"Tainted reserved bits in indices: {bad_indices.tolist()}\n"  | 
 | 99 | +            f"with values {h3_indices[bad_indices].tolist()}",  | 
 | 100 | +        )  | 
 | 101 | + | 
 | 102 | +    invalid_mode = get_mode(h3_indices) != INDEX_MODE_CELL  | 
 | 103 | +    bad_indices = np.where(invalid_mode)[0]  | 
 | 104 | +    if len(bad_indices) > 0:  | 
 | 105 | +        raise ValueError(  | 
 | 106 | +            f"Invalid index mode in indices: {bad_indices.tolist()}",  | 
 | 107 | +            f"with values {h3_indices[bad_indices].tolist()}",  | 
 | 108 | +        )  | 
 | 109 | + | 
 | 110 | +    base = get_base_cell(h3_indices)  | 
 | 111 | +    invalid_base_cell = base > MAX_BASE_CELL  | 
 | 112 | +    bad_indices = np.where(invalid_base_cell)[0]  | 
 | 113 | +    if len(bad_indices) > 0:  | 
 | 114 | +        raise ValueError(  | 
 | 115 | +            f"Invalid base cell in indices: {bad_indices.tolist()}",  | 
 | 116 | +            f"with values {h3_indices[bad_indices].tolist()}",  | 
 | 117 | +        )  | 
 | 118 | + | 
 | 119 | +    # Resolution is always valid: coded on 4 bits, valid range is [0; 15].  | 
 | 120 | +    resolution = get_resolution(h3_indices)  | 
 | 121 | + | 
 | 122 | +    # Check that we have a tail of unused cells  after `resolution` cells.  | 
 | 123 | +    #  | 
 | 124 | +    # We expect every bit to be 1 in the tail (because unused cells are  | 
 | 125 | +    # represented by `0b111`), i.e. every bit set to 0 after a NOT.  | 
 | 126 | +    unused_count = MAX_RESOLUTION - resolution  | 
 | 127 | +    unused_bitsize = unused_count * DIRECTION_BITSIZE  | 
 | 128 | +    unused_mask = (1 << unused_bitsize.astype(np.uint64)) - 1  | 
 | 129 | +    invalid_unused_direction_pattern = (~h3_indices) & unused_mask != 0  | 
 | 130 | +    bad_indices = np.where(invalid_unused_direction_pattern)[0]  | 
 | 131 | +    if len(bad_indices) > 0:  | 
 | 132 | +        raise ValueError(  | 
 | 133 | +            f"Invalid unused direction pattern in indices: {bad_indices.tolist()}",  | 
 | 134 | +            f"with values {h3_indices[bad_indices].tolist()}",  | 
 | 135 | +        )  | 
 | 136 | + | 
 | 137 | +    # Check that we have `resolution` valid cells (no unused ones).  | 
 | 138 | +    dirs_mask = (1 << (resolution * DIRECTION_BITSIZE).astype(np.uint64)) - 1  | 
 | 139 | +    dirs = (h3_indices >> unused_bitsize) & dirs_mask  | 
 | 140 | +    invalid_unused_direction = has_unused_direction(dirs)  | 
 | 141 | +    bad_indices = np.where(invalid_unused_direction)[0]  | 
 | 142 | +    if len(bad_indices) > 0:  | 
 | 143 | +        raise ValueError(  | 
 | 144 | +            f"Unexpected unused direction in indices: {bad_indices.tolist()}",  | 
 | 145 | +            f"with values {h3_indices[bad_indices].tolist()}",  | 
 | 146 | +        )  | 
 | 147 | + | 
 | 148 | +    # Check for pentagons with deleted subsequence.  | 
 | 149 | +    has_pentagon_base = np.logical_and(is_pentagon(base), resolution != 0)  | 
 | 150 | +    pentagon_base_indices = np.where(has_pentagon_base)[0]  | 
 | 151 | +    if len(pentagon_base_indices) > 0:  | 
 | 152 | +        pentagons = h3_indices[pentagon_base_indices]  | 
 | 153 | +        pentagon_resolutions = resolution[pentagon_base_indices]  | 
 | 154 | +        pentagon_dirs = dirs[pentagon_base_indices]  | 
 | 155 | + | 
 | 156 | +        # Move directions to the front, so that we can count leading zeroes.  | 
 | 157 | +        pentagon_offset = 64 - (pentagon_resolutions * DIRECTION_BITSIZE)  | 
 | 158 | + | 
 | 159 | +        # NOTE: The following was ported via GPT from Rust `leading_zeros`  | 
 | 160 | +        # https://github.com/HydroniumLabs/h3o/blob/07dcb85d9cb539f685ec63050ef0954b1d9f3864/src/index/cell.rs#L1951  | 
 | 161 | + | 
 | 162 | +        # Find the position of the first bit set, if it's a multiple of 3  | 
 | 163 | +        # that means we have a K axe as the first non-center direction,  | 
 | 164 | +        # which is forbidden.  | 
 | 165 | +        shifted = pentagon_dirs << pentagon_offset  | 
 | 166 | + | 
 | 167 | +        # Compute leading zeros for each element (assuming 64-bit unsigned integers)  | 
 | 168 | +        # where `leading_zeros = 64 - shifted.bit_length()`  | 
 | 169 | +        # numpy doesn't have bit_length, so use log2 and handle zeros  | 
 | 170 | +        bitlen = np.where(shifted == 0, 0, np.floor(np.log2(shifted)).astype(int) + 1)  | 
 | 171 | +        leading_zeros = 64 - bitlen  | 
 | 172 | + | 
 | 173 | +        # Add 1 and check if multiple of 3  | 
 | 174 | +        is_multiple_of_3 = ((leading_zeros + 1) % 3) == 0  | 
 | 175 | +        bad_indices = np.where(is_multiple_of_3)[0]  | 
 | 176 | +        if len(bad_indices) > 0:  | 
 | 177 | +            raise ValueError(  | 
 | 178 | +                f"Pentagonal cell index with a deleted subsequence: {bad_indices.tolist()}",  | 
 | 179 | +                f"with values {pentagons[bad_indices].tolist()}",  | 
 | 180 | +            )  | 
 | 181 | + | 
 | 182 | + | 
 | 183 | +def get_mode(bits: NDArray[np.uint64]) -> NDArray[np.uint8]:  | 
 | 184 | +    """Return the H3 index mode bits."""  | 
 | 185 | +    return ((bits & MODE_MASK) >> MODE_OFFSET).astype(np.uint8)  | 
 | 186 | + | 
 | 187 | + | 
 | 188 | +def get_base_cell(bits: NDArray[np.uint64]) -> NDArray[np.uint8]:  | 
 | 189 | +    """Return the H3 index base cell bits."""  | 
 | 190 | +    return ((bits & BASE_CELL_MASK) >> BASE_CELL_OFFSET).astype(np.uint8)  | 
 | 191 | + | 
 | 192 | + | 
 | 193 | +def get_resolution(bits: NDArray[np.uint64]) -> NDArray[np.uint8]:  | 
 | 194 | +    """Return the H3 index resolution."""  | 
 | 195 | +    return ((bits & RESOLUTION_MASK) >> RESOLUTION_OFFSET).astype(np.uint8)  | 
 | 196 | + | 
 | 197 | + | 
 | 198 | +def has_unused_direction(dirs: NDArray) -> NDArray[np.bool_]:  | 
 | 199 | +    """Check if there is at least one unused direction in the given directions.  | 
 | 200 | +
  | 
 | 201 | +    Copied from upstream  | 
 | 202 | +    https://github.com/HydroniumLabs/h3o/blob/07dcb85d9cb539f685ec63050ef0954b1d9f3864/src/index/cell.rs#L2056-L2107  | 
 | 203 | +    """  | 
 | 204 | +    LO_MAGIC = 0b001_001_001_001_001_001_001_001_001_001_001_001_001_001_001  # noqa: N806  | 
 | 205 | +    HI_MAGIC = 0b100_100_100_100_100_100_100_100_100_100_100_100_100_100_100  # noqa: N806  | 
 | 206 | + | 
 | 207 | +    return ((~dirs - LO_MAGIC) & (dirs & HI_MAGIC)) != 0  | 
 | 208 | + | 
 | 209 | + | 
 | 210 | +def is_pentagon(cell: NDArray[np.uint8]) -> NDArray[np.bool_]:  | 
 | 211 | +    """Return true if the base cell is pentagonal.  | 
 | 212 | +
  | 
 | 213 | +    Note that this is **not** copied from the upstream:  | 
 | 214 | +    https://github.com/HydroniumLabs/h3o/blob/3b40550291a57552117c48c19841557a3b0431e1/src/base_cell.rs#L33-L47  | 
 | 215 | +
  | 
 | 216 | +    Because they use a 128 bit integer as a bitmap, which is not available in  | 
 | 217 | +    numpy. Instead we use a simple lookup in a static array.  | 
 | 218 | +    """  | 
 | 219 | +    return np.isin(cell, PENTAGON_BASE_CELLS)  | 
0 commit comments