diff --git a/xrspatial/classify.py b/xrspatial/classify.py index 039384bc..6e4a9ebf 100644 --- a/xrspatial/classify.py +++ b/xrspatial/classify.py @@ -547,12 +547,12 @@ def quantile(agg: xr.DataArray, def _run_numpy_jenks_matrices(data, n_classes): n_data = data.shape[0] lower_class_limits = np.zeros( - (n_data + 1, n_classes + 1), dtype=np.float32 + (n_data + 1, n_classes + 1), dtype=np.float64 ) lower_class_limits[1, 1:n_classes + 1] = 1.0 var_combinations = np.zeros( - (n_data + 1, n_classes + 1), dtype=np.float32 + (n_data + 1, n_classes + 1), dtype=np.float64 ) var_combinations[2:n_data + 1, 1:n_classes + 1] = np.inf @@ -568,7 +568,7 @@ def _run_numpy_jenks_matrices(data, n_classes): lower_class_limit = l - m i4 = lower_class_limit - 1 - val = np.float32(data[i4]) + val = data[i4] # here we're estimating variance for each potential classing # of the data, for each potential number of classes. `w` @@ -610,7 +610,7 @@ def _run_jenks(data, n_classes): lower_class_limits, _ = _run_numpy_jenks_matrices(data, n_classes) k = data.shape[0] - kclass = np.zeros(n_classes + 1, dtype=np.float32) + kclass = np.zeros(n_classes + 1, dtype=np.float64) kclass[0] = data[0] kclass[-1] = data[-1] count_num = n_classes diff --git a/xrspatial/tests/test_classify.py b/xrspatial/tests/test_classify.py index e129b150..10d4f854 100644 --- a/xrspatial/tests/test_classify.py +++ b/xrspatial/tests/test_classify.py @@ -488,6 +488,39 @@ def test_natural_breaks_cupy_matches_numpy(): ) +def test_natural_breaks_large_offset_1100(): + """Jenks should separate tight clusters even when data has a large offset. + + Regression test for #1100: float32 internals caused the variance + formula to lose all significant digits for offset data. + """ + rng = np.random.default_rng(0) + centers = np.array([100_000, 100_010, 100_020, 100_030, 100_040]) + data = np.concatenate([c + rng.uniform(-1, 1, 200) for c in centers]) + agg = xr.DataArray(data.reshape(10, 100)) + + result = natural_breaks(agg, k=5, num_sample=None) + result_data = result.data + if hasattr(result_data, 'compute'): + result_data = result_data.compute() + + # All 5 classes should be present + unique_classes = np.unique(result_data[~np.isnan(result_data)]) + assert len(unique_classes) == 5, ( + f"Expected 5 classes, got {len(unique_classes)}: {unique_classes}" + ) + + # Each center's 200 points should be in a single class + flat = result_data.ravel() + for i, center in enumerate(centers): + start = i * 200 + end = start + 200 + chunk_classes = np.unique(flat[start:end]) + assert len(chunk_classes) == 1, ( + f"Center {center}: expected 1 class, got {len(chunk_classes)}" + ) + + @dask_array_available def test_natural_breaks_dask_matches_numpy(): elevation = np.arange(100, dtype=np.float64).reshape(10, 10)