diff --git a/benchmarks/benchmarks/bench_core.py b/benchmarks/benchmarks/bench_core.py index ea7aae007fdc..7bc2f6527ab4 100644 --- a/benchmarks/benchmarks/bench_core.py +++ b/benchmarks/benchmarks/bench_core.py @@ -150,6 +150,43 @@ def time_convolve(self, size1, size2, mode): np.convolve(self.x1, self.x2, mode=mode) +class CorrConvLags(Benchmark): + # Benchmarks for the maxlag / lags API on np.correlate / np.convolve. + # Demonstrates the speedup of computing only a window of lags vs. + # computing 'full' and slicing. + params = [[1000, int(1e5)], # size1 (longer signal) + [10, 100, 1000]] # size2 (shorter signal / kernel) + param_names = ['size1', 'size2'] + + def setup(self, size1, size2): + self.x1 = np.linspace(0, 1, num=size1) + self.x2 = np.cos(np.linspace(0, 2 * np.pi, num=size2)) + # A typical "small lag window" use case: just lags around 0. + self.maxlag_small = 5 + # And a moderate window. + self.maxlag_med = max(10, size2 // 2) + + def time_correlate_maxlag_small(self, size1, size2): + np.correlate(self.x1, self.x2, maxlag=self.maxlag_small) + + def time_correlate_maxlag_med(self, size1, size2): + np.correlate(self.x1, self.x2, maxlag=self.maxlag_med) + + def time_correlate_lags_range(self, size1, size2): + np.correlate(self.x1, self.x2, + lags=range(-self.maxlag_small, self.maxlag_small + 1)) + + def time_full_then_slice(self, size1, size2): + # The pre-existing way users had to do this: compute everything, + # throw away most of it. + full = np.correlate(self.x1, self.x2, mode='full') + center = len(full) // 2 + _ = full[center - self.maxlag_small:center + self.maxlag_small + 1] + + def time_convolve_maxlag_small(self, size1, size2): + np.convolve(self.x1, self.x2, maxlag=self.maxlag_small) + + class CountNonzero(Benchmark): param_names = ['numaxes', 'size', 'dtype'] params = [ diff --git a/doc/release/upcoming_changes/31261.c_api.rst b/doc/release/upcoming_changes/31261.c_api.rst new file mode 100644 index 000000000000..ad63af0b5b0a --- /dev/null +++ b/doc/release/upcoming_changes/31261.c_api.rst @@ -0,0 +1,8 @@ +New ``PyArray_CorrelateLags`` C API function +-------------------------------------------- +A new public C API function `PyArray_CorrelateLags` has been added for +computing one-dimensional cross-correlation at a specified lag range +``[minlag, maxlag)`` with a given ``lagstep``. The function accepts arrays +in any order and any valid lag-range orientation; internal swapping and +output reversal are handled transparently so the result is aligned with the +caller's input order and lag direction. diff --git a/doc/release/upcoming_changes/31261.new_feature.rst b/doc/release/upcoming_changes/31261.new_feature.rst new file mode 100644 index 000000000000..adb23da091e2 --- /dev/null +++ b/doc/release/upcoming_changes/31261.new_feature.rst @@ -0,0 +1,24 @@ +``maxlag`` and ``lags`` parameters for `numpy.correlate` and `numpy.convolve` +----------------------------------------------------------------------------- +`numpy.correlate` and `numpy.convolve` now accept ``maxlag`` and ``lags`` +keyword arguments to compute the result at a restricted set of lag values +instead of the full output, plus a ``returns_lagvector`` flag to return the +corresponding lag indices alongside the result. + +* ``maxlag=M`` (int) computes a symmetric inclusive window + ``[-M, M]`` (``2*M+1`` lags), matching MATLAB's ``xcorr(x, y, M)`` + convention. +* ``lags=`` accepts a Python ``range``, a ``slice`` with explicit start/stop, + or a 1-D integer array_like containing an arithmetic progression of lag + indices. +* ``returns_lagvector=True`` causes both functions to return a + ``(result, lagvector)`` tuple where ``lagvector`` enumerates the lags + corresponding to ``result``. + +These parameters are mutually exclusive and require ``mode='lags'`` (which is +the default when either is supplied). + +Example:: + + >>> np.correlate([1, 2, 3], [0, 1, 0.5], maxlag=1, returns_lagvector=True) + (array([2. , 3.5, 3. ]), array([-1, 0, 1])) diff --git a/doc/source/reference/routines.statistics.rst b/doc/source/reference/routines.statistics.rst index cd93e60253fb..3c3eddda8271 100644 --- a/doc/source/reference/routines.statistics.rst +++ b/doc/source/reference/routines.statistics.rst @@ -40,6 +40,7 @@ Correlating corrcoef correlate + correlate_lags cov Histograms diff --git a/numpy/__init__.py b/numpy/__init__.py index ce0452ffe8d0..ee1514b14b5e 100644 --- a/numpy/__init__.py +++ b/numpy/__init__.py @@ -203,6 +203,7 @@ copysign, copyto, correlate, + correlate_lags, cos, cosh, count_nonzero, diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 4875e64dccc1..678fbfbe6712 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -353,6 +353,7 @@ from numpy._core.numeric import ( argwhere, flatnonzero, correlate, + correlate_lags, convolve, outer, tensordot, @@ -610,7 +611,7 @@ __all__ = [ "count_nonzero", "empty", "broadcast", "dtype", "fromstring", "fromfile", "frombuffer", "from_dlpack", "where", "argwhere", "copyto", "concatenate", "lexsort", "astype", "can_cast", "promote_types", "min_scalar_type", "result_type", - "isfortran", "empty_like", "zeros_like", "ones_like", "correlate", "convolve", + "isfortran", "empty_like", "zeros_like", "ones_like", "correlate", "correlate_lags", "convolve", "inner", "dot", "outer", "vdot", "roll", "rollaxis", "moveaxis", "cross", "tensordot", "little_endian", "fromiter", "array_equal", "array_equiv", "indices", "fromfunction", "isclose", "isscalar", "binary_repr", "base_repr", "ones", diff --git a/numpy/_core/__init__.pyi b/numpy/_core/__init__.pyi index ce5427bbfcd9..5ad001ff44c9 100644 --- a/numpy/_core/__init__.pyi +++ b/numpy/_core/__init__.pyi @@ -95,6 +95,7 @@ from .numeric import ( convolve, copyto, correlate, + correlate_lags, count_nonzero, cross, dot, @@ -424,6 +425,7 @@ __all__ = [ "copysign", "copyto", "correlate", + "correlate_lags", "cos", "cosh", "count_nonzero", diff --git a/numpy/_core/code_generators/cversions.txt b/numpy/_core/code_generators/cversions.txt index b058875d0455..25a44713ae96 100644 --- a/numpy/_core/code_generators/cversions.txt +++ b/numpy/_core/code_generators/cversions.txt @@ -85,3 +85,6 @@ # General loop registration for ufuncs, sort, and argsort # Version 21 (NumPy 2.5.0) No change 0x00000015 = fbd24fc5b2ba4f7cd3606ec6128de7a5 +# Version 22 (NumPy 2.6.0) +# Added PyArray_CorrelateLags +0x00000016 = c5f7b81de5f99f0df1320b4a5e64ea12 \ No newline at end of file diff --git a/numpy/_core/code_generators/numpy_api.py b/numpy/_core/code_generators/numpy_api.py index c2b471c71757..90027982dd39 100644 --- a/numpy/_core/code_generators/numpy_api.py +++ b/numpy/_core/code_generators/numpy_api.py @@ -409,6 +409,7 @@ def get_annotations(): # End 2.0 API # NpyIterGetTransferFlags (slot 223) added. # End 2.3 API + 'PyArray_CorrelateLags': (369, MinVersion("2.6")), } ufunc_types_api = { diff --git a/numpy/_core/include/numpy/ndarraytypes.h b/numpy/_core/include/numpy/ndarraytypes.h index 7aa23dd3426b..bec3f474d455 100644 --- a/numpy/_core/include/numpy/ndarraytypes.h +++ b/numpy/_core/include/numpy/ndarraytypes.h @@ -269,7 +269,8 @@ typedef enum { typedef enum { NPY_VALID=0, NPY_SAME=1, - NPY_FULL=2 + NPY_FULL=2, + NPY_LAGS=3 } NPY_CORRELATEMODE; /* The special not-a-time (NaT) value */ diff --git a/numpy/_core/include/numpy/numpyconfig.h b/numpy/_core/include/numpy/numpyconfig.h index 4bfe3ab09dea..19ed9c9cb745 100644 --- a/numpy/_core/include/numpy/numpyconfig.h +++ b/numpy/_core/include/numpy/numpyconfig.h @@ -80,6 +80,7 @@ #define NPY_2_3_API_VERSION 0x00000014 #define NPY_2_4_API_VERSION 0x00000015 #define NPY_2_5_API_VERSION 0x00000015 +#define NPY_2_6_API_VERSION 0x00000016 /* @@ -175,6 +176,8 @@ #define NPY_FEATURE_VERSION_STRING "2.4" #elif NPY_FEATURE_VERSION == NPY_2_5_API_VERSION #define NPY_FEATURE_VERSION_STRING "2.5" +#elif NPY_FEATURE_VERSION == NPY_2_6_API_VERSION + #define NPY_FEATURE_VERSION_STRING "2.6" #else #error "Missing version string define for new NumPy version." #endif diff --git a/numpy/_core/meson.build b/numpy/_core/meson.build index dc3985a0f5a3..ef6e057b85bb 100644 --- a/numpy/_core/meson.build +++ b/numpy/_core/meson.build @@ -52,7 +52,8 @@ C_ABI_VERSION = '0x02000000' # 0x00000014 - 2.3.x # 0x00000015 - 2.4.x # 0x00000015 - 2.5.x -C_API_VERSION = '0x00000015' +# 0x00000016 - 2.6.x +C_API_VERSION = '0x00000016' # Check whether we have a mismatch between the set C API VERSION and the # actual C API VERSION. Will raise a MismatchCAPIError if so. diff --git a/numpy/_core/multiarray.py b/numpy/_core/multiarray.py index c7f21b60c488..c876a9f45c61 100644 --- a/numpy/_core/multiarray.py +++ b/numpy/_core/multiarray.py @@ -37,6 +37,7 @@ 'asanyarray', 'ascontiguousarray', 'asfortranarray', 'bincount', 'broadcast', 'busday_count', 'busday_offset', 'busdaycalendar', 'can_cast', 'compare_chararrays', 'concatenate', 'copyto', 'correlate', 'correlate2', + 'correlatelags', 'count_nonzero', 'c_einsum', 'datetime_as_string', 'datetime_data', 'dot', 'dragon4_positional', 'dragon4_scientific', 'dtype', 'empty', 'empty_like', 'error', 'flagsobj', 'flatiter', 'format_longfloat', diff --git a/numpy/_core/multiarray.pyi b/numpy/_core/multiarray.pyi index e35e2375db1c..c75e6adb16ab 100644 --- a/numpy/_core/multiarray.pyi +++ b/numpy/_core/multiarray.pyi @@ -130,6 +130,7 @@ __all__ = [ "copyto", "correlate", "correlate2", + "correlatelags", "count_nonzero", "c_einsum", "datetime_as_string", @@ -254,6 +255,7 @@ _place: Final[Callable[..., object]] = ... _reconstruct: Final[Callable[..., object]] = ... _vec_string: Final[Callable[..., object]] = ... correlate2: Final[Callable[..., object]] = ... +correlatelags: Final[Callable[..., object]] = ... dragon4_positional: Final[Callable[..., object]] = ... dragon4_scientific: Final[Callable[..., object]] = ... interp_complex: Final[Callable[..., object]] = ... diff --git a/numpy/_core/numeric.py b/numpy/_core/numeric.py index 6bd03ae75c5d..028fe835758e 100644 --- a/numpy/_core/numeric.py +++ b/numpy/_core/numeric.py @@ -78,7 +78,7 @@ 'argwhere', 'copyto', 'concatenate', 'lexsort', 'astype', 'can_cast', 'promote_types', 'min_scalar_type', 'result_type', 'isfortran', 'empty_like', 'zeros_like', 'ones_like', - 'correlate', 'convolve', 'inner', 'dot', 'outer', 'vdot', 'roll', + 'correlate', 'correlate_lags', 'convolve', 'inner', 'dot', 'outer', 'vdot', 'roll', 'rollaxis', 'moveaxis', 'cross', 'tensordot', 'little_endian', 'fromiter', 'array_equal', 'array_equiv', 'indices', 'fromfunction', 'isclose', 'isscalar', 'binary_repr', 'base_repr', 'ones', @@ -713,12 +713,197 @@ def flatnonzero(a): return np.nonzero(np.ravel(a))[0] -def _correlate_dispatcher(a, v, mode=None): +_CorrModeDefault = object() + +_CORR_MODE_MAP = { + 'valid': 0, + 'same': 1, + 'full': 2, + 'lags': 3, +} + + +def _mode_from_name(mode): + if isinstance(mode, int): + if mode in _CORR_MODE_MAP.values(): + return mode + elif isinstance(mode, str): + if mode in _CORR_MODE_MAP: + return _CORR_MODE_MAP[mode] + else: + raise TypeError("correlate/convolve mode must be a string or int, " + "one of 'valid', 'same', 'full', 'lags'") + raise ValueError("correlate/convolve mode must be " + "one of 'valid', 'same', 'full', 'lags'") + + +def _lags_from_maxlag(maxlag): + """Convert a maxlag int to a (minlag, maxlag_exclusive, step) tuple. + + ``maxlag=M`` requests the symmetric inclusive range ``[-M, M]``, + matching MATLAB's ``xcorr(x, y, M)`` convention (2*M+1 lags total). + """ + if not isinstance(maxlag, (int, nt.integer)): + raise TypeError("maxlag must be an integer") + m = int(maxlag) + if m < 0: + raise ValueError("maxlag must be non-negative") + return (-m, m + 1, 1) + + +def _lags_from_lags(lag): + """Convert a lags specification to (minlag, maxlag_exclusive, lagstep). + + Accepts a ``range``, ``slice`` (with explicit start/stop), or a 1-D + array_like containing an arithmetic progression of integers. + """ + if isinstance(lag, range): + return (lag.start, lag.stop, lag.step) + if isinstance(lag, slice): + if lag.start is None or lag.stop is None: + raise ValueError("lags slice must have explicit start and stop") + step = 1 if lag.step is None else int(lag.step) + if step == 0: + raise ValueError("lagstep must not be zero") + return (int(lag.start), int(lag.stop), step) + + arr = asanyarray(lag) + if arr.ndim != 1 or arr.size == 0: + raise ValueError("lags must be a 1-D non-empty sequence") + if not nt.issubdtype(arr.dtype, nt.integer): + raise TypeError("lags values must be integers") + if arr.size == 1: + return (int(arr[0]), int(arr[0]) + 1, 1) + step = int(arr[1] - arr[0]) + if step == 0: + raise ValueError("lagstep must not be zero") + expected = arr[0] + step * arange(arr.size) + if not (arr == expected).all(): + raise ValueError( + "lags array must be an arithmetic progression " + "(use a range or slice for non-arithmetic patterns)") + return (int(arr[0]), int(arr[-1]) + step, step) + + +def _lags_from_mode(alen, vlen, mode): + """ + Compute the lag range for a given mode. + + Produces lags aligned with the caller's original (alen, vlen) order + (i.e. matches what ``np.correlate(a, v, mode)`` returns element-by-element). + """ + inverted = False + if alen < vlen: + alen, vlen = vlen, alen + inverted = True + + if mode == 0: + m0, m1 = 0, alen - vlen + 1 + elif mode == 1: + half = int(vlen / 2) + m0, m1 = -half, alen - half + elif mode == 2: + m0, m1 = -vlen + 1, alen + else: + raise ValueError("correlate/convolve mode must be " + "one of 'valid', 'same', 'full'") + + if inverted: + count = m1 - m0 # lagstep is always 1 for mode-based lags + m0, m1 = -(m0 + count - 1), -m0 + 1 + + return (m0, m1, 1) + + +def _correlate_lags_dispatcher(a_len, v_len, mode=None, *, maxlag=None, + lags=None): + return () + + +@array_function_dispatch(_correlate_lags_dispatcher) +def correlate_lags(a_len, v_len, mode=_CorrModeDefault, *, maxlag=None, + lags=None): + """ + Return the lag indices for a `~numpy.correlate` call with the same + parameters. + + Parameters + ---------- + a_len : int + Length of the first input sequence (``len(a)`` in ``correlate(a, v, ...)``). + v_len : int + Length of the second input sequence (``len(v)`` in ``correlate(a, v, ...)``). + mode : {'valid', 'same', 'full'}, optional + The same mode that would be passed to `~numpy.correlate`. + If ``maxlag`` or ``lags`` is provided and ``mode`` is not specified, + ``mode`` defaults to 'lags'. + maxlag : int, optional + The same ``maxlag`` that would be passed to `~numpy.correlate`. + Mutually exclusive with ``lags``. + lags : range, slice, or 1-D array_like of int, optional + The same ``lags`` that would be passed to `~numpy.correlate`. + Mutually exclusive with ``maxlag``. + + Returns + ------- + lags : ndarray + Array of lag indices corresponding element-by-element to the output + of ``np.correlate(a, v, mode, maxlag=maxlag, lags=lags)``. + + See Also + -------- + correlate : Cross-correlation of two 1-D sequences. + + Examples + -------- + >>> import numpy as np + >>> np.correlate_lags(5, 3, mode='full') + array([-2, -1, 0, 1, 2, 3, 4]) + >>> np.correlate_lags(5, 3, mode='valid') + array([0, 1, 2]) + >>> np.correlate_lags(5, 3, maxlag=2) + array([-2, -1, 0, 1, 2]) + + Pair the lag vector with a correlation result: + + >>> a, v = [1, 2, 3, 4, 5], [0, 1, 0.5] + >>> r = np.correlate(a, v, mode='full') + >>> lag_vec = np.correlate_lags(len(a), len(v), mode='full') + >>> r[lag_vec == 0] + array([3.5]) + """ + if maxlag is not None and lags is not None: + raise TypeError("cannot specify both maxlag and lags") + lags_given = maxlag is not None or lags is not None + + if mode is _CorrModeDefault: + mode = 'lags' if lags_given else 'valid' + mode = _mode_from_name(mode) + + if mode in (0, 1, 2): + if lags_given: + raise ValueError( + "maxlag/lags cannot be used with mode " + "'valid', 'same', or 'full'") + lags_tuple = _lags_from_mode(a_len, v_len, mode) + elif mode == 3: + if not lags_given: + raise ValueError( + "maxlag or lags is required for mode='lags'") + if maxlag is not None: + lags_tuple = _lags_from_maxlag(maxlag) + else: + lags_tuple = _lags_from_lags(lags) + + return arange(lags_tuple[0], lags_tuple[1], lags_tuple[2]) + + +def _correlate_dispatcher(a, v, mode=None, *, maxlag=None, lags=None): return (a, v) @array_function_dispatch(_correlate_dispatcher) -def correlate(a, v, mode='valid'): +def correlate(a, v, mode=_CorrModeDefault, *, maxlag=None, lags=None): r""" Cross-correlation of two 1-dimensional sequences. @@ -734,9 +919,21 @@ def correlate(a, v, mode='valid'): ---------- a, v : array_like Input sequences. - mode : {'valid', 'same', 'full'}, optional + mode : {'valid', 'same', 'full', 'lags'}, optional Refer to the `convolve` docstring. Note that the default is 'valid', unlike `convolve`, which uses 'full'. + If ``maxlag`` or ``lags`` is provided and ``mode`` is not specified, + ``mode`` defaults to 'lags'. + maxlag : int, optional + Compute the cross-correlation at lags + ``-maxlag, -maxlag+1, ..., maxlag`` (a symmetric inclusive window + of ``2*maxlag+1`` lags around 0). + Mutually exclusive with ``lags``. + lags : range, slice, or 1-D array_like of int, optional + Explicit lag specification. Accepts a Python ``range`` object, a + ``slice`` with explicit start and stop, or a 1-D array_like + containing an arithmetic progression of integer lag indices. + Mutually exclusive with ``maxlag``. Returns ------- @@ -745,6 +942,7 @@ def correlate(a, v, mode='valid'): See Also -------- + correlate_lags : Return the lag indices corresponding to a `correlate` call. convolve : Discrete, linear convolution of two one-dimensional sequences. scipy.signal.correlate : uses FFT which has superior performance on large arrays. @@ -769,13 +967,22 @@ def correlate(a, v, mode='valid'): Examples -------- - >>> import numpy as np >>> np.correlate([1, 2, 3], [0, 1, 0.5]) array([3.5]) - >>> np.correlate([1, 2, 3], [0, 1, 0.5], "same") - array([2. , 3.5, 3. ]) - >>> np.correlate([1, 2, 3], [0, 1, 0.5], "full") - array([0.5, 2. , 3.5, 3. , 0. ]) + >>> np.correlate([1, 2, 3], [0, 1, 0.5], mode="same") + array([ 2. , 3.5, 3. ]) + >>> np.correlate([1, 2, 3], [0, 1, 0.5], maxlag=1) + array([ 2. , 3.5, 3. ]) + >>> np.correlate([1, 2, 3], [0, 1, 0.5], lags=range(-1, 2, 2)) + array([ 2., 3.]) + + Pair the result with its lag indices using `correlate_lags`: + + >>> a, v = [1, 2, 3], [0, 1, 0.5] + >>> r = np.correlate(a, v, mode="full") + >>> lag_vec = np.correlate_lags(len(a), len(v), mode="full") + >>> lag_vec + array([-2, -1, 0, 1, 2]) Using complex sequences: @@ -790,15 +997,39 @@ def correlate(a, v, mode='valid'): array([ 0.0+0.j , 3.0+1.j , 1.5+1.5j, 1.0+0.j , 0.5+0.5j]) """ - return multiarray.correlate2(a, v, mode) + if maxlag is not None and lags is not None: + raise TypeError("cannot specify both maxlag and lags") + lags_given = maxlag is not None or lags is not None + # Resolve mode: if unset, infer from whether maxlag/lags was provided + if mode is _CorrModeDefault: + mode = 'lags' if lags_given else 'valid' + mode = _mode_from_name(mode) -def _convolve_dispatcher(a, v, mode=None): + if mode in (0, 1, 2): + if lags_given: + raise ValueError( + "maxlag/lags cannot be used with mode " + "'valid', 'same', or 'full'") + return multiarray.correlate2(a, v, mode) + elif mode == 3: + if not lags_given: + raise ValueError( + "maxlag or lags is required for mode='lags'") + if maxlag is not None: + lags_tuple = _lags_from_maxlag(maxlag) + else: + lags_tuple = _lags_from_lags(lags) + return multiarray.correlatelags( + a, v, lags_tuple[0], lags_tuple[1], lags_tuple[2]) + + +def _convolve_dispatcher(a, v, mode=None, *, maxlag=None, lags=None): return (a, v) @array_function_dispatch(_convolve_dispatcher) -def convolve(a, v, mode='full'): +def convolve(a, v, mode=_CorrModeDefault, *, maxlag=None, lags=None): """ Returns the discrete, linear convolution of two one-dimensional sequences. @@ -816,23 +1047,42 @@ def convolve(a, v, mode='full'): First one-dimensional input array. v : (M,) array_like Second one-dimensional input array. - mode : {'full', 'valid', 'same'}, optional + mode : {'full', 'valid', 'same', 'lags'}, optional 'full': By default, mode is 'full'. This returns the convolution at each point of overlap, with an output shape of (N+M-1,). At the end-points of the convolution, the signals do not overlap - completely, and boundary effects may be seen. + completely, and boundary effects may be seen. This corresponds + with a lag tuple of (-M+1, N, 1) for N>M or (-N+1, M, 1) + for M>N. 'same': - Mode 'same' returns output of length ``max(M, N)``. Boundary - effects are still visible. + Mode `same` returns output of length ``max(M, N)``. Boundary + effects are still visible. This corresponds with a lag tuple of + (-M/2, N-M/2, 1) for N>M or (-M+N/2+1, N/2+1, 1) for M>N. 'valid': Mode 'valid' returns output of length ``max(M, N) - min(M, N) + 1``. The convolution product is only given for points where the signals overlap completely. Values outside - the signal boundary have no effect. - + the signal boundary have no effect. This corresponds with a lag tuple + of (0, N-M+1, 1) for N>M or (-M+N, 1, 1) for M>N. + + 'lags': + Mode 'lags' uses ``maxlag`` or ``lags`` to define the lags for + which to perform the convolution. + If ``maxlag`` or ``lags`` is provided and ``mode`` is not + specified, ``mode`` defaults to 'lags'. + + maxlag : int, optional + Compute the convolution at lags ``-maxlag, -maxlag+1, ..., maxlag`` + (a symmetric inclusive window of ``2*maxlag+1`` lags around 0). + Mutually exclusive with ``lags``. + lags : range, slice, or 1-D array_like of int, optional + Explicit lag specification. Accepts a Python ``range`` object, a + ``slice`` with explicit start and stop, or a 1-D array_like containing + an arithmetic progression of integer lag indices. + Mutually exclusive with ``maxlag``. Returns ------- out : ndarray @@ -840,6 +1090,7 @@ def convolve(a, v, mode='full'): See Also -------- + correlate_lags : Return the lag indices corresponding to a `correlate` call. scipy.signal.fftconvolve : Convolve two arrays using the Fast Fourier Transform. scipy.linalg.toeplitz : Used to construct the convolution operator. @@ -877,24 +1128,72 @@ def convolve(a, v, mode='full'): Contains boundary effects, where zeros are taken into account: - >>> np.convolve([1,2,3],[0,1,0.5], 'same') + >>> np.convolve([1,2,3],[0,1,0.5], mode='same') array([1. , 2.5, 4. ]) - The two arrays are of the same length, so there - is only one position where they completely overlap: + The two arrays are of the same length, so there is only one position + where they completely overlap, corresponding to a lag of 0: + + >>> np.convolve([1,2,3],[0,1,0.5], mode='valid') + array([ 2.5]) + + Find the convolution for lags ranging from -1 to 1 + (lag 0 aligns the left sides of the arrays; negative lags shift the + second array left, positive lags shift it right): + + >>> np.convolve([1,2,3],[0,1,0.5], maxlag=1) + array([ 1. , 2.5, 4. ]) + + Find the convolution for lags ranging from -2 to 4 with steps of 2: - >>> np.convolve([1,2,3],[0,1,0.5], 'valid') - array([2.5]) + >>> np.convolve([1,2,3,4,5], [0,1,0.5], lags=range(-2, 6, 2)) + array([ 0. , 2.5, 5.5, 2.5]) + + Pair the result with lag indices using `correlate_lags`: + + >>> a, v = [1, 2, 3], [0, 1, 0.5] + >>> r = np.convolve(a, v, mode='valid') + >>> lag_vec = np.correlate_lags(len(a), len(v), mode='valid') + >>> lag_vec + array([0]) """ a, v = array(a, copy=None, ndmin=1), array(v, copy=None, ndmin=1) - if len(a) == 0: + alen, vlen = len(a), len(v) + if alen == 0: raise ValueError('a cannot be empty') - if len(v) == 0: + if vlen == 0: raise ValueError('v cannot be empty') - if len(v) > len(a): + + if vlen > alen: a, v = v, a - return multiarray.correlate(a, v[::-1], mode) + + if maxlag is not None and lags is not None: + raise TypeError("cannot specify both maxlag and lags") + lags_given = maxlag is not None or lags is not None + + # Resolve mode: if unset, infer from whether maxlag/lags was provided + if mode is _CorrModeDefault: + mode = 'lags' if lags_given else 'full' + mode = _mode_from_name(mode) + + if mode in (0, 1, 2): + if lags_given: + raise ValueError( + "maxlag/lags cannot be used with mode " + "'valid', 'same', or 'full'") + return multiarray.correlate(a, v[::-1], mode) + elif mode == 3: + if not lags_given: + raise ValueError( + "maxlag or lags is required for mode='lags'") + if maxlag is not None: + lags_tuple = _lags_from_maxlag(maxlag) + else: + lags_tuple = _lags_from_lags(lags) + return multiarray.correlatelags( + a, v[::-1], lags_tuple[0], lags_tuple[1], lags_tuple[2], + conjugate=False) def _outer_dispatcher(a, b, out=None): diff --git a/numpy/_core/numeric.pyi b/numpy/_core/numeric.pyi index 1dd34d6a4fcd..ef7fd2671881 100644 --- a/numpy/_core/numeric.pyi +++ b/numpy/_core/numeric.pyi @@ -403,6 +403,7 @@ __all__ = [ "copysign", "copyto", "correlate", + "correlate_lags", "cos", "cosh", "count_nonzero", @@ -1048,44 +1049,88 @@ def flatnonzero(a: ArrayLike) -> _Array1D[np.intp]: ... # NOTE: we ignore UP047 because inlining `_AnyScalarT` would result in a lot of code duplication +type _LagsArg = int | tuple[int, int] | tuple[int, int, int] | range | slice | ArrayLike | None + # keep in sync with `convolve` and `ma.core.correlate` @overload def correlate( # noqa: UP047 - a: _ArrayLike1D[_AnyNumericScalarT], v: _ArrayLike1D[_AnyNumericScalarT], mode: _CorrelateMode = "valid" + a: _ArrayLike1D[_AnyNumericScalarT], v: _ArrayLike1D[_AnyNumericScalarT], + mode: _CorrelateMode | L["lags"] = ..., *, maxlag: int | None = ..., + lags: _LagsArg = ..., ) -> _Array1D[_AnyNumericScalarT]: ... @overload -def correlate(a: _ArrayLike1DBool_co, v: _ArrayLike1DBool_co, mode: _CorrelateMode = "valid") -> _Array1D[np.bool]: ... +def correlate( + a: _ArrayLike1DBool_co, v: _ArrayLike1DBool_co, + mode: _CorrelateMode | L["lags"] = ..., *, maxlag: int | None = ..., + lags: _LagsArg = ..., +) -> _Array1D[np.bool]: ... @overload -def correlate(a: _ArrayLike1DInt_co, v: _ArrayLike1DInt_co, mode: _CorrelateMode = "valid") -> _Array1D[np.int_ | Any]: ... +def correlate( + a: _ArrayLike1DInt_co, v: _ArrayLike1DInt_co, + mode: _CorrelateMode | L["lags"] = ..., *, maxlag: int | None = ..., + lags: _LagsArg = ..., +) -> _Array1D[np.int_ | Any]: ... @overload -def correlate(a: _ArrayLike1DFloat_co, v: _ArrayLike1DFloat_co, mode: _CorrelateMode = "valid") -> _Array1D[np.float64 | Any]: ... +def correlate( + a: _ArrayLike1DFloat_co, v: _ArrayLike1DFloat_co, + mode: _CorrelateMode | L["lags"] = ..., *, maxlag: int | None = ..., + lags: _LagsArg = ..., +) -> _Array1D[np.float64 | Any]: ... @overload def correlate( - a: _ArrayLike1DNumber_co, v: _ArrayLike1DNumber_co, mode: _CorrelateMode = "valid" + a: _ArrayLike1DNumber_co, v: _ArrayLike1DNumber_co, + mode: _CorrelateMode | L["lags"] = ..., *, maxlag: int | None = ..., + lags: _LagsArg = ..., ) -> _Array1D[np.complex128 | Any]: ... @overload def correlate( - a: _ArrayLike1DTD64_co, v: _ArrayLike1DTD64_co, mode: _CorrelateMode = "valid" + a: _ArrayLike1DTD64_co, v: _ArrayLike1DTD64_co, + mode: _CorrelateMode | L["lags"] = ..., *, maxlag: int | None = ..., + lags: _LagsArg = ..., ) -> _Array1D[np.timedelta64 | Any]: ... +def correlate_lags( + a_len: SupportsIndex, v_len: SupportsIndex, + mode: _CorrelateMode | L["lags"] = ..., *, maxlag: int | None = ..., + lags: _LagsArg = ..., +) -> _Array1D[np.intp]: ... + # keep in sync with `correlate` @overload def convolve( # noqa: UP047 - a: _ArrayLike1D[_AnyNumericScalarT], v: _ArrayLike1D[_AnyNumericScalarT], mode: _CorrelateMode = "valid" + a: _ArrayLike1D[_AnyNumericScalarT], v: _ArrayLike1D[_AnyNumericScalarT], + mode: _CorrelateMode | L["lags"] = ..., *, maxlag: int | None = ..., + lags: _LagsArg = ..., ) -> _Array1D[_AnyNumericScalarT]: ... @overload -def convolve(a: _ArrayLike1DBool_co, v: _ArrayLike1DBool_co, mode: _CorrelateMode = "valid") -> _Array1D[np.bool]: ... +def convolve( + a: _ArrayLike1DBool_co, v: _ArrayLike1DBool_co, + mode: _CorrelateMode | L["lags"] = ..., *, maxlag: int | None = ..., + lags: _LagsArg = ..., +) -> _Array1D[np.bool]: ... @overload -def convolve(a: _ArrayLike1DInt_co, v: _ArrayLike1DInt_co, mode: _CorrelateMode = "valid") -> _Array1D[np.int_ | Any]: ... +def convolve( + a: _ArrayLike1DInt_co, v: _ArrayLike1DInt_co, + mode: _CorrelateMode | L["lags"] = ..., *, maxlag: int | None = ..., + lags: _LagsArg = ..., +) -> _Array1D[np.int_ | Any]: ... @overload -def convolve(a: _ArrayLike1DFloat_co, v: _ArrayLike1DFloat_co, mode: _CorrelateMode = "valid") -> _Array1D[np.float64 | Any]: ... +def convolve( + a: _ArrayLike1DFloat_co, v: _ArrayLike1DFloat_co, + mode: _CorrelateMode | L["lags"] = ..., *, maxlag: int | None = ..., + lags: _LagsArg = ..., +) -> _Array1D[np.float64 | Any]: ... @overload def convolve( - a: _ArrayLike1DNumber_co, v: _ArrayLike1DNumber_co, mode: _CorrelateMode = "valid" + a: _ArrayLike1DNumber_co, v: _ArrayLike1DNumber_co, + mode: _CorrelateMode | L["lags"] = ..., *, maxlag: int | None = ..., + lags: _LagsArg = ..., ) -> _Array1D[np.complex128 | Any]: ... @overload def convolve( - a: _ArrayLike1DTD64_co, v: _ArrayLike1DTD64_co, mode: _CorrelateMode = "valid" + a: _ArrayLike1DTD64_co, v: _ArrayLike1DTD64_co, + mode: _CorrelateMode | L["lags"] = ..., *, maxlag: int | None = ..., + lags: _LagsArg = ..., ) -> _Array1D[np.timedelta64 | Any]: ... # keep roughly in sync with `convolve` and `correlate`, but for 2-D output and an additional `out` overload, diff --git a/numpy/_core/src/multiarray/conversion_utils.c b/numpy/_core/src/multiarray/conversion_utils.c index 41405beef9a8..453ccbc9364b 100644 --- a/numpy/_core/src/multiarray/conversion_utils.c +++ b/numpy/_core/src/multiarray/conversion_utils.c @@ -862,6 +862,10 @@ static int correlatemode_parser(char const *str, Py_ssize_t length, void *data) *val = NPY_FULL; is_exact = (length == 4 && strcmp(str, "full") == 0); } + else if (str[0] == 'L' || str[0] == 'l') { + *val = NPY_LAGS; + is_exact = (length == 4 && strcmp(str, "lags") == 0); + } else { return -1; } @@ -871,7 +875,7 @@ static int correlatemode_parser(char const *str, Py_ssize_t length, void *data) */ if (!is_exact) { PyErr_SetString(PyExc_ValueError, - "Use one of 'valid', 'same', or 'full' for convolve/correlate mode"); + "Use one of 'valid', 'same', 'full', or 'lags' for convolve/correlate mode"); return -1; } @@ -887,7 +891,7 @@ PyArray_CorrelatemodeConverter(PyObject *object, NPY_CORRELATEMODE *val) if (PyUnicode_Check(object)) { return string_converter_helper( object, (void *)val, correlatemode_parser, "mode", - "must be one of 'valid', 'same', or 'full'"); + "must be one of 'valid', 'same', 'full', or 'lags'"); } else { @@ -898,14 +902,14 @@ PyArray_CorrelatemodeConverter(PyObject *object, NPY_CORRELATEMODE *val) "convolve/correlate mode not understood"); return NPY_FAIL; } - if (number <= (int) NPY_FULL + if (number <= (int) NPY_LAGS && number >= (int) NPY_VALID) { *val = (NPY_CORRELATEMODE) number; return NPY_SUCCEED; } else { PyErr_Format(PyExc_ValueError, - "integer convolve/correlate mode must be 0, 1, or 2"); + "integer convolve/correlate mode must be 0, 1, 2, or 3"); return NPY_FAIL; } } diff --git a/numpy/_core/src/multiarray/multiarraymodule.c b/numpy/_core/src/multiarray/multiarraymodule.c index 359793bec0d8..1799fdaabc7b 100644 --- a/numpy/_core/src/multiarray/multiarraymodule.c +++ b/numpy/_core/src/multiarray/multiarraymodule.c @@ -1132,26 +1132,41 @@ PyArray_MatrixProduct2(PyObject *op1, PyObject *op2, PyArrayObject* out) return NULL; } +/* Forward declaration -- definition is below _pyarray_correlate */ +static int _pyarray_revert(PyArrayObject *ret); /* - * Implementation which is common between PyArray_Correlate - * and PyArray_Correlate2. + * Core correlation computation - common between PyArray_Correlate + * and PyArray_Correlate2 and PyArray_CorrelateLags. * - * inverted is set to 1 if computed correlate(ap2, ap1), 0 otherwise + * Handles all input forms internally: + * - Swaps arrays if n1 < n2 (and negates lags) + * - Normalizes negative lagstep to positive with reversed range + * - Reverses output if internal swaps changed the lag orientation + * + * Callers may pass any valid combination of (ap1, ap2, minlag, maxlag, lagstep) + * and receive a result aligned with their original array/lag order. */ static PyArrayObject* _pyarray_correlate(PyArrayObject *ap1, PyArrayObject *ap2, PyArray_Descr *typec, - int mode, int *inverted) + npy_intp minlag, npy_intp maxlag, npy_intp lagstep) { - PyArrayObject *ret; + PyArrayObject *ret, *swap; npy_intp length; - npy_intp i, n1, n2, n, n_left, n_right; + npy_intp i, i1, n1, n2, n, n11; + npy_intp lag, tmplag, maxleft, maxright, phase2_end; npy_intp is1, is2, os; char *ip1, *ip2, *op; PyArray_DotFunc *dot; + int inverted; NPY_BEGIN_THREADS_DEF; + if (lagstep == 0) { + lagstep = 1; + } + + /* size of x (n1) and y (n2) */ n1 = PyArray_DIMS(ap1)[0]; n2 = PyArray_DIMS(ap2)[0]; if (n1 == 0) { @@ -1162,48 +1177,55 @@ _pyarray_correlate(PyArrayObject *ap1, PyArrayObject *ap2, PyArray_Descr *typec, PyErr_SetString(PyExc_ValueError, "second array argument cannot be empty"); return NULL; } + + /* Ensure n1 >= n2 by swapping. Swapping negates the lag orientation. */ if (n1 < n2) { - ret = ap1; + swap = ap1; ap1 = ap2; - ap2 = ret; - ret = NULL; + ap2 = swap; i = n1; n1 = n2; n2 = i; - *inverted = 1; - } else { - *inverted = 0; + minlag = -minlag; + maxlag = -maxlag; + lagstep = -lagstep; } - length = n1; - n = n2; - switch(mode) { - case 0: - length = length - n + 1; - n_left = n_right = 0; - break; - case 1: - n_left = (npy_intp)(n/2); - n_right = n - n_left - 1; - break; - case 2: - n_right = n - 1; - n_left = n - 1; - length = length + n - 1; - break; - default: - PyErr_SetString(PyExc_ValueError, "mode must be 0, 1, or 2"); - return NULL; + /* Normalize negative lagstep: reverse the range to use positive step. + * The output will need to be reversed before returning. */ + if (lagstep < 0) { + inverted = 1; + i = minlag; + i1 = (npy_intp)(npy_ceil((maxlag - minlag)/(float)lagstep))*lagstep; + minlag = i1 + minlag - lagstep; + maxlag = i - lagstep; + lagstep = -lagstep; + } + else { + inverted = 0; + } + + if (maxlag <= minlag) { + length = 0; + } + else { + length = (maxlag - minlag + lagstep - 1)/lagstep; } /* * Need to choose an output array that can hold a sum * -- use priority to determine which subtype. + * ret is the array that will be returned as the answer */ ret = new_array_for_sum(ap1, ap2, NULL, 1, &length, typec, NULL); if (ret == NULL) { return NULL; } + /* Zero-initialize so out-of-bounds lags are correctly zero. + * Skip for object arrays where zeroing would create NULL pointers. */ + if (!PyDataType_FLAGCHK(PyArray_DESCR(ret), NPY_NEEDS_PYAPI)) { + memset(PyArray_DATA(ret), 0, PyArray_NBYTES(ret)); + } dot = PyDataType_GetArrFuncs(PyArray_DESCR(ret))->dotfunc; if (dot == NULL) { PyErr_SetString(PyExc_ValueError, @@ -1213,40 +1235,69 @@ _pyarray_correlate(PyArrayObject *ap1, PyArrayObject *ap2, PyArray_Descr *typec, int needs_pyapi = PyDataType_FLAGCHK(PyArray_DESCR(ret), NPY_NEEDS_PYAPI); NPY_BEGIN_THREADS_DESCR(PyArray_DESCR(ret)); + /* p is pointer to array start and s is stride */ + /* i1 is x, i2, is y, o is answer */ + ip1 = PyArray_DATA(ap1); is1 = PyArray_STRIDES(ap1)[0]; + ip2 = PyArray_DATA(ap2); is2 = PyArray_STRIDES(ap2)[0]; op = PyArray_DATA(ret); os = PyArray_ITEMSIZE(ret); - ip1 = PyArray_DATA(ap1); - ip2 = PyArray_BYTES(ap2) + n_left*is2; - n = n - n_left; - for (i = 0; i < n_left; i++) { - dot(ip1, is1, ip2, is2, op, n, ret); + + lag = minlag; + if (lag < -n2+1) { + /* if minlag is before any overlap between the vectors, + * then skip to first relevant lag + */ + op += os*((-n2+1) - lag); + lag = -n2+1; + } + + /* Phase 1: lags where y is left of x, i.e. lag in [-(n2-1), 0). + * Overlap length is n2 + lag. */ + maxleft = (0 < maxlag ? 0 : maxlag); + tmplag = lag; + for (lag = tmplag; lag < maxleft; lag+=lagstep) { + n = n2 + lag; + dot(ip1, is1, ip2 - lag*is2, is2, op, n, ret); if (needs_pyapi && PyErr_Occurred()) { goto done; } - n++; - ip2 -= is2; + /* iterate over answer vector */ op += os; } - if (small_correlate(ip1, is1, n1 - n2 + 1, PyArray_TYPE(ap1), - ip2, is2, n, PyArray_TYPE(ap2), - op, os)) { - ip1 += is1 * (n1 - n2 + 1); - op += os * (n1 - n2 + 1); - } - else { - for (i = 0; i < (n1 - n2 + 1) && (!needs_pyapi || !PyErr_Occurred()); - i++) { - dot(ip1, is1, ip2, is2, op, n, ret); - ip1 += is1; + + /* Phase 2: lags where y entirely overlaps with x, i.e. lag in [0, n1-n2]. + * Upper bound is the smaller of maxlag and the full-overlap boundary. */ + n11 = n1; + phase2_end = maxlag < (n11 - n2 + 1) ? maxlag : (n11 - n2 + 1); + if (lagstep == 1 && lag < phase2_end && + small_correlate(ip1 + lag*is1, is1, + phase2_end - lag, PyArray_TYPE(ap1), + ip2, is2, n2, PyArray_TYPE(ap2), + op, os)) { + op += os * (phase2_end - lag); + lag = phase2_end; + } + else if (lag < phase2_end) { + tmplag = lag; + for (lag = tmplag; + lag < phase2_end && (!needs_pyapi || !PyErr_Occurred()); + lag += lagstep) { + dot(ip1 + lag*is1, is1, ip2, is2, op, n2, ret); op += os; } } - for (i = 0; i < n_right && (!needs_pyapi || !PyErr_Occurred()); i++) { - n--; - dot(ip1, is1, ip2, is2, op, n, ret); - ip1 += is1; + + /* Phase 3: lags where y is right of x, i.e. lag in [n1-n2+1, n1). + * Overlap length is n1 - lag. */ + maxright = (maxlag < n1) ? maxlag : n1; + tmplag = lag; + for (lag = tmplag; + lag < maxright && (!needs_pyapi || !PyErr_Occurred()); + lag += lagstep) { + n = n1 - lag; + dot(ip1 + lag*is1, is1, ip2, is2, op, n, ret); op += os; } @@ -1256,6 +1307,14 @@ _pyarray_correlate(PyArrayObject *ap1, PyArrayObject *ap2, PyArray_Descr *typec, goto clean_ret; } + /* If we inverted the range, reverse the output so it aligns with the + * caller's lag orientation. */ + if (inverted) { + if (_pyarray_revert(ret) != 0) { + goto clean_ret; + } + } + return ret; clean_ret: @@ -1310,19 +1369,80 @@ _pyarray_revert(PyArrayObject *ret) return 0; } +/* + * Generate the lags corresponding to each mode. + * Produces lags aligned with the caller's original (n1, n2) order -- + * callers can pass unswapped dimensions and use the result directly. + * minlag, maxlag, and lagstep are edited in-place. + */ +static void +_lags_from_mode(int mode, npy_intp n1, npy_intp n2, + npy_intp *minlag, npy_intp *maxlag, npy_intp *lagstep) { + npy_intp m0, m1, s; + npy_intp tmp, i, i1; + int inverted = 0; + + if (n1 < n2) { + tmp = n1; + n1 = n2; + n2 = tmp; + inverted = 1; + } + + s = 1; + switch(mode) { + case 0: + /* mode = 'valid' */ + m0 = 0; + m1 = n1 - n2 + 1; + break; + case 1: + /* mode = 'same' */ + m0 = -(npy_intp)(n2/2); + m1 = n1 + m0; + break; + case 2: + /* mode = 'full' */ + m0 = -n2 + 1; + m1 = n1; + break; + default: + PyErr_SetString(PyExc_ValueError, "mode must be 0, 1, or 2"); + return; + } + + /* If we swapped to get n1 >= n2, transform the lags back to the + * caller's original orientation. */ + if (inverted) { + i = m0; + i1 = (npy_intp)(npy_ceil((m1 - m0)/(float)s))*s; + m0 = -(i1 + m0 - s); + m1 = -(i - s); + } + + *minlag = m0; + *maxlag = m1; + *lagstep = s; +} + /*NUMPY_API * correlate(a1,a2,mode) * - * This function computes the usual correlation (correlate(a1, a2) != - * correlate(a2, a1), and conjugate the second argument for complex inputs + * Computes the cross-correlation of a1 with a2 using a fixed output mode + * ('valid', 'same', or 'full'). For complex inputs, the second array is + * conjugated before the computation (i.e. correlation semantics). + * + * This is the mode-based counterpart of PyArray_CorrelateLags with + * conjugate != 0. For convolution (no conjugation), use + * PyArray_Correlate instead. */ NPY_NO_EXPORT PyObject * PyArray_Correlate2(PyObject *op1, PyObject *op2, int mode) { PyArrayObject *ap1, *ap2, *ret = NULL; PyArray_Descr *typec = NULL; - int inverted; - int st; + npy_intp minlag, maxlag, lagstep; + npy_intp n1, n2; if (PyArray_DTypeFromObject(op1, NPY_MAXDIMS, &typec) < 0) { return NULL; @@ -1362,20 +1482,100 @@ PyArray_Correlate2(PyObject *op1, PyObject *op2, int mode) ap2 = cap2; } - ret = _pyarray_correlate(ap1, ap2, typec, mode, &inverted); + n1 = PyArray_DIMS(ap1)[0]; + n2 = PyArray_DIMS(ap2)[0]; + _lags_from_mode(mode, n1, n2, &minlag, &maxlag, &lagstep); + if (PyErr_Occurred()) { + goto clean_ap2; + } + ret = _pyarray_correlate(ap1, ap2, typec, + minlag, maxlag, lagstep); if (ret == NULL) { goto clean_ap2; } - /* - * If we inverted input orders, we need to reverse the output array (i.e. - * ret = ret[::-1]) - */ - if (inverted) { - st = _pyarray_revert(ret); - if (st) { - goto clean_ret; + Py_DECREF(ap1); + Py_DECREF(ap2); + Py_DECREF(typec); + return (PyObject *)ret; + +clean_ap2: + Py_DECREF(ap2); +clean_ap1: + Py_DECREF(ap1); + Py_DECREF(typec); + return NULL; +} + +/*NUMPY_API + * correlate(a1,a2,minlag,maxlag,lagstep,conjugate) + * + * Computes the sliding inner product of a1 with a2 at the specified lag + * range [minlag, maxlag) with step lagstep. + * This function accepts vectors in either order. This function + * does not accept modes; use _lags_from_mode() to convert a mode integer to + * (minlag, maxlag, lagstep). + * + * The conjugate flag selects the operation semantics: + * + * conjugate != 0 -- correlation semantics (same as PyArray_Correlate2): + * out[k] = sum_n a1[n+k] * conj(a2[n]) + * + * conjugate == 0 -- no conjugation (same as PyArray_Correlate): + * out[k] = sum_n a1[n+k] * a2[n] + * For convolution the caller must pass a2 already + * reversed (a2[::-1]) before calling this function. + */ +NPY_NO_EXPORT PyObject * +PyArray_CorrelateLags(PyObject *op1, PyObject *op2, + npy_intp minlag, npy_intp maxlag, npy_intp lagstep, + int conjugate) +{ + PyArrayObject *ap1, *ap2, *ret = NULL; + PyArray_Descr *typec = NULL; + + if (PyArray_DTypeFromObject(op1, NPY_MAXDIMS, &typec) < 0) { + return NULL; + } + if (PyArray_DTypeFromObject(op2, NPY_MAXDIMS, &typec) < 0) { + Py_XDECREF(typec); + return NULL; + } + + if (typec == NULL) { + typec = PyArray_DescrFromType(NPY_DEFAULT_TYPE); + } + Py_SETREF(typec, NPY_DT_CALL_ensure_canonical(typec)); + + Py_INCREF(typec); + ap1 = (PyArrayObject *)PyArray_FromAny(op1, typec, 1, 1, + NPY_ARRAY_DEFAULT, NULL); + if (ap1 == NULL) { + Py_DECREF(typec); + return NULL; + } + + Py_INCREF(typec); + ap2 = (PyArrayObject *)PyArray_FromAny(op2, typec, 1, 1, + NPY_ARRAY_DEFAULT, NULL); + if (ap2 == NULL) { + goto clean_ap1; + } + + if (conjugate && PyArray_ISCOMPLEX(ap2)) { + PyArrayObject *cap2; + cap2 = (PyArrayObject *)PyArray_Conjugate(ap2, NULL); + if (cap2 == NULL) { + goto clean_ap2; } + Py_DECREF(ap2); + ap2 = cap2; + } + + ret = _pyarray_correlate(ap1, ap2, typec, + minlag, maxlag, lagstep); + if (ret == NULL) { + goto clean_ap2; } Py_DECREF(ap1); @@ -1383,26 +1583,29 @@ PyArray_Correlate2(PyObject *op1, PyObject *op2, int mode) Py_DECREF(typec); return (PyObject *)ret; -clean_ret: - Py_DECREF(ret); clean_ap2: Py_DECREF(ap2); clean_ap1: Py_DECREF(ap1); - Py_DECREF(typec); return NULL; } /*NUMPY_API * Numeric.correlate(a1,a2,mode) + * + * Legacy correlation function that does NOT conjugate the second array for + * complex inputs. Equivalent to PyArray_CorrelateLags with conjugate == 0 + * (convolution semantics) after the appropriate lag range is computed via + * _lags_from_mode(). Prefer PyArray_Correlate2 for standard correlation. */ NPY_NO_EXPORT PyObject * PyArray_Correlate(PyObject *op1, PyObject *op2, int mode) { PyArrayObject *ap1, *ap2, *ret = NULL; - int unused; PyArray_Descr *typec = NULL; + npy_intp minlag, maxlag, lagstep; + npy_intp n1, n2; if (PyArray_DTypeFromObject(op1, NPY_MAXDIMS, &typec) < 0) { return NULL; @@ -1432,7 +1635,14 @@ PyArray_Correlate(PyObject *op1, PyObject *op2, int mode) goto fail; } - ret = _pyarray_correlate(ap1, ap2, typec, mode, &unused); + n1 = PyArray_DIMS(ap1)[0]; + n2 = PyArray_DIMS(ap2)[0]; + _lags_from_mode(mode, n1, n2, &minlag, &maxlag, &lagstep); + if (PyErr_Occurred()) { + goto fail; + } + ret = _pyarray_correlate(ap1, ap2, typec, + minlag, maxlag, lagstep); if (ret == NULL) { goto fail; } @@ -3043,6 +3253,13 @@ array_correlate(PyObject *NPY_UNUSED(dummy), {"|mode", &PyArray_CorrelatemodeConverter, &mode}) < 0) { return NULL; } + if (mode == 3) { + PyErr_SetString(PyExc_ValueError, + "correlate() accepts only modes 0, 1, and 2 " + "(valid, same and full). " + "Use correlatelags() for mode 3 (lags)."); + return NULL; + } return PyArray_Correlate(a0, shape, mode); } @@ -3063,6 +3280,31 @@ array_correlate2(PyObject *NPY_UNUSED(dummy), return PyArray_Correlate2(a0, shape, mode); } +static PyObject* +array_correlatelags(PyObject *NPY_UNUSED(dummy), + PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames) +{ + PyObject *shape, *a0; + npy_intp minlag = 0, maxlag = 0, lagstep = 0; + npy_bool conjugate = 1; + NPY_PREPARE_ARGPARSER; + + if (npy_parse_arguments("correlate_lags", args, len_args, kwnames, + {"a", NULL, &a0}, + {"v", NULL, &shape}, + {"minlag", &PyArray_IntpFromPyIntConverter, &minlag}, + {"maxlag", &PyArray_IntpFromPyIntConverter, &maxlag}, + {"lagstep", &PyArray_IntpFromPyIntConverter, &lagstep}, + {"|conjugate", &PyArray_BoolConverter, &conjugate}) < 0) { + return NULL; + } + if (minlag == 0 && maxlag == 0 && lagstep == 0) { + /* if no lag parameters passed, use default: mode = 'valid' */ + return PyArray_Correlate2(a0, shape, 0); + } + return PyArray_CorrelateLags(a0, shape, minlag, maxlag, lagstep, conjugate); +} + static PyObject * array_arange(PyObject *NPY_UNUSED(ignored), PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames) @@ -4610,6 +4852,9 @@ static struct PyMethodDef array_module_methods[] = { {"correlate2", (PyCFunction)array_correlate2, METH_FASTCALL | METH_KEYWORDS, NULL}, + {"correlatelags", + (PyCFunction)array_correlatelags, + METH_FASTCALL | METH_KEYWORDS, NULL}, {"frombuffer", (PyCFunction)array_frombuffer, METH_VARARGS | METH_KEYWORDS, NULL}, diff --git a/numpy/_core/tests/test_numeric.py b/numpy/_core/tests/test_numeric.py index 5909c9eb8564..cfbe57d37c4d 100644 --- a/numpy/_core/tests/test_numeric.py +++ b/numpy/_core/tests/test_numeric.py @@ -3612,7 +3612,10 @@ def _setup(self, dt): def test_float(self): self._setup(float) z = np.correlate(self.x, self.y, 'full') + lags = np.correlate_lags(len(self.x), len(self.y), 'full') assert_array_almost_equal(z, self.z1) + assert_array_equal(lags, np.arange(-2, 5)) + assert len(z) == len(lags) z = np.correlate(self.x, self.y[:-1], 'full') assert_array_almost_equal(z, self.z1_4) z = np.correlate(self.y, self.x, 'full') @@ -3623,6 +3626,215 @@ def test_float(self): assert_array_almost_equal(z, self.z2r) z = np.correlate(self.xs, self.y, 'full') assert_array_almost_equal(z, self.zs) + # Test 'valid' mode - middle part where there's full overlap + z = np.correlate(self.x, self.y, 'valid') + lags = np.correlate_lags(len(self.x), len(self.y), 'valid') + assert_array_almost_equal(z, self.z1[2:5]) # [-14., -20., -26.] + assert_array_equal(lags, np.arange(0, 3)) + assert len(z) == len(lags) + # Test 'same' mode - same length as first input + z = np.correlate(self.x, self.y, 'same') + lags = np.correlate_lags(len(self.x), len(self.y), 'same') + assert_array_almost_equal(z, self.z1[1:6]) # [-8., -14., -20., -26., -14.] + assert_array_equal(lags, np.arange(-1, 4)) + assert len(z) == len(lags) + + def test_maxlag(self): + self._setup(float) + # maxlag=1 gives the symmetric inclusive window [-1, 0, 1] + z = np.correlate(self.x, self.y, maxlag=1) + lags = np.correlate_lags(len(self.x), len(self.y), maxlag=1) + assert_array_almost_equal(z, self.z1[1:4]) # [-8., -14., -20.] + assert_array_equal(lags, np.arange(-1, 2)) + assert len(z) == len(lags) + + def test_lags_range(self): + self._setup(float) + z = np.correlate(self.x, self.y, lags=range(-1, 3)) + lags = np.correlate_lags(len(self.x), len(self.y), lags=range(-1, 3)) + assert_array_almost_equal(z, self.z1[1:5]) # [-8., -14., -20., -26.] + assert_array_equal(lags, np.arange(-1, 3)) + assert len(z) == len(lags) + + def test_lags_range_with_step(self): + self._setup(float) + z = np.correlate(self.x, self.y, lags=range(-2, 5, 2)) + lags = np.correlate_lags(len(self.x), len(self.y), lags=range(-2, 5, 2)) + assert_array_almost_equal(z, self.z1[0::2]) # [-3., -14., -26., -5.] + assert_array_equal(lags, np.arange(-2, 5, 2)) + assert len(z) == len(lags) + + def test_lags_slice(self): + self._setup(float) + z = np.correlate(self.x, self.y, lags=slice(-1, 3)) + lags = np.correlate_lags(len(self.x), len(self.y), lags=slice(-1, 3)) + assert_array_almost_equal(z, self.z1[1:5]) + assert_array_equal(lags, np.arange(-1, 3)) + + def test_lags_array(self): + self._setup(float) + # 1-D array of lags (must be an arithmetic progression) + z = np.correlate(self.x, self.y, lags=np.array([-2, 0, 2, 4])) + lags = np.correlate_lags( + len(self.x), len(self.y), lags=np.array([-2, 0, 2, 4])) + assert_array_almost_equal(z, self.z1[0::2]) + assert_array_equal(lags, np.array([-2, 0, 2, 4])) + + def test_maxlag_and_lags_mutually_exclusive(self): + self._setup(float) + with pytest.raises(TypeError, match="cannot specify both"): + np.correlate(self.x, self.y, maxlag=1, lags=range(-1, 2)) + + def test_lags_with_non_lags_mode(self): + self._setup(float) + with pytest.raises(ValueError, match="cannot be used with mode"): + np.correlate(self.x, self.y, mode='full', maxlag=1) + with pytest.raises(ValueError, match="cannot be used with mode"): + np.correlate(self.x, self.y, mode='valid', lags=range(2)) + + def test_lags_mode_without_lags(self): + self._setup(float) + with pytest.raises(ValueError, match="is required for mode='lags'"): + np.correlate(self.x, self.y, mode='lags') + + def test_lags_non_arithmetic_array_rejected(self): + self._setup(float) + with pytest.raises(ValueError, match="arithmetic progression"): + np.correlate(self.x, self.y, lags=np.array([0, 1, 3])) + + # --- Lag range geometry tests --- + # n1=5 (self.x), n2=3 (self.y), z1 covers lags [-2..4] + + def test_lags_left_partial_only(self): + self._setup(float) + # All requested lags < 0 (left partial overlap region only) + z = np.correlate(self.x, self.y, lags=range(-2, 0)) + assert_array_almost_equal(z, self.z1[0:2]) # [-3, -8] + + def test_lags_right_partial_only(self): + self._setup(float) + # All requested lags >= n1-n2+1=3 (right partial overlap region only) + z = np.correlate(self.x, self.y, lags=range(3, 5)) + assert_array_almost_equal(z, self.z1[5:7]) # [-14, -5] + + def test_lags_full_overlap_only(self): + self._setup(float) + # Only full-overlap lags [0, n1-n2] = [0, 2] + z = np.correlate(self.x, self.y, lags=range(3)) + assert_array_almost_equal(z, self.z1[2:5]) + + def test_lags_beyond_overlap(self): + self._setup(float) + # Lags entirely outside any overlap -> all zeros + z = np.correlate(self.x, self.y, lags=range(-10, -5)) + assert_array_almost_equal(z, np.zeros(5)) + z = np.correlate(self.x, self.y, lags=range(10, 15)) + assert_array_almost_equal(z, np.zeros(5)) + + def test_lags_partial_beyond_overlap(self): + self._setup(float) + # Lags partially outside overlap -> zeros at extremes + z = np.correlate(self.x, self.y, lags=range(-5, 8)) + expected = np.concatenate([np.zeros(3), self.z1, np.zeros(3)]) + assert_array_almost_equal(z, expected) + + def test_lags_single_lag(self): + self._setup(float) + z = np.correlate(self.x, self.y, lags=range(1)) + lags = np.correlate_lags(len(self.x), len(self.y), lags=range(1)) + assert_array_almost_equal(z, [self.z1[2]]) + assert_array_equal(lags, np.array([0])) + + def test_lags_negative_step(self): + self._setup(float) + # Negative step: reversed lag order + z = np.correlate(self.x, self.y, lags=slice(2, -3, -1)) + lags = np.correlate_lags( + len(self.x), len(self.y), lags=slice(2, -3, -1)) + # slice(2,-3,-1) -> [2, 1, 0, -1, -2] + # corresponds to z1 indices [4, 3, 2, 1, 0] + assert_array_almost_equal(z, self.z1[4::-1]) + assert_array_equal(lags, np.arange(2, -3, -1)) + + def test_lags_empty(self): + self._setup(float) + z = np.correlate(self.x, self.y, lags=range(0)) + lags = np.correlate_lags(len(self.x), len(self.y), lags=range(0)) + assert len(z) == 0 + assert len(lags) == 0 + + # --- Array geometry tests --- + + def test_lags_v_longer_than_a(self): + self._setup(float) + # When len(a) < len(v): correlate(y, x) is the time-reversal of + # correlate(x, y) for real inputs. + z = np.correlate(self.y, self.x, lags=range(-4, 3)) + assert_array_almost_equal(z, self.z1[::-1]) + + def test_lags_equal_length(self): + a = np.array([1, 2, 3], dtype=float) + v = np.array([4, 5, 6], dtype=float) + z_mode = np.correlate(a, v, 'full') + z_lags = np.correlate(a, v, lags=range(-2, 3)) + assert_array_almost_equal(z_mode, z_lags) + + def test_lags_autocorrelation(self): + # a == v: result should be symmetric for real arrays + a = np.array([1.0, 2.0, 3.0]) + z = np.correlate(a, a, lags=range(-2, 3)) + assert_array_almost_equal(z, z[::-1]) + + # --- Equivalence between mode and explicit lag range --- + + def test_lags_matches_full_mode(self): + self._setup(float) + z_mode = np.correlate(self.x, self.y, 'full') + z_lags = np.correlate(self.x, self.y, lags=range(-2, 5)) + assert_array_almost_equal(z_mode, z_lags) + + def test_lags_matches_same_mode(self): + self._setup(float) + z_mode = np.correlate(self.x, self.y, 'same') + z_lags = np.correlate(self.x, self.y, lags=range(-1, 4)) + assert_array_almost_equal(z_mode, z_lags) + + def test_lags_matches_valid_mode(self): + self._setup(float) + z_mode = np.correlate(self.x, self.y, 'valid') + z_lags = np.correlate(self.x, self.y, lags=range(3)) + assert_array_almost_equal(z_mode, z_lags) + + # --- Type/dtype tests --- + + def test_complex_with_lags(self): + x = np.array([1, 2, 3, 4 + 1j], dtype=complex) + y = np.array([-1, -2j, 3 + 1j], dtype=complex) + # Lags-mode equals subset of full-mode for complex arrays + z_full = np.correlate(y, x, mode='full') + z_lags = np.correlate(y, x, lags=range(-3, 3)) + assert_array_almost_equal(z_full, z_lags) + + def test_object_with_lags(self): + self._setup(Decimal) + z_full = np.correlate(self.x, self.y, mode='full') + z_lags = np.correlate(self.x, self.y, lags=range(-2, 5)) + assert_array_almost_equal(z_full, z_lags) + + # --- Default mode resolution --- + + def test_default_mode_resolution(self): + self._setup(float) + # No mode, no lags -> 'valid' + assert_array_equal(np.correlate(self.x, self.y), + np.correlate(self.x, self.y, mode='valid')) + # No mode, with maxlag -> 'lags' + assert_array_equal(np.correlate(self.x, self.y, maxlag=1), + np.correlate(self.x, self.y, mode='lags', maxlag=1)) + # No mode, with lags -> 'lags' + assert_array_equal( + np.correlate(self.x, self.y, lags=range(-1, 2)), + np.correlate(self.x, self.y, mode='lags', lags=range(-1, 2))) def test_object(self): self._setup(Decimal) @@ -3705,6 +3917,103 @@ def test_convolve_empty_input_error_message(self): with pytest.raises(ValueError, match="v cannot be empty"): np.convolve(np.array([1, 2]), np.array([])) + # --- Convolve lags tests (mirror correlate API) --- + + def test_convolve_maxlag(self): + a = np.array([1, 2, 3], dtype=float) + v = np.array([0, 1, 0.5], dtype=float) + z = np.convolve(a, v, maxlag=1) + lags = np.correlate_lags(len(a), len(v), maxlag=1) + assert_array_almost_equal(z, [1.0, 2.5, 4.0]) + assert_array_equal(lags, np.arange(-1, 2)) + + def test_convolve_lags_range(self): + a = np.array([1, 2, 3], dtype=float) + v = np.array([0, 1, 0.5], dtype=float) + z = np.convolve(a, v, lags=range(-1, 2)) + lags = np.correlate_lags(len(a), len(v), lags=range(-1, 2)) + assert_array_almost_equal(z, [1.0, 2.5, 4.0]) + assert_array_equal(lags, np.arange(-1, 2)) + + def test_convolve_lags_range_with_step(self): + a = np.array([1, 2, 3, 4, 5], dtype=float) + v = np.array([0, 1, 0.5], dtype=float) + z = np.convolve(a, v, lags=range(-2, 6, 2)) + lags = np.correlate_lags(len(a), len(v), lags=range(-2, 6, 2)) + assert_array_almost_equal(z, [0.0, 2.5, 5.5, 2.5]) + assert_array_equal(lags, np.arange(-2, 6, 2)) + + def test_correlate_lags_full_mode(self): + a = [1, 2, 3] + v = [0, 1, 0.5] + z = np.convolve(a, v, mode='full') + lags = np.correlate_lags(len(a), len(v), mode='full') + assert len(z) == len(lags) + assert_array_equal(lags, np.arange(-2, 3)) + + def test_convolve_lags_matches_full_mode(self): + a = np.array([1, 2, 3, 4, 5], dtype=float) + v = np.array([6, 7, 8], dtype=float) + z_mode = np.convolve(a, v, 'full') + z_lags = np.convolve(a, v, lags=range(-2, 5)) + assert_array_almost_equal(z_mode, z_lags) + + def test_convolve_vs_correlate_reversed(self): + # convolve(a, v, lags=L) == correlate(a, v[::-1], lags=L) + a = np.array([1, 2, 3, 4, 5], dtype=float) + v = np.array([6, 7, 8], dtype=float) + z_conv = np.convolve(a, v, lags=range(-3, 6)) + z_corr = np.correlate(a, v[::-1], lags=range(-3, 6)) + assert_array_almost_equal(z_conv, z_corr) + + def test_convolve_default_mode_resolution(self): + a = [1, 2, 3] + v = [0, 1, 0.5] + # No mode, no lags -> 'full' + assert_array_equal(np.convolve(a, v), np.convolve(a, v, mode='full')) + # No mode, with maxlag -> 'lags' + assert_array_equal(np.convolve(a, v, maxlag=1), + np.convolve(a, v, mode='lags', maxlag=1)) + # No mode, with lags -> 'lags' + assert_array_equal(np.convolve(a, v, lags=range(-1, 2)), + np.convolve(a, v, mode='lags', + lags=range(-1, 2))) + + def test_convolve_maxlag_lags_mutually_exclusive(self): + with pytest.raises(TypeError, match="cannot specify both"): + np.convolve([1, 2, 3], [4, 5, 6], + maxlag=1, lags=range(-1, 2)) + + def test_convolve_lags_with_non_lags_mode(self): + with pytest.raises(ValueError, match="cannot be used with mode"): + np.convolve([1, 2, 3], [4, 5, 6], mode='full', maxlag=1) + with pytest.raises(ValueError, match="cannot be used with mode"): + np.convolve([1, 2, 3], [4, 5, 6], mode='full', + lags=range(2)) + + def test_convolve_lags_mode_without_lags(self): + with pytest.raises(ValueError, + match="is required for mode='lags'"): + np.convolve([1, 2, 3], [4, 5, 6], mode='lags') + + def test_complex_no_conjugation(self): + # Regression test: convolve must NOT conjugate either input. + # correlate(a, v) conjugates v; convolve(a, v) must not. + # For real inputs conjugation is a no-op, so only complex exposes + # the bug. Expected values computed as scalar polynomial product: + # (1+1j)*x^1 + 2*x^0 times 3*x^1 + (4+1j)*x^0 + a = np.array([1 + 1j, 2]) + v = np.array([3, 4 + 1j]) + expected = np.array([3 + 3j, 9 + 5j, 8 + 2j]) + assert_array_equal(np.convolve(a, v), expected) + # Same check via the lags path (maxlag=1 covers lags [-1, 0, 1], + # which is the full output for two length-2 inputs) + result = np.convolve(a, v, maxlag=1) + lags = np.correlate_lags(len(a), len(v), maxlag=1) + assert_array_equal(result, expected) + assert_array_equal(lags, np.arange(-1, 2)) + + class TestArgwhere: @pytest.mark.parametrize('nd', [0, 1, 2]) diff --git a/numpy/matlib.pyi b/numpy/matlib.pyi index 06a6806e373a..e081612d7878 100644 --- a/numpy/matlib.pyi +++ b/numpy/matlib.pyi @@ -114,6 +114,7 @@ from numpy import ( # noqa: F401 core, corrcoef, correlate, + correlate_lags, cos, cosh, count_nonzero,