Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ jobs:
with:
python-version: 3.8
- name: Install Rust
uses: dtolnay/rust-toolchain@1.48.0
uses: dtolnay/rust-toolchain@1.56.0
- uses: Swatinem/rust-cache@v2
with:
workspaces: examples/simple
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Changelog

- Unreleased
- Increase MSRV to 1.56 released in October 2021 and available in Debain 12, RHEL 9 and Alpine 3.17 following the same change for PyO3. ([#378](https://github.com/PyO3/rust-numpy/pull/378))
- Add support for ASCII (`PyFixedString<N>`) and Unicode (`PyFixedUnicode<N>`) string arrays, i.e. dtypes `SN` and `UN` where `N` is the number of characters. ([#378](https://github.com/PyO3/rust-numpy/pull/378))

- v0.19.0
- Add `PyUntypedArray` as an untyped base type for `PyArray` which can be used to inspect arguments before more targeted downcasts. This is accompanied by some methods like `dtype` and `shape` moving from `PyArray` to `PyUntypedArray`. They are still accessible though, as `PyArray` dereferences to `PyUntypedArray` via the `Deref` trait. ([#369](https://github.com/PyO3/rust-numpy/pull/369))
Expand Down
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ authors = [
]
description = "PyO3-based Rust bindings of the NumPy C-API"
documentation = "https://docs.rs/numpy"
edition = "2018"
rust-version = "1.48"
edition = "2021"
rust-version = "1.56"
repository = "https://github.com/PyO3/rust-numpy"
categories = ["api-bindings", "development-tools::ffi", "science"]
keywords = ["python", "numpy", "ffi", "pyo3"]
Expand Down
14 changes: 2 additions & 12 deletions src/borrow/shared.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ fn insert_shared(py: Python) -> PyResult<*const Shared> {
let module = get_array_module(py)?;

let capsule: &PyCapsule = match module.getattr("_RUST_NUMPY_BORROW_CHECKING_API") {
Ok(capsule) => capsule.try_into()?,
Ok(capsule) => PyTryInto::try_into(capsule)?,
Err(_err) => {
let flags: *mut BorrowFlags = Box::into_raw(Box::default());

Expand Down Expand Up @@ -437,17 +437,7 @@ fn gcd_strides(array: *mut PyArrayObject) -> isize {

let strides = unsafe { from_raw_parts((*array).strides, nd) };

reduce(strides.iter().copied(), gcd).unwrap_or(1)
}

// FIXME(adamreichold): Use `Iterator::reduce` from std when our MSRV reaches 1.51.
fn reduce<I, F>(mut iter: I, f: F) -> Option<I::Item>
where
I: Iterator,
F: FnMut(I::Item, I::Item) -> I::Item,
{
let first = iter.next()?;
Some(iter.fold(first, f))
strides.iter().copied().reduce(gcd).unwrap_or(1)
}

#[cfg(test)]
Expand Down
10 changes: 6 additions & 4 deletions src/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,8 @@ impl TypeDescriptors {
fn from_unit<'py>(&'py self, py: Python<'py>, unit: NPY_DATETIMEUNIT) -> &'py PyArrayDescr {
let mut dtypes = self.dtypes.get(py).borrow_mut();

match dtypes.get_or_insert_with(Default::default).entry(unit) {
Entry::Occupied(entry) => entry.into_mut().clone().into_ref(py),
let dtype = match dtypes.get_or_insert_with(Default::default).entry(unit) {
Entry::Occupied(entry) => entry.into_mut(),
Entry::Vacant(entry) => {
let dtype = PyArrayDescr::new_from_npy_type(py, self.npy_type);

Expand All @@ -237,9 +237,11 @@ impl TypeDescriptors {
metadata.meta.num = 1;
}

entry.insert(dtype.into()).clone().into_ref(py)
entry.insert(dtype.into())
}
}
};

dtype.clone().into_ref(py)
}
}

Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ mod dtype;
mod error;
pub mod npyffi;
mod slice_container;
mod strings;
mod sum_products;
mod untyped_array;

Expand All @@ -105,6 +106,7 @@ pub use crate::convert::{IntoPyArray, NpyIndex, ToNpyDims, ToPyArray};
pub use crate::dtype::{dtype, Complex32, Complex64, Element, PyArrayDescr};
pub use crate::error::{BorrowError, FromVecError, NotContiguousError};
pub use crate::npyffi::{PY_ARRAY_API, PY_UFUNC_API};
pub use crate::strings::{PyFixedString, PyFixedUnicode};
pub use crate::sum_products::{dot, einsum, inner};
pub use crate::untyped_array::PyUntypedArray;

Expand Down
2 changes: 1 addition & 1 deletion src/npyffi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use pyo3::{

fn get_numpy_api(py: Python, module: &str, capsule: &str) -> PyResult<*const *const c_void> {
let module = PyModule::import(py, module)?;
let capsule: &PyCapsule = module.getattr(capsule)?.try_into()?;
let capsule: &PyCapsule = PyTryInto::try_into(module.getattr(capsule)?)?;

let api = capsule.pointer() as *const *const c_void;

Expand Down
230 changes: 230 additions & 0 deletions src/strings.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
//! Types to support arrays of [ASCII][ascii] and [UCS4][ucs4] strings
//!
//! [ascii]: https://numpy.org/doc/stable/reference/c-api/dtype.html#c.NPY_STRING
//! [ucs4]: https://numpy.org/doc/stable/reference/c-api/dtype.html#c.NPY_UNICODE

use std::cell::RefCell;
use std::collections::hash_map::Entry;
use std::convert::TryInto;
use std::fmt;
use std::mem::size_of;
use std::os::raw::c_char;
use std::str;

use pyo3::{
ffi::{Py_UCS1, Py_UCS4},
sync::GILProtected,
Py, Python,
};
use rustc_hash::FxHashMap;

use crate::dtype::{Element, PyArrayDescr};
use crate::npyffi::NPY_TYPES;

/// A newtype wrapper around [`[u8; N]`][Py_UCS1] to handle [`byte` scalars][numpy-bytes] while satisfying coherence.
///
/// Note that when creating arrays of ASCII strings without an explicit `dtype`,
/// NumPy will automatically determine the smallest possible array length at runtime.
///
/// For example,
///
/// ```python
/// array = numpy.array([b"foo", b"bar", b"foobar"])
/// ```
///
/// yields `S6` for `array.dtype`.
///
/// On the Rust side however, the length `N` of `PyFixedString<N>` must always be given
/// explicitly and as a compile-time constant. For this work reliably, the Python code
/// should set the `dtype` explicitly, e.g.
///
/// ```python
/// numpy.array([b"foo", b"bar", b"foobar"], dtype='S12')
/// ```
///
/// always matching `PyArray1<PyFixedString<12>>`.
///
/// # Example
///
/// ```rust
/// # use pyo3::Python;
/// use numpy::{PyArray1, PyFixedString};
///
/// # Python::with_gil(|py| {
/// let array = PyArray1::<PyFixedString<3>>::from_vec(py, vec![[b'f', b'o', b'o'].into()]);
///
/// assert!(array.dtype().to_string().contains("S3"));
/// # });
/// ```
///
/// [numpy-bytes]: https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.bytes_
#[repr(transparent)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct PyFixedString<const N: usize>(pub [Py_UCS1; N]);

impl<const N: usize> fmt::Display for PyFixedString<N> {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
fmt.write_str(str::from_utf8(&self.0).unwrap().trim_end_matches('\0'))
}
}

impl<const N: usize> From<[Py_UCS1; N]> for PyFixedString<N> {
fn from(val: [Py_UCS1; N]) -> Self {
Self(val)
}
}

unsafe impl<const N: usize> Element for PyFixedString<N> {
const IS_COPY: bool = true;

fn get_dtype(py: Python) -> &PyArrayDescr {
static DTYPES: TypeDescriptors = TypeDescriptors::new();

unsafe { DTYPES.from_size(py, NPY_TYPES::NPY_STRING, b'|' as _, size_of::<Self>()) }
}
}

/// A newtype wrapper around [`[PyUCS4; N]`][Py_UCS4] to handle [`str_` scalars][numpy-str] while satisfying coherence.
///
/// Note that when creating arrays of Unicode strings without an explicit `dtype`,
/// NumPy will automatically determine the smallest possible array length at runtime.
///
/// For example,
///
/// ```python
/// numpy.array(["foo🐍", "bar🦀", "foobar"])
/// ```
///
/// yields `U6` for `array.dtype`.
///
/// On the Rust side however, the length `N` of `PyFixedUnicode<N>` must always be given
/// explicitly and as a compile-time constant. For this work reliably, the Python code
/// should set the `dtype` explicitly, e.g.
///
/// ```python
/// numpy.array(["foo🐍", "bar🦀", "foobar"], dtype='U12')
/// ```
///
/// always matching `PyArray1<PyFixedUnicode<12>>`.
///
/// # Example
///
/// ```rust
/// # use pyo3::Python;
/// use numpy::{PyArray1, PyFixedUnicode};
///
/// # Python::with_gil(|py| {
/// let array = PyArray1::<PyFixedUnicode<3>>::from_vec(py, vec![[b'b' as _, b'a' as _, b'r' as _].into()]);
///
/// assert!(array.dtype().to_string().contains("U3"));
/// # });
/// ```
///
/// [numpy-str]: https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.str_
#[repr(transparent)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct PyFixedUnicode<const N: usize>(pub [Py_UCS4; N]);

impl<const N: usize> fmt::Display for PyFixedUnicode<N> {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
for character in self.0 {
if character == 0 {
break;
}

write!(fmt, "{}", char::from_u32(character).unwrap())?;
}

Ok(())
}
}

impl<const N: usize> From<[Py_UCS4; N]> for PyFixedUnicode<N> {
fn from(val: [Py_UCS4; N]) -> Self {
Self(val)
}
}

unsafe impl<const N: usize> Element for PyFixedUnicode<N> {
const IS_COPY: bool = true;

fn get_dtype(py: Python) -> &PyArrayDescr {
static DTYPES: TypeDescriptors = TypeDescriptors::new();

unsafe { DTYPES.from_size(py, NPY_TYPES::NPY_UNICODE, b'=' as _, size_of::<Self>()) }
}
}

struct TypeDescriptors {
#[allow(clippy::type_complexity)]
dtypes: GILProtected<RefCell<Option<FxHashMap<usize, Py<PyArrayDescr>>>>>,
}

impl TypeDescriptors {
const fn new() -> Self {
Self {
dtypes: GILProtected::new(RefCell::new(None)),
}
}

/// `npy_type` must be either `NPY_STRING` or `NPY_UNICODE` with matching `byteorder` and `size`
#[allow(clippy::wrong_self_convention)]
unsafe fn from_size<'py>(
&'py self,
py: Python<'py>,
npy_type: NPY_TYPES,
byteorder: c_char,
size: usize,
) -> &'py PyArrayDescr {
let mut dtypes = self.dtypes.get(py).borrow_mut();

let dtype = match dtypes.get_or_insert_with(Default::default).entry(size) {
Entry::Occupied(entry) => entry.into_mut(),
Entry::Vacant(entry) => {
let dtype = PyArrayDescr::new_from_npy_type(py, npy_type);

let descr = &mut *dtype.as_dtype_ptr();
descr.elsize = size.try_into().unwrap();
descr.byteorder = byteorder;

entry.insert(dtype.into())
}
};

dtype.clone().into_ref(py)
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn format_fixed_string() {
assert_eq!(
PyFixedString([b'f', b'o', b'o', 0, 0, 0]).to_string(),
"foo"
);
assert_eq!(
PyFixedString([b'f', b'o', b'o', b'b', b'a', b'r']).to_string(),
"foobar"
);
}

#[test]
fn format_fixed_unicode() {
assert_eq!(
PyFixedUnicode([b'f' as _, b'o' as _, b'o' as _, 0, 0, 0]).to_string(),
"foo"
);
assert_eq!(
PyFixedUnicode([0x1F980, 0x1F40D, 0, 0, 0, 0]).to_string(),
"🦀🐍"
);
assert_eq!(
PyFixedUnicode([b'f' as _, b'o' as _, b'o' as _, b'b' as _, b'a' as _, b'r' as _])
.to_string(),
"foobar"
);
}
}
Loading