From e4e33ab2b0a3c40ce8357afdab6b1a9317a512ab Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Wed, 10 Nov 2021 13:27:20 +0100 Subject: [PATCH 1/2] No need to explicitly request zero objects (which would yield valid non-null pointers to the None object). --- src/array.rs | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/src/array.rs b/src/array.rs index a67060646..fd13a86a2 100644 --- a/src/array.rs +++ b/src/array.rs @@ -447,6 +447,9 @@ impl PyArray { /// If `is_fortran` is true, then /// a fortran order array is created, otherwise a C-order array is created. /// + /// For elements with `DATA_TYPE == DataType::Object`, this will fill the array + /// valid pointers to objects of type `` with value zero. + /// /// See also [PyArray_Zeros](https://numpy.org/doc/stable/reference/c-api/array.html#c.PyArray_Zeros) /// /// # Example @@ -776,14 +779,9 @@ impl PyArray { /// }); /// ``` pub fn from_exact_iter(py: Python<'_>, iter: impl ExactSizeIterator) -> &Self { - // Use zero-initialized pointers for object arrays - // so that partially initialized arrays can be dropped safely - // in case the iterator implementation panics. - let array = if T::DATA_TYPE == DataType::Object { - Self::zeros(py, [iter.len()], false) - } else { - Self::new(py, [iter.len()], false) - }; + // NumPy will always zero-initialize object pointers, + // so the array can be dropped safely if the iterator panics. + let array = Self::new(py, [iter.len()], false); unsafe { for (i, item) in iter.enumerate() { *array.uget_mut([i]) = item; @@ -811,14 +809,9 @@ impl PyArray { let iter = iter.into_iter(); let (min_len, max_len) = iter.size_hint(); let mut capacity = max_len.unwrap_or_else(|| min_len.max(512 / mem::size_of::())); - // Use zero-initialized pointers for object arrays - // so that partially initialized arrays can be dropped safely - // in case the iterator implementation panics. - let array = if T::DATA_TYPE == DataType::Object { - Self::zeros(py, [capacity], false) - } else { - Self::new(py, [capacity], false) - }; + // NumPy will always zero-initialize object pointers, + // so the array can be dropped safely if the iterator panics. + let array = Self::new(py, [capacity], false); let mut length = 0; unsafe { for (i, item) in iter.enumerate() { From 11182ec85b0cb6330c92f1b8b890128c07602f90 Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Wed, 10 Nov 2021 14:05:21 +0100 Subject: [PATCH 2/2] Make the PyArray::new method unsafe and document what can and cannot be done with its return value. --- src/array.rs | 100 +++++++++++++++++++++++++++++++++---------------- tests/array.rs | 6 +-- 2 files changed, 70 insertions(+), 36 deletions(-) diff --git a/src/array.rs b/src/array.rs index fd13a86a2..3565e01d0 100644 --- a/src/array.rs +++ b/src/array.rs @@ -249,7 +249,7 @@ impl PyArray { /// ``` /// use numpy::PyArray3; /// pyo3::Python::with_gil(|py| { - /// let arr = PyArray3::::new(py, [4, 5, 6], false); + /// let arr = PyArray3::::zeros(py, [4, 5, 6], false); /// assert_eq!(arr.ndim(), 3); /// }); /// ``` @@ -266,7 +266,7 @@ impl PyArray { /// ``` /// use numpy::PyArray3; /// pyo3::Python::with_gil(|py| { - /// let arr = PyArray3::::new(py, [4, 5, 6], false); + /// let arr = PyArray3::::zeros(py, [4, 5, 6], false); /// assert_eq!(arr.strides(), &[240, 48, 8]); /// }); /// ``` @@ -287,7 +287,7 @@ impl PyArray { /// ``` /// use numpy::PyArray3; /// pyo3::Python::with_gil(|py| { - /// let arr = PyArray3::::new(py, [4, 5, 6], false); + /// let arr = PyArray3::::zeros(py, [4, 5, 6], false); /// assert_eq!(arr.shape(), &[4, 5, 6]); /// }); /// ``` @@ -371,20 +371,46 @@ impl PyArray { /// /// If `is_fortran == true`, returns Fortran-order array. Else, returns C-order array. /// + /// # Safety + /// + /// The returned array will always be safe to be dropped as the elements must either + /// be trivially copyable or have `DATA_TYPE == DataType::Object`, i.e. be pointers + /// into Python's heap, which NumPy will automatically zero-initialize. + /// + /// However, the elements themselves will not be valid and should only be accessed + /// via raw pointers obtained via [uget_raw](#method.uget_raw). + /// + /// All methods which produce references to the elements invoke undefined behaviour. + /// In particular, zero-initialized pointers are _not_ valid instances of `PyObject`. + /// /// # Example /// ``` /// use numpy::PyArray3; + /// /// pyo3::Python::with_gil(|py| { - /// let arr = PyArray3::::new(py, [4, 5, 6], false); + /// let arr = unsafe { + /// let arr = PyArray3::::new(py, [4, 5, 6], false); + /// + /// for i in 0..4 { + /// for j in 0..5 { + /// for k in 0..6 { + /// arr.uget_raw([i, j, k]).write((i * j * k) as i32); + /// } + /// } + /// } + /// + /// arr + /// }; + /// /// assert_eq!(arr.shape(), &[4, 5, 6]); /// }); /// ``` - pub fn new(py: Python, dims: ID, is_fortran: bool) -> &Self + pub unsafe fn new(py: Python, dims: ID, is_fortran: bool) -> &Self where ID: IntoDimension, { let flags = if is_fortran { 1 } else { 0 }; - unsafe { PyArray::new_(py, dims, ptr::null_mut(), flags) } + PyArray::new_(py, dims, ptr::null_mut(), flags) } pub(crate) unsafe fn new_( @@ -448,7 +474,7 @@ impl PyArray { /// a fortran order array is created, otherwise a C-order array is created. /// /// For elements with `DATA_TYPE == DataType::Object`, this will fill the array - /// valid pointers to objects of type `` with value zero. + /// with valid pointers to zero-valued Python integer objects. /// /// See also [PyArray_Zeros](https://numpy.org/doc/stable/reference/c-api/array.html#c.PyArray_Zeros) /// @@ -596,6 +622,16 @@ impl PyArray { &mut *(self.data().offset(offset) as *mut _) } + /// Same as [uget](#method.uget), but returns `*mut T`. + #[inline(always)] + pub unsafe fn uget_raw(&self, index: Idx) -> *mut T + where + Idx: NpyIndex, + { + let offset = index.get_unchecked::(self.strides()); + self.data().offset(offset) as *mut _ + } + /// Get dynamic dimensioned array from fixed dimension array. pub fn to_dyn(&self) -> &PyArray { let python = self.py(); @@ -733,20 +769,18 @@ impl PyArray { /// }); /// ``` pub fn from_slice<'py>(py: Python<'py>, slice: &[T]) -> &'py Self { - let array = PyArray::new(py, [slice.len()], false); - if T::DATA_TYPE != DataType::Object { - unsafe { + unsafe { + let array = PyArray::new(py, [slice.len()], false); + if T::DATA_TYPE != DataType::Object { array.copy_ptr(slice.as_ptr(), slice.len()); - } - } else { - unsafe { + } else { let data_ptr = array.data(); for (i, item) in slice.iter().enumerate() { data_ptr.add(i).write(item.clone()); } } + array } - array } /// Construct one-dimension PyArray @@ -781,13 +815,13 @@ impl PyArray { pub fn from_exact_iter(py: Python<'_>, iter: impl ExactSizeIterator) -> &Self { // NumPy will always zero-initialize object pointers, // so the array can be dropped safely if the iterator panics. - let array = Self::new(py, [iter.len()], false); unsafe { + let array = Self::new(py, [iter.len()], false); for (i, item) in iter.enumerate() { - *array.uget_mut([i]) = item; + array.uget_raw([i]).write(item); } + array } - array } /// Construct one-dimension PyArray from a type which implements @@ -809,11 +843,11 @@ impl PyArray { let iter = iter.into_iter(); let (min_len, max_len) = iter.size_hint(); let mut capacity = max_len.unwrap_or_else(|| min_len.max(512 / mem::size_of::())); - // NumPy will always zero-initialize object pointers, - // so the array can be dropped safely if the iterator panics. - let array = Self::new(py, [capacity], false); - let mut length = 0; unsafe { + // NumPy will always zero-initialize object pointers, + // so the array can be dropped safely if the iterator panics. + let array = Self::new(py, [capacity], false); + let mut length = 0; for (i, item) in iter.enumerate() { length += 1; if length > capacity { @@ -822,13 +856,13 @@ impl PyArray { .resize(capacity) .expect("PyArray::from_iter: Failed to allocate memory"); } - *array.uget_mut([i]) = item; + array.uget_raw([i]).write(item); } + if capacity > length { + array.resize(length).unwrap() + } + array } - if capacity > length { - array.resize(length).unwrap() - } - array } /// Extends or trancates the length of 1 dimension PyArray. @@ -902,15 +936,15 @@ impl PyArray { return Err(FromVecError::new(v.len(), last_len)); } let dims = [v.len(), last_len]; - let array = Self::new(py, dims, false); unsafe { + let array = Self::new(py, dims, false); for (y, vy) in v.iter().enumerate() { for (x, vyx) in vy.iter().enumerate() { - *array.uget_mut([y, x]) = vyx.clone(); + array.uget_raw([y, x]).write(vyx.clone()); } } + Ok(array) } - Ok(array) } } @@ -944,17 +978,17 @@ impl PyArray { return Err(FromVecError::new(v.len(), len3)); } let dims = [v.len(), len2, len3]; - let array = Self::new(py, dims, false); unsafe { + let array = Self::new(py, dims, false); for (z, vz) in v.iter().enumerate() { for (y, vzy) in vz.iter().enumerate() { for (x, vzyx) in vzy.iter().enumerate() { - *array.uget_mut([z, y, x]) = vzyx.clone(); + array.uget_raw([z, y, x]).write(vzyx.clone()); } } } + Ok(array) } - Ok(array) } } @@ -965,7 +999,7 @@ impl PyArray { /// use numpy::PyArray; /// pyo3::Python::with_gil(|py| { /// let pyarray_f = PyArray::arange(py, 2.0, 5.0, 1.0); - /// let pyarray_i = PyArray::::new(py, [3], false); + /// let pyarray_i = unsafe { PyArray::::new(py, [3], false) }; /// assert!(pyarray_f.copy_to(pyarray_i).is_ok()); /// assert_eq!(pyarray_i.readonly().as_slice().unwrap(), &[2, 3, 4]); /// }); diff --git a/tests/array.rs b/tests/array.rs index 2fb9c09e8..ca62e1da9 100644 --- a/tests/array.rs +++ b/tests/array.rs @@ -25,7 +25,7 @@ fn not_contiguous_array<'py>(py: Python<'py>) -> &'py PyArray1 { fn new_c_order() { let dim = [3, 5]; pyo3::Python::with_gil(|py| { - let arr = PyArray::::new(py, dim, false); + let arr = PyArray::::zeros(py, dim, false); assert!(arr.ndim() == 2); assert!(arr.dims() == dim); let size = std::mem::size_of::() as isize; @@ -37,7 +37,7 @@ fn new_c_order() { fn new_fortran_order() { let dim = [3, 5]; pyo3::Python::with_gil(|py| { - let arr = PyArray::::new(py, dim, true); + let arr = PyArray::::zeros(py, dim, true); assert!(arr.ndim() == 2); assert!(arr.dims() == dim); let size = std::mem::size_of::() as isize; @@ -109,7 +109,7 @@ fn as_slice() { #[test] fn is_instance() { pyo3::Python::with_gil(|py| { - let arr = PyArray2::::new(py, [3, 5], false); + let arr = PyArray2::::zeros(py, [3, 5], false); assert!(arr.is_instance::>().unwrap()); assert!(!arr.is_instance::().unwrap()); })