Skip to content

Commit

Permalink
Consolidate list and tuple iteration, fast path
Browse files Browse the repository at this point in the history
  • Loading branch information
ijl committed Jan 18, 2024
1 parent a40f58b commit 34507e4
Show file tree
Hide file tree
Showing 8 changed files with 203 additions and 90 deletions.
7 changes: 4 additions & 3 deletions src/serialize/per_type/dataclass.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)

use crate::serialize::error::SerializeError;
use crate::serialize::per_type::dict::ZeroDictSerializer;
use crate::serialize::serializer::PyObjectSerializer;
use crate::serialize::state::SerializerState;
use crate::str::unicode_to_str;
Expand Down Expand Up @@ -86,9 +87,9 @@ impl Serialize for DataclassFastSerializer {
where
S: Serializer,
{
let len = ffi!(Py_SIZE(self.ptr));
let len = ffi!(Py_SIZE(self.ptr)) as usize;
if unlikely!(len == 0) {
return serializer.serialize_map(Some(0)).unwrap().end();
return ZeroDictSerializer::new().serialize(serializer);
}
let mut map = serializer.serialize_map(None).unwrap();
let mut next_key: *mut pyo3_ffi::PyObject = std::ptr::null_mut();
Expand Down Expand Up @@ -152,7 +153,7 @@ impl Serialize for DataclassFallbackSerializer {
ffi!(Py_DECREF(fields));
let len = ffi!(Py_SIZE(fields)) as usize;
if unlikely!(len == 0) {
return serializer.serialize_map(Some(0)).unwrap().end();
return ZeroDictSerializer::new().serialize(serializer);
}
let mut map = serializer.serialize_map(None).unwrap();

Expand Down
96 changes: 82 additions & 14 deletions src/serialize/per_type/dict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,40 @@
use crate::opt::*;
use crate::serialize::error::SerializeError;
use crate::serialize::obtype::{pyobject_to_obtype, ObType};
use crate::serialize::per_type::{Date, DateTime, DateTimeBuffer, DateTimeLike, Time, UUID};
use crate::serialize::per_type::{
BoolSerializer, Date, DateTime, DateTimeBuffer, DateTimeLike, FloatSerializer, Int53Serializer,
IntSerializer, ListTupleSerializer, NoneSerializer, StrSerializer, Time, ZeroListSerializer,
UUID,
};
use crate::serialize::serializer::PyObjectSerializer;
use crate::serialize::state::SerializerState;
use crate::str::{unicode_to_str, unicode_to_str_via_ffi};
use crate::typeref::{STR_TYPE, TRUE, VALUE_STR};
use crate::typeref::{
BOOL_TYPE, DATETIME_TYPE, DICT_TYPE, FLOAT_TYPE, INT_TYPE, LIST_TYPE, NONE_TYPE, STR_TYPE,
TRUE, VALUE_STR,
};
use compact_str::CompactString;
use serde::ser::{Serialize, SerializeMap, Serializer};
use smallvec::SmallVec;
use std::ptr::NonNull;

pub struct ZeroDictSerializer;

impl ZeroDictSerializer {
pub const fn new() -> Self {
Self {}
}
}

impl Serialize for ZeroDictSerializer {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_bytes(b"{}")
}
}

pub struct DictGenericSerializer {
ptr: *mut pyo3_ffi::PyObject,
state: SerializerState,
Expand Down Expand Up @@ -42,8 +66,8 @@ impl Serialize for DictGenericSerializer {
if unlikely!(self.state.recursion_limit()) {
err!(SerializeError::RecursionLimit)
}
if unlikely!(ffi!(Py_SIZE(self.ptr)) == 0) {
serializer.serialize_map(Some(0)).unwrap().end()
if ffi!(Py_SIZE(self.ptr)) == 0 {
ZeroDictSerializer::new().serialize(serializer)
} else if opt_disabled!(self.state.opts(), SORT_OR_NON_STR_KEYS) {
Dict::new(self.ptr, self.state, self.default).serialize(serializer)
} else if opt_enabled!(self.state.opts(), NON_STR_KEYS) {
Expand Down Expand Up @@ -73,11 +97,12 @@ impl Dict {
}
}
impl Serialize for Dict {
#[inline(always)]
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
debug_assert!(ffi!(Py_SIZE(self.ptr)) > 0);

let mut map = serializer.serialize_map(None).unwrap();

let mut next_key: *mut pyo3_ffi::PyObject = std::ptr::null_mut();
Expand All @@ -92,17 +117,58 @@ impl Serialize for Dict {

pydict_next!(self.ptr, &mut pos, &mut next_key, &mut next_value);

if unlikely!(unsafe { ob_type!(key) != STR_TYPE }) {
err!(SerializeError::KeyMustBeStr)
// key
{
let key_ob_type = ob_type!(key);
if unlikely!(!is_class_by_type!(key_ob_type, STR_TYPE)) {
err!(SerializeError::KeyMustBeStr)
}
let key_as_str = unicode_to_str(key);
if unlikely!(key_as_str.is_none()) {
err!(SerializeError::InvalidStr)
}
map.serialize_key(key_as_str.unwrap()).unwrap();
}
let key_as_str = unicode_to_str(key);
if unlikely!(key_as_str.is_none()) {
err!(SerializeError::InvalidStr)

// value
{
let value_ob_type = ob_type!(value);
if is_class_by_type!(value_ob_type, STR_TYPE) {
map.serialize_value(&StrSerializer::new(value))?;
} else if is_class_by_type!(value_ob_type, INT_TYPE) {
if unlikely!(opt_enabled!(self.state.opts(), STRICT_INTEGER)) {
map.serialize_value(&Int53Serializer::new(value))?;
} else {
map.serialize_value(&IntSerializer::new(value))?;
}
} else if is_class_by_type!(value_ob_type, BOOL_TYPE) {
map.serialize_value(&BoolSerializer::new(value))?;
} else if is_class_by_type!(value_ob_type, NONE_TYPE) {
map.serialize_value(&NoneSerializer::new())?;
} else if is_class_by_type!(value_ob_type, FLOAT_TYPE) {
map.serialize_value(&FloatSerializer::new(value))?;
} else if is_class_by_type!(value_ob_type, DICT_TYPE) {
let pyvalue = DictGenericSerializer::new(value, self.state, self.default);
map.serialize_value(&pyvalue)?;
} else if is_class_by_type!(value_ob_type, LIST_TYPE) {
if ffi!(Py_SIZE(value)) == 0 {
map.serialize_value(&ZeroListSerializer::new())?;
} else {
let pyvalue =
ListTupleSerializer::from_list(value, self.state, self.default);
map.serialize_value(&pyvalue)?;
}
} else if is_class_by_type!(value_ob_type, DATETIME_TYPE)
&& opt_disabled!(self.state.opts(), PASSTHROUGH_DATETIME)
{
map.serialize_value(&DateTime::new(value, self.state.opts()))?;
} else {
let pyvalue = PyObjectSerializer::new(value, self.state, self.default);
map.serialize_value(&pyvalue)?;
}
}
let pyvalue = PyObjectSerializer::new(value, self.state, self.default);
map.serialize_key(key_as_str.unwrap()).unwrap();
map.serialize_value(&pyvalue)?;
}

map.end()
}
}
Expand Down Expand Up @@ -134,6 +200,7 @@ impl Serialize for DictSortedKey {
S: Serializer,
{
let len = ffi!(Py_SIZE(self.ptr)) as usize;
debug_assert!(len > 0);
let mut items: SmallVec<[(&str, *mut pyo3_ffi::PyObject); 8]> =
SmallVec::with_capacity(len);

Expand Down Expand Up @@ -298,6 +365,7 @@ impl Serialize for DictNonStrKey {
S: Serializer,
{
let len = ffi!(Py_SIZE(self.ptr)) as usize;
debug_assert!(len > 0);
let mut items: SmallVec<[(CompactString, *mut pyo3_ffi::PyObject); 8]> =
SmallVec::with_capacity(len);

Expand All @@ -309,7 +377,7 @@ impl Serialize for DictNonStrKey {
let mut pos = 0;

pydict_next!(self.ptr, &mut pos, &mut next_key, &mut next_value);
for _ in 0..=ffi!(Py_SIZE(self.ptr)) as usize - 1 {
for _ in 0..=len - 1 {
let key = next_key;
let value = next_value;

Expand Down
112 changes: 95 additions & 17 deletions src/serialize/per_type/list.rs
Original file line number Diff line number Diff line change
@@ -1,51 +1,129 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)

use crate::opt::{PASSTHROUGH_DATETIME, STRICT_INTEGER};
use crate::serialize::error::SerializeError;
use crate::serialize::per_type::{
BoolSerializer, DateTime, DictGenericSerializer, FloatSerializer, Int53Serializer,
IntSerializer, NoneSerializer, StrSerializer,
};
use crate::serialize::serializer::PyObjectSerializer;
use crate::serialize::state::SerializerState;
use crate::typeref::*;

use serde::ser::{Serialize, SerializeSeq, Serializer};
use std::ptr::NonNull;

pub struct ListSerializer {
ptr: *mut pyo3_ffi::PyObject,
pub struct ZeroListSerializer;

impl ZeroListSerializer {
pub const fn new() -> Self {
Self {}
}
}

impl Serialize for ZeroListSerializer {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_bytes(b"[]")
}
}

pub struct ListTupleSerializer {
data_ptr: *const *mut pyo3_ffi::PyObject,
state: SerializerState,
default: Option<NonNull<pyo3_ffi::PyObject>>,
len: usize,
}

impl ListSerializer {
pub fn new(
impl ListTupleSerializer {
pub fn from_list(
ptr: *mut pyo3_ffi::PyObject,
state: SerializerState,
default: Option<NonNull<pyo3_ffi::PyObject>>,
) -> Self {
debug_assert!(
is_type!(ob_type!(ptr), LIST_TYPE)
|| is_subclass_by_flag!(ob_type!(ptr), Py_TPFLAGS_LIST_SUBCLASS)
);
let data_ptr = unsafe { (*(ptr as *mut pyo3_ffi::PyListObject)).ob_item };
let len = ffi!(Py_SIZE(ptr)) as usize;
Self {
data_ptr: data_ptr,
len: len,
state: state.copy_for_recursive_call(),
default: default,
}
}

pub fn from_tuple(
ptr: *mut pyo3_ffi::PyObject,
state: SerializerState,
default: Option<NonNull<pyo3_ffi::PyObject>>,
) -> Self {
ListSerializer {
ptr: ptr,
debug_assert!(
is_type!(ob_type!(ptr), TUPLE_TYPE)
|| is_subclass_by_flag!(ob_type!(ptr), Py_TPFLAGS_TUPLE_SUBCLASS)
);
let data_ptr = unsafe { (*(ptr as *mut pyo3_ffi::PyTupleObject)).ob_item.as_ptr() };
let len = ffi!(Py_SIZE(ptr)) as usize;
Self {
data_ptr: data_ptr,
len: len,
state: state.copy_for_recursive_call(),
default: default,
}
}
}

impl Serialize for ListSerializer {
impl Serialize for ListTupleSerializer {
#[inline(never)]
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
if unlikely!(self.state.recursion_limit()) {
err!(SerializeError::RecursionLimit)
}
if ffi!(Py_SIZE(self.ptr)) == 0 {
serializer.serialize_seq(Some(0)).unwrap().end()
} else {
let mut seq = serializer.serialize_seq(None).unwrap();
for idx in 0..=ffi!(Py_SIZE(self.ptr)) - 1 {
let elem =
unsafe { *((*(self.ptr as *mut pyo3_ffi::PyListObject)).ob_item).offset(idx) };
let value = PyObjectSerializer::new(elem, self.state, self.default);
seq.serialize_element(&value)?;
debug_assert!(self.len >= 1);
let mut seq = serializer.serialize_seq(None).unwrap();
for idx in 0..=self.len - 1 {
let value = unsafe { *((self.data_ptr).add(idx)) };
let value_ob_type = ob_type!(value);
if is_class_by_type!(value_ob_type, STR_TYPE) {
seq.serialize_element(&StrSerializer::new(value))?;
} else if is_class_by_type!(value_ob_type, INT_TYPE) {
if unlikely!(opt_enabled!(self.state.opts(), STRICT_INTEGER)) {
seq.serialize_element(&Int53Serializer::new(value))?;
} else {
seq.serialize_element(&IntSerializer::new(value))?;
}
} else if is_class_by_type!(value_ob_type, BOOL_TYPE) {
seq.serialize_element(&BoolSerializer::new(value))?;
} else if is_class_by_type!(value_ob_type, NONE_TYPE) {
seq.serialize_element(&NoneSerializer::new())?;
} else if is_class_by_type!(value_ob_type, FLOAT_TYPE) {
seq.serialize_element(&FloatSerializer::new(value))?;
} else if is_class_by_type!(value_ob_type, DICT_TYPE) {
let pyvalue = DictGenericSerializer::new(value, self.state, self.default);
seq.serialize_element(&pyvalue)?;
} else if is_class_by_type!(value_ob_type, LIST_TYPE) {
if ffi!(Py_SIZE(value)) == 0 {
seq.serialize_element(&ZeroListSerializer::new())?;
} else {
let pyvalue = ListTupleSerializer::from_list(value, self.state, self.default);
seq.serialize_element(&pyvalue)?;
}
} else if is_class_by_type!(value_ob_type, DATETIME_TYPE)
&& opt_disabled!(self.state.opts(), PASSTHROUGH_DATETIME)
{
seq.serialize_element(&DateTime::new(value, self.state.opts()))?;
} else {
let pyvalue = PyObjectSerializer::new(value, self.state, self.default);
seq.serialize_element(&pyvalue)?;
}
seq.end()
}
seq.end()
}
}
4 changes: 1 addition & 3 deletions src/serialize/per_type/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ mod list;
mod none;
mod numpy;
mod pyenum;
mod tuple;
mod unicode;
mod uuid;

Expand All @@ -26,11 +25,10 @@ pub use dict::DictGenericSerializer;
pub use float::FloatSerializer;
pub use fragment::FragmentSerializer;
pub use int::{Int53Serializer, IntSerializer};
pub use list::ListSerializer;
pub use list::{ListTupleSerializer, ZeroListSerializer};
pub use none::NoneSerializer;
pub use numpy::{is_numpy_array, is_numpy_scalar, NumpyScalar, NumpySerializer};
pub use pybool::BoolSerializer;
pub use pyenum::EnumSerializer;
pub use tuple::TupleSerializer;
pub use unicode::{StrSerializer, StrSubclassSerializer};
pub use uuid::UUID;
4 changes: 2 additions & 2 deletions src/serialize/per_type/numpy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::opt::*;

use crate::serialize::error::SerializeError;
use crate::serialize::per_type::{
DateTimeBuffer, DateTimeError, DateTimeLike, DefaultSerializer, Offset,
DateTimeBuffer, DateTimeError, DateTimeLike, DefaultSerializer, Offset, ZeroListSerializer,
};
use crate::serialize::serializer::PyObjectSerializer;
use crate::typeref::{load_numpy_types, ARRAY_STRUCT_STR, DESCR_STR, DTYPE_STR, NUMPY_TYPES};
Expand Down Expand Up @@ -296,7 +296,7 @@ impl Serialize for NumpyArray {
S: Serializer,
{
if unlikely!(!(self.depth >= self.dimensions() || self.shape()[self.depth] != 0)) {
serializer.serialize_seq(Some(0)).unwrap().end()
ZeroListSerializer::new().serialize(serializer)
} else if !self.children.is_empty() {
let mut seq = serializer.serialize_seq(None).unwrap();
for child in &self.children {
Expand Down
Loading

0 comments on commit 34507e4

Please sign in to comment.