diff --git a/src/serialize/per_type/dataclass.rs b/src/serialize/per_type/dataclass.rs index a8a77a7f..5129d72c 100644 --- a/src/serialize/per_type/dataclass.rs +++ b/src/serialize/per_type/dataclass.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) use crate::serialize::error::SerializeError; +use crate::serialize::per_type::dict::ZeroDictSerializer; use crate::serialize::serializer::PyObjectSerializer; use crate::serialize::state::SerializerState; use crate::str::unicode_to_str; @@ -86,9 +87,9 @@ impl Serialize for DataclassFastSerializer { where S: Serializer, { - let len = ffi!(Py_SIZE(self.ptr)); + let len = ffi!(Py_SIZE(self.ptr)) as usize; if unlikely!(len == 0) { - return serializer.serialize_map(Some(0)).unwrap().end(); + return ZeroDictSerializer::new().serialize(serializer); } let mut map = serializer.serialize_map(None).unwrap(); let mut next_key: *mut pyo3_ffi::PyObject = std::ptr::null_mut(); @@ -152,7 +153,7 @@ impl Serialize for DataclassFallbackSerializer { ffi!(Py_DECREF(fields)); let len = ffi!(Py_SIZE(fields)) as usize; if unlikely!(len == 0) { - return serializer.serialize_map(Some(0)).unwrap().end(); + return ZeroDictSerializer::new().serialize(serializer); } let mut map = serializer.serialize_map(None).unwrap(); diff --git a/src/serialize/per_type/dict.rs b/src/serialize/per_type/dict.rs index 914a0134..89f7ca00 100644 --- a/src/serialize/per_type/dict.rs +++ b/src/serialize/per_type/dict.rs @@ -3,16 +3,40 @@ use crate::opt::*; use crate::serialize::error::SerializeError; use crate::serialize::obtype::{pyobject_to_obtype, ObType}; -use crate::serialize::per_type::{Date, DateTime, DateTimeBuffer, DateTimeLike, Time, UUID}; +use crate::serialize::per_type::{ + BoolSerializer, Date, DateTime, DateTimeBuffer, DateTimeLike, FloatSerializer, Int53Serializer, + IntSerializer, ListTupleSerializer, NoneSerializer, StrSerializer, Time, ZeroListSerializer, + UUID, +}; use crate::serialize::serializer::PyObjectSerializer; use crate::serialize::state::SerializerState; use crate::str::{unicode_to_str, unicode_to_str_via_ffi}; -use crate::typeref::{STR_TYPE, TRUE, VALUE_STR}; +use crate::typeref::{ + BOOL_TYPE, DATETIME_TYPE, DICT_TYPE, FLOAT_TYPE, INT_TYPE, LIST_TYPE, NONE_TYPE, STR_TYPE, + TRUE, VALUE_STR, +}; use compact_str::CompactString; use serde::ser::{Serialize, SerializeMap, Serializer}; use smallvec::SmallVec; use std::ptr::NonNull; +pub struct ZeroDictSerializer; + +impl ZeroDictSerializer { + pub const fn new() -> Self { + Self {} + } +} + +impl Serialize for ZeroDictSerializer { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_bytes(b"{}") + } +} + pub struct DictGenericSerializer { ptr: *mut pyo3_ffi::PyObject, state: SerializerState, @@ -42,8 +66,8 @@ impl Serialize for DictGenericSerializer { if unlikely!(self.state.recursion_limit()) { err!(SerializeError::RecursionLimit) } - if unlikely!(ffi!(Py_SIZE(self.ptr)) == 0) { - serializer.serialize_map(Some(0)).unwrap().end() + if ffi!(Py_SIZE(self.ptr)) == 0 { + ZeroDictSerializer::new().serialize(serializer) } else if opt_disabled!(self.state.opts(), SORT_OR_NON_STR_KEYS) { Dict::new(self.ptr, self.state, self.default).serialize(serializer) } else if opt_enabled!(self.state.opts(), NON_STR_KEYS) { @@ -73,11 +97,12 @@ impl Dict { } } impl Serialize for Dict { - #[inline(always)] fn serialize(&self, serializer: S) -> Result where S: Serializer, { + debug_assert!(ffi!(Py_SIZE(self.ptr)) > 0); + let mut map = serializer.serialize_map(None).unwrap(); let mut next_key: *mut pyo3_ffi::PyObject = std::ptr::null_mut(); @@ -92,17 +117,58 @@ impl Serialize for Dict { pydict_next!(self.ptr, &mut pos, &mut next_key, &mut next_value); - if unlikely!(unsafe { ob_type!(key) != STR_TYPE }) { - err!(SerializeError::KeyMustBeStr) + // key + { + let key_ob_type = ob_type!(key); + if unlikely!(!is_class_by_type!(key_ob_type, STR_TYPE)) { + err!(SerializeError::KeyMustBeStr) + } + let key_as_str = unicode_to_str(key); + if unlikely!(key_as_str.is_none()) { + err!(SerializeError::InvalidStr) + } + map.serialize_key(key_as_str.unwrap()).unwrap(); } - let key_as_str = unicode_to_str(key); - if unlikely!(key_as_str.is_none()) { - err!(SerializeError::InvalidStr) + + // value + { + let value_ob_type = ob_type!(value); + if is_class_by_type!(value_ob_type, STR_TYPE) { + map.serialize_value(&StrSerializer::new(value))?; + } else if is_class_by_type!(value_ob_type, INT_TYPE) { + if unlikely!(opt_enabled!(self.state.opts(), STRICT_INTEGER)) { + map.serialize_value(&Int53Serializer::new(value))?; + } else { + map.serialize_value(&IntSerializer::new(value))?; + } + } else if is_class_by_type!(value_ob_type, BOOL_TYPE) { + map.serialize_value(&BoolSerializer::new(value))?; + } else if is_class_by_type!(value_ob_type, NONE_TYPE) { + map.serialize_value(&NoneSerializer::new())?; + } else if is_class_by_type!(value_ob_type, FLOAT_TYPE) { + map.serialize_value(&FloatSerializer::new(value))?; + } else if is_class_by_type!(value_ob_type, DICT_TYPE) { + let pyvalue = DictGenericSerializer::new(value, self.state, self.default); + map.serialize_value(&pyvalue)?; + } else if is_class_by_type!(value_ob_type, LIST_TYPE) { + if ffi!(Py_SIZE(value)) == 0 { + map.serialize_value(&ZeroListSerializer::new())?; + } else { + let pyvalue = + ListTupleSerializer::from_list(value, self.state, self.default); + map.serialize_value(&pyvalue)?; + } + } else if is_class_by_type!(value_ob_type, DATETIME_TYPE) + && opt_disabled!(self.state.opts(), PASSTHROUGH_DATETIME) + { + map.serialize_value(&DateTime::new(value, self.state.opts()))?; + } else { + let pyvalue = PyObjectSerializer::new(value, self.state, self.default); + map.serialize_value(&pyvalue)?; + } } - let pyvalue = PyObjectSerializer::new(value, self.state, self.default); - map.serialize_key(key_as_str.unwrap()).unwrap(); - map.serialize_value(&pyvalue)?; } + map.end() } } @@ -134,6 +200,7 @@ impl Serialize for DictSortedKey { S: Serializer, { let len = ffi!(Py_SIZE(self.ptr)) as usize; + debug_assert!(len > 0); let mut items: SmallVec<[(&str, *mut pyo3_ffi::PyObject); 8]> = SmallVec::with_capacity(len); @@ -298,6 +365,7 @@ impl Serialize for DictNonStrKey { S: Serializer, { let len = ffi!(Py_SIZE(self.ptr)) as usize; + debug_assert!(len > 0); let mut items: SmallVec<[(CompactString, *mut pyo3_ffi::PyObject); 8]> = SmallVec::with_capacity(len); @@ -309,7 +377,7 @@ impl Serialize for DictNonStrKey { let mut pos = 0; pydict_next!(self.ptr, &mut pos, &mut next_key, &mut next_value); - for _ in 0..=ffi!(Py_SIZE(self.ptr)) as usize - 1 { + for _ in 0..=len - 1 { let key = next_key; let value = next_value; diff --git a/src/serialize/per_type/list.rs b/src/serialize/per_type/list.rs index ed6d235e..7a9d047a 100644 --- a/src/serialize/per_type/list.rs +++ b/src/serialize/per_type/list.rs @@ -1,33 +1,84 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) +use crate::opt::{PASSTHROUGH_DATETIME, STRICT_INTEGER}; use crate::serialize::error::SerializeError; +use crate::serialize::per_type::{ + BoolSerializer, DateTime, DictGenericSerializer, FloatSerializer, Int53Serializer, + IntSerializer, NoneSerializer, StrSerializer, +}; use crate::serialize::serializer::PyObjectSerializer; use crate::serialize::state::SerializerState; +use crate::typeref::*; use serde::ser::{Serialize, SerializeSeq, Serializer}; use std::ptr::NonNull; -pub struct ListSerializer { - ptr: *mut pyo3_ffi::PyObject, +pub struct ZeroListSerializer; + +impl ZeroListSerializer { + pub const fn new() -> Self { + Self {} + } +} + +impl Serialize for ZeroListSerializer { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_bytes(b"[]") + } +} + +pub struct ListTupleSerializer { + data_ptr: *const *mut pyo3_ffi::PyObject, state: SerializerState, default: Option>, + len: usize, } -impl ListSerializer { - pub fn new( +impl ListTupleSerializer { + pub fn from_list( + ptr: *mut pyo3_ffi::PyObject, + state: SerializerState, + default: Option>, + ) -> Self { + debug_assert!( + is_type!(ob_type!(ptr), LIST_TYPE) + || is_subclass_by_flag!(ob_type!(ptr), Py_TPFLAGS_LIST_SUBCLASS) + ); + let data_ptr = unsafe { (*(ptr as *mut pyo3_ffi::PyListObject)).ob_item }; + let len = ffi!(Py_SIZE(ptr)) as usize; + Self { + data_ptr: data_ptr, + len: len, + state: state.copy_for_recursive_call(), + default: default, + } + } + + pub fn from_tuple( ptr: *mut pyo3_ffi::PyObject, state: SerializerState, default: Option>, ) -> Self { - ListSerializer { - ptr: ptr, + debug_assert!( + is_type!(ob_type!(ptr), TUPLE_TYPE) + || is_subclass_by_flag!(ob_type!(ptr), Py_TPFLAGS_TUPLE_SUBCLASS) + ); + let data_ptr = unsafe { (*(ptr as *mut pyo3_ffi::PyTupleObject)).ob_item.as_ptr() }; + let len = ffi!(Py_SIZE(ptr)) as usize; + Self { + data_ptr: data_ptr, + len: len, state: state.copy_for_recursive_call(), default: default, } } } -impl Serialize for ListSerializer { +impl Serialize for ListTupleSerializer { + #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -35,17 +86,44 @@ impl Serialize for ListSerializer { if unlikely!(self.state.recursion_limit()) { err!(SerializeError::RecursionLimit) } - if ffi!(Py_SIZE(self.ptr)) == 0 { - serializer.serialize_seq(Some(0)).unwrap().end() - } else { - let mut seq = serializer.serialize_seq(None).unwrap(); - for idx in 0..=ffi!(Py_SIZE(self.ptr)) - 1 { - let elem = - unsafe { *((*(self.ptr as *mut pyo3_ffi::PyListObject)).ob_item).offset(idx) }; - let value = PyObjectSerializer::new(elem, self.state, self.default); - seq.serialize_element(&value)?; + debug_assert!(self.len >= 1); + let mut seq = serializer.serialize_seq(None).unwrap(); + for idx in 0..=self.len - 1 { + let value = unsafe { *((self.data_ptr).add(idx)) }; + let value_ob_type = ob_type!(value); + if is_class_by_type!(value_ob_type, STR_TYPE) { + seq.serialize_element(&StrSerializer::new(value))?; + } else if is_class_by_type!(value_ob_type, INT_TYPE) { + if unlikely!(opt_enabled!(self.state.opts(), STRICT_INTEGER)) { + seq.serialize_element(&Int53Serializer::new(value))?; + } else { + seq.serialize_element(&IntSerializer::new(value))?; + } + } else if is_class_by_type!(value_ob_type, BOOL_TYPE) { + seq.serialize_element(&BoolSerializer::new(value))?; + } else if is_class_by_type!(value_ob_type, NONE_TYPE) { + seq.serialize_element(&NoneSerializer::new())?; + } else if is_class_by_type!(value_ob_type, FLOAT_TYPE) { + seq.serialize_element(&FloatSerializer::new(value))?; + } else if is_class_by_type!(value_ob_type, DICT_TYPE) { + let pyvalue = DictGenericSerializer::new(value, self.state, self.default); + seq.serialize_element(&pyvalue)?; + } else if is_class_by_type!(value_ob_type, LIST_TYPE) { + if ffi!(Py_SIZE(value)) == 0 { + seq.serialize_element(&ZeroListSerializer::new())?; + } else { + let pyvalue = ListTupleSerializer::from_list(value, self.state, self.default); + seq.serialize_element(&pyvalue)?; + } + } else if is_class_by_type!(value_ob_type, DATETIME_TYPE) + && opt_disabled!(self.state.opts(), PASSTHROUGH_DATETIME) + { + seq.serialize_element(&DateTime::new(value, self.state.opts()))?; + } else { + let pyvalue = PyObjectSerializer::new(value, self.state, self.default); + seq.serialize_element(&pyvalue)?; } - seq.end() } + seq.end() } } diff --git a/src/serialize/per_type/mod.rs b/src/serialize/per_type/mod.rs index 3265b08f..b0ec5b59 100644 --- a/src/serialize/per_type/mod.rs +++ b/src/serialize/per_type/mod.rs @@ -14,7 +14,6 @@ mod list; mod none; mod numpy; mod pyenum; -mod tuple; mod unicode; mod uuid; @@ -26,11 +25,10 @@ pub use dict::DictGenericSerializer; pub use float::FloatSerializer; pub use fragment::FragmentSerializer; pub use int::{Int53Serializer, IntSerializer}; -pub use list::ListSerializer; +pub use list::{ListTupleSerializer, ZeroListSerializer}; pub use none::NoneSerializer; pub use numpy::{is_numpy_array, is_numpy_scalar, NumpyScalar, NumpySerializer}; pub use pybool::BoolSerializer; pub use pyenum::EnumSerializer; -pub use tuple::TupleSerializer; pub use unicode::{StrSerializer, StrSubclassSerializer}; pub use uuid::UUID; diff --git a/src/serialize/per_type/numpy.rs b/src/serialize/per_type/numpy.rs index 9ee4cbac..95122e2e 100644 --- a/src/serialize/per_type/numpy.rs +++ b/src/serialize/per_type/numpy.rs @@ -2,7 +2,7 @@ use crate::opt::*; use crate::serialize::error::SerializeError; use crate::serialize::per_type::{ - DateTimeBuffer, DateTimeError, DateTimeLike, DefaultSerializer, Offset, + DateTimeBuffer, DateTimeError, DateTimeLike, DefaultSerializer, Offset, ZeroListSerializer, }; use crate::serialize::serializer::PyObjectSerializer; use crate::typeref::{load_numpy_types, ARRAY_STRUCT_STR, DESCR_STR, DTYPE_STR, NUMPY_TYPES}; @@ -296,7 +296,7 @@ impl Serialize for NumpyArray { S: Serializer, { if unlikely!(!(self.depth >= self.dimensions() || self.shape()[self.depth] != 0)) { - serializer.serialize_seq(Some(0)).unwrap().end() + ZeroListSerializer::new().serialize(serializer) } else if !self.children.is_empty() { let mut seq = serializer.serialize_seq(None).unwrap(); for child in &self.children { diff --git a/src/serialize/per_type/tuple.rs b/src/serialize/per_type/tuple.rs deleted file mode 100644 index e85f040e..00000000 --- a/src/serialize/per_type/tuple.rs +++ /dev/null @@ -1,47 +0,0 @@ -// SPDX-License-Identifier: (Apache-2.0 OR MIT) - -use crate::serialize::serializer::PyObjectSerializer; -use crate::serialize::state::SerializerState; - -use serde::ser::{Serialize, SerializeSeq, Serializer}; -use std::ptr::NonNull; - -pub struct TupleSerializer { - ptr: *mut pyo3_ffi::PyObject, - state: SerializerState, - default: Option>, -} - -impl TupleSerializer { - pub fn new( - ptr: *mut pyo3_ffi::PyObject, - state: SerializerState, - default: Option>, - ) -> Self { - TupleSerializer { - ptr: ptr, - state: state.copy_for_recursive_call(), - default: default, - } - } -} - -impl Serialize for TupleSerializer { - #[inline(never)] - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - if ffi!(Py_SIZE(self.ptr)) == 0 { - serializer.serialize_seq(Some(0)).unwrap().end() - } else { - let mut seq = serializer.serialize_seq(None).unwrap(); - for i in 0..=ffi!(Py_SIZE(self.ptr)) as usize - 1 { - let elem = ffi!(PyTuple_GET_ITEM(self.ptr, i as isize)); - let value = PyObjectSerializer::new(elem, self.state, self.default); - seq.serialize_element(&value)?; - } - seq.end() - } - } -} diff --git a/src/serialize/serializer.rs b/src/serialize/serializer.rs index 61094319..6f71ce86 100644 --- a/src/serialize/serializer.rs +++ b/src/serialize/serializer.rs @@ -5,8 +5,8 @@ use crate::serialize::obtype::{pyobject_to_obtype, ObType}; use crate::serialize::per_type::{ BoolSerializer, DataclassGenericSerializer, Date, DateTime, DefaultSerializer, DictGenericSerializer, EnumSerializer, FloatSerializer, FragmentSerializer, Int53Serializer, - IntSerializer, ListSerializer, NoneSerializer, NumpyScalar, NumpySerializer, StrSerializer, - StrSubclassSerializer, Time, TupleSerializer, UUID, + IntSerializer, ListTupleSerializer, NoneSerializer, NumpyScalar, NumpySerializer, + StrSerializer, StrSubclassSerializer, Time, ZeroListSerializer, UUID, }; use crate::serialize::state::SerializerState; use crate::serialize::writer::{to_writer, to_writer_pretty, BytesWriter}; @@ -86,10 +86,20 @@ impl Serialize for PyObjectSerializer { DictGenericSerializer::new(self.ptr, self.state, self.default).serialize(serializer) } ObType::List => { - ListSerializer::new(self.ptr, self.state, self.default).serialize(serializer) + if ffi!(Py_SIZE(self.ptr)) == 0 { + ZeroListSerializer::new().serialize(serializer) + } else { + ListTupleSerializer::from_list(self.ptr, self.state, self.default) + .serialize(serializer) + } } ObType::Tuple => { - TupleSerializer::new(self.ptr, self.state, self.default).serialize(serializer) + if ffi!(Py_SIZE(self.ptr)) == 0 { + ZeroListSerializer::new().serialize(serializer) + } else { + ListTupleSerializer::from_tuple(self.ptr, self.state, self.default) + .serialize(serializer) + } } ObType::Dataclass => DataclassGenericSerializer::new(self).serialize(serializer), ObType::Enum => EnumSerializer::new(self).serialize(serializer), diff --git a/test/test_indent.py b/test/test_indent.py index 888ff969..4179a20f 100644 --- a/test/test_indent.py +++ b/test/test_indent.py @@ -49,6 +49,11 @@ def test_options(self): == b'{\n "1": 1,\n "a": "1970-01-01T00:00:00+00:00",\n "b": true\n}' ) + def test_empty(self): + obj = [{}, [[[]]], {"key": []}] + ref = b'[\n {},\n [\n [\n []\n ]\n ],\n {\n "key": []\n }\n]' + assert orjson.dumps(obj, option=orjson.OPT_INDENT_2) == ref + def test_twitter_pretty(self): """ twitter.json pretty