Skip to content

Commit

Permalink
int uses _PyLong_AsByteArray()
Browse files Browse the repository at this point in the history
  • Loading branch information
ijl committed Oct 15, 2024
1 parent 459ece3 commit da359ab
Show file tree
Hide file tree
Showing 10 changed files with 112 additions and 123 deletions.
76 changes: 37 additions & 39 deletions src/ffi/long.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,47 +3,42 @@
// longintrepr.h, _longobject, _PyLongValue

#[cfg(Py_3_12)]
#[allow(non_upper_case_globals)]
const SIGN_MASK: usize = 3;

#[cfg(Py_3_12)]
#[allow(non_upper_case_globals)]
const SIGN_ZERO: usize = 1;

#[cfg(Py_3_12)]
#[allow(non_upper_case_globals)]
const _PyLong_NON_SIZE_BITS: usize = 3;
const NON_SIZE_BITS: usize = 3;

#[cfg(Py_3_12)]
#[repr(C)]
struct _PyLongValue {
pub struct _PyLongValue {
pub lv_tag: usize,
pub ob_digit: u32,
}

#[cfg(Py_3_12)]
#[repr(C)]
struct PyLongObject {
pub ob_refcnt: pyo3_ffi::Py_ssize_t,
pub ob_type: *mut pyo3_ffi::PyTypeObject,
pub struct PyLongObject {
pub ob_base: pyo3_ffi::PyObject,
pub long_value: _PyLongValue,
}

#[cfg(Py_3_12)]
#[inline(always)]
pub fn pylong_is_zero(ptr: *mut pyo3_ffi::PyObject) -> bool {
unsafe { (*(ptr as *mut PyLongObject)).long_value.lv_tag & SIGN_MASK == SIGN_ZERO }
}

#[cfg(not(Py_3_12))]
#[inline(always)]
pub fn pylong_is_zero(ptr: *mut pyo3_ffi::PyObject) -> bool {
unsafe { (*(ptr as *mut pyo3_ffi::PyVarObject)).ob_size == 0 }
#[repr(C)]
pub struct PyLongObject {
pub ob_base: pyo3_ffi::PyVarObject,
pub ob_digit: u32,
}

#[cfg(Py_3_12)]
#[inline(always)]
pub fn pylong_is_unsigned(ptr: *mut pyo3_ffi::PyObject) -> bool {
unsafe {
1 - (((*(ptr as *mut PyLongObject)).long_value.lv_tag & _PyLong_NON_SIZE_BITS) as isize) > 0
}
unsafe { (*(ptr as *mut PyLongObject)).long_value.lv_tag & SIGN_MASK == 0 }
}

#[cfg(not(Py_3_12))]
Expand All @@ -54,41 +49,44 @@ pub fn pylong_is_unsigned(ptr: *mut pyo3_ffi::PyObject) -> bool {

#[cfg(Py_3_12)]
#[inline(always)]
fn pylong_is_compact(ptr: *mut pyo3_ffi::PyObject) -> bool {
unsafe { (*(ptr as *mut PyLongObject)).long_value.lv_tag < (2 << _PyLong_NON_SIZE_BITS) }
pub fn pylong_fits_in_i32(ptr: *mut pyo3_ffi::PyObject) -> bool {
unsafe { (*(ptr as *mut PyLongObject)).long_value.lv_tag < (2 << NON_SIZE_BITS) }
}

#[cfg(Py_3_12)]
#[cfg(not(Py_3_12))]
#[inline(always)]
pub fn pylong_value_unsigned(ptr: *mut pyo3_ffi::PyObject) -> u64 {
if pylong_is_compact(ptr) == true {
unsafe { (*(ptr as *mut PyLongObject)).long_value.ob_digit as u64 }
} else {
ffi!(PyLong_AsUnsignedLongLong(ptr))
}
pub fn pylong_fits_in_i32(ptr: *mut pyo3_ffi::PyObject) -> bool {
unsafe { isize::abs((*(ptr as *mut pyo3_ffi::PyVarObject)).ob_size) == 1 }
}

#[cfg(not(Py_3_12))]
#[cfg(Py_3_12)]
#[inline(always)]
pub fn pylong_value_unsigned(ptr: *mut pyo3_ffi::PyObject) -> u64 {
ffi!(PyLong_AsUnsignedLongLong(ptr))
pub fn pylong_is_zero(ptr: *mut pyo3_ffi::PyObject) -> bool {
unsafe { (*(ptr as *mut PyLongObject)).long_value.lv_tag & SIGN_MASK == SIGN_ZERO }
}

#[cfg(not(Py_3_12))]
#[inline(always)]
pub fn pylong_value_signed(ptr: *mut pyo3_ffi::PyObject) -> i64 {
ffi!(PyLong_AsLongLong(ptr))
pub fn pylong_is_zero(ptr: *mut pyo3_ffi::PyObject) -> bool {
unsafe { (*(ptr as *mut pyo3_ffi::PyVarObject)).ob_size == 0 }
}

#[cfg(Py_3_12)]
#[inline(always)]
pub fn pylong_value_signed(ptr: *mut pyo3_ffi::PyObject) -> i64 {
if pylong_is_compact(ptr) == true {
unsafe {
let sign = 1 - ((*(ptr as *mut PyLongObject)).long_value.lv_tag & SIGN_MASK) as i64;
sign * (*(ptr as *mut PyLongObject)).long_value.ob_digit as i64
pub fn pylong_get_inline_value(ptr: *mut pyo3_ffi::PyObject) -> i64 {
unsafe {
if pylong_is_unsigned(ptr) {
(*(ptr as *mut PyLongObject)).long_value.ob_digit as i64
} else {
-1 * (*(ptr as *mut PyLongObject)).long_value.ob_digit as i64
}
} else {
ffi!(PyLong_AsLongLong(ptr))
}
}

#[cfg(not(Py_3_12))]
#[inline(always)]
pub fn pylong_get_inline_value(ptr: *mut pyo3_ffi::PyObject) -> i64 {
unsafe {
(*(ptr as *mut pyo3_ffi::PyVarObject)).ob_size as i64
* (*(ptr as *mut PyLongObject)).ob_digit as i64
}
}
2 changes: 1 addition & 1 deletion src/ffi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ pub mod yyjson;
pub use buffer::*;
pub use bytes::*;
pub use fragment::{orjson_fragmenttype_new, Fragment};
pub use long::{pylong_is_unsigned, pylong_is_zero, pylong_value_signed, pylong_value_unsigned};
pub use long::{pylong_fits_in_i32, pylong_get_inline_value, pylong_is_unsigned, pylong_is_zero};
15 changes: 5 additions & 10 deletions src/serialize/per_type/dict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ use crate::serialize::obtype::{pyobject_to_obtype, ObType};
use crate::serialize::per_type::datetimelike::DateTimeLike;
use crate::serialize::per_type::{
BoolSerializer, DataclassGenericSerializer, Date, DateTime, DateTimeBuffer, DefaultSerializer,
EnumSerializer, FloatSerializer, FragmentSerializer, Int53Serializer, IntSerializer,
ListTupleSerializer, NoneSerializer, NumpyScalar, NumpySerializer, StrSerializer,
StrSubclassSerializer, Time, ZeroListSerializer, UUID,
EnumSerializer, FloatSerializer, FragmentSerializer, IntSerializer, ListTupleSerializer,
NoneSerializer, NumpyScalar, NumpySerializer, StrSerializer, StrSubclassSerializer, Time,
ZeroListSerializer, UUID,
};
use crate::serialize::serializer::PyObjectSerializer;
use crate::serialize::state::SerializerState;
Expand Down Expand Up @@ -100,13 +100,8 @@ macro_rules! impl_serialize_entry {
$map.serialize_value(&StrSubclassSerializer::new($value))?;
}
ObType::Int => {
if unlikely!(opt_enabled!($self.state.opts(), STRICT_INTEGER)) {
$map.serialize_key($key).unwrap();
$map.serialize_value(&Int53Serializer::new($value))?;
} else {
$map.serialize_key($key).unwrap();
$map.serialize_value(&IntSerializer::new($value))?;
}
$map.serialize_key($key).unwrap();
$map.serialize_value(&IntSerializer::new($value, $self.state.opts()))?;
}
ObType::None => {
$map.serialize_key($key).unwrap();
Expand Down
101 changes: 51 additions & 50 deletions src/serialize/per_type/int.rs
Original file line number Diff line number Diff line change
@@ -1,79 +1,80 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)

use crate::ffi::{pylong_is_unsigned, pylong_is_zero, pylong_value_signed, pylong_value_unsigned};
use crate::ffi::{pylong_fits_in_i32, pylong_get_inline_value, pylong_is_unsigned, pylong_is_zero};
use crate::opt::{Opt, STRICT_INTEGER};
use crate::serialize::error::SerializeError;
use serde::ser::{Serialize, Serializer};

use core::ffi::c_uchar;
use core::mem::transmute;

// https://tools.ietf.org/html/rfc7159#section-6
// "[-(2**53)+1, (2**53)-1]"
const STRICT_INT_MIN: i64 = -9007199254740991;
const STRICT_INT_MAX: i64 = 9007199254740991;

#[repr(transparent)]
pub struct IntSerializer {
ptr: *mut pyo3_ffi::PyObject,
opts: Opt,
}

impl IntSerializer {
pub fn new(ptr: *mut pyo3_ffi::PyObject) -> Self {
IntSerializer { ptr: ptr }
pub fn new(ptr: *mut pyo3_ffi::PyObject, opts: Opt) -> Self {
IntSerializer {
ptr: ptr,
opts: opts,
}
}
}

impl Serialize for IntSerializer {
#[inline(never)]
#[inline(always)]
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
if pylong_is_zero(self.ptr) {
serializer.serialize_u64(0)
} else if pylong_is_unsigned(self.ptr) {
let val = pylong_value_unsigned(self.ptr);
if unlikely!(val == u64::MAX) && !ffi!(PyErr_Occurred()).is_null() {
err!(SerializeError::Integer64Bits)
} else {
serializer.serialize_u64(val)
unsafe {
if pylong_is_zero(self.ptr) {
return serializer.serialize_bytes(b"0");
}
} else {
let val = pylong_value_signed(self.ptr);
if unlikely!(val == -1) && !ffi!(PyErr_Occurred()).is_null() {
err!(SerializeError::Integer64Bits)
}
serializer.serialize_i64(val)
}
}
}

#[repr(transparent)]
pub struct Int53Serializer {
ptr: *mut pyo3_ffi::PyObject,
}

impl Int53Serializer {
pub fn new(ptr: *mut pyo3_ffi::PyObject) -> Self {
Int53Serializer { ptr: ptr }
}
}

impl Serialize for Int53Serializer {
#[cold]
#[inline(never)]
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let val = pylong_value_signed(self.ptr);
if unlikely!(val == -1) {
if ffi!(PyErr_Occurred()).is_null() {
serializer.serialize_i64(val)
let is_signed = !pylong_is_unsigned(self.ptr) as i32;
if pylong_fits_in_i32(self.ptr) {
if is_signed == 0 {
serializer.serialize_u64(pylong_get_inline_value(self.ptr) as u64)
} else {
serializer.serialize_i64(pylong_get_inline_value(self.ptr) as i64)
}
} else {
err!(SerializeError::Integer53Bits)
let mut buffer: [u8; 8] = [0; 8];
let ret = pyo3_ffi::_PyLong_AsByteArray(
self.ptr as *mut pyo3_ffi::PyLongObject,
buffer.as_mut_ptr() as *mut c_uchar,
8,
1,
is_signed,
);
if unlikely!(ret == -1) {
ffi!(PyErr_Clear());
err!(SerializeError::Integer64Bits)
}
if is_signed == 0 {
let val = transmute::<[u8; 8], u64>(buffer);
if unlikely!(opt_enabled!(self.opts, STRICT_INTEGER))
&& val > STRICT_INT_MAX as u64
{
err!(SerializeError::Integer53Bits)
}
serializer.serialize_u64(val)
} else {
let val = transmute::<[u8; 8], i64>(buffer);
if unlikely!(opt_enabled!(self.opts, STRICT_INTEGER))
&& !(STRICT_INT_MIN..=STRICT_INT_MAX).contains(&val)
{
err!(SerializeError::Integer53Bits)
}
serializer.serialize_i64(val)
}
}
} else if !(STRICT_INT_MIN..=STRICT_INT_MAX).contains(&val) {
err!(SerializeError::Integer53Bits)
} else {
serializer.serialize_i64(val)
}
}
}
12 changes: 3 additions & 9 deletions src/serialize/per_type/list.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)

use crate::opt::STRICT_INTEGER;
use crate::serialize::error::SerializeError;
use crate::serialize::obtype::{pyobject_to_obtype, ObType};
use crate::serialize::per_type::{
BoolSerializer, DataclassGenericSerializer, Date, DateTime, DefaultSerializer,
DictGenericSerializer, EnumSerializer, FloatSerializer, FragmentSerializer, Int53Serializer,
IntSerializer, NoneSerializer, NumpyScalar, NumpySerializer, StrSerializer,
StrSubclassSerializer, Time, UUID,
DictGenericSerializer, EnumSerializer, FloatSerializer, FragmentSerializer, IntSerializer,
NoneSerializer, NumpyScalar, NumpySerializer, StrSerializer, StrSubclassSerializer, Time, UUID,
};
use crate::serialize::serializer::PyObjectSerializer;
use crate::serialize::state::SerializerState;
Expand Down Expand Up @@ -102,11 +100,7 @@ impl Serialize for ListTupleSerializer {
seq.serialize_element(&StrSubclassSerializer::new(value))?;
}
ObType::Int => {
if unlikely!(opt_enabled!(self.state.opts(), STRICT_INTEGER)) {
seq.serialize_element(&Int53Serializer::new(value))?;
} else {
seq.serialize_element(&IntSerializer::new(value))?;
}
seq.serialize_element(&IntSerializer::new(value, self.state.opts()))?;
}
ObType::None => {
seq.serialize_element(&NoneSerializer::new()).unwrap();
Expand Down
2 changes: 1 addition & 1 deletion src/serialize/per_type/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub use default::DefaultSerializer;
pub use dict::DictGenericSerializer;
pub use float::FloatSerializer;
pub use fragment::FragmentSerializer;
pub use int::{Int53Serializer, IntSerializer};
pub use int::IntSerializer;
pub use list::{ListTupleSerializer, ZeroListSerializer};
pub use none::NoneSerializer;
pub use numpy::{is_numpy_array, is_numpy_scalar, NumpyScalar, NumpySerializer};
Expand Down
16 changes: 5 additions & 11 deletions src/serialize/serializer.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)

use crate::opt::{Opt, APPEND_NEWLINE, INDENT_2, STRICT_INTEGER};
use crate::opt::{Opt, APPEND_NEWLINE, INDENT_2};
use crate::serialize::obtype::{pyobject_to_obtype, ObType};
use crate::serialize::per_type::{
BoolSerializer, DataclassGenericSerializer, Date, DateTime, DefaultSerializer,
DictGenericSerializer, EnumSerializer, FloatSerializer, FragmentSerializer, Int53Serializer,
IntSerializer, ListTupleSerializer, NoneSerializer, NumpyScalar, NumpySerializer,
StrSerializer, StrSubclassSerializer, Time, ZeroListSerializer, UUID,
DictGenericSerializer, EnumSerializer, FloatSerializer, FragmentSerializer, IntSerializer,
ListTupleSerializer, NoneSerializer, NumpyScalar, NumpySerializer, StrSerializer,
StrSubclassSerializer, Time, ZeroListSerializer, UUID,
};
use crate::serialize::state::SerializerState;
use crate::serialize::writer::{to_writer, to_writer_pretty, BytesWriter};
Expand Down Expand Up @@ -68,13 +68,7 @@ impl Serialize for PyObjectSerializer {
match pyobject_to_obtype(self.ptr, self.state.opts()) {
ObType::Str => StrSerializer::new(self.ptr).serialize(serializer),
ObType::StrSubclass => StrSubclassSerializer::new(self.ptr).serialize(serializer),
ObType::Int => {
if unlikely!(opt_enabled!(self.state.opts(), STRICT_INTEGER)) {
Int53Serializer::new(self.ptr).serialize(serializer)
} else {
IntSerializer::new(self.ptr).serialize(serializer)
}
}
ObType::Int => IntSerializer::new(self.ptr, self.state.opts()).serialize(serializer),
ObType::None => NoneSerializer::new().serialize(serializer),
ObType::Float => FloatSerializer::new(self.ptr).serialize(serializer),
ObType::Bool => BoolSerializer::new(self.ptr).serialize(serializer),
Expand Down
2 changes: 1 addition & 1 deletion src/serialize/writer/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ where
.map_err(Error::io)
}
}
#[inline(never)]
#[inline]
fn serialize_f64(self, value: f64) -> Result<()> {
if unlikely!(value.is_infinite() || value.is_nan()) {
self.serialize_unit()
Expand Down
1 change: 0 additions & 1 deletion test/test_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,4 +188,3 @@ def test_dumps_normalize_exception(self):
with pytest.raises(orjson.JSONEncodeError) as exc_info:
orjson.dumps(10**60)
assert exc_info.type == orjson.JSONEncodeError
assert isinstance(exc_info.value.__cause__, OverflowError)
8 changes: 8 additions & 0 deletions test/test_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,14 @@ def test_int_53_exc_usize(self):
with pytest.raises(orjson.JSONEncodeError):
orjson.dumps(val, option=orjson.OPT_STRICT_INTEGER)

def test_int_53_exc_128(self):
"""
int 53-bit exception on 128-bit
"""
val = 2**65
with pytest.raises(orjson.JSONEncodeError):
orjson.dumps(val, option=orjson.OPT_STRICT_INTEGER)

def test_int_64(self):
"""
int 64-bit
Expand Down

0 comments on commit da359ab

Please sign in to comment.