From 5ae9b5827aa836df2ea3801b82df2889586bef72 Mon Sep 17 00:00:00 2001 From: ijl Date: Thu, 18 Jan 2024 13:45:36 +0000 Subject: [PATCH] ahash compile-time-rng --- Cargo.lock | 53 +++++++++++++++++++++++++++++++++++++ Cargo.toml | 2 +- src/deserialize/cache.rs | 9 +++---- src/deserialize/pyobject.rs | 43 +++++++++++++++++------------- src/typeref.rs | 21 --------------- 5 files changed, 82 insertions(+), 46 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6067fb74..a61bff64 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9,6 +9,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a" dependencies = [ "cfg-if", + "const-random", "once_cell", "version_check", "zerocopy", @@ -97,6 +98,32 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "const-random" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aaf16c9c2c612020bcfd042e170f6e32de9b9d75adb5277cdbbd2e2c8c8299a" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom", + "once_cell", + "tiny-keccak", +] + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "encoding_rs" version = "0.8.33" @@ -106,6 +133,17 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "getrandom" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "itoa" version = "1.0.10" @@ -298,6 +336,15 @@ version = "0.12.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69758bda2e78f098e4ccb393021a0963bb3442eac05f135c30f61b7370bbafae" +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "unicode-ident" version = "1.0.12" @@ -310,6 +357,12 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + [[package]] name = "zerocopy" version = "0.7.32" diff --git a/Cargo.toml b/Cargo.toml index 2c247052..16259c1b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,7 +45,7 @@ no-panic = [ yyjson = [] [dependencies] -ahash = { version = "=0.8.6", default_features = false } +ahash = { version = "=0.8.6", default_features = false, features = ["compile-time-rng"] } arrayvec = { version = "0.7", default_features = false, features = ["std", "serde"] } associative-cache = { version = "2", default_features = false } beef = { version = "0.5", default_features = false, features = ["impl_serde"] } diff --git a/src/deserialize/cache.rs b/src/deserialize/cache.rs index 4ba30fc8..ecf8607a 100644 --- a/src/deserialize/cache.rs +++ b/src/deserialize/cache.rs @@ -1,10 +1,8 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use crate::typeref::HASH_BUILDER; use associative_cache::replacement::RoundRobinReplacement; use associative_cache::*; use once_cell::unsync::OnceCell; -use std::hash::BuildHasher; use std::hash::Hasher; use std::os::raw::c_void; @@ -42,12 +40,13 @@ pub type KeyMap = pub static mut KEY_MAP: OnceCell = OnceCell::new(); pub fn cache_hash(key: &[u8]) -> u64 { + // try to omit code for >64 path in ahash + debug_assert!(key.len() <= 64); #[cfg(feature = "intrinsics")] unsafe { - std::intrinsics::assume(!key.is_empty()); std::intrinsics::assume(key.len() <= 64); - } - let mut hasher = unsafe { HASH_BUILDER.get().unwrap().build_hasher() }; + }; + let mut hasher = ahash::AHasher::default(); hasher.write(key); hasher.finish() } diff --git a/src/deserialize/pyobject.rs b/src/deserialize/pyobject.rs index 0da2f9fd..2b1fa4b1 100644 --- a/src/deserialize/pyobject.rs +++ b/src/deserialize/pyobject.rs @@ -2,34 +2,39 @@ use crate::deserialize::cache::*; use crate::str::{hash_str, unicode_from_str}; -use crate::typeref::{EMPTY_UNICODE, FALSE, NONE, TRUE}; +use crate::typeref::{FALSE, NONE, TRUE}; use std::ptr::NonNull; pub fn get_unicode_key(key_str: &str) -> *mut pyo3_ffi::PyObject { - let pykey: *mut pyo3_ffi::PyObject; - if unlikely!(key_str.len() > 64) { - pykey = unicode_from_str(key_str); - hash_str(pykey); - } else if unlikely!(key_str.is_empty()) { - pykey = use_immortal!(EMPTY_UNICODE); + let pykey = if unlikely!(key_str.len() > 64) { + create_str_impl(key_str) } else { - let hash = cache_hash(key_str.as_bytes()); - let map = unsafe { KEY_MAP.get_mut().unwrap_or_else(|| unreachable!()) }; - let entry = map.entry(&hash).or_insert_with( - || hash, - || { - let pyob = unicode_from_str(key_str); - hash_str(pyob); - CachedKey::new(pyob) - }, - ); - pykey = entry.get(); - } + get_unicode_key_impl(key_str) + }; debug_assert!(ffi!(Py_REFCNT(pykey)) >= 1); debug_assert!(unsafe { (*pykey.cast::()).hash != -1 }); pykey } +fn get_unicode_key_impl(key_str: &str) -> *mut pyo3_ffi::PyObject { + let hash = cache_hash(key_str.as_bytes()); + let map = unsafe { KEY_MAP.get_mut().unwrap_or_else(|| unreachable!()) }; + let entry = map.entry(&hash).or_insert_with( + || hash, + || { + let pyob = create_str_impl(key_str); + CachedKey::new(pyob) + }, + ); + entry.get() +} + +fn create_str_impl(key_str: &str) -> *mut pyo3_ffi::PyObject { + let pyob = unicode_from_str(key_str); + hash_str(pyob); + pyob +} + #[allow(dead_code)] #[inline(always)] pub fn parse_bool(val: bool) -> NonNull { diff --git a/src/typeref.rs b/src/typeref.rs index dd1c2dbb..3e6492f0 100644 --- a/src/typeref.rs +++ b/src/typeref.rs @@ -1,7 +1,6 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) use crate::ffi::orjson_fragmenttype_new; -use ahash::RandomState; use once_cell::race::{OnceBool, OnceBox}; use pyo3_ffi::*; #[cfg(feature = "yyjson")] @@ -76,23 +75,6 @@ pub static mut DESCR_STR: *mut PyObject = null_mut(); pub static mut VALUE_STR: *mut PyObject = null_mut(); pub static mut INT_ATTR_STR: *mut PyObject = null_mut(); -pub static mut HASH_BUILDER: OnceBox = OnceBox::new(); - -pub fn ahash_init() -> Box { - unsafe { - debug_assert!(!VALUE_STR.is_null()); - debug_assert!(!DICT_TYPE.is_null()); - debug_assert!(!STR_TYPE.is_null()); - debug_assert!(!BYTES_TYPE.is_null()); - Box::new(RandomState::with_seeds( - VALUE_STR as u64, - DICT_TYPE as u64, - STR_TYPE as u64, - BYTES_TYPE as u64, - )) - } -} - #[cfg(feature = "yyjson")] pub const YYJSON_BUFFER_SIZE: usize = 1024 * 1024 * 8; @@ -214,9 +196,6 @@ fn _init_typerefs_impl() -> bool { JsonEncodeError = pyo3_ffi::PyExc_TypeError; Py_INCREF(JsonEncodeError); JsonDecodeError = look_up_json_exc(); - - // after all type lookups - HASH_BUILDER.get_or_init(ahash_init); }; true }