diff --git a/dredis/db.py b/dredis/db.py index ccb3dee..44112fe 100644 --- a/dredis/db.py +++ b/dredis/db.py @@ -4,6 +4,7 @@ import plyvel from dredis.path import Path +from dredis.utils import FLOAT_CODEC class KeyCodec(object): @@ -20,12 +21,11 @@ class KeyCodec(object): # type_id | key_length KEY_PREFIX_FORMAT = '>BI' - KEY_PREFIX_LENGTH = struct.calcsize(KEY_PREFIX_FORMAT) - ZSET_SCORE_FORMAT = '>d' - ZSET_SCORE_FORMAT_LENGTH = struct.calcsize(ZSET_SCORE_FORMAT) - KEY_PREFIX_STRUCT = struct.Struct(KEY_PREFIX_FORMAT) - ZSET_SCORE_STRUCT = struct.Struct(ZSET_SCORE_FORMAT) + KEY_PREFIX_LENGTH = KEY_PREFIX_STRUCT.size + + ZSET_SCORE_STRUCT = FLOAT_CODEC.STRUCT + ZSET_SCORE_FORMAT_LENGTH = ZSET_SCORE_STRUCT.size # the key format using was inspired by the `blackwidow` project: # https://github.com/KernelMaker/blackwidow/blob/5abe9a3e3f035dd0d81f514e598f29c1db679a28/src/zsets_data_key_format.h#L44-L53 @@ -65,7 +65,8 @@ def encode_zset_value(self, key, value): return self.get_key(key, self.ZSET_VALUE_TYPE) + bytes(value) def encode_zset_score(self, key, value, score): - return self.get_key(key, self.ZSET_SCORE_TYPE) + self.ZSET_SCORE_STRUCT.pack(float(score)) + bytes(value) + score = float(score) + return self.get_key(key, self.ZSET_SCORE_TYPE) + FLOAT_CODEC.encode(score) + bytes(value) def decode_key(self, key): type_id, key_length = self.KEY_PREFIX_STRUCT.unpack(key[:self.KEY_PREFIX_LENGTH]) @@ -74,7 +75,8 @@ def decode_key(self, key): def decode_zset_score(self, ldb_key): _, length, key_name = self.decode_key(ldb_key) - return self.ZSET_SCORE_STRUCT.unpack(key_name[length:length + self.ZSET_SCORE_FORMAT_LENGTH])[0] + score_bytes = key_name[length:length + self.ZSET_SCORE_FORMAT_LENGTH] + return FLOAT_CODEC.decode(score_bytes) def decode_zset_value(self, ldb_key): _, length, key_name = self.decode_key(ldb_key) diff --git a/dredis/utils.py b/dredis/utils.py index 9e0e583..13af02b 100644 --- a/dredis/utils.py +++ b/dredis/utils.py @@ -1,6 +1,56 @@ +import struct + + def to_float(s): # Redis uses `strtod` which converts empty string to 0 if s == '': return 0 else: return float(s) + + +class FloatCodec(object): + """ + FloatCode encodes `float` objects to bytes and preserve their numerical order. + For example, the following list is encoded to bytes while keeping and preserves its numerical order: + >>> floats = [1.5, -2.5, 3.2, 2.0] + >>> sorted(floats, key=FloatCodec().encode) + [-2.5, 1.5, 2.0, 3.2] + + References: + * https://en.wikipedia.org/wiki/Floating-point_arithmetic#IEEE_754_design_rationale + * https://stackoverflow.com/a/43305015/565999 + * https://stackoverflow.com/a/12933766/565999 + * https://ananthakumaran.in/2018/08/17/order-preserving-serialization.html#float + * https://github.com/apple/foundationdb/blob/b92e6b09ad67fa382e17500536fd13b10bb23ede/design/tuple.md#ieee-binary-floating-point + """ + + STRUCT = struct.Struct('>d') + + def encode(self, score): + score_bytes = bytearray(self.STRUCT.pack(score)) + # if a negative number, flip all bits + if score_bytes[0] & 0x80 != 0x00: + score_bytes = self._flip_bits(score_bytes) + else: + # flip the sign if it's a positive number + score_bytes[0] ^= 0x80 # flip the sign + return bytes(score_bytes) + + def decode(self, bytestring): + score_bytes = bytearray(bytestring) + # if a negative number, flip all bits + if score_bytes[0] & 0x80 != 0x80: + score_bytes = self._flip_bits(score_bytes) + else: + # flip the sign if it's a positive number + score_bytes[0] ^= 0x80 # flip the sign + return self.STRUCT.unpack(score_bytes)[0] + + def _flip_bits(self, bytestring): + for i in xrange(len(bytestring)): + bytestring[i] ^= 0xff + return bytestring + + +FLOAT_CODEC = FloatCodec() diff --git a/tests/integration/test_zset.py b/tests/integration/test_zset.py index 7895715..995bed0 100644 --- a/tests/integration/test_zset.py +++ b/tests/integration/test_zset.py @@ -23,7 +23,7 @@ def test_zadd_with_multiple_parameters(): assert r.zcard('myzset') == 3 -def test_zset_zrange_with_positive_integers(): +def test_zset_zrange_with_positive_indexes(): r = fresh_redis() r.zadd('myzset', 0, 'myvalue1') r.zadd('myzset', 1, 'myvalue2') @@ -31,7 +31,7 @@ def test_zset_zrange_with_positive_integers(): assert r.zrange('myzset', 0, 100) == ['myvalue1', 'myvalue2'] -def test_zset_zrange_with_negative_numbers(): +def test_zset_zrange_with_negative_indexes(): r = fresh_redis() r.zadd('myzset', 0, 'myvalue1') r.zadd('myzset', 1, 'myvalue2') @@ -352,3 +352,18 @@ def test_deleting_a_zset_should_not_impact_other_zsets(): assert r.keys('*') == ['myzset2'] assert r.zrange('myzset2', 0, 10) == ['test2'] + + +def test_order_of_zrange_with_negative_scores(): + r = fresh_redis() + + pairs = [ + ('test1', -2.5), + ('test2', -1.1), + ('test3', 0.0), + ('test4', 1.2), + ('test5', 3.5), + ] + for member, score in pairs: + r.zadd('myzset', score, member) + assert r.zrange('myzset', 0, -1, withscores=True) == pairs diff --git a/tests/unit/test_float_encoding.py b/tests/unit/test_float_encoding.py new file mode 100644 index 0000000..dae9830 --- /dev/null +++ b/tests/unit/test_float_encoding.py @@ -0,0 +1,12 @@ +from dredis.utils import FLOAT_CODEC + + +def test_encoding_keeps_natural_order(): + floats = [1.5, -2.5, 0, 3.2, -2.0] + assert sorted(floats, key=FLOAT_CODEC.encode) == sorted(floats) + + +def test_decoding_should_be_the_opposite_of_encoding(): + floats = [1.5, -2.5, 0, 3.2, -2.0] + encoded_floats = map(FLOAT_CODEC.encode, floats) + assert map(FLOAT_CODEC.decode, encoded_floats) == floats