Skip to content

Commit

Permalink
Fix negative score order (backward incompatible!)
Browse files Browse the repository at this point in the history
Before this commit, sorted set negative scores were stored after
all positive scores because of their binary representation.
For example:
    >>> struct.pack('>d', -1)
    b'\xbf\xf0\x00\x00\x00\x00\x00\x00'
    >>> struct.pack('>d', +1)
    b'?\xf0\x00\x00\x00\x00\x00\x00'
    >>> struct.pack('>d', -1) < struct.pack('>d', +1)
    False

This commit introduces the FloatCodec class to encode/decode floats into bytes and vice-versa, preserving their numerical order.
For example, the following list is encoded to bytes while keeping and preserves its numerical order:
    >>> floats = [1.5, -2.5, 3.2, 2.0]
    >>> sorted(floats, key=FloatCodec().encode)
    [-2.5, 1.5, 2.0, 3.2]

References:
* https://en.wikipedia.org/wiki/Floating-point_arithmetic#IEEE_754_design_rationale
* https://stackoverflow.com/a/43305015/565999
* https://stackoverflow.com/a/12933766/565999
* https://ananthakumaran.in/2018/08/17/order-preserving-serialization.html
* https://ananthakumaran.in/2018/08/17/order-preserving-serialization.html#float

WARNING: This is a backward incompatible change and requires a major release!
  • Loading branch information
hltbra committed Jun 26, 2019
1 parent 3d8396a commit 3621858
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 9 deletions.
16 changes: 9 additions & 7 deletions dredis/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import plyvel

from dredis.path import Path
from dredis.utils import FLOAT_CODEC


class KeyCodec(object):
Expand All @@ -20,12 +21,11 @@ class KeyCodec(object):

# type_id | key_length
KEY_PREFIX_FORMAT = '>BI'
KEY_PREFIX_LENGTH = struct.calcsize(KEY_PREFIX_FORMAT)
ZSET_SCORE_FORMAT = '>d'
ZSET_SCORE_FORMAT_LENGTH = struct.calcsize(ZSET_SCORE_FORMAT)

KEY_PREFIX_STRUCT = struct.Struct(KEY_PREFIX_FORMAT)
ZSET_SCORE_STRUCT = struct.Struct(ZSET_SCORE_FORMAT)
KEY_PREFIX_LENGTH = KEY_PREFIX_STRUCT.size

ZSET_SCORE_STRUCT = FLOAT_CODEC.STRUCT
ZSET_SCORE_FORMAT_LENGTH = ZSET_SCORE_STRUCT.size

# the key format using <key length + key> was inspired by the `blackwidow` project:
# https:/KernelMaker/blackwidow/blob/5abe9a3e3f035dd0d81f514e598f29c1db679a28/src/zsets_data_key_format.h#L44-L53
Expand Down Expand Up @@ -65,7 +65,8 @@ def encode_zset_value(self, key, value):
return self.get_key(key, self.ZSET_VALUE_TYPE) + bytes(value)

def encode_zset_score(self, key, value, score):
return self.get_key(key, self.ZSET_SCORE_TYPE) + self.ZSET_SCORE_STRUCT.pack(float(score)) + bytes(value)
score = float(score)
return self.get_key(key, self.ZSET_SCORE_TYPE) + FLOAT_CODEC.encode(score) + bytes(value)

def decode_key(self, key):
type_id, key_length = self.KEY_PREFIX_STRUCT.unpack(key[:self.KEY_PREFIX_LENGTH])
Expand All @@ -74,7 +75,8 @@ def decode_key(self, key):

def decode_zset_score(self, ldb_key):
_, length, key_name = self.decode_key(ldb_key)
return self.ZSET_SCORE_STRUCT.unpack(key_name[length:length + self.ZSET_SCORE_FORMAT_LENGTH])[0]
score_bytes = key_name[length:length + self.ZSET_SCORE_FORMAT_LENGTH]
return FLOAT_CODEC.decode(score_bytes)

def decode_zset_value(self, ldb_key):
_, length, key_name = self.decode_key(ldb_key)
Expand Down
49 changes: 49 additions & 0 deletions dredis/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,55 @@
import struct


def to_float(s):
# Redis uses `strtod` which converts empty string to 0
if s == '':
return 0
else:
return float(s)


class FloatCodec(object):
"""
FloatCode encodes `float` objects to bytes and preserve their numerical order.
For example, the following list is encoded to bytes while keeping and preserves its numerical order:
>>> floats = [1.5, -2.5, 3.2, 2.0]
>>> sorted(floats, key=FloatCodec().encode)
[-2.5, 1.5, 2.0, 3.2]
References:
* https://en.wikipedia.org/wiki/Floating-point_arithmetic#IEEE_754_design_rationale
* https://stackoverflow.com/a/43305015/565999
* https://stackoverflow.com/a/12933766/565999
* https://ananthakumaran.in/2018/08/17/order-preserving-serialization.html#float
"""

STRUCT = struct.Struct('>d')

def encode(self, score):
score_bytes = bytearray(self.STRUCT.pack(score))
# if a negative number, flip all bits
if score_bytes[0] & 0x80 != 0x00:
score_bytes = self._flip_bits(score_bytes)
else:
# flip the sign if it's a positive number
score_bytes[0] ^= 0x80 # flip the sign
return bytes(score_bytes)

def decode(self, bytestring):
score_bytes = bytearray(bytestring)
# if a negative number, flip all bits
if score_bytes[0] & 0x80 != 0x80:
score_bytes = self._flip_bits(score_bytes)
else:
# flip the sign if it's a positive number
score_bytes[0] ^= 0x80 # flip the sign
return self.STRUCT.unpack(score_bytes)[0]

def _flip_bits(self, bytestring):
for i in xrange(len(bytestring)):
bytestring[i] ^= 0xff
return bytestring


FLOAT_CODEC = FloatCodec()
19 changes: 17 additions & 2 deletions tests/integration/test_zset.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ def test_zadd_with_multiple_parameters():
assert r.zcard('myzset') == 3


def test_zset_zrange_with_positive_integers():
def test_zset_zrange_with_positive_indexes():
r = fresh_redis()
r.zadd('myzset', 0, 'myvalue1')
r.zadd('myzset', 1, 'myvalue2')
assert r.zrange('myzset', 0, 1) == ['myvalue1', 'myvalue2']
assert r.zrange('myzset', 0, 100) == ['myvalue1', 'myvalue2']


def test_zset_zrange_with_negative_numbers():
def test_zset_zrange_with_negative_indexes():
r = fresh_redis()
r.zadd('myzset', 0, 'myvalue1')
r.zadd('myzset', 1, 'myvalue2')
Expand Down Expand Up @@ -352,3 +352,18 @@ def test_deleting_a_zset_should_not_impact_other_zsets():

assert r.keys('*') == ['myzset2']
assert r.zrange('myzset2', 0, 10) == ['test2']


def test_order_of_zrange_with_negative_scores():
r = fresh_redis()

pairs = [
('test1', -2.5),
('test2', -1.1),
('test3', 0.0),
('test4', 1.2),
('test5', 3.5),
]
for member, score in pairs:
r.zadd('myzset', score, member)
assert r.zrange('myzset', 0, -1, withscores=True) == pairs
12 changes: 12 additions & 0 deletions tests/unit/test_float_encoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from dredis.utils import FLOAT_CODEC


def test_encoding_keeps_natural_order():
floats = [1.5, -2.5, 0, 3.2, -2.0]
assert sorted(floats, key=FLOAT_CODEC.encode) == sorted(floats)


def test_decoding_should_be_the_opposite_of_encoding():
floats = [1.5, -2.5, 0, 3.2, -2.0]
encoded_floats = map(FLOAT_CODEC.encode, floats)
assert map(FLOAT_CODEC.decode, encoded_floats) == floats

0 comments on commit 3621858

Please sign in to comment.