Fix negative score order (backward incompatible!)

Before this commit, sorted set negative scores were stored after all positive scores because of their binary representation. For example: >>> struct.pack('>d', -1) b'\xbf\xf0\x00\x00\x00\x00\x00\x00' >>> struct.pack('>d', +1) b'?\xf0\x00\x00\x00\x00\x00\x00' >>> struct.pack('>d', -1) < struct.pack('>d', +1) False This commit introduces the FloatCodec class to encode/decode floats into bytes and vice-versa, preserving their numerical order. For example, the following list is encoded to bytes while keeping and preserves its numerical order: >>> floats = [1.5, -2.5, 3.2, 2.0] >>> sorted(floats, key=FloatCodec().encode) [-2.5, 1.5, 2.0, 3.2] References: * https://en.wikipedia.org/wiki/Floating-point_arithmetic#IEEE_754_design_rationale * https://stackoverflow.com/a/43305015/565999 * https://stackoverflow.com/a/12933766/565999 * https://ananthakumaran.in/2018/08/17/order-preserving-serialization.html * https://ananthakumaran.in/2018/08/17/order-preserving-serialization.html#float WARNING: This is a backward incompatible change and requires a major release!
Yipit · Jun 26, 2019 · 3621858 · 3621858
1 parent 3d8396a
commit 3621858
Show file tree

Hide file tree

Showing 4 changed files with 87 additions and 9 deletions.
diff --git a/dredis/db.py b/dredis/db.py
@@ -4,6 +4,7 @@
 import plyvel
 
 from dredis.path import Path
+from dredis.utils import FLOAT_CODEC
 
 
 class KeyCodec(object):
@@ -20,12 +21,11 @@ class KeyCodec(object):
 
  # type_id | key_length
  KEY_PREFIX_FORMAT = '>BI'
- KEY_PREFIX_LENGTH = struct.calcsize(KEY_PREFIX_FORMAT)
- ZSET_SCORE_FORMAT = '>d'
- ZSET_SCORE_FORMAT_LENGTH = struct.calcsize(ZSET_SCORE_FORMAT)
-
  KEY_PREFIX_STRUCT = struct.Struct(KEY_PREFIX_FORMAT)
- ZSET_SCORE_STRUCT = struct.Struct(ZSET_SCORE_FORMAT)
+ KEY_PREFIX_LENGTH = KEY_PREFIX_STRUCT.size
+
+ ZSET_SCORE_STRUCT = FLOAT_CODEC.STRUCT
+ ZSET_SCORE_FORMAT_LENGTH = ZSET_SCORE_STRUCT.size
 
  # the key format using <key length + key> was inspired by the `blackwidow` project:
  # https:/KernelMaker/blackwidow/blob/5abe9a3e3f035dd0d81f514e598f29c1db679a28/src/zsets_data_key_format.h#L44-L53
@@ -65,7 +65,8 @@ def encode_zset_value(self, key, value):
  return self.get_key(key, self.ZSET_VALUE_TYPE) + bytes(value)
 
  def encode_zset_score(self, key, value, score):
- return self.get_key(key, self.ZSET_SCORE_TYPE) + self.ZSET_SCORE_STRUCT.pack(float(score)) + bytes(value)
+ score = float(score)
+ return self.get_key(key, self.ZSET_SCORE_TYPE) + FLOAT_CODEC.encode(score) + bytes(value)
 
  def decode_key(self, key):
  type_id, key_length = self.KEY_PREFIX_STRUCT.unpack(key[:self.KEY_PREFIX_LENGTH])
@@ -74,7 +75,8 @@ def decode_key(self, key):
 
  def decode_zset_score(self, ldb_key):
  _, length, key_name = self.decode_key(ldb_key)
- return self.ZSET_SCORE_STRUCT.unpack(key_name[length:length + self.ZSET_SCORE_FORMAT_LENGTH])[0]
+ score_bytes = key_name[length:length + self.ZSET_SCORE_FORMAT_LENGTH]
+ return FLOAT_CODEC.decode(score_bytes)
 
  def decode_zset_value(self, ldb_key):
  _, length, key_name = self.decode_key(ldb_key)

diff --git a/dredis/utils.py b/dredis/utils.py
@@ -1,6 +1,55 @@
+import struct
+
+
 def to_float(s):
  # Redis uses `strtod` which converts empty string to 0
  if s == '':
  return 0
  else:
  return float(s)
+
+
+class FloatCodec(object):
+ """
+ FloatCode encodes `float` objects to bytes and preserve their numerical order.
+ For example, the following list is encoded to bytes while keeping and preserves its numerical order:
+ >>> floats = [1.5, -2.5, 3.2, 2.0]
+ >>> sorted(floats, key=FloatCodec().encode)
+ [-2.5, 1.5, 2.0, 3.2]
+
+ References:
+ * https://en.wikipedia.org/wiki/Floating-point_arithmetic#IEEE_754_design_rationale
+ * https://stackoverflow.com/a/43305015/565999
+ * https://stackoverflow.com/a/12933766/565999
+ * https://ananthakumaran.in/2018/08/17/order-preserving-serialization.html#float
+ """
+
+ STRUCT = struct.Struct('>d')
+
+ def encode(self, score):
+ score_bytes = bytearray(self.STRUCT.pack(score))
+ # if a negative number, flip all bits
+ if score_bytes[0] & 0x80 != 0x00:
+ score_bytes = self._flip_bits(score_bytes)
+ else:
+ # flip the sign if it's a positive number
+ score_bytes[0] ^= 0x80 # flip the sign
+ return bytes(score_bytes)
+
+ def decode(self, bytestring):
+ score_bytes = bytearray(bytestring)
+ # if a negative number, flip all bits
+ if score_bytes[0] & 0x80 != 0x80:
+ score_bytes = self._flip_bits(score_bytes)
+ else:
+ # flip the sign if it's a positive number
+ score_bytes[0] ^= 0x80 # flip the sign
+ return self.STRUCT.unpack(score_bytes)[0]
+
+ def _flip_bits(self, bytestring):
+ for i in xrange(len(bytestring)):
+ bytestring[i] ^= 0xff
+ return bytestring
+
+
+FLOAT_CODEC = FloatCodec()
diff --git a/tests/integration/test_zset.py b/tests/integration/test_zset.py
@@ -23,15 +23,15 @@ def test_zadd_with_multiple_parameters():
  assert r.zcard('myzset') == 3
 
 
-def test_zset_zrange_with_positive_integers():
+def test_zset_zrange_with_positive_indexes():
  r = fresh_redis()
  r.zadd('myzset', 0, 'myvalue1')
  r.zadd('myzset', 1, 'myvalue2')
  assert r.zrange('myzset', 0, 1) == ['myvalue1', 'myvalue2']
  assert r.zrange('myzset', 0, 100) == ['myvalue1', 'myvalue2']
 
 
-def test_zset_zrange_with_negative_numbers():
+def test_zset_zrange_with_negative_indexes():
  r = fresh_redis()
  r.zadd('myzset', 0, 'myvalue1')
  r.zadd('myzset', 1, 'myvalue2')
@@ -352,3 +352,18 @@ def test_deleting_a_zset_should_not_impact_other_zsets():
 
  assert r.keys('*') == ['myzset2']
  assert r.zrange('myzset2', 0, 10) == ['test2']
+
+
+def test_order_of_zrange_with_negative_scores():
+ r = fresh_redis()
+
+ pairs = [
+ ('test1', -2.5),
+ ('test2', -1.1),
+ ('test3', 0.0),
+ ('test4', 1.2),
+ ('test5', 3.5),
+ ]
+ for member, score in pairs:
+ r.zadd('myzset', score, member)
+ assert r.zrange('myzset', 0, -1, withscores=True) == pairs
diff --git a/tests/unit/test_float_encoding.py b/tests/unit/test_float_encoding.py
@@ -0,0 +1,12 @@
+from dredis.utils import FLOAT_CODEC
+
+
+def test_encoding_keeps_natural_order():
+ floats = [1.5, -2.5, 0, 3.2, -2.0]
+ assert sorted(floats, key=FLOAT_CODEC.encode) == sorted(floats)
+
+
+def test_decoding_should_be_the_opposite_of_encoding():
+ floats = [1.5, -2.5, 0, 3.2, -2.0]
+ encoded_floats = map(FLOAT_CODEC.encode, floats)
+ assert map(FLOAT_CODEC.decode, encoded_floats) == floats