Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up leb128 encoding and decoding for unsigned values. #46919

Merged
merged 1 commit into from
Jan 20, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 102 additions & 62 deletions src/libserialize/leb128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,64 +9,94 @@
// except according to those terms.

#[inline]
fn write_to_vec(vec: &mut Vec<u8>, position: usize, byte: u8) {
pub fn write_to_vec(vec: &mut Vec<u8>, position: usize, byte: u8) {
if position == vec.len() {
vec.push(byte);
} else {
vec[position] = byte;
}
}

#[inline]
/// encodes an integer using unsigned leb128 encoding and stores
/// the result using a callback function.
///
/// The callback `write` is called once for each position
/// that is to be written to with the byte to be encoded
/// at that position.
pub fn write_unsigned_leb128_to<W>(mut value: u128, mut write: W) -> usize
where W: FnMut(usize, u8)
{
let mut position = 0;
loop {
let mut byte = (value & 0x7F) as u8;
value >>= 7;
if value != 0 {
byte |= 0x80;
}

write(position, byte);
position += 1;
#[cfg(target_pointer_width = "32")]
const USIZE_LEB128_SIZE: usize = 5;
#[cfg(target_pointer_width = "64")]
const USIZE_LEB128_SIZE: usize = 10;

macro_rules! leb128_size {
(u16) => (3);
(u32) => (5);
(u64) => (10);
(u128) => (19);
(usize) => (USIZE_LEB128_SIZE);
}

if value == 0 {
break;
macro_rules! impl_write_unsigned_leb128 {
($fn_name:ident, $int_ty:ident) => (
#[inline]
pub fn $fn_name(out: &mut Vec<u8>, start_position: usize, mut value: $int_ty) -> usize {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be more verbose, but another strategy I've seen for this is just branching on the size of the value and avoiding the loop. Not sure which would be faster in rustc though.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, running some tests shows that the following implementation for u32 is 10% faster when encoding metadata (while showing no improvement for the query-cache and the dep-graph):

#[inline]
pub fn write_leb128_u32(out: &mut Vec<u8>, start_position: usize, value: u32) -> usize {

    if value <= (1 << 7) {
        write_to_vec(out, start_position, value as u8);
        1
    } else if value <= (1 << 14) {
        write_to_vec(out, start_position, (value as u8) | 0x80);
        write_to_vec(out, start_position + 1, (value >> 7) as u8);
        2
    } else if value <= (1 << 21) {
        write_to_vec(out, start_position, (value as u8) | 0x80);
        write_to_vec(out, start_position + 1, ((value >> 7) as u8) | 0x80);
        write_to_vec(out, start_position + 2, (value >> 14) as u8);
        3
    } else if value <= (1 << 28) {
        write_to_vec(out, start_position, (value as u8) | 0x80);
        write_to_vec(out, start_position + 1, ((value >> 7) as u8) | 0x80);
        write_to_vec(out, start_position + 2, (value >> 14) as u8 | 0x80);
        write_to_vec(out, start_position + 3, (value >> 21) as u8);
        4
    } else {
        write_to_vec(out, start_position, (value as u8) | 0x80);
        write_to_vec(out, start_position + 1, ((value >> 7) as u8) | 0x80);
        write_to_vec(out, start_position + 2, (value >> 14) as u8 | 0x80);
        write_to_vec(out, start_position + 3, (value >> 21) as u8 | 0x80);
        write_to_vec(out, start_position + 4, (value >> 28) as u8);
        5
    }
}

A similar implementation for usize does a lot worse than the one from the PR. Not sure if it's worth the trouble since my test data is only from one crate.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool, thanks for checking it out!

let mut position = start_position;
for _ in 0 .. leb128_size!($int_ty) {
let mut byte = (value & 0x7F) as u8;
value >>= 7;
if value != 0 {
byte |= 0x80;
}

write_to_vec(out, position, byte);
position += 1;

if value == 0 {
break;
}
}

position - start_position
}
}

position
)
}

pub fn write_unsigned_leb128(out: &mut Vec<u8>, start_position: usize, value: u128) -> usize {
write_unsigned_leb128_to(value, |i, v| write_to_vec(out, start_position+i, v))
impl_write_unsigned_leb128!(write_u16_leb128, u16);
impl_write_unsigned_leb128!(write_u32_leb128, u32);
impl_write_unsigned_leb128!(write_u64_leb128, u64);
impl_write_unsigned_leb128!(write_u128_leb128, u128);
impl_write_unsigned_leb128!(write_usize_leb128, usize);


macro_rules! impl_read_unsigned_leb128 {
($fn_name:ident, $int_ty:ident) => (
#[inline]
pub fn $fn_name(slice: &[u8]) -> ($int_ty, usize) {
let mut result: $int_ty = 0;
let mut shift = 0;
let mut position = 0;

for _ in 0 .. leb128_size!($int_ty) {
let byte = unsafe {
*slice.get_unchecked(position)
};
position += 1;
result |= ((byte & 0x7F) as $int_ty) << shift;
if (byte & 0x80) == 0 {
break;
}
shift += 7;
}

// Do a single bounds check at the end instead of for every byte.
assert!(position <= slice.len());

(result, position)
}
)
}

#[inline]
pub fn read_unsigned_leb128(data: &[u8], start_position: usize) -> (u128, usize) {
let mut result = 0;
let mut shift = 0;
let mut position = start_position;
loop {
let byte = data[position];
position += 1;
result |= ((byte & 0x7F) as u128) << shift;
if (byte & 0x80) == 0 {
break;
}
shift += 7;
}
impl_read_unsigned_leb128!(read_u16_leb128, u16);
impl_read_unsigned_leb128!(read_u32_leb128, u32);
impl_read_unsigned_leb128!(read_u64_leb128, u64);
impl_read_unsigned_leb128!(read_u128_leb128, u128);
impl_read_unsigned_leb128!(read_usize_leb128, usize);


(result, position - start_position)
}

#[inline]
/// encodes an integer using signed leb128 encoding and stores
Expand Down Expand Up @@ -130,26 +160,36 @@ pub fn read_signed_leb128(data: &[u8], start_position: usize) -> (i128, usize) {
(result, position - start_position)
}

#[test]
fn test_unsigned_leb128() {
let mut stream = Vec::with_capacity(10000);

for x in 0..62 {
let pos = stream.len();
let bytes_written = write_unsigned_leb128(&mut stream, pos, 3 << x);
assert_eq!(stream.len(), pos + bytes_written);
}

let mut position = 0;
for x in 0..62 {
let expected = 3 << x;
let (actual, bytes_read) = read_unsigned_leb128(&stream, position);
assert_eq!(expected, actual);
position += bytes_read;
}
assert_eq!(stream.len(), position);
macro_rules! impl_test_unsigned_leb128 {
($test_name:ident, $write_fn_name:ident, $read_fn_name:ident, $int_ty:ident) => (
#[test]
fn $test_name() {
let mut stream = Vec::new();

for x in 0..62 {
let pos = stream.len();
let bytes_written = $write_fn_name(&mut stream, pos, (3u64 << x) as $int_ty);
assert_eq!(stream.len(), pos + bytes_written);
}

let mut position = 0;
for x in 0..62 {
let expected = (3u64 << x) as $int_ty;
let (actual, bytes_read) = $read_fn_name(&stream[position ..]);
assert_eq!(expected, actual);
position += bytes_read;
}
assert_eq!(stream.len(), position);
}
)
}

impl_test_unsigned_leb128!(test_u16_leb128, write_u16_leb128, read_u16_leb128, u16);
impl_test_unsigned_leb128!(test_u32_leb128, write_u32_leb128, read_u32_leb128, u32);
impl_test_unsigned_leb128!(test_u64_leb128, write_u64_leb128, read_u64_leb128, u64);
impl_test_unsigned_leb128!(test_u128_leb128, write_u128_leb128, read_u128_leb128, u128);
impl_test_unsigned_leb128!(test_usize_leb128, write_usize_leb128, read_usize_leb128, usize);

#[test]
fn test_signed_leb128() {
let values: Vec<_> = (-500..500).map(|i| i * 0x12345789ABCDEF).collect();
Expand Down
61 changes: 42 additions & 19 deletions src/libserialize/opaque.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use leb128::{read_signed_leb128, read_unsigned_leb128, write_signed_leb128, write_unsigned_leb128};
use leb128::{self, read_signed_leb128, write_signed_leb128};
use std::borrow::Cow;
use std::io::{self, Write};
use serialize;
Expand All @@ -31,9 +31,9 @@ impl<'a> Encoder<'a> {


macro_rules! write_uleb128 {
($enc:expr, $value:expr) => {{
($enc:expr, $value:expr, $fun:ident) => {{
let pos = $enc.cursor.position() as usize;
let bytes_written = write_unsigned_leb128($enc.cursor.get_mut(), pos, $value as u128);
let bytes_written = leb128::$fun($enc.cursor.get_mut(), pos, $value);
$enc.cursor.set_position((pos + bytes_written) as u64);
Ok(())
}}
Expand All @@ -51,61 +51,76 @@ macro_rules! write_sleb128 {
impl<'a> serialize::Encoder for Encoder<'a> {
type Error = io::Error;

#[inline]
fn emit_nil(&mut self) -> EncodeResult {
Ok(())
}

#[inline]
fn emit_usize(&mut self, v: usize) -> EncodeResult {
write_uleb128!(self, v)
write_uleb128!(self, v, write_usize_leb128)
}

#[inline]
fn emit_u128(&mut self, v: u128) -> EncodeResult {
write_uleb128!(self, v)
write_uleb128!(self, v, write_u128_leb128)
}

#[inline]
fn emit_u64(&mut self, v: u64) -> EncodeResult {
write_uleb128!(self, v)
write_uleb128!(self, v, write_u64_leb128)
}

#[inline]
fn emit_u32(&mut self, v: u32) -> EncodeResult {
write_uleb128!(self, v)
write_uleb128!(self, v, write_u32_leb128)
}

#[inline]
fn emit_u16(&mut self, v: u16) -> EncodeResult {
write_uleb128!(self, v)
write_uleb128!(self, v, write_u16_leb128)
}

#[inline]
fn emit_u8(&mut self, v: u8) -> EncodeResult {
let _ = self.cursor.write_all(&[v]);
let pos = self.cursor.position() as usize;
leb128::write_to_vec(self.cursor.get_mut(), pos, v);
self.cursor.set_position((pos + 1) as u64);
Ok(())
}

#[inline]
fn emit_isize(&mut self, v: isize) -> EncodeResult {
write_sleb128!(self, v)
}

#[inline]
fn emit_i128(&mut self, v: i128) -> EncodeResult {
write_sleb128!(self, v)
}

#[inline]
fn emit_i64(&mut self, v: i64) -> EncodeResult {
write_sleb128!(self, v)
}

#[inline]
fn emit_i32(&mut self, v: i32) -> EncodeResult {
write_sleb128!(self, v)
}

#[inline]
fn emit_i16(&mut self, v: i16) -> EncodeResult {
write_sleb128!(self, v)
}

#[inline]
fn emit_i8(&mut self, v: i8) -> EncodeResult {
let as_u8: u8 = unsafe { ::std::mem::transmute(v) };
let _ = self.cursor.write_all(&[as_u8]);
Ok(())
self.emit_u8(as_u8)
}

#[inline]
fn emit_bool(&mut self, v: bool) -> EncodeResult {
self.emit_u8(if v {
1
Expand All @@ -114,20 +129,24 @@ impl<'a> serialize::Encoder for Encoder<'a> {
})
}

#[inline]
fn emit_f64(&mut self, v: f64) -> EncodeResult {
let as_u64: u64 = unsafe { ::std::mem::transmute(v) };
self.emit_u64(as_u64)
}

#[inline]
fn emit_f32(&mut self, v: f32) -> EncodeResult {
let as_u32: u32 = unsafe { ::std::mem::transmute(v) };
self.emit_u32(as_u32)
}

#[inline]
fn emit_char(&mut self, v: char) -> EncodeResult {
self.emit_u32(v as u32)
}

#[inline]
fn emit_str(&mut self, v: &str) -> EncodeResult {
self.emit_usize(v.len())?;
let _ = self.cursor.write_all(v.as_bytes());
Expand All @@ -136,6 +155,7 @@ impl<'a> serialize::Encoder for Encoder<'a> {
}

impl<'a> Encoder<'a> {
#[inline]
pub fn position(&self) -> usize {
self.cursor.position() as usize
}
Expand All @@ -158,24 +178,27 @@ impl<'a> Decoder<'a> {
}
}

#[inline]
pub fn position(&self) -> usize {
self.position
}

#[inline]
pub fn set_position(&mut self, pos: usize) {
self.position = pos
}

#[inline]
pub fn advance(&mut self, bytes: usize) {
self.position += bytes;
}
}

macro_rules! read_uleb128 {
($dec:expr, $t:ty) => ({
let (value, bytes_read) = read_unsigned_leb128($dec.data, $dec.position);
($dec:expr, $t:ty, $fun:ident) => ({
let (value, bytes_read) = leb128::$fun(&$dec.data[$dec.position ..]);
$dec.position += bytes_read;
Ok(value as $t)
Ok(value)
})
}

Expand All @@ -198,22 +221,22 @@ impl<'a> serialize::Decoder for Decoder<'a> {

#[inline]
fn read_u128(&mut self) -> Result<u128, Self::Error> {
read_uleb128!(self, u128)
read_uleb128!(self, u128, read_u128_leb128)
}

#[inline]
fn read_u64(&mut self) -> Result<u64, Self::Error> {
read_uleb128!(self, u64)
read_uleb128!(self, u64, read_u64_leb128)
}

#[inline]
fn read_u32(&mut self) -> Result<u32, Self::Error> {
read_uleb128!(self, u32)
read_uleb128!(self, u32, read_u32_leb128)
}

#[inline]
fn read_u16(&mut self) -> Result<u16, Self::Error> {
read_uleb128!(self, u16)
read_uleb128!(self, u16, read_u16_leb128)
}

#[inline]
Expand All @@ -225,7 +248,7 @@ impl<'a> serialize::Decoder for Decoder<'a> {

#[inline]
fn read_usize(&mut self) -> Result<usize, Self::Error> {
read_uleb128!(self, usize)
read_uleb128!(self, usize, read_usize_leb128)
}

#[inline]
Expand Down