From 58a24c5ba243f89302f90ef62550cee252968c9d Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Thu, 18 Jan 2018 10:56:15 -0500 Subject: [PATCH] ARROW-2004: [C++] Add shrink_to_fit parameter to BufferBuilder::Resize, add Reserve method I also relaxed the requirement to pass `const uint8_t*` so that one can pass `const void*` when writing to a `BufferBuilder`. This will not affect any downstream users Author: Wes McKinney Closes #1486 from wesm/ARROW-2004 and squashes the following commits: 2d6660a8 [Wes McKinney] Add shrink_to_fit parameter to BufferBuilder::Resize, add Reserve method, relax pointer type in Append --- cpp/src/arrow/buffer-test.cc | 25 ++++++++++++++++++++++ cpp/src/arrow/buffer.h | 41 ++++++++++++++++++++++++++---------- 2 files changed, 55 insertions(+), 11 deletions(-) diff --git a/cpp/src/arrow/buffer-test.cc b/cpp/src/arrow/buffer-test.cc index 5fd2706f0466b..398cc06363a6f 100644 --- a/cpp/src/arrow/buffer-test.cc +++ b/cpp/src/arrow/buffer-test.cc @@ -194,4 +194,29 @@ TEST(TestBuffer, SliceMutableBuffer) { ASSERT_TRUE(slice->Equals(expected)); } +TEST(TestBufferBuilder, ResizeReserve) { + const std::string data = "some data"; + auto data_ptr = data.c_str(); + + BufferBuilder builder; + + ASSERT_OK(builder.Append(data_ptr, 9)); + ASSERT_EQ(9, builder.length()); + + ASSERT_OK(builder.Resize(128)); + ASSERT_EQ(128, builder.capacity()); + + // Do not shrink to fit + ASSERT_OK(builder.Resize(64, false)); + ASSERT_EQ(128, builder.capacity()); + + // Shrink to fit + ASSERT_OK(builder.Resize(64)); + ASSERT_EQ(64, builder.capacity()); + + // Reserve elements + ASSERT_OK(builder.Reserve(60)); + ASSERT_EQ(128, builder.capacity()); +} + } // namespace arrow diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h index 450a4c78b5bbb..b50b1a1aa041d 100644 --- a/cpp/src/arrow/buffer.h +++ b/cpp/src/arrow/buffer.h @@ -25,6 +25,7 @@ #include #include +#include "arrow/memory_pool.h" #include "arrow/status.h" #include "arrow/util/bit-util.h" #include "arrow/util/macros.h" @@ -32,13 +33,12 @@ namespace arrow { -class MemoryPool; - // ---------------------------------------------------------------------- // Buffer classes -/// Immutable API for a chunk of bytes which may or may not be owned by the -/// class instance. +/// \class Buffer +/// \brief Object containing a pointer to a piece of contiguous memory with a +/// particular size. Base class does not own its memory /// /// Buffers have two related notions of length: size and capacity. Size is /// the number of bytes that might have valid data. Capacity is the number @@ -133,7 +133,8 @@ ARROW_EXPORT std::shared_ptr SliceMutableBuffer(const std::shared_ptr& buffer, const int64_t offset, const int64_t length); -/// A Buffer whose contents can be mutated. May or may not own its data. +/// \class MutableBuffer +/// \brief A Buffer whose contents can be mutated. May or may not own its data. class ARROW_EXPORT MutableBuffer : public Buffer { public: MutableBuffer(uint8_t* data, const int64_t size) : Buffer(data, size) { @@ -148,6 +149,8 @@ class ARROW_EXPORT MutableBuffer : public Buffer { MutableBuffer() : Buffer(NULLPTR, 0) {} }; +/// \class ResizableBuffer +/// \brief A mutable buffer that can be resized class ARROW_EXPORT ResizableBuffer : public MutableBuffer { public: /// Change buffer reported size to indicated size, allocating memory if @@ -190,13 +193,22 @@ class ARROW_EXPORT PoolBuffer : public ResizableBuffer { MemoryPool* pool_; }; +/// \class BufferBuilder +/// \brief A class for incrementally building a contiguous chunk of in-memory data class ARROW_EXPORT BufferBuilder { public: - explicit BufferBuilder(MemoryPool* pool) + explicit BufferBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT) : pool_(pool), data_(NULLPTR), capacity_(0), size_(0) {} - /// Resizes the buffer to the nearest multiple of 64 bytes per Layout.md - Status Resize(const int64_t elements) { + /// \brief Resizes the buffer to the nearest multiple of 64 bytes + /// + /// \param elements the new capacity of the of the builder. Will be rounded + /// up to a multiple of 64 bytes for padding + /// \param shrink_to_fit if new capacity smaller than existing size, + /// reallocate internal buffer. Set to false to avoid reallocations when + /// shrinking the builder + /// \return Status + Status Resize(const int64_t elements, bool shrink_to_fit = true) { // Resize(0) is a no-op if (elements == 0) { return Status::OK(); @@ -205,7 +217,7 @@ class ARROW_EXPORT BufferBuilder { buffer_ = std::make_shared(pool_); } int64_t old_capacity = capacity_; - RETURN_NOT_OK(buffer_->Resize(elements)); + RETURN_NOT_OK(buffer_->Resize(elements, shrink_to_fit)); capacity_ = buffer_->capacity(); data_ = buffer_->mutable_data(); if (capacity_ > old_capacity) { @@ -214,7 +226,14 @@ class ARROW_EXPORT BufferBuilder { return Status::OK(); } - Status Append(const uint8_t* data, int64_t length) { + /// \brief Ensure that builder can accommodate the additional number of bytes + /// without the need to perform allocations + /// + /// \param size number of additional bytes to make space for + /// \return Status + Status Reserve(const int64_t size) { return Resize(size_ + size, false); } + + Status Append(const void* data, int64_t length) { if (capacity_ < length + size_) { int64_t new_capacity = BitUtil::NextPower2(length + size_); RETURN_NOT_OK(Resize(new_capacity)); @@ -248,7 +267,7 @@ class ARROW_EXPORT BufferBuilder { } // Unsafe methods don't check existing size - void UnsafeAppend(const uint8_t* data, int64_t length) { + void UnsafeAppend(const void* data, int64_t length) { memcpy(data_ + size_, data, static_cast(length)); size_ += length; }