Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into apacheGH-15187
Browse files Browse the repository at this point in the history
  • Loading branch information
rtadepalli committed Jun 16, 2023
2 parents fa74584 + 41309de commit 5dcce2e
Show file tree
Hide file tree
Showing 30 changed files with 977 additions and 389 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,9 @@ jobs:
ARROW_WITH_BROTLI: ON
ARROW_WITH_BZ2: ON
ARROW_WITH_LZ4: ON
ARROW_WITH_OPENTELEMETRY: ON
# GH-36013 disabling opentelemetry here because we can't
# get the patched version from conda
# ARROW_WITH_OPENTELEMETRY: ON
ARROW_WITH_SNAPPY: ON
ARROW_WITH_ZLIB: ON
ARROW_WITH_ZSTD: ON
Expand Down
2 changes: 1 addition & 1 deletion ci/scripts/go_bench_adapt.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
# Assume GitHub Actions CI. The environment variable lookups below are
# expected to fail when not running in GitHub Actions.
github_commit_info = {
"repository": os.environ["GITHUB_REPOSITORY"],
"repository": f'{os.environ["GITHUB_SERVER_URL"]}/{os.environ["GITHUB_REPOSITORY"]}',
"commit": os.environ["GITHUB_SHA"],
"pr_number": None, # implying default branch
}
Expand Down
7 changes: 2 additions & 5 deletions cpp/cmake_modules/BuildUtils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -567,15 +567,13 @@ function(ADD_BENCHMARK REL_BENCHMARK_NAME)
target_link_libraries(${BENCHMARK_NAME} PRIVATE ${ARROW_BENCHMARK_LINK_LIBS})
endif()
add_dependencies(benchmark ${BENCHMARK_NAME})
set(NO_COLOR "--color_print=false")

if(ARG_EXTRA_LINK_LIBS)
target_link_libraries(${BENCHMARK_NAME} PRIVATE ${ARG_EXTRA_LINK_LIBS})
endif()
else()
# No executable, just invoke the benchmark (probably a script) directly.
set(BENCHMARK_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${REL_BENCHMARK_NAME})
set(NO_COLOR "")
endif()

# With OSX and conda, we need to set the correct RPATH so that dependencies
Expand Down Expand Up @@ -615,8 +613,8 @@ function(ADD_BENCHMARK REL_BENCHMARK_NAME)
${BUILD_SUPPORT_DIR}/run-test.sh
${CMAKE_BINARY_DIR}
benchmark
${BENCHMARK_PATH}
${NO_COLOR})
${BENCHMARK_PATH})

set_property(TEST ${BENCHMARK_NAME}
APPEND
PROPERTY LABELS ${ARG_LABELS})
Expand Down Expand Up @@ -847,7 +845,6 @@ function(ADD_ARROW_EXAMPLE REL_EXAMPLE_NAME)
add_executable(${EXAMPLE_NAME} "${REL_EXAMPLE_NAME}.cc" ${ARG_EXTRA_SOURCES})
target_link_libraries(${EXAMPLE_NAME} ${ARROW_EXAMPLE_LINK_LIBS})
add_dependencies(runexample ${EXAMPLE_NAME})
set(NO_COLOR "--color_print=false")

if(ARG_EXTRA_LINK_LIBS)
target_link_libraries(${EXAMPLE_NAME} ${ARG_EXTRA_LINK_LIBS})
Expand Down
14 changes: 7 additions & 7 deletions cpp/cmake_modules/DefineOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,13 @@ takes precedence over ccache if a storage backend is configured" ON)
#----------------------------------------------------------------------
set_option_category("Project component")

define_option(ARROW_ACERO
"Build the Arrow Acero Engine Module"
OFF
DEPENDS
ARROW_COMPUTE
ARROW_IPC)

define_option(ARROW_BUILD_UTILITIES "Build Arrow commandline utilities" OFF)

define_option(ARROW_COMPUTE "Build all Arrow Compute kernels" OFF)
Expand Down Expand Up @@ -402,13 +409,6 @@ takes precedence over ccache if a storage backend is configured" ON)
ARROW_IPC
ARROW_PARQUET)

define_option(ARROW_ACERO
"Build the Arrow Acero Engine Module"
OFF
DEPENDS
ARROW_COMPUTE
ARROW_IPC)

define_option(ARROW_TENSORFLOW "Build Arrow with TensorFlow support enabled" OFF)

define_option(ARROW_TESTING
Expand Down
4 changes: 4 additions & 0 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4511,6 +4511,10 @@ endif()

macro(build_opentelemetry)
message(STATUS "Building OpenTelemetry from source")
if(Protobuf_VERSION VERSION_GREATER_EQUAL 3.22)
message(FATAL_ERROR "GH-36013: Can't use bundled OpenTelemetry with Protobuf 3.22 or later. "
"Protobuf is version ${Protobuf_VERSION}")
endif()

set(OPENTELEMETRY_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/opentelemetry_ep-install")
set(OPENTELEMETRY_INCLUDE_DIR "${OPENTELEMETRY_PREFIX}/include")
Expand Down
6 changes: 6 additions & 0 deletions cpp/src/arrow/compute/api_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,12 +254,18 @@ namespace internal {
// These internal functions are implemented in kernels/vector_selection.cc

/// \brief Return the number of selected indices in the boolean filter
///
/// \param filter a plain or run-end encoded boolean array with or without nulls
/// \param null_selection how to handle nulls in the filter
ARROW_EXPORT
int64_t GetFilterOutputSize(const ArraySpan& filter,
FilterOptions::NullSelectionBehavior null_selection);

/// \brief Compute uint64 selection indices for use with Take given a boolean
/// filter
///
/// \param filter a plain or run-end encoded boolean array with or without nulls
/// \param null_selection how to handle nulls in the filter
ARROW_EXPORT
Result<std::shared_ptr<ArrayData>> GetTakeIndices(
const ArraySpan& filter, FilterOptions::NullSelectionBehavior null_selection,
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/compute/kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,10 @@ std::shared_ptr<TypeMatcher> RunEndEncoded(
std::move(value_type_matcher));
}

std::shared_ptr<TypeMatcher> RunEndEncoded(Type::type value_type_id) {
return RunEndEncoded(SameTypeId(value_type_id));
}

std::shared_ptr<TypeMatcher> RunEndEncoded(
std::shared_ptr<TypeMatcher> run_end_type_matcher,
std::shared_ptr<TypeMatcher> value_type_matcher) {
Expand Down
6 changes: 6 additions & 0 deletions cpp/src/arrow/compute/kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,12 @@ ARROW_EXPORT std::shared_ptr<TypeMatcher> RunEndInteger();
ARROW_EXPORT std::shared_ptr<TypeMatcher> RunEndEncoded(
std::shared_ptr<TypeMatcher> value_type_matcher);

/// \brief Match run-end encoded types that use any valid run-end type and
/// encode specific value types
///
/// @param[in] value_type_id a type id that the type of the values field should match
ARROW_EXPORT std::shared_ptr<TypeMatcher> RunEndEncoded(Type::type value_type_id);

/// \brief Match run-end encoded types that encode specific run-end and value types
///
/// @param[in] run_end_type_matcher a matcher that is applied to the run_ends field
Expand Down
25 changes: 25 additions & 0 deletions cpp/src/arrow/compute/kernels/ree_util_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,18 @@ class ReadWriteValue<ArrowType, in_has_validity_buffer, out_has_validity_buffer,
return valid;
}

/// Pre-conditions guaranteed by the callers:
/// - i and j are valid indices into the values buffer
/// - the values in i and j are valid
bool CompareValuesAt(int64_t i, int64_t j) const {
if constexpr (std::is_same_v<ArrowType, BooleanType>) {
return bit_util::GetBit(input_values_, i) == bit_util::GetBit(input_values_, j);
} else {
return (reinterpret_cast<const ValueRepr*>(input_values_))[i] ==
(reinterpret_cast<const ValueRepr*>(input_values_))[j];
}
}

/// \brief Ensure padding is zeroed in validity bitmap.
void ZeroValidityPadding(int64_t length) const {
DCHECK(output_values_);
Expand Down Expand Up @@ -166,6 +178,11 @@ class ReadWriteValue<ArrowType, in_has_validity_buffer, out_has_validity_buffer,
return valid;
}

bool CompareValuesAt(int64_t i, int64_t j) const {
return 0 == memcmp(input_values_ + (i * byte_width_),
input_values_ + (j * byte_width_), byte_width_);
}

/// \brief Ensure padding is zeroed in validity bitmap.
void ZeroValidityPadding(int64_t length) const {
DCHECK(output_values_);
Expand Down Expand Up @@ -253,6 +270,14 @@ class ReadWriteValue<ArrowType, in_has_validity_buffer, out_has_validity_buffer,
return valid;
}

bool CompareValuesAt(int64_t i, int64_t j) const {
const offset_type len_i = input_offsets_[i + 1] - input_offsets_[i];
const offset_type len_j = input_offsets_[j + 1] - input_offsets_[j];
return len_i == len_j &&
memcmp(input_values_ + input_offsets_[i], input_values_ + input_offsets_[j],
static_cast<size_t>(len_i));
}

/// \brief Ensure padding is zeroed in validity bitmap.
void ZeroValidityPadding(int64_t length) const {
DCHECK(output_values_);
Expand Down
7 changes: 3 additions & 4 deletions cpp/src/arrow/compute/kernels/vector_selection.cc
Original file line number Diff line number Diff line change
Expand Up @@ -332,8 +332,8 @@ void RegisterVectorSelection(FunctionRegistry* registry) {
VectorKernel filter_base;
filter_base.init = FilterState::Init;
RegisterSelectionFunction("array_filter", array_filter_doc, filter_base,
/*selection_type=*/boolean(), filter_kernels,
GetDefaultFilterOptions(), registry);
std::move(filter_kernels), GetDefaultFilterOptions(),
registry);

DCHECK_OK(registry->AddFunction(MakeFilterMetaFunction()));

Expand All @@ -345,8 +345,7 @@ void RegisterVectorSelection(FunctionRegistry* registry) {
take_base.init = TakeState::Init;
take_base.can_execute_chunkwise = false;
RegisterSelectionFunction("array_take", array_take_doc, take_base,
/*selection_type=*/match::Integer(), take_kernels,
GetDefaultTakeOptions(), registry);
std::move(take_kernels), GetDefaultTakeOptions(), registry);

DCHECK_OK(registry->AddFunction(MakeTakeMetaFunction()));

Expand Down
Loading

0 comments on commit 5dcce2e

Please sign in to comment.