Skip to content

Commit

Permalink
GDV-56: [C++] Add support for sql regex functions (apache#86)
Browse files Browse the repository at this point in the history
- add a registry for "function holders" implemented in cpp
- the function holder is instantiated at expression decomposition time
- at eval time, the registered fn gets an extra param (the . function holder)
  • Loading branch information
pravindra authored Aug 29, 2018
1 parent 3c1156e commit c675302
Show file tree
Hide file tree
Showing 22 changed files with 591 additions and 216 deletions.
11 changes: 6 additions & 5 deletions cpp/src/gandiva/codegen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,10 @@ set(SRC_FILES annotator.cc
function_signature.cc
llvm_generator.cc
llvm_types.cc
like_holder.cc
projector.cc
selection_vector.cc
regex_util.cc
status.cc
tree_expr_builder.cc
${BC_FILE_PATH_CC})
Expand Down Expand Up @@ -84,17 +86,16 @@ install(

#args: label test-file src-files
add_gandiva_unit_test(bitmap_accumulator_test.cc bitmap_accumulator.cc)
add_gandiva_unit_test(dex_llvm_test.cc)
add_gandiva_unit_test(engine_llvm_test.cc engine.cc llvm_types.cc status.cc configuration.cc ${BC_FILE_PATH_CC})
add_gandiva_unit_test(function_signature_test.cc function_signature.cc)
add_gandiva_unit_test(function_registry_test.cc function_registry.cc function_signature.cc)
add_gandiva_unit_test(llvm_types_test.cc llvm_types.cc)
add_gandiva_unit_test(llvm_generator_test.cc llvm_generator.cc engine.cc llvm_types.cc expr_decomposer.cc function_registry.cc annotator.cc status.cc bitmap_accumulator.cc configuration.cc function_signature.cc ${BC_FILE_PATH_CC})
add_gandiva_unit_test(llvm_generator_test.cc llvm_generator.cc regex_util.cc engine.cc llvm_types.cc expr_decomposer.cc function_registry.cc annotator.cc status.cc bitmap_accumulator.cc configuration.cc function_signature.cc like_holder.cc regex_util.cc ${BC_FILE_PATH_CC})
add_gandiva_unit_test(annotator_test.cc annotator.cc function_signature.cc)
add_gandiva_unit_test(tree_expr_test.cc tree_expr_builder.cc expr_decomposer.cc annotator.cc function_registry.cc function_signature.cc)
add_gandiva_unit_test(expr_decomposer_test.cc expr_decomposer.cc tree_expr_builder.cc annotator.cc function_registry.cc function_signature.cc)
add_gandiva_unit_test(tree_expr_test.cc tree_expr_builder.cc expr_decomposer.cc annotator.cc function_registry.cc function_signature.cc like_holder.cc regex_util.cc status.cc)
add_gandiva_unit_test(expr_decomposer_test.cc expr_decomposer.cc tree_expr_builder.cc annotator.cc function_registry.cc function_signature.cc like_holder.cc regex_util.cc status.cc)
add_gandiva_unit_test(status_test.cc status.cc)
add_gandiva_unit_test(expression_registry_test.cc llvm_types.cc expression_registry.cc function_signature.cc function_registry.cc)
add_gandiva_unit_test(selection_vector_test.cc selection_vector.cc status.cc)
add_gandiva_unit_test(lru_cache_test.cc)

add_gandiva_unit_test(like_holder_test.cc like_holder.cc regex_util.cc status.cc)
16 changes: 12 additions & 4 deletions cpp/src/gandiva/codegen/dex.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "codegen/dex_visitor.h"
#include "codegen/field_descriptor.h"
#include "codegen/func_descriptor.h"
#include "codegen/function_holder.h"
#include "codegen/literal_holder.h"
#include "codegen/native_function.h"
#include "codegen/value_validity_pair.h"
Expand Down Expand Up @@ -104,20 +105,24 @@ class LocalBitMapValidityDex : public Dex {
class FuncDex : public Dex {
public:
FuncDex(FuncDescriptorPtr func_descriptor, const NativeFunction *native_function,
const ValueValidityPairVector &args)
FunctionHolderPtr function_holder, const ValueValidityPairVector &args)
: func_descriptor_(func_descriptor),
native_function_(native_function),
function_holder_(function_holder),
args_(args) {}

FuncDescriptorPtr func_descriptor() const { return func_descriptor_; }

const NativeFunction *native_function() const { return native_function_; }

FunctionHolderPtr function_holder() const { return function_holder_; }

const ValueValidityPairVector &args() const { return args_; }

private:
FuncDescriptorPtr func_descriptor_;
const NativeFunction *native_function_;
FunctionHolderPtr function_holder_;
ValueValidityPairVector args_;
};

Expand All @@ -127,8 +132,9 @@ class NonNullableFuncDex : public FuncDex {
public:
NonNullableFuncDex(FuncDescriptorPtr func_descriptor,
const NativeFunction *native_function,
FunctionHolderPtr function_holder,
const ValueValidityPairVector &args)
: FuncDex(func_descriptor, native_function, args) {}
: FuncDex(func_descriptor, native_function, function_holder, args) {}

void Accept(DexVisitor &visitor) override { visitor.Visit(*this); }
};
Expand All @@ -139,8 +145,9 @@ class NullableNeverFuncDex : public FuncDex {
public:
NullableNeverFuncDex(FuncDescriptorPtr func_descriptor,
const NativeFunction *native_function,
FunctionHolderPtr function_holder,
const ValueValidityPairVector &args)
: FuncDex(func_descriptor, native_function, args) {}
: FuncDex(func_descriptor, native_function, function_holder, args) {}

void Accept(DexVisitor &visitor) override { visitor.Visit(*this); }
};
Expand All @@ -151,8 +158,9 @@ class NullableInternalFuncDex : public FuncDex {
public:
NullableInternalFuncDex(FuncDescriptorPtr func_descriptor,
const NativeFunction *native_function,
FunctionHolderPtr function_holder,
const ValueValidityPairVector &args, int local_bitmap_idx)
: FuncDex(func_descriptor, native_function, args),
: FuncDex(func_descriptor, native_function, function_holder, args),
local_bitmap_idx_(local_bitmap_idx) {}

void Accept(DexVisitor &visitor) override { visitor.Visit(*this); }
Expand Down
147 changes: 0 additions & 147 deletions cpp/src/gandiva/codegen/dex_llvm_test.cc

This file was deleted.

35 changes: 26 additions & 9 deletions cpp/src/gandiva/codegen/expr_decomposer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include "codegen/annotator.h"
#include "codegen/dex.h"
#include "codegen/function_holder_registry.h"
#include "codegen/function_registry.h"
#include "codegen/node.h"
#include "gandiva/function_signature.h"
Expand Down Expand Up @@ -53,10 +54,19 @@ Status ExprDecomposer::Visit(const FunctionNode &node) {
// decompose the children.
std::vector<ValueValidityPairPtr> args;
for (auto &child : node.children()) {
child->Accept(*this);
auto status = child->Accept(*this);
GANDIVA_RETURN_NOT_OK(status);

args.push_back(result());
}

// Make a function holder, if required.
std::shared_ptr<FunctionHolder> holder;
if (native_function->needs_holder()) {
auto status = FunctionHolderRegistry::Make(desc->name(), node, &holder);
GANDIVA_RETURN_NOT_OK(status);
}

if (native_function->result_nullable_type() == RESULT_NULL_IF_NULL) {
// These functions are decomposable, merge the validity bits of the children.

Expand All @@ -68,11 +78,13 @@ Status ExprDecomposer::Visit(const FunctionNode &node) {
decomposed->validity_exprs().end());
}

auto value_dex = std::make_shared<NonNullableFuncDex>(desc, native_function, args);
auto value_dex =
std::make_shared<NonNullableFuncDex>(desc, native_function, holder, args);
result_ = std::make_shared<ValueValidityPair>(merged_validity, value_dex);
} else if (native_function->result_nullable_type() == RESULT_NULL_NEVER) {
// These functions always output valid results. So, no validity dex.
auto value_dex = std::make_shared<NullableNeverFuncDex>(desc, native_function, args);
auto value_dex =
std::make_shared<NullableNeverFuncDex>(desc, native_function, holder, args);
result_ = std::make_shared<ValueValidityPair>(value_dex);
} else {
DCHECK(native_function->result_nullable_type() == RESULT_NULL_INTERNAL);
Expand All @@ -81,8 +93,8 @@ Status ExprDecomposer::Visit(const FunctionNode &node) {
int local_bitmap_idx = annotator_.AddLocalBitMap();
auto validity_dex = std::make_shared<LocalBitMapValidityDex>(local_bitmap_idx);

auto value_dex = std::make_shared<NullableInternalFuncDex>(desc, native_function,
args, local_bitmap_idx);
auto value_dex = std::make_shared<NullableInternalFuncDex>(
desc, native_function, holder, args, local_bitmap_idx);
result_ = std::make_shared<ValueValidityPair>(validity_dex, value_dex);
}
return Status::OK();
Expand All @@ -91,16 +103,19 @@ Status ExprDecomposer::Visit(const FunctionNode &node) {
// Decompose an IfNode
Status ExprDecomposer::Visit(const IfNode &node) {
// Add a local bitmap to track the output validity.
node.condition()->Accept(*this);
auto status = node.condition()->Accept(*this);
GANDIVA_RETURN_NOT_OK(status);
auto condition_vv = result();

int local_bitmap_idx = PushThenEntry(node);
node.then_node()->Accept(*this);
status = node.then_node()->Accept(*this);
GANDIVA_RETURN_NOT_OK(status);
auto then_vv = result();
PopThenEntry(node);

PushElseEntry(node, local_bitmap_idx);
node.else_node()->Accept(*this);
status = node.else_node()->Accept(*this);
GANDIVA_RETURN_NOT_OK(status);
auto else_vv = result();
bool is_terminal_else = PopElseEntry(node);

Expand All @@ -118,7 +133,9 @@ Status ExprDecomposer::Visit(const BooleanNode &node) {
// decompose the children.
std::vector<ValueValidityPairPtr> args;
for (auto &child : node.children()) {
child->Accept(*this);
auto status = child->Accept(*this);
GANDIVA_RETURN_NOT_OK(status);

args.push_back(result());
}

Expand Down
9 changes: 6 additions & 3 deletions cpp/src/gandiva/codegen/expr_decomposer.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,12 @@ class ExprDecomposer : public NodeVisitor {
explicit ExprDecomposer(const FunctionRegistry &registry, Annotator &annotator)
: registry_(registry), annotator_(annotator) {}

ValueValidityPairPtr Decompose(const Node &root) {
root.Accept(*this);
return result();
Status Decompose(const Node &root, ValueValidityPairPtr *out) {
auto status = root.Accept(*this);
if (status.ok()) {
*out = std::move(result_);
}
return status;
}

private:
Expand Down
11 changes: 5 additions & 6 deletions cpp/src/gandiva/codegen/expression_registry.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,27 +29,26 @@ ExpressionRegistry::~ExpressionRegistry() {}

const ExpressionRegistry::FunctionSignatureIterator
ExpressionRegistry::function_signature_begin() {
return FunctionSignatureIterator(function_registry_->begin(),
function_registry_->end());
return FunctionSignatureIterator(function_registry_->begin());
}

const ExpressionRegistry::FunctionSignatureIterator
ExpressionRegistry::function_signature_end() const {
return FunctionSignatureIterator(function_registry_->end(), function_registry_->end());
return FunctionSignatureIterator(function_registry_->end());
}

bool ExpressionRegistry::FunctionSignatureIterator::operator!=(
const FunctionSignatureIterator &func_sign_it) {
return func_sign_it.it != this->it;
return func_sign_it.it_ != this->it_;
}

FunctionSignature ExpressionRegistry::FunctionSignatureIterator::operator*() {
return (*it).signature();
return (*it_).signature();
}

ExpressionRegistry::iterator ExpressionRegistry::FunctionSignatureIterator::operator++(
int increment) {
return it++;
return it_++;
}

DataTypeVector ExpressionRegistry::supported_types_ =
Expand Down
Loading

0 comments on commit c675302

Please sign in to comment.