Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sse4.2: added the implementation for mm_cmpestra #295

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
237 changes: 237 additions & 0 deletions simde/x86/sse4.2.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,243 @@ SIMDE_BEGIN_DECLS_
#define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK
#endif

SIMDE_FUNCTION_ATTRIBUTES
int
simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int imm8)
nemequ marked this conversation as resolved.
Show resolved Hide resolved
SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {
const int cmp_op = imm8 & 0x0c;
const int polarity = imm8 & 0x30;
simde__m128i_private
bool_res_ = simde__m128i_to_private(simde_mm_setzero_si128()),
a_ = simde__m128i_to_private(a),
b_ = simde__m128i_to_private(b);
const int upper_bound = (128 / 8) - 1;
int a_invalid = 0;
int b_invalid = 0;
for(int i = 0 ; i <= upper_bound ; i++) {
for(int j = 0; j <= upper_bound ; j++){
int bitvalue = ((a_.i8[i] == b_.i8[j]) ? 1 : 0);
if(i == la)
a_invalid = 1;
if(j == lb)
b_invalid = 1;
switch(cmp_op){
case SIMDE_SIDD_CMP_EQUAL_ANY:
case SIMDE_SIDD_CMP_RANGES:
if(!a_invalid && !b_invalid);
else
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
if(!a_invalid && !b_invalid);
else if(a_invalid && b_invalid)
bitvalue = 1;
else
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
if(!a_invalid && !b_invalid);
else if(a_invalid && !b_invalid)
bitvalue = 1;
else if(a_invalid && b_invalid)
bitvalue = 1;
else
bitvalue = 0;
break;
}
bool_res_.i8[i] |= (bitvalue << j);
}
}
int32_t int_res_1 = 0;
int32_t int_res_2 = 0;
switch(cmp_op) {
case SIMDE_SIDD_CMP_EQUAL_ANY:
for(int i = 0 ; i <= upper_bound ; i++){
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j <= upper_bound ; j++){
int_res_1 |= (((bool_res_.i8[i] >> j) & 1) << i);
}
}
break;
case SIMDE_SIDD_CMP_RANGES:
for(int i = 0 ; i <= upper_bound ; i++){
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j <= upper_bound ; j++){
int_res_1 |= ((((bool_res_.i8[i] >> j) & 1) & ((bool_res_.i8[i] >> (j + 1)) & 1)) << i);
j += 2;
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for(int i = 0 ; i <= upper_bound ; i++){
int_res_1 |= (((bool_res_.i8[i] >> i) & 1) << i);
}
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
int_res_1 = 0xff;
for(int i = 0 ; i <= upper_bound ; i++){
int k = i;
HEDLEY_DIAGNOSTIC_PUSH
#if defined(SIMDE_BUG_CLANG_45959)
#pragma clang diagnostic ignored "-Wsign-conversion"
#endif
SIMDE_VECTORIZE_REDUCTION(&:int_res_1)
for(int j = 0 ; j <= (upper_bound-i) ; j++){
int_res_1 &= (((bool_res_.i8[k] >> j) & 1 ) << i) ;
k += 1;
}
HEDLEY_DIAGNOSTIC_POP
}
break;
}
for(int i = 0; i <= upper_bound ; i++){
if(polarity & SIMDE_SIDD_NEGATIVE_POLARITY){
if(polarity & SIMDE_SIDD_MASKED_POSITIVE_POLARITY) {
if (i >= lb) {
int_res_2 |= (((int_res_1 >> i) & 1) << i);
}
else {
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= ( ((int_res_1 >> i) & 1) << i);
}
}
return !int_res_2 & (lb > upper_bound);
}

SIMDE_FUNCTION_ATTRIBUTES
int
simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int imm8)
SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {
const int cmp_op = imm8 & 0x0c;
const int polarity = imm8 & 0x30;
simde__m128i_private
bool_res_ = simde__m128i_to_private(simde_mm_setzero_si128()),
a_ = simde__m128i_to_private(a),
b_ = simde__m128i_to_private(b);
const int upper_bound = (128 / 16) - 1;
int a_invalid = 0;
int b_invalid = 0;
for(int i = 0 ; i <= upper_bound ; i++) {
for(int j = 0; j <= upper_bound ; j++)
{
int bitvalue = ((a_.i16[i] == b_.i16[j]) ? 1 : 0);
a_invalid = 0;
b_invalid = 0;
if(i >= la)
a_invalid = 1;
if(j >= lb)
b_invalid = 1;
switch(cmp_op){
case SIMDE_SIDD_CMP_EQUAL_ANY:
case SIMDE_SIDD_CMP_RANGES:
nemequ marked this conversation as resolved.
Show resolved Hide resolved
if(!a_invalid && !b_invalid);
else
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
if(!a_invalid && !b_invalid);
else if(a_invalid && b_invalid)
bitvalue = 1;
else
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
if(!a_invalid && !b_invalid);
else if(a_invalid && !b_invalid)
bitvalue = 1;
else if(a_invalid && b_invalid)
bitvalue = 1;
else
bitvalue = 0;
break;
}
bool_res_.i16[i] |= (bitvalue << j);
}
}
int32_t int_res_1 = 0;
int32_t int_res_2 = 0;
switch(cmp_op) {
case SIMDE_SIDD_CMP_EQUAL_ANY:
for(int i = 0 ; i <= upper_bound ; i++){
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for (int j = 0 ; j <= upper_bound ; j++){
int_res_1 |= (((bool_res_.i16[i] >> j) & 1) << i) ;
}
}
break;
case SIMDE_SIDD_CMP_RANGES:
for(int i = 0 ; i <= upper_bound ; i++){
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j <= upper_bound ; j++){
int_res_1 |= ((((bool_res_.i16[i] >> j) & 1) & ((bool_res_.i16[i] >> (j + 1)) & 1)) << i);
j += 2;
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for(int i = 0 ; i <= upper_bound ; i++){
int_res_1 |= (((bool_res_.i16[i] >> i) & 1) << i);
}
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
int_res_1 = 0xffff;
for(int i = 0 ; i <= upper_bound ; i++){
int k = i;
HEDLEY_DIAGNOSTIC_PUSH
#if defined(SIMDE_BUG_CLANG_45959)
#pragma clang diagnostic ignored "-Wsign-conversion"
#endif
SIMDE_VECTORIZE_REDUCTION(&:int_res_1)
for(int j = 0 ; j <= (upper_bound-i) ; j++){
int_res_1 &= (((bool_res_.i16[k] >> j) & 1) << i) ;
k += 1;
}
HEDLEY_DIAGNOSTIC_POP
}
break;
}
for(int i = 0; i <= upper_bound ; i++){
if(polarity & SIMDE_SIDD_NEGATIVE_POLARITY){
if(polarity & SIMDE_SIDD_MASKED_POSITIVE_POLARITY) {
if (i >= lb) {
int_res_2 |= (((int_res_1 >> i) & 1) << i);
}
else {
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= (((int_res_1 >> i) & 1) << i);
}
}
return !int_res_2 & (lb > upper_bound);
}

#if defined(SIMDE_X86_SSE4_2_NATIVE)
#define simde_mm_cmpestra(a, la, b, lb, imm8) _mm_cmpestra(a, la, b, lb, imm8)
#else
#define simde_mm_cmpestra(a, la, b, lb, imm8) \
(((imm8) & SIMDE_SIDD_UWORD_OPS) \
? simde_mm_cmpestra_16_((a), (la), (b), (lb), (imm8)) \
: simde_mm_cmpestra_8_((a), (la), (b), (lb), (imm8)))
#endif
#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES)
#define _mm_cmpestra(a, la, b, lb, imm8) simde_mm_cmpestra(a, la, b, lb, imm8)
#endif

SIMDE_FUNCTION_ATTRIBUTES
int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8)
SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {
Expand Down
31 changes: 31 additions & 0 deletions test/x86/sse4.2.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,36 @@
#include <test/x86/test-sse2.h>
#include <simde/x86/sse4.2.h>

static int
test_simde_mm_cmpestra_ranges_8(SIMDE_MUNIT_TEST_ARGS) {
const struct {
simde__m128i a;
int la;
simde__m128i b;
int lb;
int r;
} test_vec[] = {
{ simde_mm_set_epi8(INT8_C( 45), INT8_C( -94), INT8_C( 38), INT8_C( -11),
INT8_C( 84), INT8_C(-123), INT8_C( -43), INT8_C( -49),
INT8_C( 25), INT8_C( -55), INT8_C(-121), INT8_C( -6),
INT8_C( 57), INT8_C( 108), INT8_C( -55), INT8_C( 69)),
23 ,
simde_mm_set_epi8(INT8_C( -26), INT8_C( -61), INT8_C( -21), INT8_C( -96),
INT8_C( 48), INT8_C(-112), INT8_C( 95), INT8_C( -56),
INT8_C( 29), INT8_C( -55), INT8_C(-121), INT8_C( -6),
INT8_C( 57), INT8_C( 108), INT8_C( -55), INT8_C( 69)),
28 ,
0 }
};

for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
int r = simde_mm_cmpestra(test_vec[i].a, test_vec[i].la, test_vec[i].b, test_vec[i].lb, 36);
simde_assert_equal_i(r, test_vec[i].r);
}

return 0;
}

static int
test_simde_mm_cmpestrs_8(SIMDE_MUNIT_TEST_ARGS) {
const struct {
Expand Down Expand Up @@ -1064,6 +1094,7 @@ test_simde_mm_crc32_u64 (SIMDE_MUNIT_TEST_ARGS) {
}

SIMDE_TEST_FUNC_LIST_BEGIN
SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpestra_ranges_8)
SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpestrs_8)
SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpestrs_16)
SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpestrz_8)
Expand Down
Loading