Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vector_algorithms.cpp, *minmax*: invert the condition to improve *_element cases a bit more #4401

Merged
merged 2 commits into from
Feb 27, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 56 additions & 53 deletions stl/src/vector_algorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1197,7 +1197,37 @@ namespace {
// Increment vertical indices. Will stop at exactly wrap around, if not reach the end before
_Cur_idx = _Traits::_Inc(_Cur_idx);

if (_First == _Stop_at) {
if (_First != _Stop_at) {
// This is the main part, finding vertical minimum/maximum

// Load values and if unsigned adjust them to be signed (for signed vector comparisons)
_Cur_vals = _Traits::_Sign_correction(_Traits::_Load(_First), _Sign);

if constexpr ((_Mode & _Mode_min) != 0) {
// Looking for the first occurrence of minimum, don't overwrite with newly found occurrences
const auto _Is_less = _Traits::_Cmp_gt(_Cur_vals_min, _Cur_vals); // _Cur_vals < _Cur_vals_min
_Cur_idx_min = _mm_blendv_epi8(
_Cur_idx_min, _Cur_idx, _Traits::_Mask_cast(_Is_less)); // Remember their vertical indices
_Cur_vals_min = _Traits::_Min(_Cur_vals_min, _Cur_vals, _Is_less); // Update the current minimum
}

if constexpr (_Mode == _Mode_max) {
// Looking for the first occurrence of maximum, don't overwrite with newly found occurrences
const auto _Is_greater =
_Traits::_Cmp_gt(_Cur_vals, _Cur_vals_max); // _Cur_vals > _Cur_vals_max
_Cur_idx_max = _mm_blendv_epi8(_Cur_idx_max, _Cur_idx,
_Traits::_Mask_cast(_Is_greater)); // Remember their vertical indices
_Cur_vals_max =
_Traits::_Max(_Cur_vals_max, _Cur_vals, _Is_greater); // Update the current maximum
} else if constexpr (_Mode == _Mode_both) {
// Looking for the last occurrence of maximum, do overwrite with newly found occurrences
const auto _Is_less =
_Traits::_Cmp_gt(_Cur_vals_max, _Cur_vals); // !(_Cur_vals >= _Cur_vals_max)
_Cur_idx_max = _mm_blendv_epi8(_Cur_idx, _Cur_idx_max,
_Traits::_Mask_cast(_Is_less)); // Remember their vertical indices
_Cur_vals_max = _Traits::_Max(_Cur_vals, _Cur_vals_max, _Is_less); // Update the current maximum
}
} else {
// Reached end or indices wrap around point.
// Compute horizontal min and/or max. Determine horizontal and vertical position of it.

Expand Down Expand Up @@ -1303,38 +1333,10 @@ namespace {
_Cur_vals_max = _Cur_vals;
_Cur_idx_max = _mm_setzero_si128();
}

continue;
} else {
break; // No wrapping, so it was the only portion
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
}
}
// This is the main part, finding vertical minimum/maximum

// Load values and if unsigned adjust them to be signed (for signed vector comparisons)
_Cur_vals = _Traits::_Sign_correction(_Traits::_Load(_First), _Sign);

if constexpr ((_Mode & _Mode_min) != 0) {
// Looking for the first occurrence of minimum, don't overwrite with newly found occurrences
const auto _Is_less = _Traits::_Cmp_gt(_Cur_vals_min, _Cur_vals); // _Cur_vals < _Cur_vals_min
_Cur_idx_min = _mm_blendv_epi8(
_Cur_idx_min, _Cur_idx, _Traits::_Mask_cast(_Is_less)); // Remember their vertical indices
_Cur_vals_min = _Traits::_Min(_Cur_vals_min, _Cur_vals, _Is_less); // Update the current minimum
}

if constexpr (_Mode == _Mode_max) {
// Looking for the first occurrence of maximum, don't overwrite with newly found occurrences
const auto _Is_greater = _Traits::_Cmp_gt(_Cur_vals, _Cur_vals_max); // _Cur_vals > _Cur_vals_max
_Cur_idx_max = _mm_blendv_epi8(
_Cur_idx_max, _Cur_idx, _Traits::_Mask_cast(_Is_greater)); // Remember their vertical indices
_Cur_vals_max = _Traits::_Max(_Cur_vals_max, _Cur_vals, _Is_greater); // Update the current maximum
} else if constexpr (_Mode == _Mode_both) {
// Looking for the last occurrence of maximum, do overwrite with newly found occurrences
const auto _Is_less = _Traits::_Cmp_gt(_Cur_vals_max, _Cur_vals); // !(_Cur_vals >= _Cur_vals_max)
_Cur_idx_max = _mm_blendv_epi8(_Cur_idx, _Cur_idx_max,
_Traits::_Mask_cast(_Is_less)); // Remember their vertical indices
_Cur_vals_max = _Traits::_Max(_Cur_vals, _Cur_vals_max, _Is_less); // Update the current maximum
}
}
}
#endif // !_M_ARM64EC
Expand Down Expand Up @@ -1410,7 +1412,31 @@ namespace {
for (;;) {
_Advance_bytes(_First, 16);

if (_First == _Stop_at) {
if (_First != _Stop_at) {
// This is the main part, finding vertical minimum/maximum

_Cur_vals = _Traits::_Load(_First);

if constexpr (_Sign_correction) {
_Cur_vals = _Traits::_Sign_correction(_Cur_vals, false);
}

if constexpr ((_Mode & _Mode_min) != 0) {
if constexpr (_Sign || _Sign_correction) {
_Cur_vals_min = _Traits::_Min(_Cur_vals_min, _Cur_vals); // Update the current minimum
} else {
_Cur_vals_min = _Traits::_Min_u(_Cur_vals_min, _Cur_vals); // Update the current minimum
}
}

if constexpr ((_Mode & _Mode_max) != 0) {
if constexpr (_Sign || _Sign_correction) {
_Cur_vals_max = _Traits::_Max(_Cur_vals_max, _Cur_vals); // Update the current maximum
} else {
_Cur_vals_max = _Traits::_Max_u(_Cur_vals_max, _Cur_vals); // Update the current maximum
}
}
} else {
// Reached end. Compute horizontal min and/or max.

if constexpr ((_Mode & _Mode_min) != 0) {
Expand Down Expand Up @@ -1451,29 +1477,6 @@ namespace {

break;
}
// This is the main part, finding vertical minimum/maximum

_Cur_vals = _Traits::_Load(_First);

if constexpr (_Sign_correction) {
_Cur_vals = _Traits::_Sign_correction(_Cur_vals, false);
}

if constexpr ((_Mode & _Mode_min) != 0) {
if constexpr (_Sign || _Sign_correction) {
_Cur_vals_min = _Traits::_Min(_Cur_vals_min, _Cur_vals); // Update the current minimum
} else {
_Cur_vals_min = _Traits::_Min_u(_Cur_vals_min, _Cur_vals); // Update the current minimum
}
}

if constexpr ((_Mode & _Mode_max) != 0) {
if constexpr (_Sign || _Sign_correction) {
_Cur_vals_max = _Traits::_Max(_Cur_vals_max, _Cur_vals); // Update the current maximum
} else {
_Cur_vals_max = _Traits::_Max_u(_Cur_vals_max, _Cur_vals); // Update the current maximum
}
}
}
} else
#endif // !_M_ARM64EC
Expand Down