forked from ARM-software/CMSIS_5
-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0b52176
commit d63b089
Showing
10 changed files
with
2,143 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,12 @@ | |
* $Revision: V.1.0.0 | ||
* | ||
* Target Processor: Cortex-M cores | ||
* | ||
* Modification: INT-Q extension | ||
* $Date: 09. July 2018 | ||
* $Authors: Manuele Rusci - [email protected] | ||
* Alessandro Capotondi - [email protected] | ||
* Francesco Conti - [email protected] | ||
* -------------------------------------------------------------------- */ | ||
|
||
/** | ||
|
@@ -1003,6 +1009,80 @@ extern "C" | |
|
||
void arm_softmax_q15(const q15_t * vec_in, const uint16_t dim_vec, q15_t * p_out); | ||
|
||
|
||
|
||
/* | ||
* Quantized Convolutional Layers | ||
* | ||
*/ | ||
arm_status | ||
arm_convolve_HWC_BIN_fast(const uint32_t * Im_in, | ||
const uint16_t dim_im_in, | ||
const uint16_t ch_im_in, | ||
const uint32_t * wt, | ||
const uint16_t ch_im_out, | ||
const uint16_t dim_kernel, | ||
const uint16_t padding, | ||
const uint16_t stride, | ||
uint8_t * Im_out, | ||
const uint16_t dim_im_out, | ||
uint32_t * bufferA, | ||
const int16_t * pThreshold, | ||
int8_t * bufferB); | ||
|
||
arm_status | ||
arm_convolve_HWC_INT2_fast( const int8_t * Im_in, | ||
const uint16_t dim_im_in, | ||
const uint16_t ch_im_in, | ||
const int8_t * wt, | ||
const uint16_t ch_im_out, | ||
const uint16_t dim_kernel, | ||
const uint16_t padding, | ||
const uint16_t stride, | ||
int8_t * Im_out, | ||
const uint16_t dim_im_out, | ||
int16_t * bufferA, | ||
const int16_t * pThreshold, | ||
int8_t * bufferB); | ||
|
||
arm_status | ||
arm_convolve_HWC_INT4_fast( const int8_t * Im_in, | ||
const uint16_t dim_im_in, | ||
const uint16_t ch_im_in, | ||
const int8_t * wt, | ||
const uint16_t ch_im_out, | ||
const uint16_t dim_kernel, | ||
const uint16_t padding, | ||
const uint16_t stride, | ||
int8_t * Im_out, | ||
const uint16_t dim_im_out, | ||
int16_t * bufferA, | ||
const int16_t * pThreshold, | ||
int8_t * bufferB); | ||
|
||
int8_t *arm_nn_mat_mult_kernel_int2_int16_reordered(const int8_t * pA, | ||
const int16_t * pInBuffer, | ||
const uint16_t ch_im_out, | ||
const uint16_t numCol_A, | ||
const int16_t * pThreshold, | ||
int8_t * pOut); | ||
|
||
int8_t *arm_nn_mat_mult_kernel_int4_int16_reordered(const int8_t * pA, | ||
const int16_t * pInBuffer, | ||
const uint16_t ch_im_out, | ||
const uint16_t numCol_A, | ||
const int16_t * pThreshold, | ||
int8_t * pOut); | ||
|
||
uint32_t *arm_nn_mat_mult_kernel_BIN_reordered( const uint32_t * pA, | ||
const uint32_t * pInBuffer, | ||
const uint16_t ch_im_out, | ||
const uint32_t numCol_A, | ||
const int16_t * pThreshold, | ||
uint32_t * pOut); | ||
|
||
|
||
|
||
#ifdef __cplusplus | ||
} | ||
#endif | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,13 @@ | |
* $Revision: V.1.0.0 | ||
* | ||
* Target Processor: Cortex-M cores | ||
* | ||
* Modification: INT-Q extension | ||
* $Date: 09. July 2018 | ||
* $Authors: Manuele Rusci - [email protected] | ||
* Alessandro Capotondi - [email protected] | ||
* Francesco Conti - [email protected] | ||
* | ||
* -------------------------------------------------------------------- */ | ||
|
||
#ifndef _ARM_NNSUPPORTFUNCTIONS_H_ | ||
|
@@ -133,6 +140,62 @@ __STATIC_FORCEINLINE void *read_and_pad_reordered(void *source, q31_t * out1, q3 | |
|
||
return source; | ||
} | ||
|
||
/** | ||
* @brief read and expand one INT4 word into two INT16 words with reordering | ||
*/ | ||
__STATIC_INLINE void *read_and_pad_reordered_INT4(void *source, int32_t * out1, int32_t * out2, int32_t * out3, int32_t * out4) | ||
{ | ||
|
||
#ifndef ARM_MATH_BIG_ENDIAN | ||
q31_t inA = *__SIMD32(source)++; | ||
|
||
*out1 = __SXTB16(__ROR(__SXTB16( inA << 4) , 4 ) ); | ||
*out2 = __SXTB16(__ROR(__SXTB16( inA ) , 4 ) ) ; | ||
*out3 = __SXTB16(__ROR(__SXTB16( __ROR(inA, 4) ) , 4 ) ) ; | ||
*out4 = __SXTB16(__ROR(__SXTB16( __ROR(inA, 8) ) , 4 ) ); | ||
#else | ||
*out4 = __SXTB16(__ROR(__SXTB16( inA << 4) , 4 ) ); | ||
*out3 = __SXTB16(__ROR(__SXTB16( inA ) , 4 ) ) ; | ||
*out2 = __SXTB16(__ROR(__SXTB16( __ROR(inA, 4) ) , 4 ) ) ; | ||
*out1 = __SXTB16(__ROR(__SXTB16( __ROR(inA, 8) ) , 4 ) ); | ||
#endif | ||
|
||
return source; | ||
} | ||
|
||
|
||
/** | ||
* @brief read and expand one INT2 word into two INT16 words with reordering | ||
*/ | ||
__STATIC_INLINE void *read_and_pad_reordered_INT2( void *source, int32_t * out1, int32_t * out2, int32_t * out3, int32_t * out4, | ||
int32_t * out5, int32_t * out6, int32_t * out7, int32_t * out8) | ||
{ | ||
q31_t inA = *__SIMD32(source)++; | ||
#ifndef ARM_MATH_BIG_ENDIAN | ||
*out1 = __SXTB16(__ROR(__SXTB16( inA << 6) , 6 ) ); | ||
*out2 = __SXTB16(__ROR(__SXTB16( inA << 4) , 6 ) ); | ||
*out3 = __SXTB16(__ROR(__SXTB16( inA << 2) , 6 ) ); | ||
*out4 = __SXTB16(__ROR(__SXTB16( inA ) , 6 ) ); | ||
*out5 = __SXTB16(__ROR(__SXTB16( inA >> 2) , 6 ) ); | ||
*out6 = __SXTB16(__ROR(__SXTB16( inA >> 4) , 6 ) ); | ||
*out7 = __SXTB16(__ROR(__SXTB16( inA >> 6) , 6 ) ); | ||
*out8 = __SXTB16(__ROR(__SXTB16( inA >> 8) , 6 ) ); | ||
#else | ||
*out8 = __SXTB16(__ROR(__SXTB16( inA << 6) , 6 ) ); | ||
*out7 = __SXTB16(__ROR(__SXTB16( inA << 4) , 6 ) ); | ||
*out6 = __SXTB16(__ROR(__SXTB16( inA << 2) , 6 ) ); | ||
*out5 = __SXTB16(__ROR(__SXTB16( inA ) , 6 ) ); | ||
*out4 = __SXTB16(__ROR(__SXTB16( inA >> 2) , 6 ) ); | ||
*out3 = __SXTB16(__ROR(__SXTB16( inA >> 4) , 6 ) ); | ||
*out2 = __SXTB16(__ROR(__SXTB16( inA >> 6) , 6 ) ); | ||
*out1 = __SXTB16(__ROR(__SXTB16( inA >> 8) , 6 ) ); | ||
#endif | ||
|
||
return source; | ||
} | ||
|
||
|
||
#endif | ||
|
||
/** | ||
|
Oops, something went wrong.