Skip to content

Commit

Permalink
INT-Q support v1.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
mrusci authored and alessandrocapotondi committed Feb 6, 2019
1 parent 0b52176 commit d63b089
Show file tree
Hide file tree
Showing 10 changed files with 2,143 additions and 0 deletions.
80 changes: 80 additions & 0 deletions CMSIS/NN/Include/arm_nnfunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@
* $Revision: V.1.0.0
*
* Target Processor: Cortex-M cores
*
* Modification: INT-Q extension
* $Date: 09. July 2018
* $Authors: Manuele Rusci - [email protected]
* Alessandro Capotondi - [email protected]
* Francesco Conti - [email protected]
* -------------------------------------------------------------------- */

/**
Expand Down Expand Up @@ -1003,6 +1009,80 @@ extern "C"

void arm_softmax_q15(const q15_t * vec_in, const uint16_t dim_vec, q15_t * p_out);



/*
* Quantized Convolutional Layers
*
*/
arm_status
arm_convolve_HWC_BIN_fast(const uint32_t * Im_in,
const uint16_t dim_im_in,
const uint16_t ch_im_in,
const uint32_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel,
const uint16_t padding,
const uint16_t stride,
uint8_t * Im_out,
const uint16_t dim_im_out,
uint32_t * bufferA,
const int16_t * pThreshold,
int8_t * bufferB);

arm_status
arm_convolve_HWC_INT2_fast( const int8_t * Im_in,
const uint16_t dim_im_in,
const uint16_t ch_im_in,
const int8_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel,
const uint16_t padding,
const uint16_t stride,
int8_t * Im_out,
const uint16_t dim_im_out,
int16_t * bufferA,
const int16_t * pThreshold,
int8_t * bufferB);

arm_status
arm_convolve_HWC_INT4_fast( const int8_t * Im_in,
const uint16_t dim_im_in,
const uint16_t ch_im_in,
const int8_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel,
const uint16_t padding,
const uint16_t stride,
int8_t * Im_out,
const uint16_t dim_im_out,
int16_t * bufferA,
const int16_t * pThreshold,
int8_t * bufferB);

int8_t *arm_nn_mat_mult_kernel_int2_int16_reordered(const int8_t * pA,
const int16_t * pInBuffer,
const uint16_t ch_im_out,
const uint16_t numCol_A,
const int16_t * pThreshold,
int8_t * pOut);

int8_t *arm_nn_mat_mult_kernel_int4_int16_reordered(const int8_t * pA,
const int16_t * pInBuffer,
const uint16_t ch_im_out,
const uint16_t numCol_A,
const int16_t * pThreshold,
int8_t * pOut);

uint32_t *arm_nn_mat_mult_kernel_BIN_reordered( const uint32_t * pA,
const uint32_t * pInBuffer,
const uint16_t ch_im_out,
const uint32_t numCol_A,
const int16_t * pThreshold,
uint32_t * pOut);



#ifdef __cplusplus
}
#endif
Expand Down
63 changes: 63 additions & 0 deletions CMSIS/NN/Include/arm_nnsupportfunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@
* $Revision: V.1.0.0
*
* Target Processor: Cortex-M cores
*
* Modification: INT-Q extension
* $Date: 09. July 2018
* $Authors: Manuele Rusci - [email protected]
* Alessandro Capotondi - [email protected]
* Francesco Conti - [email protected]
*
* -------------------------------------------------------------------- */

#ifndef _ARM_NNSUPPORTFUNCTIONS_H_
Expand Down Expand Up @@ -133,6 +140,62 @@ __STATIC_FORCEINLINE void *read_and_pad_reordered(void *source, q31_t * out1, q3

return source;
}

/**
* @brief read and expand one INT4 word into two INT16 words with reordering
*/
__STATIC_INLINE void *read_and_pad_reordered_INT4(void *source, int32_t * out1, int32_t * out2, int32_t * out3, int32_t * out4)
{

#ifndef ARM_MATH_BIG_ENDIAN
q31_t inA = *__SIMD32(source)++;

*out1 = __SXTB16(__ROR(__SXTB16( inA << 4) , 4 ) );
*out2 = __SXTB16(__ROR(__SXTB16( inA ) , 4 ) ) ;
*out3 = __SXTB16(__ROR(__SXTB16( __ROR(inA, 4) ) , 4 ) ) ;
*out4 = __SXTB16(__ROR(__SXTB16( __ROR(inA, 8) ) , 4 ) );
#else
*out4 = __SXTB16(__ROR(__SXTB16( inA << 4) , 4 ) );
*out3 = __SXTB16(__ROR(__SXTB16( inA ) , 4 ) ) ;
*out2 = __SXTB16(__ROR(__SXTB16( __ROR(inA, 4) ) , 4 ) ) ;
*out1 = __SXTB16(__ROR(__SXTB16( __ROR(inA, 8) ) , 4 ) );
#endif

return source;
}


/**
* @brief read and expand one INT2 word into two INT16 words with reordering
*/
__STATIC_INLINE void *read_and_pad_reordered_INT2( void *source, int32_t * out1, int32_t * out2, int32_t * out3, int32_t * out4,
int32_t * out5, int32_t * out6, int32_t * out7, int32_t * out8)
{
q31_t inA = *__SIMD32(source)++;
#ifndef ARM_MATH_BIG_ENDIAN
*out1 = __SXTB16(__ROR(__SXTB16( inA << 6) , 6 ) );
*out2 = __SXTB16(__ROR(__SXTB16( inA << 4) , 6 ) );
*out3 = __SXTB16(__ROR(__SXTB16( inA << 2) , 6 ) );
*out4 = __SXTB16(__ROR(__SXTB16( inA ) , 6 ) );
*out5 = __SXTB16(__ROR(__SXTB16( inA >> 2) , 6 ) );
*out6 = __SXTB16(__ROR(__SXTB16( inA >> 4) , 6 ) );
*out7 = __SXTB16(__ROR(__SXTB16( inA >> 6) , 6 ) );
*out8 = __SXTB16(__ROR(__SXTB16( inA >> 8) , 6 ) );
#else
*out8 = __SXTB16(__ROR(__SXTB16( inA << 6) , 6 ) );
*out7 = __SXTB16(__ROR(__SXTB16( inA << 4) , 6 ) );
*out6 = __SXTB16(__ROR(__SXTB16( inA << 2) , 6 ) );
*out5 = __SXTB16(__ROR(__SXTB16( inA ) , 6 ) );
*out4 = __SXTB16(__ROR(__SXTB16( inA >> 2) , 6 ) );
*out3 = __SXTB16(__ROR(__SXTB16( inA >> 4) , 6 ) );
*out2 = __SXTB16(__ROR(__SXTB16( inA >> 6) , 6 ) );
*out1 = __SXTB16(__ROR(__SXTB16( inA >> 8) , 6 ) );
#endif

return source;
}


#endif

/**
Expand Down
Loading

0 comments on commit d63b089

Please sign in to comment.