mirror of
https://github.com/ArteryTek/AT32F403A_407_Firmware_Library.git
synced 2026-05-21 09:22:19 +00:00
Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3bac77751d | ||
|
|
16720f59ce | ||
|
|
4403cb6781 | ||
|
|
eb3198540f | ||
|
|
25f12a4ab3 | ||
|
|
c0f81f4b0d | ||
|
|
a89a26cea4 | ||
|
|
eb00682e95 | ||
|
|
e1d3f6e2c9 | ||
|
|
4fd69ebc78 |
Binary file not shown.
29
LICENSE
Normal file
29
LICENSE
Normal file
@@ -0,0 +1,29 @@
|
||||
BSD 3-Clause License
|
||||
|
||||
Copyright (c) 2021, ArteryTek
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -1,76 +1,76 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_const_structs.h
|
||||
* Description: Constant structs that are initialized for user convenience.
|
||||
* For example, some can be given as arguments to the arm_cfft_f32() function.
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef _ARM_CONST_STRUCTS_H
|
||||
#define _ARM_CONST_STRUCTS_H
|
||||
|
||||
#include "arm_math.h"
|
||||
#include "arm_common_tables.h"
|
||||
|
||||
extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len16;
|
||||
extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len32;
|
||||
extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len64;
|
||||
extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len128;
|
||||
extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len256;
|
||||
extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len512;
|
||||
extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len1024;
|
||||
extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len2048;
|
||||
extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len4096;
|
||||
|
||||
extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len16;
|
||||
extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len32;
|
||||
extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len64;
|
||||
extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len128;
|
||||
extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len256;
|
||||
extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len512;
|
||||
extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len1024;
|
||||
extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len2048;
|
||||
extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len4096;
|
||||
|
||||
extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len16;
|
||||
extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len32;
|
||||
extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len64;
|
||||
extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len128;
|
||||
extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len256;
|
||||
extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len512;
|
||||
extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len1024;
|
||||
extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len2048;
|
||||
extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len4096;
|
||||
|
||||
extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len16;
|
||||
extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len32;
|
||||
extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len64;
|
||||
extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len128;
|
||||
extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len256;
|
||||
extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len512;
|
||||
extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len1024;
|
||||
extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len2048;
|
||||
extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len4096;
|
||||
|
||||
#endif
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_const_structs.h
|
||||
* Description: Constant structs that are initialized for user convenience.
|
||||
* For example, some can be given as arguments to the arm_cfft_f32() function.
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef _ARM_CONST_STRUCTS_H
|
||||
#define _ARM_CONST_STRUCTS_H
|
||||
|
||||
#include "arm_math.h"
|
||||
#include "arm_common_tables.h"
|
||||
|
||||
extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len16;
|
||||
extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len32;
|
||||
extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len64;
|
||||
extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len128;
|
||||
extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len256;
|
||||
extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len512;
|
||||
extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len1024;
|
||||
extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len2048;
|
||||
extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len4096;
|
||||
|
||||
extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len16;
|
||||
extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len32;
|
||||
extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len64;
|
||||
extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len128;
|
||||
extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len256;
|
||||
extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len512;
|
||||
extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len1024;
|
||||
extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len2048;
|
||||
extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len4096;
|
||||
|
||||
extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len16;
|
||||
extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len32;
|
||||
extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len64;
|
||||
extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len128;
|
||||
extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len256;
|
||||
extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len512;
|
||||
extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len1024;
|
||||
extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len2048;
|
||||
extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len4096;
|
||||
|
||||
extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len16;
|
||||
extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len32;
|
||||
extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len64;
|
||||
extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len128;
|
||||
extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len256;
|
||||
extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len512;
|
||||
extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len1024;
|
||||
extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len2048;
|
||||
extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len4096;
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,348 +1,348 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_helium_utils.h
|
||||
* Description: Utility functions for Helium development
|
||||
*
|
||||
* $Date: 09. September 2019
|
||||
* $Revision: V.1.5.1
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef _ARM_UTILS_HELIUM_H_
|
||||
#define _ARM_UTILS_HELIUM_H_
|
||||
|
||||
/***************************************
|
||||
|
||||
Definitions available for MVEF and MVEI
|
||||
|
||||
***************************************/
|
||||
#if defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI)
|
||||
|
||||
#define INACTIVELANE 0 /* inactive lane content */
|
||||
|
||||
|
||||
#endif /* defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI) */
|
||||
|
||||
/***************************************
|
||||
|
||||
Definitions available for MVEF only
|
||||
|
||||
***************************************/
|
||||
#if defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF)
|
||||
|
||||
__STATIC_FORCEINLINE float32_t vecAddAcrossF32Mve(float32x4_t in)
|
||||
{
|
||||
float32_t acc;
|
||||
|
||||
acc = vgetq_lane(in, 0) + vgetq_lane(in, 1) +
|
||||
vgetq_lane(in, 2) + vgetq_lane(in, 3);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
/* newton initial guess */
|
||||
#define INVSQRT_MAGIC_F32 0x5f3759df
|
||||
|
||||
#define INVSQRT_NEWTON_MVE_F32(invSqrt, xHalf, xStart)\
|
||||
{ \
|
||||
float32x4_t tmp; \
|
||||
\
|
||||
/* tmp = xhalf * x * x */ \
|
||||
tmp = vmulq(xStart, xStart); \
|
||||
tmp = vmulq(tmp, xHalf); \
|
||||
/* (1.5f - xhalf * x * x) */ \
|
||||
tmp = vsubq(vdupq_n_f32(1.5f), tmp); \
|
||||
/* x = x*(1.5f-xhalf*x*x); */ \
|
||||
invSqrt = vmulq(tmp, xStart); \
|
||||
}
|
||||
#endif /* defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) */
|
||||
|
||||
/***************************************
|
||||
|
||||
Definitions available for MVEI only
|
||||
|
||||
***************************************/
|
||||
#if defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEI)
|
||||
|
||||
|
||||
#include "arm_common_tables.h"
|
||||
|
||||
/* Following functions are used to transpose matrix in f32 and q31 cases */
|
||||
__STATIC_INLINE arm_status arm_mat_trans_32bit_2x2_mve(
|
||||
uint32_t * pDataSrc,
|
||||
uint32_t * pDataDest)
|
||||
{
|
||||
static const uint32x4_t vecOffs = { 0, 2, 1, 3 };
|
||||
/*
|
||||
*
|
||||
* | 0 1 | => | 0 2 |
|
||||
* | 2 3 | | 1 3 |
|
||||
*
|
||||
*/
|
||||
uint32x4_t vecIn = vldrwq_u32((uint32_t const *)pDataSrc);
|
||||
vstrwq_scatter_shifted_offset_u32(pDataDest, vecOffs, vecIn);
|
||||
|
||||
return (ARM_MATH_SUCCESS);
|
||||
}
|
||||
|
||||
__STATIC_INLINE arm_status arm_mat_trans_32bit_3x3_mve(
|
||||
uint32_t * pDataSrc,
|
||||
uint32_t * pDataDest)
|
||||
{
|
||||
const uint32x4_t vecOffs1 = { 0, 3, 6, 1};
|
||||
const uint32x4_t vecOffs2 = { 4, 7, 2, 5};
|
||||
/*
|
||||
*
|
||||
* | 0 1 2 | | 0 3 6 | 4 x 32 flattened version | 0 3 6 1 |
|
||||
* | 3 4 5 | => | 1 4 7 | => | 4 7 2 5 |
|
||||
* | 6 7 8 | | 2 5 8 | (row major) | 8 . . . |
|
||||
*
|
||||
*/
|
||||
uint32x4_t vecIn1 = vldrwq_u32((uint32_t const *) pDataSrc);
|
||||
uint32x4_t vecIn2 = vldrwq_u32((uint32_t const *) &pDataSrc[4]);
|
||||
|
||||
vstrwq_scatter_shifted_offset_u32(pDataDest, vecOffs1, vecIn1);
|
||||
vstrwq_scatter_shifted_offset_u32(pDataDest, vecOffs2, vecIn2);
|
||||
|
||||
pDataDest[8] = pDataSrc[8];
|
||||
|
||||
return (ARM_MATH_SUCCESS);
|
||||
}
|
||||
|
||||
__STATIC_INLINE arm_status arm_mat_trans_32bit_4x4_mve(uint32_t * pDataSrc, uint32_t * pDataDest)
|
||||
{
|
||||
/*
|
||||
* 4x4 Matrix transposition
|
||||
* is 4 x de-interleave operation
|
||||
*
|
||||
* 0 1 2 3 0 4 8 12
|
||||
* 4 5 6 7 1 5 9 13
|
||||
* 8 9 10 11 2 6 10 14
|
||||
* 12 13 14 15 3 7 11 15
|
||||
*/
|
||||
|
||||
uint32x4x4_t vecIn;
|
||||
|
||||
vecIn = vld4q((uint32_t const *) pDataSrc);
|
||||
vstrwq(pDataDest, vecIn.val[0]);
|
||||
pDataDest += 4;
|
||||
vstrwq(pDataDest, vecIn.val[1]);
|
||||
pDataDest += 4;
|
||||
vstrwq(pDataDest, vecIn.val[2]);
|
||||
pDataDest += 4;
|
||||
vstrwq(pDataDest, vecIn.val[3]);
|
||||
|
||||
return (ARM_MATH_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
__STATIC_INLINE arm_status arm_mat_trans_32bit_generic_mve(
|
||||
uint16_t srcRows,
|
||||
uint16_t srcCols,
|
||||
uint32_t * pDataSrc,
|
||||
uint32_t * pDataDest)
|
||||
{
|
||||
uint32x4_t vecOffs;
|
||||
uint32_t i;
|
||||
uint32_t blkCnt;
|
||||
uint32_t const *pDataC;
|
||||
uint32_t *pDataDestR;
|
||||
uint32x4_t vecIn;
|
||||
|
||||
vecOffs = vidupq_u32((uint32_t)0, 1);
|
||||
vecOffs = vecOffs * srcCols;
|
||||
|
||||
i = srcCols;
|
||||
do
|
||||
{
|
||||
pDataC = (uint32_t const *) pDataSrc;
|
||||
pDataDestR = pDataDest;
|
||||
|
||||
blkCnt = srcRows >> 2;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecIn = vldrwq_gather_shifted_offset_u32(pDataC, vecOffs);
|
||||
vstrwq(pDataDestR, vecIn);
|
||||
pDataDestR += 4;
|
||||
pDataC = pDataC + srcCols * 4;
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = srcRows & 3;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vecIn = vldrwq_gather_shifted_offset_u32(pDataC, vecOffs);
|
||||
vstrwq_p(pDataDestR, vecIn, p0);
|
||||
}
|
||||
|
||||
pDataSrc += 1;
|
||||
pDataDest += srcRows;
|
||||
}
|
||||
while (--i);
|
||||
|
||||
return (ARM_MATH_SUCCESS);
|
||||
}
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_FAST_SQRT_Q31_MVE)
|
||||
__STATIC_INLINE q31x4_t FAST_VSQRT_Q31(q31x4_t vecIn)
|
||||
{
|
||||
q63x2_t vecTmpLL;
|
||||
q31x4_t vecTmp0, vecTmp1;
|
||||
q31_t scale;
|
||||
q63_t tmp64;
|
||||
q31x4_t vecNrm, vecDst, vecIdx, vecSignBits;
|
||||
|
||||
|
||||
vecSignBits = vclsq(vecIn);
|
||||
vecSignBits = vbicq(vecSignBits, 1);
|
||||
/*
|
||||
* in = in << no_of_sign_bits;
|
||||
*/
|
||||
vecNrm = vshlq(vecIn, vecSignBits);
|
||||
/*
|
||||
* index = in >> 24;
|
||||
*/
|
||||
vecIdx = vecNrm >> 24;
|
||||
vecIdx = vecIdx << 1;
|
||||
|
||||
vecTmp0 = vldrwq_gather_shifted_offset_s32(sqrtTable_Q31, vecIdx);
|
||||
|
||||
vecIdx = vecIdx + 1;
|
||||
|
||||
vecTmp1 = vldrwq_gather_shifted_offset_s32(sqrtTable_Q31, vecIdx);
|
||||
|
||||
vecTmp1 = vqrdmulhq(vecTmp1, vecNrm);
|
||||
vecTmp0 = vecTmp0 - vecTmp1;
|
||||
vecTmp1 = vqrdmulhq(vecTmp0, vecTmp0);
|
||||
vecTmp1 = vqrdmulhq(vecNrm, vecTmp1);
|
||||
vecTmp1 = vdupq_n_s32(0x18000000) - vecTmp1;
|
||||
vecTmp0 = vqrdmulhq(vecTmp0, vecTmp1);
|
||||
vecTmpLL = vmullbq_int(vecNrm, vecTmp0);
|
||||
|
||||
/*
|
||||
* scale elements 0, 2
|
||||
*/
|
||||
scale = 26 + (vecSignBits[0] >> 1);
|
||||
tmp64 = asrl(vecTmpLL[0], scale);
|
||||
vecDst[0] = (q31_t) tmp64;
|
||||
|
||||
scale = 26 + (vecSignBits[2] >> 1);
|
||||
tmp64 = asrl(vecTmpLL[1], scale);
|
||||
vecDst[2] = (q31_t) tmp64;
|
||||
|
||||
vecTmpLL = vmulltq_int(vecNrm, vecTmp0);
|
||||
|
||||
/*
|
||||
* scale elements 1, 3
|
||||
*/
|
||||
scale = 26 + (vecSignBits[1] >> 1);
|
||||
tmp64 = asrl(vecTmpLL[0], scale);
|
||||
vecDst[1] = (q31_t) tmp64;
|
||||
|
||||
scale = 26 + (vecSignBits[3] >> 1);
|
||||
tmp64 = asrl(vecTmpLL[1], scale);
|
||||
vecDst[3] = (q31_t) tmp64;
|
||||
/*
|
||||
* set negative values to 0
|
||||
*/
|
||||
vecDst = vdupq_m(vecDst, 0, vcmpltq_n_s32(vecIn, 0));
|
||||
|
||||
return vecDst;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_FAST_SQRT_Q15_MVE)
|
||||
__STATIC_INLINE q15x8_t FAST_VSQRT_Q15(q15x8_t vecIn)
|
||||
{
|
||||
q31x4_t vecTmpLev, vecTmpLodd, vecSignL;
|
||||
q15x8_t vecTmp0, vecTmp1;
|
||||
q15x8_t vecNrm, vecDst, vecIdx, vecSignBits;
|
||||
|
||||
vecDst = vuninitializedq_s16();
|
||||
|
||||
vecSignBits = vclsq(vecIn);
|
||||
vecSignBits = vbicq(vecSignBits, 1);
|
||||
/*
|
||||
* in = in << no_of_sign_bits;
|
||||
*/
|
||||
vecNrm = vshlq(vecIn, vecSignBits);
|
||||
|
||||
vecIdx = vecNrm >> 8;
|
||||
vecIdx = vecIdx << 1;
|
||||
|
||||
vecTmp0 = vldrhq_gather_shifted_offset_s16(sqrtTable_Q15, vecIdx);
|
||||
|
||||
vecIdx = vecIdx + 1;
|
||||
|
||||
vecTmp1 = vldrhq_gather_shifted_offset_s16(sqrtTable_Q15, vecIdx);
|
||||
|
||||
vecTmp1 = vqrdmulhq(vecTmp1, vecNrm);
|
||||
vecTmp0 = vecTmp0 - vecTmp1;
|
||||
vecTmp1 = vqrdmulhq(vecTmp0, vecTmp0);
|
||||
vecTmp1 = vqrdmulhq(vecNrm, vecTmp1);
|
||||
vecTmp1 = vdupq_n_s16(0x1800) - vecTmp1;
|
||||
vecTmp0 = vqrdmulhq(vecTmp0, vecTmp1);
|
||||
|
||||
vecSignBits = vecSignBits >> 1;
|
||||
|
||||
vecTmpLev = vmullbq_int(vecNrm, vecTmp0);
|
||||
vecTmpLodd = vmulltq_int(vecNrm, vecTmp0);
|
||||
|
||||
vecTmp0 = vecSignBits + 10;
|
||||
/*
|
||||
* negate sign to apply register based vshl
|
||||
*/
|
||||
vecTmp0 = -vecTmp0;
|
||||
|
||||
/*
|
||||
* shift even elements
|
||||
*/
|
||||
vecSignL = vmovlbq(vecTmp0);
|
||||
vecTmpLev = vshlq(vecTmpLev, vecSignL);
|
||||
/*
|
||||
* shift odd elements
|
||||
*/
|
||||
vecSignL = vmovltq(vecTmp0);
|
||||
vecTmpLodd = vshlq(vecTmpLodd, vecSignL);
|
||||
/*
|
||||
* merge and narrow odd and even parts
|
||||
*/
|
||||
vecDst = vmovnbq_s32(vecDst, vecTmpLev);
|
||||
vecDst = vmovntq_s32(vecDst, vecTmpLodd);
|
||||
/*
|
||||
* set negative values to 0
|
||||
*/
|
||||
vecDst = vdupq_m(vecDst, 0, vcmpltq_n_s16(vecIn, 0));
|
||||
|
||||
return vecDst;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEI) */
|
||||
|
||||
#endif
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_helium_utils.h
|
||||
* Description: Utility functions for Helium development
|
||||
*
|
||||
* $Date: 09. September 2019
|
||||
* $Revision: V.1.5.1
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef _ARM_UTILS_HELIUM_H_
|
||||
#define _ARM_UTILS_HELIUM_H_
|
||||
|
||||
/***************************************
|
||||
|
||||
Definitions available for MVEF and MVEI
|
||||
|
||||
***************************************/
|
||||
#if defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI)
|
||||
|
||||
#define INACTIVELANE 0 /* inactive lane content */
|
||||
|
||||
|
||||
#endif /* defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI) */
|
||||
|
||||
/***************************************
|
||||
|
||||
Definitions available for MVEF only
|
||||
|
||||
***************************************/
|
||||
#if defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF)
|
||||
|
||||
__STATIC_FORCEINLINE float32_t vecAddAcrossF32Mve(float32x4_t in)
|
||||
{
|
||||
float32_t acc;
|
||||
|
||||
acc = vgetq_lane(in, 0) + vgetq_lane(in, 1) +
|
||||
vgetq_lane(in, 2) + vgetq_lane(in, 3);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
/* newton initial guess */
|
||||
#define INVSQRT_MAGIC_F32 0x5f3759df
|
||||
|
||||
#define INVSQRT_NEWTON_MVE_F32(invSqrt, xHalf, xStart)\
|
||||
{ \
|
||||
float32x4_t tmp; \
|
||||
\
|
||||
/* tmp = xhalf * x * x */ \
|
||||
tmp = vmulq(xStart, xStart); \
|
||||
tmp = vmulq(tmp, xHalf); \
|
||||
/* (1.5f - xhalf * x * x) */ \
|
||||
tmp = vsubq(vdupq_n_f32(1.5f), tmp); \
|
||||
/* x = x*(1.5f-xhalf*x*x); */ \
|
||||
invSqrt = vmulq(tmp, xStart); \
|
||||
}
|
||||
#endif /* defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) */
|
||||
|
||||
/***************************************
|
||||
|
||||
Definitions available for MVEI only
|
||||
|
||||
***************************************/
|
||||
#if defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEI)
|
||||
|
||||
|
||||
#include "arm_common_tables.h"
|
||||
|
||||
/* Following functions are used to transpose matrix in f32 and q31 cases */
|
||||
__STATIC_INLINE arm_status arm_mat_trans_32bit_2x2_mve(
|
||||
uint32_t * pDataSrc,
|
||||
uint32_t * pDataDest)
|
||||
{
|
||||
static const uint32x4_t vecOffs = { 0, 2, 1, 3 };
|
||||
/*
|
||||
*
|
||||
* | 0 1 | => | 0 2 |
|
||||
* | 2 3 | | 1 3 |
|
||||
*
|
||||
*/
|
||||
uint32x4_t vecIn = vldrwq_u32((uint32_t const *)pDataSrc);
|
||||
vstrwq_scatter_shifted_offset_u32(pDataDest, vecOffs, vecIn);
|
||||
|
||||
return (ARM_MATH_SUCCESS);
|
||||
}
|
||||
|
||||
__STATIC_INLINE arm_status arm_mat_trans_32bit_3x3_mve(
|
||||
uint32_t * pDataSrc,
|
||||
uint32_t * pDataDest)
|
||||
{
|
||||
const uint32x4_t vecOffs1 = { 0, 3, 6, 1};
|
||||
const uint32x4_t vecOffs2 = { 4, 7, 2, 5};
|
||||
/*
|
||||
*
|
||||
* | 0 1 2 | | 0 3 6 | 4 x 32 flattened version | 0 3 6 1 |
|
||||
* | 3 4 5 | => | 1 4 7 | => | 4 7 2 5 |
|
||||
* | 6 7 8 | | 2 5 8 | (row major) | 8 . . . |
|
||||
*
|
||||
*/
|
||||
uint32x4_t vecIn1 = vldrwq_u32((uint32_t const *) pDataSrc);
|
||||
uint32x4_t vecIn2 = vldrwq_u32((uint32_t const *) &pDataSrc[4]);
|
||||
|
||||
vstrwq_scatter_shifted_offset_u32(pDataDest, vecOffs1, vecIn1);
|
||||
vstrwq_scatter_shifted_offset_u32(pDataDest, vecOffs2, vecIn2);
|
||||
|
||||
pDataDest[8] = pDataSrc[8];
|
||||
|
||||
return (ARM_MATH_SUCCESS);
|
||||
}
|
||||
|
||||
__STATIC_INLINE arm_status arm_mat_trans_32bit_4x4_mve(uint32_t * pDataSrc, uint32_t * pDataDest)
|
||||
{
|
||||
/*
|
||||
* 4x4 Matrix transposition
|
||||
* is 4 x de-interleave operation
|
||||
*
|
||||
* 0 1 2 3 0 4 8 12
|
||||
* 4 5 6 7 1 5 9 13
|
||||
* 8 9 10 11 2 6 10 14
|
||||
* 12 13 14 15 3 7 11 15
|
||||
*/
|
||||
|
||||
uint32x4x4_t vecIn;
|
||||
|
||||
vecIn = vld4q((uint32_t const *) pDataSrc);
|
||||
vstrwq(pDataDest, vecIn.val[0]);
|
||||
pDataDest += 4;
|
||||
vstrwq(pDataDest, vecIn.val[1]);
|
||||
pDataDest += 4;
|
||||
vstrwq(pDataDest, vecIn.val[2]);
|
||||
pDataDest += 4;
|
||||
vstrwq(pDataDest, vecIn.val[3]);
|
||||
|
||||
return (ARM_MATH_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
__STATIC_INLINE arm_status arm_mat_trans_32bit_generic_mve(
|
||||
uint16_t srcRows,
|
||||
uint16_t srcCols,
|
||||
uint32_t * pDataSrc,
|
||||
uint32_t * pDataDest)
|
||||
{
|
||||
uint32x4_t vecOffs;
|
||||
uint32_t i;
|
||||
uint32_t blkCnt;
|
||||
uint32_t const *pDataC;
|
||||
uint32_t *pDataDestR;
|
||||
uint32x4_t vecIn;
|
||||
|
||||
vecOffs = vidupq_u32((uint32_t)0, 1);
|
||||
vecOffs = vecOffs * srcCols;
|
||||
|
||||
i = srcCols;
|
||||
do
|
||||
{
|
||||
pDataC = (uint32_t const *) pDataSrc;
|
||||
pDataDestR = pDataDest;
|
||||
|
||||
blkCnt = srcRows >> 2;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecIn = vldrwq_gather_shifted_offset_u32(pDataC, vecOffs);
|
||||
vstrwq(pDataDestR, vecIn);
|
||||
pDataDestR += 4;
|
||||
pDataC = pDataC + srcCols * 4;
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = srcRows & 3;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vecIn = vldrwq_gather_shifted_offset_u32(pDataC, vecOffs);
|
||||
vstrwq_p(pDataDestR, vecIn, p0);
|
||||
}
|
||||
|
||||
pDataSrc += 1;
|
||||
pDataDest += srcRows;
|
||||
}
|
||||
while (--i);
|
||||
|
||||
return (ARM_MATH_SUCCESS);
|
||||
}
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_FAST_SQRT_Q31_MVE)
|
||||
__STATIC_INLINE q31x4_t FAST_VSQRT_Q31(q31x4_t vecIn)
|
||||
{
|
||||
q63x2_t vecTmpLL;
|
||||
q31x4_t vecTmp0, vecTmp1;
|
||||
q31_t scale;
|
||||
q63_t tmp64;
|
||||
q31x4_t vecNrm, vecDst, vecIdx, vecSignBits;
|
||||
|
||||
|
||||
vecSignBits = vclsq(vecIn);
|
||||
vecSignBits = vbicq(vecSignBits, 1);
|
||||
/*
|
||||
* in = in << no_of_sign_bits;
|
||||
*/
|
||||
vecNrm = vshlq(vecIn, vecSignBits);
|
||||
/*
|
||||
* index = in >> 24;
|
||||
*/
|
||||
vecIdx = vecNrm >> 24;
|
||||
vecIdx = vecIdx << 1;
|
||||
|
||||
vecTmp0 = vldrwq_gather_shifted_offset_s32(sqrtTable_Q31, vecIdx);
|
||||
|
||||
vecIdx = vecIdx + 1;
|
||||
|
||||
vecTmp1 = vldrwq_gather_shifted_offset_s32(sqrtTable_Q31, vecIdx);
|
||||
|
||||
vecTmp1 = vqrdmulhq(vecTmp1, vecNrm);
|
||||
vecTmp0 = vecTmp0 - vecTmp1;
|
||||
vecTmp1 = vqrdmulhq(vecTmp0, vecTmp0);
|
||||
vecTmp1 = vqrdmulhq(vecNrm, vecTmp1);
|
||||
vecTmp1 = vdupq_n_s32(0x18000000) - vecTmp1;
|
||||
vecTmp0 = vqrdmulhq(vecTmp0, vecTmp1);
|
||||
vecTmpLL = vmullbq_int(vecNrm, vecTmp0);
|
||||
|
||||
/*
|
||||
* scale elements 0, 2
|
||||
*/
|
||||
scale = 26 + (vecSignBits[0] >> 1);
|
||||
tmp64 = asrl(vecTmpLL[0], scale);
|
||||
vecDst[0] = (q31_t) tmp64;
|
||||
|
||||
scale = 26 + (vecSignBits[2] >> 1);
|
||||
tmp64 = asrl(vecTmpLL[1], scale);
|
||||
vecDst[2] = (q31_t) tmp64;
|
||||
|
||||
vecTmpLL = vmulltq_int(vecNrm, vecTmp0);
|
||||
|
||||
/*
|
||||
* scale elements 1, 3
|
||||
*/
|
||||
scale = 26 + (vecSignBits[1] >> 1);
|
||||
tmp64 = asrl(vecTmpLL[0], scale);
|
||||
vecDst[1] = (q31_t) tmp64;
|
||||
|
||||
scale = 26 + (vecSignBits[3] >> 1);
|
||||
tmp64 = asrl(vecTmpLL[1], scale);
|
||||
vecDst[3] = (q31_t) tmp64;
|
||||
/*
|
||||
* set negative values to 0
|
||||
*/
|
||||
vecDst = vdupq_m(vecDst, 0, vcmpltq_n_s32(vecIn, 0));
|
||||
|
||||
return vecDst;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_FAST_SQRT_Q15_MVE)
|
||||
__STATIC_INLINE q15x8_t FAST_VSQRT_Q15(q15x8_t vecIn)
|
||||
{
|
||||
q31x4_t vecTmpLev, vecTmpLodd, vecSignL;
|
||||
q15x8_t vecTmp0, vecTmp1;
|
||||
q15x8_t vecNrm, vecDst, vecIdx, vecSignBits;
|
||||
|
||||
vecDst = vuninitializedq_s16();
|
||||
|
||||
vecSignBits = vclsq(vecIn);
|
||||
vecSignBits = vbicq(vecSignBits, 1);
|
||||
/*
|
||||
* in = in << no_of_sign_bits;
|
||||
*/
|
||||
vecNrm = vshlq(vecIn, vecSignBits);
|
||||
|
||||
vecIdx = vecNrm >> 8;
|
||||
vecIdx = vecIdx << 1;
|
||||
|
||||
vecTmp0 = vldrhq_gather_shifted_offset_s16(sqrtTable_Q15, vecIdx);
|
||||
|
||||
vecIdx = vecIdx + 1;
|
||||
|
||||
vecTmp1 = vldrhq_gather_shifted_offset_s16(sqrtTable_Q15, vecIdx);
|
||||
|
||||
vecTmp1 = vqrdmulhq(vecTmp1, vecNrm);
|
||||
vecTmp0 = vecTmp0 - vecTmp1;
|
||||
vecTmp1 = vqrdmulhq(vecTmp0, vecTmp0);
|
||||
vecTmp1 = vqrdmulhq(vecNrm, vecTmp1);
|
||||
vecTmp1 = vdupq_n_s16(0x1800) - vecTmp1;
|
||||
vecTmp0 = vqrdmulhq(vecTmp0, vecTmp1);
|
||||
|
||||
vecSignBits = vecSignBits >> 1;
|
||||
|
||||
vecTmpLev = vmullbq_int(vecNrm, vecTmp0);
|
||||
vecTmpLodd = vmulltq_int(vecNrm, vecTmp0);
|
||||
|
||||
vecTmp0 = vecSignBits + 10;
|
||||
/*
|
||||
* negate sign to apply register based vshl
|
||||
*/
|
||||
vecTmp0 = -vecTmp0;
|
||||
|
||||
/*
|
||||
* shift even elements
|
||||
*/
|
||||
vecSignL = vmovlbq(vecTmp0);
|
||||
vecTmpLev = vshlq(vecTmpLev, vecSignL);
|
||||
/*
|
||||
* shift odd elements
|
||||
*/
|
||||
vecSignL = vmovltq(vecTmp0);
|
||||
vecTmpLodd = vshlq(vecTmpLodd, vecSignL);
|
||||
/*
|
||||
* merge and narrow odd and even parts
|
||||
*/
|
||||
vecDst = vmovnbq_s32(vecDst, vecTmpLev);
|
||||
vecDst = vmovntq_s32(vecDst, vecTmpLodd);
|
||||
/*
|
||||
* set negative values to 0
|
||||
*/
|
||||
vecDst = vdupq_m(vecDst, 0, vcmpltq_n_s16(vecIn, 0));
|
||||
|
||||
return vecDst;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEI) */
|
||||
|
||||
#endif
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,235 +1,235 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mve_tables.h
|
||||
* Description: common tables like fft twiddle factors, Bitreverse, reciprocal etc
|
||||
* used for MVE implementation only
|
||||
*
|
||||
* $Date: 08. January 2020
|
||||
* $Revision: V1.7.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef _ARM_MVE_TABLES_H
|
||||
#define _ARM_MVE_TABLES_H
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_16) || defined(ARM_TABLE_TWIDDLECOEF_F32_32)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_16_f32[2];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_16_f32[2];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_16_f32[2];
|
||||
extern float32_t rearranged_twiddle_stride1_16_f32[8];
|
||||
extern float32_t rearranged_twiddle_stride2_16_f32[8];
|
||||
extern float32_t rearranged_twiddle_stride3_16_f32[8];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_64) || defined(ARM_TABLE_TWIDDLECOEF_F32_128)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_64_f32[3];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_64_f32[3];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_64_f32[3];
|
||||
extern float32_t rearranged_twiddle_stride1_64_f32[40];
|
||||
extern float32_t rearranged_twiddle_stride2_64_f32[40];
|
||||
extern float32_t rearranged_twiddle_stride3_64_f32[40];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_256) || defined(ARM_TABLE_TWIDDLECOEF_F32_512)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_256_f32[4];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_256_f32[4];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_256_f32[4];
|
||||
extern float32_t rearranged_twiddle_stride1_256_f32[168];
|
||||
extern float32_t rearranged_twiddle_stride2_256_f32[168];
|
||||
extern float32_t rearranged_twiddle_stride3_256_f32[168];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_1024) || defined(ARM_TABLE_TWIDDLECOEF_F32_2048)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_1024_f32[5];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_1024_f32[5];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_1024_f32[5];
|
||||
extern float32_t rearranged_twiddle_stride1_1024_f32[680];
|
||||
extern float32_t rearranged_twiddle_stride2_1024_f32[680];
|
||||
extern float32_t rearranged_twiddle_stride3_1024_f32[680];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_4096) || defined(ARM_TABLE_TWIDDLECOEF_F32_8192)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_4096_f32[6];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_4096_f32[6];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_4096_f32[6];
|
||||
extern float32_t rearranged_twiddle_stride1_4096_f32[2728];
|
||||
extern float32_t rearranged_twiddle_stride2_4096_f32[2728];
|
||||
extern float32_t rearranged_twiddle_stride3_4096_f32[2728];
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */
|
||||
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_16) || defined(ARM_TABLE_TWIDDLECOEF_Q31_32)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_16_q31[2];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_16_q31[2];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_16_q31[2];
|
||||
extern q31_t rearranged_twiddle_stride1_16_q31[8];
|
||||
extern q31_t rearranged_twiddle_stride2_16_q31[8];
|
||||
extern q31_t rearranged_twiddle_stride3_16_q31[8];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_64) || defined(ARM_TABLE_TWIDDLECOEF_Q31_128)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_64_q31[3];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_64_q31[3];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_64_q31[3];
|
||||
extern q31_t rearranged_twiddle_stride1_64_q31[40];
|
||||
extern q31_t rearranged_twiddle_stride2_64_q31[40];
|
||||
extern q31_t rearranged_twiddle_stride3_64_q31[40];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_256) || defined(ARM_TABLE_TWIDDLECOEF_Q31_512)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_256_q31[4];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_256_q31[4];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_256_q31[4];
|
||||
extern q31_t rearranged_twiddle_stride1_256_q31[168];
|
||||
extern q31_t rearranged_twiddle_stride2_256_q31[168];
|
||||
extern q31_t rearranged_twiddle_stride3_256_q31[168];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_1024) || defined(ARM_TABLE_TWIDDLECOEF_Q31_2048)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_1024_q31[5];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_1024_q31[5];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_1024_q31[5];
|
||||
extern q31_t rearranged_twiddle_stride1_1024_q31[680];
|
||||
extern q31_t rearranged_twiddle_stride2_1024_q31[680];
|
||||
extern q31_t rearranged_twiddle_stride3_1024_q31[680];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_4096) || defined(ARM_TABLE_TWIDDLECOEF_Q31_8192)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_4096_q31[6];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_4096_q31[6];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_4096_q31[6];
|
||||
extern q31_t rearranged_twiddle_stride1_4096_q31[2728];
|
||||
extern q31_t rearranged_twiddle_stride2_4096_q31[2728];
|
||||
extern q31_t rearranged_twiddle_stride3_4096_q31[2728];
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */
|
||||
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_16) || defined(ARM_TABLE_TWIDDLECOEF_Q15_32)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_16_q15[2];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_16_q15[2];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_16_q15[2];
|
||||
extern q15_t rearranged_twiddle_stride1_16_q15[8];
|
||||
extern q15_t rearranged_twiddle_stride2_16_q15[8];
|
||||
extern q15_t rearranged_twiddle_stride3_16_q15[8];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_64) || defined(ARM_TABLE_TWIDDLECOEF_Q15_128)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_64_q15[3];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_64_q15[3];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_64_q15[3];
|
||||
extern q15_t rearranged_twiddle_stride1_64_q15[40];
|
||||
extern q15_t rearranged_twiddle_stride2_64_q15[40];
|
||||
extern q15_t rearranged_twiddle_stride3_64_q15[40];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_256) || defined(ARM_TABLE_TWIDDLECOEF_Q15_512)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_256_q15[4];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_256_q15[4];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_256_q15[4];
|
||||
extern q15_t rearranged_twiddle_stride1_256_q15[168];
|
||||
extern q15_t rearranged_twiddle_stride2_256_q15[168];
|
||||
extern q15_t rearranged_twiddle_stride3_256_q15[168];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_1024) || defined(ARM_TABLE_TWIDDLECOEF_Q15_2048)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_1024_q15[5];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_1024_q15[5];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_1024_q15[5];
|
||||
extern q15_t rearranged_twiddle_stride1_1024_q15[680];
|
||||
extern q15_t rearranged_twiddle_stride2_1024_q15[680];
|
||||
extern q15_t rearranged_twiddle_stride3_1024_q15[680];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_4096) || defined(ARM_TABLE_TWIDDLECOEF_Q15_8192)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_4096_q15[6];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_4096_q15[6];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_4096_q15[6];
|
||||
extern q15_t rearranged_twiddle_stride1_4096_q15[2728];
|
||||
extern q15_t rearranged_twiddle_stride2_4096_q15[2728];
|
||||
extern q15_t rearranged_twiddle_stride3_4096_q15[2728];
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */
|
||||
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
|
||||
|
||||
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */
|
||||
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
|
||||
|
||||
#endif /*_ARM_MVE_TABLES_H*/
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mve_tables.h
|
||||
* Description: common tables like fft twiddle factors, Bitreverse, reciprocal etc
|
||||
* used for MVE implementation only
|
||||
*
|
||||
* $Date: 08. January 2020
|
||||
* $Revision: V1.7.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef _ARM_MVE_TABLES_H
|
||||
#define _ARM_MVE_TABLES_H
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_16) || defined(ARM_TABLE_TWIDDLECOEF_F32_32)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_16_f32[2];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_16_f32[2];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_16_f32[2];
|
||||
extern float32_t rearranged_twiddle_stride1_16_f32[8];
|
||||
extern float32_t rearranged_twiddle_stride2_16_f32[8];
|
||||
extern float32_t rearranged_twiddle_stride3_16_f32[8];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_64) || defined(ARM_TABLE_TWIDDLECOEF_F32_128)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_64_f32[3];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_64_f32[3];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_64_f32[3];
|
||||
extern float32_t rearranged_twiddle_stride1_64_f32[40];
|
||||
extern float32_t rearranged_twiddle_stride2_64_f32[40];
|
||||
extern float32_t rearranged_twiddle_stride3_64_f32[40];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_256) || defined(ARM_TABLE_TWIDDLECOEF_F32_512)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_256_f32[4];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_256_f32[4];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_256_f32[4];
|
||||
extern float32_t rearranged_twiddle_stride1_256_f32[168];
|
||||
extern float32_t rearranged_twiddle_stride2_256_f32[168];
|
||||
extern float32_t rearranged_twiddle_stride3_256_f32[168];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_1024) || defined(ARM_TABLE_TWIDDLECOEF_F32_2048)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_1024_f32[5];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_1024_f32[5];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_1024_f32[5];
|
||||
extern float32_t rearranged_twiddle_stride1_1024_f32[680];
|
||||
extern float32_t rearranged_twiddle_stride2_1024_f32[680];
|
||||
extern float32_t rearranged_twiddle_stride3_1024_f32[680];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_4096) || defined(ARM_TABLE_TWIDDLECOEF_F32_8192)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_4096_f32[6];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_4096_f32[6];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_4096_f32[6];
|
||||
extern float32_t rearranged_twiddle_stride1_4096_f32[2728];
|
||||
extern float32_t rearranged_twiddle_stride2_4096_f32[2728];
|
||||
extern float32_t rearranged_twiddle_stride3_4096_f32[2728];
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */
|
||||
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_16) || defined(ARM_TABLE_TWIDDLECOEF_Q31_32)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_16_q31[2];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_16_q31[2];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_16_q31[2];
|
||||
extern q31_t rearranged_twiddle_stride1_16_q31[8];
|
||||
extern q31_t rearranged_twiddle_stride2_16_q31[8];
|
||||
extern q31_t rearranged_twiddle_stride3_16_q31[8];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_64) || defined(ARM_TABLE_TWIDDLECOEF_Q31_128)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_64_q31[3];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_64_q31[3];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_64_q31[3];
|
||||
extern q31_t rearranged_twiddle_stride1_64_q31[40];
|
||||
extern q31_t rearranged_twiddle_stride2_64_q31[40];
|
||||
extern q31_t rearranged_twiddle_stride3_64_q31[40];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_256) || defined(ARM_TABLE_TWIDDLECOEF_Q31_512)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_256_q31[4];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_256_q31[4];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_256_q31[4];
|
||||
extern q31_t rearranged_twiddle_stride1_256_q31[168];
|
||||
extern q31_t rearranged_twiddle_stride2_256_q31[168];
|
||||
extern q31_t rearranged_twiddle_stride3_256_q31[168];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_1024) || defined(ARM_TABLE_TWIDDLECOEF_Q31_2048)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_1024_q31[5];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_1024_q31[5];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_1024_q31[5];
|
||||
extern q31_t rearranged_twiddle_stride1_1024_q31[680];
|
||||
extern q31_t rearranged_twiddle_stride2_1024_q31[680];
|
||||
extern q31_t rearranged_twiddle_stride3_1024_q31[680];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_4096) || defined(ARM_TABLE_TWIDDLECOEF_Q31_8192)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_4096_q31[6];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_4096_q31[6];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_4096_q31[6];
|
||||
extern q31_t rearranged_twiddle_stride1_4096_q31[2728];
|
||||
extern q31_t rearranged_twiddle_stride2_4096_q31[2728];
|
||||
extern q31_t rearranged_twiddle_stride3_4096_q31[2728];
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */
|
||||
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_16) || defined(ARM_TABLE_TWIDDLECOEF_Q15_32)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_16_q15[2];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_16_q15[2];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_16_q15[2];
|
||||
extern q15_t rearranged_twiddle_stride1_16_q15[8];
|
||||
extern q15_t rearranged_twiddle_stride2_16_q15[8];
|
||||
extern q15_t rearranged_twiddle_stride3_16_q15[8];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_64) || defined(ARM_TABLE_TWIDDLECOEF_Q15_128)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_64_q15[3];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_64_q15[3];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_64_q15[3];
|
||||
extern q15_t rearranged_twiddle_stride1_64_q15[40];
|
||||
extern q15_t rearranged_twiddle_stride2_64_q15[40];
|
||||
extern q15_t rearranged_twiddle_stride3_64_q15[40];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_256) || defined(ARM_TABLE_TWIDDLECOEF_Q15_512)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_256_q15[4];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_256_q15[4];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_256_q15[4];
|
||||
extern q15_t rearranged_twiddle_stride1_256_q15[168];
|
||||
extern q15_t rearranged_twiddle_stride2_256_q15[168];
|
||||
extern q15_t rearranged_twiddle_stride3_256_q15[168];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_1024) || defined(ARM_TABLE_TWIDDLECOEF_Q15_2048)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_1024_q15[5];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_1024_q15[5];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_1024_q15[5];
|
||||
extern q15_t rearranged_twiddle_stride1_1024_q15[680];
|
||||
extern q15_t rearranged_twiddle_stride2_1024_q15[680];
|
||||
extern q15_t rearranged_twiddle_stride3_1024_q15[680];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_4096) || defined(ARM_TABLE_TWIDDLECOEF_Q15_8192)
|
||||
|
||||
extern uint32_t rearranged_twiddle_tab_stride1_arr_4096_q15[6];
|
||||
extern uint32_t rearranged_twiddle_tab_stride2_arr_4096_q15[6];
|
||||
extern uint32_t rearranged_twiddle_tab_stride3_arr_4096_q15[6];
|
||||
extern q15_t rearranged_twiddle_stride1_4096_q15[2728];
|
||||
extern q15_t rearranged_twiddle_stride2_4096_q15[2728];
|
||||
extern q15_t rearranged_twiddle_stride3_4096_q15[2728];
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */
|
||||
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
|
||||
|
||||
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */
|
||||
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
|
||||
|
||||
#endif /*_ARM_MVE_TABLES_H*/
|
||||
|
||||
|
||||
@@ -1,372 +1,372 @@
|
||||
/******************************************************************************
|
||||
* @file arm_vec_math.h
|
||||
* @brief Public header file for CMSIS DSP Library
|
||||
* @version V1.7.0
|
||||
* @date 15. October 2019
|
||||
******************************************************************************/
|
||||
/*
|
||||
* Copyright (c) 2010-2019 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef _ARM_VEC_MATH_H
|
||||
#define _ARM_VEC_MATH_H
|
||||
|
||||
#include "arm_math.h"
|
||||
#include "arm_common_tables.h"
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#define INV_NEWTON_INIT_F32 0x7EF127EA
|
||||
|
||||
static const float32_t __logf_rng_f32=0.693147180f;
|
||||
|
||||
|
||||
/* fast inverse approximation (3x newton) */
|
||||
__STATIC_INLINE f32x4_t vrecip_medprec_f32(
|
||||
f32x4_t x)
|
||||
{
|
||||
q31x4_t m;
|
||||
f32x4_t b;
|
||||
any32x4_t xinv;
|
||||
f32x4_t ax = vabsq(x);
|
||||
|
||||
xinv.f = ax;
|
||||
m = 0x3F800000 - (xinv.i & 0x7F800000);
|
||||
xinv.i = xinv.i + m;
|
||||
xinv.f = 1.41176471f - 0.47058824f * xinv.f;
|
||||
xinv.i = xinv.i + m;
|
||||
|
||||
b = 2.0f - xinv.f * ax;
|
||||
xinv.f = xinv.f * b;
|
||||
|
||||
b = 2.0f - xinv.f * ax;
|
||||
xinv.f = xinv.f * b;
|
||||
|
||||
b = 2.0f - xinv.f * ax;
|
||||
xinv.f = xinv.f * b;
|
||||
|
||||
xinv.f = vdupq_m(xinv.f, INFINITY, vcmpeqq(x, 0.0f));
|
||||
/*
|
||||
* restore sign
|
||||
*/
|
||||
xinv.f = vnegq_m(xinv.f, xinv.f, vcmpltq(x, 0.0f));
|
||||
|
||||
return xinv.f;
|
||||
}
|
||||
|
||||
/* fast inverse approximation (4x newton) */
|
||||
__STATIC_INLINE f32x4_t vrecip_hiprec_f32(
|
||||
f32x4_t x)
|
||||
{
|
||||
q31x4_t m;
|
||||
f32x4_t b;
|
||||
any32x4_t xinv;
|
||||
f32x4_t ax = vabsq(x);
|
||||
|
||||
xinv.f = ax;
|
||||
|
||||
m = 0x3F800000 - (xinv.i & 0x7F800000);
|
||||
xinv.i = xinv.i + m;
|
||||
xinv.f = 1.41176471f - 0.47058824f * xinv.f;
|
||||
xinv.i = xinv.i + m;
|
||||
|
||||
b = 2.0f - xinv.f * ax;
|
||||
xinv.f = xinv.f * b;
|
||||
|
||||
b = 2.0f - xinv.f * ax;
|
||||
xinv.f = xinv.f * b;
|
||||
|
||||
b = 2.0f - xinv.f * ax;
|
||||
xinv.f = xinv.f * b;
|
||||
|
||||
b = 2.0f - xinv.f * ax;
|
||||
xinv.f = xinv.f * b;
|
||||
|
||||
xinv.f = vdupq_m(xinv.f, INFINITY, vcmpeqq(x, 0.0f));
|
||||
/*
|
||||
* restore sign
|
||||
*/
|
||||
xinv.f = vnegq_m(xinv.f, xinv.f, vcmpltq(x, 0.0f));
|
||||
|
||||
return xinv.f;
|
||||
}
|
||||
|
||||
__STATIC_INLINE f32x4_t vdiv_f32(
|
||||
f32x4_t num, f32x4_t den)
|
||||
{
|
||||
return vmulq(num, vrecip_hiprec_f32(den));
|
||||
}
|
||||
|
||||
/**
|
||||
@brief Single-precision taylor dev.
|
||||
@param[in] x f32 quad vector input
|
||||
@param[in] coeffs f32 quad vector coeffs
|
||||
@return destination f32 quad vector
|
||||
*/
|
||||
|
||||
__STATIC_INLINE f32x4_t vtaylor_polyq_f32(
|
||||
f32x4_t x,
|
||||
const float32_t * coeffs)
|
||||
{
|
||||
f32x4_t A = vfmasq(vdupq_n_f32(coeffs[4]), x, coeffs[0]);
|
||||
f32x4_t B = vfmasq(vdupq_n_f32(coeffs[6]), x, coeffs[2]);
|
||||
f32x4_t C = vfmasq(vdupq_n_f32(coeffs[5]), x, coeffs[1]);
|
||||
f32x4_t D = vfmasq(vdupq_n_f32(coeffs[7]), x, coeffs[3]);
|
||||
f32x4_t x2 = vmulq(x, x);
|
||||
f32x4_t x4 = vmulq(x2, x2);
|
||||
f32x4_t res = vfmaq(vfmaq_f32(A, B, x2), vfmaq_f32(C, D, x2), x4);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
__STATIC_INLINE f32x4_t vmant_exp_f32(
|
||||
f32x4_t x,
|
||||
int32x4_t * e)
|
||||
{
|
||||
any32x4_t r;
|
||||
int32x4_t n;
|
||||
|
||||
r.f = x;
|
||||
n = r.i >> 23;
|
||||
n = n - 127;
|
||||
r.i = r.i - (n << 23);
|
||||
|
||||
*e = n;
|
||||
return r.f;
|
||||
}
|
||||
|
||||
|
||||
__STATIC_INLINE f32x4_t vlogq_f32(f32x4_t vecIn)
|
||||
{
|
||||
q31x4_t vecExpUnBiased;
|
||||
f32x4_t vecTmpFlt0, vecTmpFlt1;
|
||||
f32x4_t vecAcc0, vecAcc1, vecAcc2, vecAcc3;
|
||||
f32x4_t vecExpUnBiasedFlt;
|
||||
|
||||
/*
|
||||
* extract exponent
|
||||
*/
|
||||
vecTmpFlt1 = vmant_exp_f32(vecIn, &vecExpUnBiased);
|
||||
|
||||
vecTmpFlt0 = vecTmpFlt1 * vecTmpFlt1;
|
||||
/*
|
||||
* a = (__logf_lut_f32[4] * r.f) + (__logf_lut_f32[0]);
|
||||
*/
|
||||
vecAcc0 = vdupq_n_f32(__logf_lut_f32[0]);
|
||||
vecAcc0 = vfmaq(vecAcc0, vecTmpFlt1, __logf_lut_f32[4]);
|
||||
/*
|
||||
* b = (__logf_lut_f32[6] * r.f) + (__logf_lut_f32[2]);
|
||||
*/
|
||||
vecAcc1 = vdupq_n_f32(__logf_lut_f32[2]);
|
||||
vecAcc1 = vfmaq(vecAcc1, vecTmpFlt1, __logf_lut_f32[6]);
|
||||
/*
|
||||
* c = (__logf_lut_f32[5] * r.f) + (__logf_lut_f32[1]);
|
||||
*/
|
||||
vecAcc2 = vdupq_n_f32(__logf_lut_f32[1]);
|
||||
vecAcc2 = vfmaq(vecAcc2, vecTmpFlt1, __logf_lut_f32[5]);
|
||||
/*
|
||||
* d = (__logf_lut_f32[7] * r.f) + (__logf_lut_f32[3]);
|
||||
*/
|
||||
vecAcc3 = vdupq_n_f32(__logf_lut_f32[3]);
|
||||
vecAcc3 = vfmaq(vecAcc3, vecTmpFlt1, __logf_lut_f32[7]);
|
||||
/*
|
||||
* a = a + b * xx;
|
||||
*/
|
||||
vecAcc0 = vfmaq(vecAcc0, vecAcc1, vecTmpFlt0);
|
||||
/*
|
||||
* c = c + d * xx;
|
||||
*/
|
||||
vecAcc2 = vfmaq(vecAcc2, vecAcc3, vecTmpFlt0);
|
||||
/*
|
||||
* xx = xx * xx;
|
||||
*/
|
||||
vecTmpFlt0 = vecTmpFlt0 * vecTmpFlt0;
|
||||
vecExpUnBiasedFlt = vcvtq_f32_s32(vecExpUnBiased);
|
||||
/*
|
||||
* r.f = a + c * xx;
|
||||
*/
|
||||
vecAcc0 = vfmaq(vecAcc0, vecAcc2, vecTmpFlt0);
|
||||
/*
|
||||
* add exponent
|
||||
* r.f = r.f + ((float32_t) m) * __logf_rng_f32;
|
||||
*/
|
||||
vecAcc0 = vfmaq(vecAcc0, vecExpUnBiasedFlt, __logf_rng_f32);
|
||||
// set log0 down to -inf
|
||||
vecAcc0 = vdupq_m(vecAcc0, -INFINITY, vcmpeqq(vecIn, 0.0f));
|
||||
return vecAcc0;
|
||||
}
|
||||
|
||||
__STATIC_INLINE f32x4_t vexpq_f32(
|
||||
f32x4_t x)
|
||||
{
|
||||
// Perform range reduction [-log(2),log(2)]
|
||||
int32x4_t m = vcvtq_s32_f32(vmulq_n_f32(x, 1.4426950408f));
|
||||
f32x4_t val = vfmsq_f32(x, vcvtq_f32_s32(m), vdupq_n_f32(0.6931471805f));
|
||||
|
||||
// Polynomial Approximation
|
||||
f32x4_t poly = vtaylor_polyq_f32(val, exp_tab);
|
||||
|
||||
// Reconstruct
|
||||
poly = (f32x4_t) (vqaddq_s32((q31x4_t) (poly), vqshlq_n_s32(m, 23)));
|
||||
|
||||
poly = vdupq_m(poly, 0.0f, vcmpltq_n_s32(m, -126));
|
||||
return poly;
|
||||
}
|
||||
|
||||
__STATIC_INLINE f32x4_t arm_vec_exponent_f32(f32x4_t x, int32_t nb)
|
||||
{
|
||||
f32x4_t r = x;
|
||||
nb--;
|
||||
while (nb > 0) {
|
||||
r = vmulq(r, x);
|
||||
nb--;
|
||||
}
|
||||
return (r);
|
||||
}
|
||||
|
||||
__STATIC_INLINE f32x4_t vrecip_f32(f32x4_t vecIn)
|
||||
{
|
||||
f32x4_t vecSx, vecW, vecTmp;
|
||||
any32x4_t v;
|
||||
|
||||
vecSx = vabsq(vecIn);
|
||||
|
||||
v.f = vecIn;
|
||||
v.i = vsubq(vdupq_n_s32(INV_NEWTON_INIT_F32), v.i);
|
||||
|
||||
vecW = vmulq(vecSx, v.f);
|
||||
|
||||
// v.f = v.f * (8 + w * (-28 + w * (56 + w * (-70 + w *(56 + w * (-28 + w * (8 - w)))))));
|
||||
vecTmp = vsubq(vdupq_n_f32(8.0f), vecW);
|
||||
vecTmp = vfmasq(vecW, vecTmp, -28.0f);
|
||||
vecTmp = vfmasq(vecW, vecTmp, 56.0f);
|
||||
vecTmp = vfmasq(vecW, vecTmp, -70.0f);
|
||||
vecTmp = vfmasq(vecW, vecTmp, 56.0f);
|
||||
vecTmp = vfmasq(vecW, vecTmp, -28.0f);
|
||||
vecTmp = vfmasq(vecW, vecTmp, 8.0f);
|
||||
v.f = vmulq(v.f, vecTmp);
|
||||
|
||||
v.f = vdupq_m(v.f, INFINITY, vcmpeqq(vecIn, 0.0f));
|
||||
/*
|
||||
* restore sign
|
||||
*/
|
||||
v.f = vnegq_m(v.f, v.f, vcmpltq(vecIn, 0.0f));
|
||||
return v.f;
|
||||
}
|
||||
|
||||
__STATIC_INLINE f32x4_t vtanhq_f32(
|
||||
f32x4_t val)
|
||||
{
|
||||
f32x4_t x =
|
||||
vminnmq_f32(vmaxnmq_f32(val, vdupq_n_f32(-10.f)), vdupq_n_f32(10.0f));
|
||||
f32x4_t exp2x = vexpq_f32(vmulq_n_f32(x, 2.f));
|
||||
f32x4_t num = vsubq_n_f32(exp2x, 1.f);
|
||||
f32x4_t den = vaddq_n_f32(exp2x, 1.f);
|
||||
f32x4_t tanh = vmulq_f32(num, vrecip_f32(den));
|
||||
return tanh;
|
||||
}
|
||||
|
||||
__STATIC_INLINE f32x4_t vpowq_f32(
|
||||
f32x4_t val,
|
||||
f32x4_t n)
|
||||
{
|
||||
return vexpq_f32(vmulq_f32(n, vlogq_f32(val)));
|
||||
}
|
||||
|
||||
#endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)*/
|
||||
|
||||
#if (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM))
|
||||
#endif /* (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) */
|
||||
|
||||
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#include "NEMath.h"
|
||||
/**
|
||||
* @brief Vectorized integer exponentiation
|
||||
* @param[in] x value
|
||||
* @param[in] nb integer exponent >= 1
|
||||
* @return x^nb
|
||||
*
|
||||
*/
|
||||
__STATIC_INLINE float32x4_t arm_vec_exponent_f32(float32x4_t x, int32_t nb)
|
||||
{
|
||||
float32x4_t r = x;
|
||||
nb --;
|
||||
while(nb > 0)
|
||||
{
|
||||
r = vmulq_f32(r , x);
|
||||
nb--;
|
||||
}
|
||||
return(r);
|
||||
}
|
||||
|
||||
|
||||
__STATIC_INLINE float32x4_t __arm_vec_sqrt_f32_neon(float32x4_t x)
|
||||
{
|
||||
float32x4_t x1 = vmaxq_f32(x, vdupq_n_f32(FLT_MIN));
|
||||
float32x4_t e = vrsqrteq_f32(x1);
|
||||
e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e);
|
||||
e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e);
|
||||
return vmulq_f32(x, e);
|
||||
}
|
||||
|
||||
__STATIC_INLINE int16x8_t __arm_vec_sqrt_q15_neon(int16x8_t vec)
|
||||
{
|
||||
float32x4_t tempF;
|
||||
int32x4_t tempHI,tempLO;
|
||||
|
||||
tempLO = vmovl_s16(vget_low_s16(vec));
|
||||
tempF = vcvtq_n_f32_s32(tempLO,15);
|
||||
tempF = __arm_vec_sqrt_f32_neon(tempF);
|
||||
tempLO = vcvtq_n_s32_f32(tempF,15);
|
||||
|
||||
tempHI = vmovl_s16(vget_high_s16(vec));
|
||||
tempF = vcvtq_n_f32_s32(tempHI,15);
|
||||
tempF = __arm_vec_sqrt_f32_neon(tempF);
|
||||
tempHI = vcvtq_n_s32_f32(tempF,15);
|
||||
|
||||
return(vcombine_s16(vqmovn_s32(tempLO),vqmovn_s32(tempHI)));
|
||||
}
|
||||
|
||||
__STATIC_INLINE int32x4_t __arm_vec_sqrt_q31_neon(int32x4_t vec)
|
||||
{
|
||||
float32x4_t temp;
|
||||
|
||||
temp = vcvtq_n_f32_s32(vec,31);
|
||||
temp = __arm_vec_sqrt_f32_neon(temp);
|
||||
return(vcvtq_n_s32_f32(temp,31));
|
||||
}
|
||||
|
||||
#endif /* (defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* _ARM_VEC_MATH_H */
|
||||
|
||||
/**
|
||||
*
|
||||
* End of file.
|
||||
*/
|
||||
/******************************************************************************
|
||||
* @file arm_vec_math.h
|
||||
* @brief Public header file for CMSIS DSP Library
|
||||
* @version V1.7.0
|
||||
* @date 15. October 2019
|
||||
******************************************************************************/
|
||||
/*
|
||||
* Copyright (c) 2010-2019 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef _ARM_VEC_MATH_H
|
||||
#define _ARM_VEC_MATH_H
|
||||
|
||||
#include "arm_math.h"
|
||||
#include "arm_common_tables.h"
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#define INV_NEWTON_INIT_F32 0x7EF127EA
|
||||
|
||||
static const float32_t __logf_rng_f32=0.693147180f;
|
||||
|
||||
|
||||
/* fast inverse approximation (3x newton) */
|
||||
__STATIC_INLINE f32x4_t vrecip_medprec_f32(
|
||||
f32x4_t x)
|
||||
{
|
||||
q31x4_t m;
|
||||
f32x4_t b;
|
||||
any32x4_t xinv;
|
||||
f32x4_t ax = vabsq(x);
|
||||
|
||||
xinv.f = ax;
|
||||
m = 0x3F800000 - (xinv.i & 0x7F800000);
|
||||
xinv.i = xinv.i + m;
|
||||
xinv.f = 1.41176471f - 0.47058824f * xinv.f;
|
||||
xinv.i = xinv.i + m;
|
||||
|
||||
b = 2.0f - xinv.f * ax;
|
||||
xinv.f = xinv.f * b;
|
||||
|
||||
b = 2.0f - xinv.f * ax;
|
||||
xinv.f = xinv.f * b;
|
||||
|
||||
b = 2.0f - xinv.f * ax;
|
||||
xinv.f = xinv.f * b;
|
||||
|
||||
xinv.f = vdupq_m(xinv.f, INFINITY, vcmpeqq(x, 0.0f));
|
||||
/*
|
||||
* restore sign
|
||||
*/
|
||||
xinv.f = vnegq_m(xinv.f, xinv.f, vcmpltq(x, 0.0f));
|
||||
|
||||
return xinv.f;
|
||||
}
|
||||
|
||||
/* fast inverse approximation (4x newton) */
|
||||
__STATIC_INLINE f32x4_t vrecip_hiprec_f32(
|
||||
f32x4_t x)
|
||||
{
|
||||
q31x4_t m;
|
||||
f32x4_t b;
|
||||
any32x4_t xinv;
|
||||
f32x4_t ax = vabsq(x);
|
||||
|
||||
xinv.f = ax;
|
||||
|
||||
m = 0x3F800000 - (xinv.i & 0x7F800000);
|
||||
xinv.i = xinv.i + m;
|
||||
xinv.f = 1.41176471f - 0.47058824f * xinv.f;
|
||||
xinv.i = xinv.i + m;
|
||||
|
||||
b = 2.0f - xinv.f * ax;
|
||||
xinv.f = xinv.f * b;
|
||||
|
||||
b = 2.0f - xinv.f * ax;
|
||||
xinv.f = xinv.f * b;
|
||||
|
||||
b = 2.0f - xinv.f * ax;
|
||||
xinv.f = xinv.f * b;
|
||||
|
||||
b = 2.0f - xinv.f * ax;
|
||||
xinv.f = xinv.f * b;
|
||||
|
||||
xinv.f = vdupq_m(xinv.f, INFINITY, vcmpeqq(x, 0.0f));
|
||||
/*
|
||||
* restore sign
|
||||
*/
|
||||
xinv.f = vnegq_m(xinv.f, xinv.f, vcmpltq(x, 0.0f));
|
||||
|
||||
return xinv.f;
|
||||
}
|
||||
|
||||
__STATIC_INLINE f32x4_t vdiv_f32(
|
||||
f32x4_t num, f32x4_t den)
|
||||
{
|
||||
return vmulq(num, vrecip_hiprec_f32(den));
|
||||
}
|
||||
|
||||
/**
|
||||
@brief Single-precision taylor dev.
|
||||
@param[in] x f32 quad vector input
|
||||
@param[in] coeffs f32 quad vector coeffs
|
||||
@return destination f32 quad vector
|
||||
*/
|
||||
|
||||
__STATIC_INLINE f32x4_t vtaylor_polyq_f32(
|
||||
f32x4_t x,
|
||||
const float32_t * coeffs)
|
||||
{
|
||||
f32x4_t A = vfmasq(vdupq_n_f32(coeffs[4]), x, coeffs[0]);
|
||||
f32x4_t B = vfmasq(vdupq_n_f32(coeffs[6]), x, coeffs[2]);
|
||||
f32x4_t C = vfmasq(vdupq_n_f32(coeffs[5]), x, coeffs[1]);
|
||||
f32x4_t D = vfmasq(vdupq_n_f32(coeffs[7]), x, coeffs[3]);
|
||||
f32x4_t x2 = vmulq(x, x);
|
||||
f32x4_t x4 = vmulq(x2, x2);
|
||||
f32x4_t res = vfmaq(vfmaq_f32(A, B, x2), vfmaq_f32(C, D, x2), x4);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
__STATIC_INLINE f32x4_t vmant_exp_f32(
|
||||
f32x4_t x,
|
||||
int32x4_t * e)
|
||||
{
|
||||
any32x4_t r;
|
||||
int32x4_t n;
|
||||
|
||||
r.f = x;
|
||||
n = r.i >> 23;
|
||||
n = n - 127;
|
||||
r.i = r.i - (n << 23);
|
||||
|
||||
*e = n;
|
||||
return r.f;
|
||||
}
|
||||
|
||||
|
||||
__STATIC_INLINE f32x4_t vlogq_f32(f32x4_t vecIn)
|
||||
{
|
||||
q31x4_t vecExpUnBiased;
|
||||
f32x4_t vecTmpFlt0, vecTmpFlt1;
|
||||
f32x4_t vecAcc0, vecAcc1, vecAcc2, vecAcc3;
|
||||
f32x4_t vecExpUnBiasedFlt;
|
||||
|
||||
/*
|
||||
* extract exponent
|
||||
*/
|
||||
vecTmpFlt1 = vmant_exp_f32(vecIn, &vecExpUnBiased);
|
||||
|
||||
vecTmpFlt0 = vecTmpFlt1 * vecTmpFlt1;
|
||||
/*
|
||||
* a = (__logf_lut_f32[4] * r.f) + (__logf_lut_f32[0]);
|
||||
*/
|
||||
vecAcc0 = vdupq_n_f32(__logf_lut_f32[0]);
|
||||
vecAcc0 = vfmaq(vecAcc0, vecTmpFlt1, __logf_lut_f32[4]);
|
||||
/*
|
||||
* b = (__logf_lut_f32[6] * r.f) + (__logf_lut_f32[2]);
|
||||
*/
|
||||
vecAcc1 = vdupq_n_f32(__logf_lut_f32[2]);
|
||||
vecAcc1 = vfmaq(vecAcc1, vecTmpFlt1, __logf_lut_f32[6]);
|
||||
/*
|
||||
* c = (__logf_lut_f32[5] * r.f) + (__logf_lut_f32[1]);
|
||||
*/
|
||||
vecAcc2 = vdupq_n_f32(__logf_lut_f32[1]);
|
||||
vecAcc2 = vfmaq(vecAcc2, vecTmpFlt1, __logf_lut_f32[5]);
|
||||
/*
|
||||
* d = (__logf_lut_f32[7] * r.f) + (__logf_lut_f32[3]);
|
||||
*/
|
||||
vecAcc3 = vdupq_n_f32(__logf_lut_f32[3]);
|
||||
vecAcc3 = vfmaq(vecAcc3, vecTmpFlt1, __logf_lut_f32[7]);
|
||||
/*
|
||||
* a = a + b * xx;
|
||||
*/
|
||||
vecAcc0 = vfmaq(vecAcc0, vecAcc1, vecTmpFlt0);
|
||||
/*
|
||||
* c = c + d * xx;
|
||||
*/
|
||||
vecAcc2 = vfmaq(vecAcc2, vecAcc3, vecTmpFlt0);
|
||||
/*
|
||||
* xx = xx * xx;
|
||||
*/
|
||||
vecTmpFlt0 = vecTmpFlt0 * vecTmpFlt0;
|
||||
vecExpUnBiasedFlt = vcvtq_f32_s32(vecExpUnBiased);
|
||||
/*
|
||||
* r.f = a + c * xx;
|
||||
*/
|
||||
vecAcc0 = vfmaq(vecAcc0, vecAcc2, vecTmpFlt0);
|
||||
/*
|
||||
* add exponent
|
||||
* r.f = r.f + ((float32_t) m) * __logf_rng_f32;
|
||||
*/
|
||||
vecAcc0 = vfmaq(vecAcc0, vecExpUnBiasedFlt, __logf_rng_f32);
|
||||
// set log0 down to -inf
|
||||
vecAcc0 = vdupq_m(vecAcc0, -INFINITY, vcmpeqq(vecIn, 0.0f));
|
||||
return vecAcc0;
|
||||
}
|
||||
|
||||
__STATIC_INLINE f32x4_t vexpq_f32(
|
||||
f32x4_t x)
|
||||
{
|
||||
// Perform range reduction [-log(2),log(2)]
|
||||
int32x4_t m = vcvtq_s32_f32(vmulq_n_f32(x, 1.4426950408f));
|
||||
f32x4_t val = vfmsq_f32(x, vcvtq_f32_s32(m), vdupq_n_f32(0.6931471805f));
|
||||
|
||||
// Polynomial Approximation
|
||||
f32x4_t poly = vtaylor_polyq_f32(val, exp_tab);
|
||||
|
||||
// Reconstruct
|
||||
poly = (f32x4_t) (vqaddq_s32((q31x4_t) (poly), vqshlq_n_s32(m, 23)));
|
||||
|
||||
poly = vdupq_m(poly, 0.0f, vcmpltq_n_s32(m, -126));
|
||||
return poly;
|
||||
}
|
||||
|
||||
__STATIC_INLINE f32x4_t arm_vec_exponent_f32(f32x4_t x, int32_t nb)
|
||||
{
|
||||
f32x4_t r = x;
|
||||
nb--;
|
||||
while (nb > 0) {
|
||||
r = vmulq(r, x);
|
||||
nb--;
|
||||
}
|
||||
return (r);
|
||||
}
|
||||
|
||||
__STATIC_INLINE f32x4_t vrecip_f32(f32x4_t vecIn)
|
||||
{
|
||||
f32x4_t vecSx, vecW, vecTmp;
|
||||
any32x4_t v;
|
||||
|
||||
vecSx = vabsq(vecIn);
|
||||
|
||||
v.f = vecIn;
|
||||
v.i = vsubq(vdupq_n_s32(INV_NEWTON_INIT_F32), v.i);
|
||||
|
||||
vecW = vmulq(vecSx, v.f);
|
||||
|
||||
// v.f = v.f * (8 + w * (-28 + w * (56 + w * (-70 + w *(56 + w * (-28 + w * (8 - w)))))));
|
||||
vecTmp = vsubq(vdupq_n_f32(8.0f), vecW);
|
||||
vecTmp = vfmasq(vecW, vecTmp, -28.0f);
|
||||
vecTmp = vfmasq(vecW, vecTmp, 56.0f);
|
||||
vecTmp = vfmasq(vecW, vecTmp, -70.0f);
|
||||
vecTmp = vfmasq(vecW, vecTmp, 56.0f);
|
||||
vecTmp = vfmasq(vecW, vecTmp, -28.0f);
|
||||
vecTmp = vfmasq(vecW, vecTmp, 8.0f);
|
||||
v.f = vmulq(v.f, vecTmp);
|
||||
|
||||
v.f = vdupq_m(v.f, INFINITY, vcmpeqq(vecIn, 0.0f));
|
||||
/*
|
||||
* restore sign
|
||||
*/
|
||||
v.f = vnegq_m(v.f, v.f, vcmpltq(vecIn, 0.0f));
|
||||
return v.f;
|
||||
}
|
||||
|
||||
__STATIC_INLINE f32x4_t vtanhq_f32(
|
||||
f32x4_t val)
|
||||
{
|
||||
f32x4_t x =
|
||||
vminnmq_f32(vmaxnmq_f32(val, vdupq_n_f32(-10.f)), vdupq_n_f32(10.0f));
|
||||
f32x4_t exp2x = vexpq_f32(vmulq_n_f32(x, 2.f));
|
||||
f32x4_t num = vsubq_n_f32(exp2x, 1.f);
|
||||
f32x4_t den = vaddq_n_f32(exp2x, 1.f);
|
||||
f32x4_t tanh = vmulq_f32(num, vrecip_f32(den));
|
||||
return tanh;
|
||||
}
|
||||
|
||||
__STATIC_INLINE f32x4_t vpowq_f32(
|
||||
f32x4_t val,
|
||||
f32x4_t n)
|
||||
{
|
||||
return vexpq_f32(vmulq_f32(n, vlogq_f32(val)));
|
||||
}
|
||||
|
||||
#endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)*/
|
||||
|
||||
#if (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM))
|
||||
#endif /* (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) */
|
||||
|
||||
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#include "NEMath.h"
|
||||
/**
|
||||
* @brief Vectorized integer exponentiation
|
||||
* @param[in] x value
|
||||
* @param[in] nb integer exponent >= 1
|
||||
* @return x^nb
|
||||
*
|
||||
*/
|
||||
__STATIC_INLINE float32x4_t arm_vec_exponent_f32(float32x4_t x, int32_t nb)
|
||||
{
|
||||
float32x4_t r = x;
|
||||
nb --;
|
||||
while(nb > 0)
|
||||
{
|
||||
r = vmulq_f32(r , x);
|
||||
nb--;
|
||||
}
|
||||
return(r);
|
||||
}
|
||||
|
||||
|
||||
__STATIC_INLINE float32x4_t __arm_vec_sqrt_f32_neon(float32x4_t x)
|
||||
{
|
||||
float32x4_t x1 = vmaxq_f32(x, vdupq_n_f32(FLT_MIN));
|
||||
float32x4_t e = vrsqrteq_f32(x1);
|
||||
e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e);
|
||||
e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e);
|
||||
return vmulq_f32(x, e);
|
||||
}
|
||||
|
||||
__STATIC_INLINE int16x8_t __arm_vec_sqrt_q15_neon(int16x8_t vec)
|
||||
{
|
||||
float32x4_t tempF;
|
||||
int32x4_t tempHI,tempLO;
|
||||
|
||||
tempLO = vmovl_s16(vget_low_s16(vec));
|
||||
tempF = vcvtq_n_f32_s32(tempLO,15);
|
||||
tempF = __arm_vec_sqrt_f32_neon(tempF);
|
||||
tempLO = vcvtq_n_s32_f32(tempF,15);
|
||||
|
||||
tempHI = vmovl_s16(vget_high_s16(vec));
|
||||
tempF = vcvtq_n_f32_s32(tempHI,15);
|
||||
tempF = __arm_vec_sqrt_f32_neon(tempF);
|
||||
tempHI = vcvtq_n_s32_f32(tempF,15);
|
||||
|
||||
return(vcombine_s16(vqmovn_s32(tempLO),vqmovn_s32(tempHI)));
|
||||
}
|
||||
|
||||
__STATIC_INLINE int32x4_t __arm_vec_sqrt_q31_neon(int32x4_t vec)
|
||||
{
|
||||
float32x4_t temp;
|
||||
|
||||
temp = vcvtq_n_f32_s32(vec,31);
|
||||
temp = __arm_vec_sqrt_f32_neon(temp);
|
||||
return(vcvtq_n_s32_f32(temp,31));
|
||||
}
|
||||
|
||||
#endif /* (defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* _ARM_VEC_MATH_H */
|
||||
|
||||
/**
|
||||
*
|
||||
* End of file.
|
||||
*/
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -8,7 +8,7 @@
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// Copyright (c) 2017-2019 IAR Systems
|
||||
// Copyright (c) 2017-2019 Arm Limited. All rights reserved.
|
||||
// Copyright (c) 2017-2019 Arm Limited. All rights reserved.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
@@ -198,7 +198,7 @@
|
||||
#define __VTOR_PRESENT 1U
|
||||
#warning "__VTOR_PRESENT not defined in device header file; using default!"
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef __NVIC_PRIO_BITS
|
||||
#define __NVIC_PRIO_BITS 3U
|
||||
#warning "__NVIC_PRIO_BITS not defined in device header file; using default!"
|
||||
|
||||
@@ -21,13 +21,13 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
#if defined ( __ICCARM__ )
|
||||
#pragma system_include /* treat file as system include file for MISRA check */
|
||||
#elif defined (__clang__)
|
||||
#pragma clang system_header /* treat file as system include file */
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef ARM_MPU_ARMV7_H
|
||||
#define ARM_MPU_ARMV7_H
|
||||
|
||||
@@ -79,12 +79,12 @@
|
||||
|
||||
/**
|
||||
* MPU Memory Access Attributes
|
||||
*
|
||||
*
|
||||
* \param TypeExtField Type extension field, allows you to configure memory access type, for example strongly ordered, peripheral.
|
||||
* \param IsShareable Region is shareable between multiple bus masters.
|
||||
* \param IsCacheable Region is cacheable, i.e. its value may be kept in cache.
|
||||
* \param IsBufferable Region is bufferable, i.e. using write-back caching. Cacheable but non-bufferable regions use write-through policy.
|
||||
*/
|
||||
*/
|
||||
#define ARM_MPU_ACCESS_(TypeExtField, IsShareable, IsCacheable, IsBufferable) \
|
||||
((((TypeExtField) << MPU_RASR_TEX_Pos) & MPU_RASR_TEX_Msk) | \
|
||||
(((IsShareable) << MPU_RASR_S_Pos) & MPU_RASR_S_Msk) | \
|
||||
@@ -93,7 +93,7 @@
|
||||
|
||||
/**
|
||||
* MPU Region Attribute and Size Register Value
|
||||
*
|
||||
*
|
||||
* \param DisableExec Instruction access disable bit, 1= disable instruction fetches.
|
||||
* \param AccessPermission Data access permissions, allows you to configure read/write access for User and Privileged mode.
|
||||
* \param AccessAttributes Memory access attribution, see \ref ARM_MPU_ACCESS_.
|
||||
@@ -110,7 +110,7 @@
|
||||
|
||||
/**
|
||||
* MPU Region Attribute and Size Register Value
|
||||
*
|
||||
*
|
||||
* \param DisableExec Instruction access disable bit, 1= disable instruction fetches.
|
||||
* \param AccessPermission Data access permissions, allows you to configure read/write access for User and Privileged mode.
|
||||
* \param TypeExtField Type extension field, allows you to configure memory access type, for example strongly ordered, peripheral.
|
||||
@@ -119,7 +119,7 @@
|
||||
* \param IsBufferable Region is bufferable, i.e. using write-back caching. Cacheable but non-bufferable regions use write-through policy.
|
||||
* \param SubRegionDisable Sub-region disable field.
|
||||
* \param Size Region size of the region to be configured, for example 4K, 8K.
|
||||
*/
|
||||
*/
|
||||
#define ARM_MPU_RASR(DisableExec, AccessPermission, TypeExtField, IsShareable, IsCacheable, IsBufferable, SubRegionDisable, Size) \
|
||||
ARM_MPU_RASR_EX(DisableExec, AccessPermission, ARM_MPU_ACCESS_(TypeExtField, IsShareable, IsCacheable, IsBufferable), SubRegionDisable, Size)
|
||||
|
||||
@@ -129,7 +129,7 @@
|
||||
* - Shareable
|
||||
* - Non-cacheable
|
||||
* - Non-bufferable
|
||||
*/
|
||||
*/
|
||||
#define ARM_MPU_ACCESS_ORDERED ARM_MPU_ACCESS_(0U, 1U, 0U, 0U)
|
||||
|
||||
/**
|
||||
@@ -140,7 +140,7 @@
|
||||
* - Bufferable (if shareable) or non-bufferable (if non-shareable)
|
||||
*
|
||||
* \param IsShareable Configures the device memory as shareable or non-shareable.
|
||||
*/
|
||||
*/
|
||||
#define ARM_MPU_ACCESS_DEVICE(IsShareable) ((IsShareable) ? ARM_MPU_ACCESS_(0U, 1U, 0U, 1U) : ARM_MPU_ACCESS_(2U, 0U, 0U, 0U))
|
||||
|
||||
/**
|
||||
@@ -153,7 +153,7 @@
|
||||
* \param OuterCp Configures the outer cache policy.
|
||||
* \param InnerCp Configures the inner cache policy.
|
||||
* \param IsShareable Configures the memory as shareable or non-shareable.
|
||||
*/
|
||||
*/
|
||||
#define ARM_MPU_ACCESS_NORMAL(OuterCp, InnerCp, IsShareable) ARM_MPU_ACCESS_((4U | (OuterCp)), IsShareable, ((InnerCp) >> 1U), ((InnerCp) & 1U))
|
||||
|
||||
/**
|
||||
@@ -184,7 +184,7 @@ typedef struct {
|
||||
uint32_t RBAR; //!< The region base address register value (RBAR)
|
||||
uint32_t RASR; //!< The region attribute and size register value (RASR) \ref MPU_RASR
|
||||
} ARM_MPU_Region_t;
|
||||
|
||||
|
||||
/** Enable the MPU.
|
||||
* \param MPU_Control Default access permissions for unconfigured regions.
|
||||
*/
|
||||
@@ -224,7 +224,7 @@ __STATIC_INLINE void ARM_MPU_ClrRegion(uint32_t rnr)
|
||||
/** Configure an MPU region.
|
||||
* \param rbar Value for RBAR register.
|
||||
* \param rsar Value for RSAR register.
|
||||
*/
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_SetRegion(uint32_t rbar, uint32_t rasr)
|
||||
{
|
||||
MPU->RBAR = rbar;
|
||||
@@ -235,7 +235,7 @@ __STATIC_INLINE void ARM_MPU_SetRegion(uint32_t rbar, uint32_t rasr)
|
||||
* \param rnr Region number to be configured.
|
||||
* \param rbar Value for RBAR register.
|
||||
* \param rsar Value for RSAR register.
|
||||
*/
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_SetRegionEx(uint32_t rnr, uint32_t rbar, uint32_t rasr)
|
||||
{
|
||||
MPU->RNR = rnr;
|
||||
@@ -251,7 +251,7 @@ __STATIC_INLINE void ARM_MPU_SetRegionEx(uint32_t rnr, uint32_t rbar, uint32_t r
|
||||
__STATIC_INLINE void ARM_MPU_OrderedMemcpy(volatile uint32_t* dst, const uint32_t* __RESTRICT src, uint32_t len)
|
||||
{
|
||||
uint32_t i;
|
||||
for (i = 0U; i < len; ++i)
|
||||
for (i = 0U; i < len; ++i)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
@@ -261,7 +261,7 @@ __STATIC_INLINE void ARM_MPU_OrderedMemcpy(volatile uint32_t* dst, const uint32_
|
||||
* \param table Pointer to the MPU configuration table.
|
||||
* \param cnt Amount of regions to be configured.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_Load(ARM_MPU_Region_t const* table, uint32_t cnt)
|
||||
__STATIC_INLINE void ARM_MPU_Load(ARM_MPU_Region_t const* table, uint32_t cnt)
|
||||
{
|
||||
const uint32_t rowWordSize = sizeof(ARM_MPU_Region_t)/4U;
|
||||
while (cnt > MPU_TYPE_RALIASES) {
|
||||
|
||||
@@ -1,352 +1,352 @@
|
||||
/******************************************************************************
|
||||
* @file mpu_armv8.h
|
||||
* @brief CMSIS MPU API for Armv8-M and Armv8.1-M MPU
|
||||
* @version V5.1.2
|
||||
* @date 10. February 2020
|
||||
******************************************************************************/
|
||||
/*
|
||||
* Copyright (c) 2017-2020 Arm Limited. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#if defined ( __ICCARM__ )
|
||||
#pragma system_include /* treat file as system include file for MISRA check */
|
||||
#elif defined (__clang__)
|
||||
#pragma clang system_header /* treat file as system include file */
|
||||
#endif
|
||||
|
||||
#ifndef ARM_MPU_ARMV8_H
|
||||
#define ARM_MPU_ARMV8_H
|
||||
|
||||
/** \brief Attribute for device memory (outer only) */
|
||||
#define ARM_MPU_ATTR_DEVICE ( 0U )
|
||||
|
||||
/** \brief Attribute for non-cacheable, normal memory */
|
||||
#define ARM_MPU_ATTR_NON_CACHEABLE ( 4U )
|
||||
|
||||
/** \brief Attribute for normal memory (outer and inner)
|
||||
* \param NT Non-Transient: Set to 1 for non-transient data.
|
||||
* \param WB Write-Back: Set to 1 to use write-back update policy.
|
||||
* \param RA Read Allocation: Set to 1 to use cache allocation on read miss.
|
||||
* \param WA Write Allocation: Set to 1 to use cache allocation on write miss.
|
||||
*/
|
||||
#define ARM_MPU_ATTR_MEMORY_(NT, WB, RA, WA) \
|
||||
((((NT) & 1U) << 3U) | (((WB) & 1U) << 2U) | (((RA) & 1U) << 1U) | ((WA) & 1U))
|
||||
|
||||
/** \brief Device memory type non Gathering, non Re-ordering, non Early Write Acknowledgement */
|
||||
#define ARM_MPU_ATTR_DEVICE_nGnRnE (0U)
|
||||
|
||||
/** \brief Device memory type non Gathering, non Re-ordering, Early Write Acknowledgement */
|
||||
#define ARM_MPU_ATTR_DEVICE_nGnRE (1U)
|
||||
|
||||
/** \brief Device memory type non Gathering, Re-ordering, Early Write Acknowledgement */
|
||||
#define ARM_MPU_ATTR_DEVICE_nGRE (2U)
|
||||
|
||||
/** \brief Device memory type Gathering, Re-ordering, Early Write Acknowledgement */
|
||||
#define ARM_MPU_ATTR_DEVICE_GRE (3U)
|
||||
|
||||
/** \brief Memory Attribute
|
||||
* \param O Outer memory attributes
|
||||
* \param I O == ARM_MPU_ATTR_DEVICE: Device memory attributes, else: Inner memory attributes
|
||||
*/
|
||||
#define ARM_MPU_ATTR(O, I) ((((O) & 0xFU) << 4U) | ((((O) & 0xFU) != 0U) ? ((I) & 0xFU) : (((I) & 0x3U) << 2U)))
|
||||
|
||||
/** \brief Normal memory non-shareable */
|
||||
#define ARM_MPU_SH_NON (0U)
|
||||
|
||||
/** \brief Normal memory outer shareable */
|
||||
#define ARM_MPU_SH_OUTER (2U)
|
||||
|
||||
/** \brief Normal memory inner shareable */
|
||||
#define ARM_MPU_SH_INNER (3U)
|
||||
|
||||
/** \brief Memory access permissions
|
||||
* \param RO Read-Only: Set to 1 for read-only memory.
|
||||
* \param NP Non-Privileged: Set to 1 for non-privileged memory.
|
||||
*/
|
||||
#define ARM_MPU_AP_(RO, NP) ((((RO) & 1U) << 1U) | ((NP) & 1U))
|
||||
|
||||
/** \brief Region Base Address Register value
|
||||
* \param BASE The base address bits [31:5] of a memory region. The value is zero extended. Effective address gets 32 byte aligned.
|
||||
* \param SH Defines the Shareability domain for this memory region.
|
||||
* \param RO Read-Only: Set to 1 for a read-only memory region.
|
||||
* \param NP Non-Privileged: Set to 1 for a non-privileged memory region.
|
||||
* \oaram XN eXecute Never: Set to 1 for a non-executable memory region.
|
||||
*/
|
||||
#define ARM_MPU_RBAR(BASE, SH, RO, NP, XN) \
|
||||
(((BASE) & MPU_RBAR_BASE_Msk) | \
|
||||
(((SH) << MPU_RBAR_SH_Pos) & MPU_RBAR_SH_Msk) | \
|
||||
((ARM_MPU_AP_(RO, NP) << MPU_RBAR_AP_Pos) & MPU_RBAR_AP_Msk) | \
|
||||
(((XN) << MPU_RBAR_XN_Pos) & MPU_RBAR_XN_Msk))
|
||||
|
||||
/** \brief Region Limit Address Register value
|
||||
* \param LIMIT The limit address bits [31:5] for this memory region. The value is one extended.
|
||||
* \param IDX The attribute index to be associated with this memory region.
|
||||
*/
|
||||
#define ARM_MPU_RLAR(LIMIT, IDX) \
|
||||
(((LIMIT) & MPU_RLAR_LIMIT_Msk) | \
|
||||
(((IDX) << MPU_RLAR_AttrIndx_Pos) & MPU_RLAR_AttrIndx_Msk) | \
|
||||
(MPU_RLAR_EN_Msk))
|
||||
|
||||
#if defined(MPU_RLAR_PXN_Pos)
|
||||
|
||||
/** \brief Region Limit Address Register with PXN value
|
||||
* \param LIMIT The limit address bits [31:5] for this memory region. The value is one extended.
|
||||
* \param PXN Privileged execute never. Defines whether code can be executed from this privileged region.
|
||||
* \param IDX The attribute index to be associated with this memory region.
|
||||
*/
|
||||
#define ARM_MPU_RLAR_PXN(LIMIT, PXN, IDX) \
|
||||
(((LIMIT) & MPU_RLAR_LIMIT_Msk) | \
|
||||
(((PXN) << MPU_RLAR_PXN_Pos) & MPU_RLAR_PXN_Msk) | \
|
||||
(((IDX) << MPU_RLAR_AttrIndx_Pos) & MPU_RLAR_AttrIndx_Msk) | \
|
||||
(MPU_RLAR_EN_Msk))
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Struct for a single MPU Region
|
||||
*/
|
||||
typedef struct {
|
||||
uint32_t RBAR; /*!< Region Base Address Register value */
|
||||
uint32_t RLAR; /*!< Region Limit Address Register value */
|
||||
} ARM_MPU_Region_t;
|
||||
|
||||
/** Enable the MPU.
|
||||
* \param MPU_Control Default access permissions for unconfigured regions.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_Enable(uint32_t MPU_Control)
|
||||
{
|
||||
__DMB();
|
||||
MPU->CTRL = MPU_Control | MPU_CTRL_ENABLE_Msk;
|
||||
#ifdef SCB_SHCSR_MEMFAULTENA_Msk
|
||||
SCB->SHCSR |= SCB_SHCSR_MEMFAULTENA_Msk;
|
||||
#endif
|
||||
__DSB();
|
||||
__ISB();
|
||||
}
|
||||
|
||||
/** Disable the MPU.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_Disable(void)
|
||||
{
|
||||
__DMB();
|
||||
#ifdef SCB_SHCSR_MEMFAULTENA_Msk
|
||||
SCB->SHCSR &= ~SCB_SHCSR_MEMFAULTENA_Msk;
|
||||
#endif
|
||||
MPU->CTRL &= ~MPU_CTRL_ENABLE_Msk;
|
||||
__DSB();
|
||||
__ISB();
|
||||
}
|
||||
|
||||
#ifdef MPU_NS
|
||||
/** Enable the Non-secure MPU.
|
||||
* \param MPU_Control Default access permissions for unconfigured regions.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_Enable_NS(uint32_t MPU_Control)
|
||||
{
|
||||
__DMB();
|
||||
MPU_NS->CTRL = MPU_Control | MPU_CTRL_ENABLE_Msk;
|
||||
#ifdef SCB_SHCSR_MEMFAULTENA_Msk
|
||||
SCB_NS->SHCSR |= SCB_SHCSR_MEMFAULTENA_Msk;
|
||||
#endif
|
||||
__DSB();
|
||||
__ISB();
|
||||
}
|
||||
|
||||
/** Disable the Non-secure MPU.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_Disable_NS(void)
|
||||
{
|
||||
__DMB();
|
||||
#ifdef SCB_SHCSR_MEMFAULTENA_Msk
|
||||
SCB_NS->SHCSR &= ~SCB_SHCSR_MEMFAULTENA_Msk;
|
||||
#endif
|
||||
MPU_NS->CTRL &= ~MPU_CTRL_ENABLE_Msk;
|
||||
__DSB();
|
||||
__ISB();
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Set the memory attribute encoding to the given MPU.
|
||||
* \param mpu Pointer to the MPU to be configured.
|
||||
* \param idx The attribute index to be set [0-7]
|
||||
* \param attr The attribute value to be set.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_SetMemAttrEx(MPU_Type* mpu, uint8_t idx, uint8_t attr)
|
||||
{
|
||||
const uint8_t reg = idx / 4U;
|
||||
const uint32_t pos = ((idx % 4U) * 8U);
|
||||
const uint32_t mask = 0xFFU << pos;
|
||||
|
||||
if (reg >= (sizeof(mpu->MAIR) / sizeof(mpu->MAIR[0]))) {
|
||||
return; // invalid index
|
||||
}
|
||||
|
||||
mpu->MAIR[reg] = ((mpu->MAIR[reg] & ~mask) | ((attr << pos) & mask));
|
||||
}
|
||||
|
||||
/** Set the memory attribute encoding.
|
||||
* \param idx The attribute index to be set [0-7]
|
||||
* \param attr The attribute value to be set.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_SetMemAttr(uint8_t idx, uint8_t attr)
|
||||
{
|
||||
ARM_MPU_SetMemAttrEx(MPU, idx, attr);
|
||||
}
|
||||
|
||||
#ifdef MPU_NS
|
||||
/** Set the memory attribute encoding to the Non-secure MPU.
|
||||
* \param idx The attribute index to be set [0-7]
|
||||
* \param attr The attribute value to be set.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_SetMemAttr_NS(uint8_t idx, uint8_t attr)
|
||||
{
|
||||
ARM_MPU_SetMemAttrEx(MPU_NS, idx, attr);
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Clear and disable the given MPU region of the given MPU.
|
||||
* \param mpu Pointer to MPU to be used.
|
||||
* \param rnr Region number to be cleared.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_ClrRegionEx(MPU_Type* mpu, uint32_t rnr)
|
||||
{
|
||||
mpu->RNR = rnr;
|
||||
mpu->RLAR = 0U;
|
||||
}
|
||||
|
||||
/** Clear and disable the given MPU region.
|
||||
* \param rnr Region number to be cleared.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_ClrRegion(uint32_t rnr)
|
||||
{
|
||||
ARM_MPU_ClrRegionEx(MPU, rnr);
|
||||
}
|
||||
|
||||
#ifdef MPU_NS
|
||||
/** Clear and disable the given Non-secure MPU region.
|
||||
* \param rnr Region number to be cleared.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_ClrRegion_NS(uint32_t rnr)
|
||||
{
|
||||
ARM_MPU_ClrRegionEx(MPU_NS, rnr);
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Configure the given MPU region of the given MPU.
|
||||
* \param mpu Pointer to MPU to be used.
|
||||
* \param rnr Region number to be configured.
|
||||
* \param rbar Value for RBAR register.
|
||||
* \param rlar Value for RLAR register.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_SetRegionEx(MPU_Type* mpu, uint32_t rnr, uint32_t rbar, uint32_t rlar)
|
||||
{
|
||||
mpu->RNR = rnr;
|
||||
mpu->RBAR = rbar;
|
||||
mpu->RLAR = rlar;
|
||||
}
|
||||
|
||||
/** Configure the given MPU region.
|
||||
* \param rnr Region number to be configured.
|
||||
* \param rbar Value for RBAR register.
|
||||
* \param rlar Value for RLAR register.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_SetRegion(uint32_t rnr, uint32_t rbar, uint32_t rlar)
|
||||
{
|
||||
ARM_MPU_SetRegionEx(MPU, rnr, rbar, rlar);
|
||||
}
|
||||
|
||||
#ifdef MPU_NS
|
||||
/** Configure the given Non-secure MPU region.
|
||||
* \param rnr Region number to be configured.
|
||||
* \param rbar Value for RBAR register.
|
||||
* \param rlar Value for RLAR register.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_SetRegion_NS(uint32_t rnr, uint32_t rbar, uint32_t rlar)
|
||||
{
|
||||
ARM_MPU_SetRegionEx(MPU_NS, rnr, rbar, rlar);
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Memcopy with strictly ordered memory access, e.g. for register targets.
|
||||
* \param dst Destination data is copied to.
|
||||
* \param src Source data is copied from.
|
||||
* \param len Amount of data words to be copied.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_OrderedMemcpy(volatile uint32_t* dst, const uint32_t* __RESTRICT src, uint32_t len)
|
||||
{
|
||||
uint32_t i;
|
||||
for (i = 0U; i < len; ++i)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
/** Load the given number of MPU regions from a table to the given MPU.
|
||||
* \param mpu Pointer to the MPU registers to be used.
|
||||
* \param rnr First region number to be configured.
|
||||
* \param table Pointer to the MPU configuration table.
|
||||
* \param cnt Amount of regions to be configured.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_LoadEx(MPU_Type* mpu, uint32_t rnr, ARM_MPU_Region_t const* table, uint32_t cnt)
|
||||
{
|
||||
const uint32_t rowWordSize = sizeof(ARM_MPU_Region_t)/4U;
|
||||
if (cnt == 1U) {
|
||||
mpu->RNR = rnr;
|
||||
ARM_MPU_OrderedMemcpy(&(mpu->RBAR), &(table->RBAR), rowWordSize);
|
||||
} else {
|
||||
uint32_t rnrBase = rnr & ~(MPU_TYPE_RALIASES-1U);
|
||||
uint32_t rnrOffset = rnr % MPU_TYPE_RALIASES;
|
||||
|
||||
mpu->RNR = rnrBase;
|
||||
while ((rnrOffset + cnt) > MPU_TYPE_RALIASES) {
|
||||
uint32_t c = MPU_TYPE_RALIASES - rnrOffset;
|
||||
ARM_MPU_OrderedMemcpy(&(mpu->RBAR)+(rnrOffset*2U), &(table->RBAR), c*rowWordSize);
|
||||
table += c;
|
||||
cnt -= c;
|
||||
rnrOffset = 0U;
|
||||
rnrBase += MPU_TYPE_RALIASES;
|
||||
mpu->RNR = rnrBase;
|
||||
}
|
||||
|
||||
ARM_MPU_OrderedMemcpy(&(mpu->RBAR)+(rnrOffset*2U), &(table->RBAR), cnt*rowWordSize);
|
||||
}
|
||||
}
|
||||
|
||||
/** Load the given number of MPU regions from a table.
|
||||
* \param rnr First region number to be configured.
|
||||
* \param table Pointer to the MPU configuration table.
|
||||
* \param cnt Amount of regions to be configured.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_Load(uint32_t rnr, ARM_MPU_Region_t const* table, uint32_t cnt)
|
||||
{
|
||||
ARM_MPU_LoadEx(MPU, rnr, table, cnt);
|
||||
}
|
||||
|
||||
#ifdef MPU_NS
|
||||
/** Load the given number of MPU regions from a table to the Non-secure MPU.
|
||||
* \param rnr First region number to be configured.
|
||||
* \param table Pointer to the MPU configuration table.
|
||||
* \param cnt Amount of regions to be configured.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_Load_NS(uint32_t rnr, ARM_MPU_Region_t const* table, uint32_t cnt)
|
||||
{
|
||||
ARM_MPU_LoadEx(MPU_NS, rnr, table, cnt);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/******************************************************************************
|
||||
* @file mpu_armv8.h
|
||||
* @brief CMSIS MPU API for Armv8-M and Armv8.1-M MPU
|
||||
* @version V5.1.2
|
||||
* @date 10. February 2020
|
||||
******************************************************************************/
|
||||
/*
|
||||
* Copyright (c) 2017-2020 Arm Limited. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#if defined ( __ICCARM__ )
|
||||
#pragma system_include /* treat file as system include file for MISRA check */
|
||||
#elif defined (__clang__)
|
||||
#pragma clang system_header /* treat file as system include file */
|
||||
#endif
|
||||
|
||||
#ifndef ARM_MPU_ARMV8_H
|
||||
#define ARM_MPU_ARMV8_H
|
||||
|
||||
/** \brief Attribute for device memory (outer only) */
|
||||
#define ARM_MPU_ATTR_DEVICE ( 0U )
|
||||
|
||||
/** \brief Attribute for non-cacheable, normal memory */
|
||||
#define ARM_MPU_ATTR_NON_CACHEABLE ( 4U )
|
||||
|
||||
/** \brief Attribute for normal memory (outer and inner)
|
||||
* \param NT Non-Transient: Set to 1 for non-transient data.
|
||||
* \param WB Write-Back: Set to 1 to use write-back update policy.
|
||||
* \param RA Read Allocation: Set to 1 to use cache allocation on read miss.
|
||||
* \param WA Write Allocation: Set to 1 to use cache allocation on write miss.
|
||||
*/
|
||||
#define ARM_MPU_ATTR_MEMORY_(NT, WB, RA, WA) \
|
||||
((((NT) & 1U) << 3U) | (((WB) & 1U) << 2U) | (((RA) & 1U) << 1U) | ((WA) & 1U))
|
||||
|
||||
/** \brief Device memory type non Gathering, non Re-ordering, non Early Write Acknowledgement */
|
||||
#define ARM_MPU_ATTR_DEVICE_nGnRnE (0U)
|
||||
|
||||
/** \brief Device memory type non Gathering, non Re-ordering, Early Write Acknowledgement */
|
||||
#define ARM_MPU_ATTR_DEVICE_nGnRE (1U)
|
||||
|
||||
/** \brief Device memory type non Gathering, Re-ordering, Early Write Acknowledgement */
|
||||
#define ARM_MPU_ATTR_DEVICE_nGRE (2U)
|
||||
|
||||
/** \brief Device memory type Gathering, Re-ordering, Early Write Acknowledgement */
|
||||
#define ARM_MPU_ATTR_DEVICE_GRE (3U)
|
||||
|
||||
/** \brief Memory Attribute
|
||||
* \param O Outer memory attributes
|
||||
* \param I O == ARM_MPU_ATTR_DEVICE: Device memory attributes, else: Inner memory attributes
|
||||
*/
|
||||
#define ARM_MPU_ATTR(O, I) ((((O) & 0xFU) << 4U) | ((((O) & 0xFU) != 0U) ? ((I) & 0xFU) : (((I) & 0x3U) << 2U)))
|
||||
|
||||
/** \brief Normal memory non-shareable */
|
||||
#define ARM_MPU_SH_NON (0U)
|
||||
|
||||
/** \brief Normal memory outer shareable */
|
||||
#define ARM_MPU_SH_OUTER (2U)
|
||||
|
||||
/** \brief Normal memory inner shareable */
|
||||
#define ARM_MPU_SH_INNER (3U)
|
||||
|
||||
/** \brief Memory access permissions
|
||||
* \param RO Read-Only: Set to 1 for read-only memory.
|
||||
* \param NP Non-Privileged: Set to 1 for non-privileged memory.
|
||||
*/
|
||||
#define ARM_MPU_AP_(RO, NP) ((((RO) & 1U) << 1U) | ((NP) & 1U))
|
||||
|
||||
/** \brief Region Base Address Register value
|
||||
* \param BASE The base address bits [31:5] of a memory region. The value is zero extended. Effective address gets 32 byte aligned.
|
||||
* \param SH Defines the Shareability domain for this memory region.
|
||||
* \param RO Read-Only: Set to 1 for a read-only memory region.
|
||||
* \param NP Non-Privileged: Set to 1 for a non-privileged memory region.
|
||||
* \oaram XN eXecute Never: Set to 1 for a non-executable memory region.
|
||||
*/
|
||||
#define ARM_MPU_RBAR(BASE, SH, RO, NP, XN) \
|
||||
(((BASE) & MPU_RBAR_BASE_Msk) | \
|
||||
(((SH) << MPU_RBAR_SH_Pos) & MPU_RBAR_SH_Msk) | \
|
||||
((ARM_MPU_AP_(RO, NP) << MPU_RBAR_AP_Pos) & MPU_RBAR_AP_Msk) | \
|
||||
(((XN) << MPU_RBAR_XN_Pos) & MPU_RBAR_XN_Msk))
|
||||
|
||||
/** \brief Region Limit Address Register value
|
||||
* \param LIMIT The limit address bits [31:5] for this memory region. The value is one extended.
|
||||
* \param IDX The attribute index to be associated with this memory region.
|
||||
*/
|
||||
#define ARM_MPU_RLAR(LIMIT, IDX) \
|
||||
(((LIMIT) & MPU_RLAR_LIMIT_Msk) | \
|
||||
(((IDX) << MPU_RLAR_AttrIndx_Pos) & MPU_RLAR_AttrIndx_Msk) | \
|
||||
(MPU_RLAR_EN_Msk))
|
||||
|
||||
#if defined(MPU_RLAR_PXN_Pos)
|
||||
|
||||
/** \brief Region Limit Address Register with PXN value
|
||||
* \param LIMIT The limit address bits [31:5] for this memory region. The value is one extended.
|
||||
* \param PXN Privileged execute never. Defines whether code can be executed from this privileged region.
|
||||
* \param IDX The attribute index to be associated with this memory region.
|
||||
*/
|
||||
#define ARM_MPU_RLAR_PXN(LIMIT, PXN, IDX) \
|
||||
(((LIMIT) & MPU_RLAR_LIMIT_Msk) | \
|
||||
(((PXN) << MPU_RLAR_PXN_Pos) & MPU_RLAR_PXN_Msk) | \
|
||||
(((IDX) << MPU_RLAR_AttrIndx_Pos) & MPU_RLAR_AttrIndx_Msk) | \
|
||||
(MPU_RLAR_EN_Msk))
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Struct for a single MPU Region
|
||||
*/
|
||||
typedef struct {
|
||||
uint32_t RBAR; /*!< Region Base Address Register value */
|
||||
uint32_t RLAR; /*!< Region Limit Address Register value */
|
||||
} ARM_MPU_Region_t;
|
||||
|
||||
/** Enable the MPU.
|
||||
* \param MPU_Control Default access permissions for unconfigured regions.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_Enable(uint32_t MPU_Control)
|
||||
{
|
||||
__DMB();
|
||||
MPU->CTRL = MPU_Control | MPU_CTRL_ENABLE_Msk;
|
||||
#ifdef SCB_SHCSR_MEMFAULTENA_Msk
|
||||
SCB->SHCSR |= SCB_SHCSR_MEMFAULTENA_Msk;
|
||||
#endif
|
||||
__DSB();
|
||||
__ISB();
|
||||
}
|
||||
|
||||
/** Disable the MPU.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_Disable(void)
|
||||
{
|
||||
__DMB();
|
||||
#ifdef SCB_SHCSR_MEMFAULTENA_Msk
|
||||
SCB->SHCSR &= ~SCB_SHCSR_MEMFAULTENA_Msk;
|
||||
#endif
|
||||
MPU->CTRL &= ~MPU_CTRL_ENABLE_Msk;
|
||||
__DSB();
|
||||
__ISB();
|
||||
}
|
||||
|
||||
#ifdef MPU_NS
|
||||
/** Enable the Non-secure MPU.
|
||||
* \param MPU_Control Default access permissions for unconfigured regions.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_Enable_NS(uint32_t MPU_Control)
|
||||
{
|
||||
__DMB();
|
||||
MPU_NS->CTRL = MPU_Control | MPU_CTRL_ENABLE_Msk;
|
||||
#ifdef SCB_SHCSR_MEMFAULTENA_Msk
|
||||
SCB_NS->SHCSR |= SCB_SHCSR_MEMFAULTENA_Msk;
|
||||
#endif
|
||||
__DSB();
|
||||
__ISB();
|
||||
}
|
||||
|
||||
/** Disable the Non-secure MPU.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_Disable_NS(void)
|
||||
{
|
||||
__DMB();
|
||||
#ifdef SCB_SHCSR_MEMFAULTENA_Msk
|
||||
SCB_NS->SHCSR &= ~SCB_SHCSR_MEMFAULTENA_Msk;
|
||||
#endif
|
||||
MPU_NS->CTRL &= ~MPU_CTRL_ENABLE_Msk;
|
||||
__DSB();
|
||||
__ISB();
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Set the memory attribute encoding to the given MPU.
|
||||
* \param mpu Pointer to the MPU to be configured.
|
||||
* \param idx The attribute index to be set [0-7]
|
||||
* \param attr The attribute value to be set.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_SetMemAttrEx(MPU_Type* mpu, uint8_t idx, uint8_t attr)
|
||||
{
|
||||
const uint8_t reg = idx / 4U;
|
||||
const uint32_t pos = ((idx % 4U) * 8U);
|
||||
const uint32_t mask = 0xFFU << pos;
|
||||
|
||||
if (reg >= (sizeof(mpu->MAIR) / sizeof(mpu->MAIR[0]))) {
|
||||
return; // invalid index
|
||||
}
|
||||
|
||||
mpu->MAIR[reg] = ((mpu->MAIR[reg] & ~mask) | ((attr << pos) & mask));
|
||||
}
|
||||
|
||||
/** Set the memory attribute encoding.
|
||||
* \param idx The attribute index to be set [0-7]
|
||||
* \param attr The attribute value to be set.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_SetMemAttr(uint8_t idx, uint8_t attr)
|
||||
{
|
||||
ARM_MPU_SetMemAttrEx(MPU, idx, attr);
|
||||
}
|
||||
|
||||
#ifdef MPU_NS
|
||||
/** Set the memory attribute encoding to the Non-secure MPU.
|
||||
* \param idx The attribute index to be set [0-7]
|
||||
* \param attr The attribute value to be set.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_SetMemAttr_NS(uint8_t idx, uint8_t attr)
|
||||
{
|
||||
ARM_MPU_SetMemAttrEx(MPU_NS, idx, attr);
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Clear and disable the given MPU region of the given MPU.
|
||||
* \param mpu Pointer to MPU to be used.
|
||||
* \param rnr Region number to be cleared.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_ClrRegionEx(MPU_Type* mpu, uint32_t rnr)
|
||||
{
|
||||
mpu->RNR = rnr;
|
||||
mpu->RLAR = 0U;
|
||||
}
|
||||
|
||||
/** Clear and disable the given MPU region.
|
||||
* \param rnr Region number to be cleared.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_ClrRegion(uint32_t rnr)
|
||||
{
|
||||
ARM_MPU_ClrRegionEx(MPU, rnr);
|
||||
}
|
||||
|
||||
#ifdef MPU_NS
|
||||
/** Clear and disable the given Non-secure MPU region.
|
||||
* \param rnr Region number to be cleared.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_ClrRegion_NS(uint32_t rnr)
|
||||
{
|
||||
ARM_MPU_ClrRegionEx(MPU_NS, rnr);
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Configure the given MPU region of the given MPU.
|
||||
* \param mpu Pointer to MPU to be used.
|
||||
* \param rnr Region number to be configured.
|
||||
* \param rbar Value for RBAR register.
|
||||
* \param rlar Value for RLAR register.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_SetRegionEx(MPU_Type* mpu, uint32_t rnr, uint32_t rbar, uint32_t rlar)
|
||||
{
|
||||
mpu->RNR = rnr;
|
||||
mpu->RBAR = rbar;
|
||||
mpu->RLAR = rlar;
|
||||
}
|
||||
|
||||
/** Configure the given MPU region.
|
||||
* \param rnr Region number to be configured.
|
||||
* \param rbar Value for RBAR register.
|
||||
* \param rlar Value for RLAR register.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_SetRegion(uint32_t rnr, uint32_t rbar, uint32_t rlar)
|
||||
{
|
||||
ARM_MPU_SetRegionEx(MPU, rnr, rbar, rlar);
|
||||
}
|
||||
|
||||
#ifdef MPU_NS
|
||||
/** Configure the given Non-secure MPU region.
|
||||
* \param rnr Region number to be configured.
|
||||
* \param rbar Value for RBAR register.
|
||||
* \param rlar Value for RLAR register.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_SetRegion_NS(uint32_t rnr, uint32_t rbar, uint32_t rlar)
|
||||
{
|
||||
ARM_MPU_SetRegionEx(MPU_NS, rnr, rbar, rlar);
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Memcopy with strictly ordered memory access, e.g. for register targets.
|
||||
* \param dst Destination data is copied to.
|
||||
* \param src Source data is copied from.
|
||||
* \param len Amount of data words to be copied.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_OrderedMemcpy(volatile uint32_t* dst, const uint32_t* __RESTRICT src, uint32_t len)
|
||||
{
|
||||
uint32_t i;
|
||||
for (i = 0U; i < len; ++i)
|
||||
{
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
/** Load the given number of MPU regions from a table to the given MPU.
|
||||
* \param mpu Pointer to the MPU registers to be used.
|
||||
* \param rnr First region number to be configured.
|
||||
* \param table Pointer to the MPU configuration table.
|
||||
* \param cnt Amount of regions to be configured.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_LoadEx(MPU_Type* mpu, uint32_t rnr, ARM_MPU_Region_t const* table, uint32_t cnt)
|
||||
{
|
||||
const uint32_t rowWordSize = sizeof(ARM_MPU_Region_t)/4U;
|
||||
if (cnt == 1U) {
|
||||
mpu->RNR = rnr;
|
||||
ARM_MPU_OrderedMemcpy(&(mpu->RBAR), &(table->RBAR), rowWordSize);
|
||||
} else {
|
||||
uint32_t rnrBase = rnr & ~(MPU_TYPE_RALIASES-1U);
|
||||
uint32_t rnrOffset = rnr % MPU_TYPE_RALIASES;
|
||||
|
||||
mpu->RNR = rnrBase;
|
||||
while ((rnrOffset + cnt) > MPU_TYPE_RALIASES) {
|
||||
uint32_t c = MPU_TYPE_RALIASES - rnrOffset;
|
||||
ARM_MPU_OrderedMemcpy(&(mpu->RBAR)+(rnrOffset*2U), &(table->RBAR), c*rowWordSize);
|
||||
table += c;
|
||||
cnt -= c;
|
||||
rnrOffset = 0U;
|
||||
rnrBase += MPU_TYPE_RALIASES;
|
||||
mpu->RNR = rnrBase;
|
||||
}
|
||||
|
||||
ARM_MPU_OrderedMemcpy(&(mpu->RBAR)+(rnrOffset*2U), &(table->RBAR), cnt*rowWordSize);
|
||||
}
|
||||
}
|
||||
|
||||
/** Load the given number of MPU regions from a table.
|
||||
* \param rnr First region number to be configured.
|
||||
* \param table Pointer to the MPU configuration table.
|
||||
* \param cnt Amount of regions to be configured.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_Load(uint32_t rnr, ARM_MPU_Region_t const* table, uint32_t cnt)
|
||||
{
|
||||
ARM_MPU_LoadEx(MPU, rnr, table, cnt);
|
||||
}
|
||||
|
||||
#ifdef MPU_NS
|
||||
/** Load the given number of MPU regions from a table to the Non-secure MPU.
|
||||
* \param rnr First region number to be configured.
|
||||
* \param table Pointer to the MPU configuration table.
|
||||
* \param cnt Amount of regions to be configured.
|
||||
*/
|
||||
__STATIC_INLINE void ARM_MPU_Load_NS(uint32_t rnr, ARM_MPU_Region_t const* table, uint32_t cnt)
|
||||
{
|
||||
ARM_MPU_LoadEx(MPU_NS, rnr, table, cnt);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,337 +1,337 @@
|
||||
/******************************************************************************
|
||||
* @file pmu_armv8.h
|
||||
* @brief CMSIS PMU API for Armv8.1-M PMU
|
||||
* @version V1.0.0
|
||||
* @date 24. March 2020
|
||||
******************************************************************************/
|
||||
/*
|
||||
* Copyright (c) 2020 Arm Limited. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#if defined ( __ICCARM__ )
|
||||
#pragma system_include /* treat file as system include file for MISRA check */
|
||||
#elif defined (__clang__)
|
||||
#pragma clang system_header /* treat file as system include file */
|
||||
#endif
|
||||
|
||||
#ifndef ARM_PMU_ARMV8_H
|
||||
#define ARM_PMU_ARMV8_H
|
||||
|
||||
/**
|
||||
* \brief PMU Events
|
||||
* \note See the Armv8.1-M Architecture Reference Manual for full details on these PMU events.
|
||||
* */
|
||||
|
||||
#define ARM_PMU_SW_INCR 0x0000 /*!< Software update to the PMU_SWINC register, architecturally executed and condition code check pass */
|
||||
#define ARM_PMU_L1I_CACHE_REFILL 0x0001 /*!< L1 I-Cache refill */
|
||||
#define ARM_PMU_L1D_CACHE_REFILL 0x0003 /*!< L1 D-Cache refill */
|
||||
#define ARM_PMU_L1D_CACHE 0x0004 /*!< L1 D-Cache access */
|
||||
#define ARM_PMU_LD_RETIRED 0x0006 /*!< Memory-reading instruction architecturally executed and condition code check pass */
|
||||
#define ARM_PMU_ST_RETIRED 0x0007 /*!< Memory-writing instruction architecturally executed and condition code check pass */
|
||||
#define ARM_PMU_INST_RETIRED 0x0008 /*!< Instruction architecturally executed */
|
||||
#define ARM_PMU_EXC_TAKEN 0x0009 /*!< Exception entry */
|
||||
#define ARM_PMU_EXC_RETURN 0x000A /*!< Exception return instruction architecturally executed and the condition code check pass */
|
||||
#define ARM_PMU_PC_WRITE_RETIRED 0x000C /*!< Software change to the Program Counter (PC). Instruction is architecturally executed and condition code check pass */
|
||||
#define ARM_PMU_BR_IMMED_RETIRED 0x000D /*!< Immediate branch architecturally executed */
|
||||
#define ARM_PMU_BR_RETURN_RETIRED 0x000E /*!< Function return instruction architecturally executed and the condition code check pass */
|
||||
#define ARM_PMU_UNALIGNED_LDST_RETIRED 0x000F /*!< Unaligned memory memory-reading or memory-writing instruction architecturally executed and condition code check pass */
|
||||
#define ARM_PMU_BR_MIS_PRED 0x0010 /*!< Mispredicted or not predicted branch speculatively executed */
|
||||
#define ARM_PMU_CPU_CYCLES 0x0011 /*!< Cycle */
|
||||
#define ARM_PMU_BR_PRED 0x0012 /*!< Predictable branch speculatively executed */
|
||||
#define ARM_PMU_MEM_ACCESS 0x0013 /*!< Data memory access */
|
||||
#define ARM_PMU_L1I_CACHE 0x0014 /*!< Level 1 instruction cache access */
|
||||
#define ARM_PMU_L1D_CACHE_WB 0x0015 /*!< Level 1 data cache write-back */
|
||||
#define ARM_PMU_L2D_CACHE 0x0016 /*!< Level 2 data cache access */
|
||||
#define ARM_PMU_L2D_CACHE_REFILL 0x0017 /*!< Level 2 data cache refill */
|
||||
#define ARM_PMU_L2D_CACHE_WB 0x0018 /*!< Level 2 data cache write-back */
|
||||
#define ARM_PMU_BUS_ACCESS 0x0019 /*!< Bus access */
|
||||
#define ARM_PMU_MEMORY_ERROR 0x001A /*!< Local memory error */
|
||||
#define ARM_PMU_INST_SPEC 0x001B /*!< Instruction speculatively executed */
|
||||
#define ARM_PMU_BUS_CYCLES 0x001D /*!< Bus cycles */
|
||||
#define ARM_PMU_CHAIN 0x001E /*!< For an odd numbered counter, increment when an overflow occurs on the preceding even-numbered counter on the same PE */
|
||||
#define ARM_PMU_L1D_CACHE_ALLOCATE 0x001F /*!< Level 1 data cache allocation without refill */
|
||||
#define ARM_PMU_L2D_CACHE_ALLOCATE 0x0020 /*!< Level 2 data cache allocation without refill */
|
||||
#define ARM_PMU_BR_RETIRED 0x0021 /*!< Branch instruction architecturally executed */
|
||||
#define ARM_PMU_BR_MIS_PRED_RETIRED 0x0022 /*!< Mispredicted branch instruction architecturally executed */
|
||||
#define ARM_PMU_STALL_FRONTEND 0x0023 /*!< No operation issued because of the frontend */
|
||||
#define ARM_PMU_STALL_BACKEND 0x0024 /*!< No operation issued because of the backend */
|
||||
#define ARM_PMU_L2I_CACHE 0x0027 /*!< Level 2 instruction cache access */
|
||||
#define ARM_PMU_L2I_CACHE_REFILL 0x0028 /*!< Level 2 instruction cache refill */
|
||||
#define ARM_PMU_L3D_CACHE_ALLOCATE 0x0029 /*!< Level 3 data cache allocation without refill */
|
||||
#define ARM_PMU_L3D_CACHE_REFILL 0x002A /*!< Level 3 data cache refill */
|
||||
#define ARM_PMU_L3D_CACHE 0x002B /*!< Level 3 data cache access */
|
||||
#define ARM_PMU_L3D_CACHE_WB 0x002C /*!< Level 3 data cache write-back */
|
||||
#define ARM_PMU_LL_CACHE_RD 0x0036 /*!< Last level data cache read */
|
||||
#define ARM_PMU_LL_CACHE_MISS_RD 0x0037 /*!< Last level data cache read miss */
|
||||
#define ARM_PMU_L1D_CACHE_MISS_RD 0x0039 /*!< Level 1 data cache read miss */
|
||||
#define ARM_PMU_OP_COMPLETE 0x003A /*!< Operation retired */
|
||||
#define ARM_PMU_OP_SPEC 0x003B /*!< Operation speculatively executed */
|
||||
#define ARM_PMU_STALL 0x003C /*!< Stall cycle for instruction or operation not sent for execution */
|
||||
#define ARM_PMU_STALL_OP_BACKEND 0x003D /*!< Stall cycle for instruction or operation not sent for execution due to pipeline backend */
|
||||
#define ARM_PMU_STALL_OP_FRONTEND 0x003E /*!< Stall cycle for instruction or operation not sent for execution due to pipeline frontend */
|
||||
#define ARM_PMU_STALL_OP 0x003F /*!< Instruction or operation slots not occupied each cycle */
|
||||
#define ARM_PMU_L1D_CACHE_RD 0x0040 /*!< Level 1 data cache read */
|
||||
#define ARM_PMU_LE_RETIRED 0x0100 /*!< Loop end instruction executed */
|
||||
#define ARM_PMU_LE_SPEC 0x0101 /*!< Loop end instruction speculatively executed */
|
||||
#define ARM_PMU_BF_RETIRED 0x0104 /*!< Branch future instruction architecturally executed and condition code check pass */
|
||||
#define ARM_PMU_BF_SPEC 0x0105 /*!< Branch future instruction speculatively executed and condition code check pass */
|
||||
#define ARM_PMU_LE_CANCEL 0x0108 /*!< Loop end instruction not taken */
|
||||
#define ARM_PMU_BF_CANCEL 0x0109 /*!< Branch future instruction not taken */
|
||||
#define ARM_PMU_SE_CALL_S 0x0114 /*!< Call to secure function, resulting in Security state change */
|
||||
#define ARM_PMU_SE_CALL_NS 0x0115 /*!< Call to non-secure function, resulting in Security state change */
|
||||
#define ARM_PMU_DWT_CMPMATCH0 0x0118 /*!< DWT comparator 0 match */
|
||||
#define ARM_PMU_DWT_CMPMATCH1 0x0119 /*!< DWT comparator 1 match */
|
||||
#define ARM_PMU_DWT_CMPMATCH2 0x011A /*!< DWT comparator 2 match */
|
||||
#define ARM_PMU_DWT_CMPMATCH3 0x011B /*!< DWT comparator 3 match */
|
||||
#define ARM_PMU_MVE_INST_RETIRED 0x0200 /*!< MVE instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_INST_SPEC 0x0201 /*!< MVE instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_FP_RETIRED 0x0204 /*!< MVE floating-point instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_FP_SPEC 0x0205 /*!< MVE floating-point instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_FP_HP_RETIRED 0x0208 /*!< MVE half-precision floating-point instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_FP_HP_SPEC 0x0209 /*!< MVE half-precision floating-point instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_FP_SP_RETIRED 0x020C /*!< MVE single-precision floating-point instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_FP_SP_SPEC 0x020D /*!< MVE single-precision floating-point instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_FP_MAC_RETIRED 0x0214 /*!< MVE floating-point multiply or multiply-accumulate instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_FP_MAC_SPEC 0x0215 /*!< MVE floating-point multiply or multiply-accumulate instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_INT_RETIRED 0x0224 /*!< MVE integer instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_INT_SPEC 0x0225 /*!< MVE integer instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_INT_MAC_RETIRED 0x0228 /*!< MVE multiply or multiply-accumulate instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_INT_MAC_SPEC 0x0229 /*!< MVE multiply or multiply-accumulate instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_LDST_RETIRED 0x0238 /*!< MVE load or store instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_LDST_SPEC 0x0239 /*!< MVE load or store instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_LD_RETIRED 0x023C /*!< MVE load instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_LD_SPEC 0x023D /*!< MVE load instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_ST_RETIRED 0x0240 /*!< MVE store instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_ST_SPEC 0x0241 /*!< MVE store instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_LDST_CONTIG_RETIRED 0x0244 /*!< MVE contiguous load or store instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_LDST_CONTIG_SPEC 0x0245 /*!< MVE contiguous load or store instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_LD_CONTIG_RETIRED 0x0248 /*!< MVE contiguous load instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_LD_CONTIG_SPEC 0x0249 /*!< MVE contiguous load instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_ST_CONTIG_RETIRED 0x024C /*!< MVE contiguous store instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_ST_CONTIG_SPEC 0x024D /*!< MVE contiguous store instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_LDST_NONCONTIG_RETIRED 0x0250 /*!< MVE non-contiguous load or store instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_LDST_NONCONTIG_SPEC 0x0251 /*!< MVE non-contiguous load or store instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_LD_NONCONTIG_RETIRED 0x0254 /*!< MVE non-contiguous load instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_LD_NONCONTIG_SPEC 0x0255 /*!< MVE non-contiguous load instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_ST_NONCONTIG_RETIRED 0x0258 /*!< MVE non-contiguous store instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_ST_NONCONTIG_SPEC 0x0259 /*!< MVE non-contiguous store instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_LDST_MULTI_RETIRED 0x025C /*!< MVE memory instruction targeting multiple registers architecturally executed */
|
||||
#define ARM_PMU_MVE_LDST_MULTI_SPEC 0x025D /*!< MVE memory instruction targeting multiple registers speculatively executed */
|
||||
#define ARM_PMU_MVE_LD_MULTI_RETIRED 0x0260 /*!< MVE memory load instruction targeting multiple registers architecturally executed */
|
||||
#define ARM_PMU_MVE_LD_MULTI_SPEC 0x0261 /*!< MVE memory load instruction targeting multiple registers speculatively executed */
|
||||
#define ARM_PMU_MVE_ST_MULTI_RETIRED 0x0261 /*!< MVE memory store instruction targeting multiple registers architecturally executed */
|
||||
#define ARM_PMU_MVE_ST_MULTI_SPEC 0x0265 /*!< MVE memory store instruction targeting multiple registers speculatively executed */
|
||||
#define ARM_PMU_MVE_LDST_UNALIGNED_RETIRED 0x028C /*!< MVE unaligned memory load or store instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_LDST_UNALIGNED_SPEC 0x028D /*!< MVE unaligned memory load or store instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_LD_UNALIGNED_RETIRED 0x0290 /*!< MVE unaligned load instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_LD_UNALIGNED_SPEC 0x0291 /*!< MVE unaligned load instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_ST_UNALIGNED_RETIRED 0x0294 /*!< MVE unaligned store instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_ST_UNALIGNED_SPEC 0x0295 /*!< MVE unaligned store instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_LDST_UNALIGNED_NONCONTIG_RETIRED 0x0298 /*!< MVE unaligned noncontiguous load or store instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_LDST_UNALIGNED_NONCONTIG_SPEC 0x0299 /*!< MVE unaligned noncontiguous load or store instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_VREDUCE_RETIRED 0x02A0 /*!< MVE vector reduction instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_VREDUCE_SPEC 0x02A1 /*!< MVE vector reduction instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_VREDUCE_FP_RETIRED 0x02A4 /*!< MVE floating-point vector reduction instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_VREDUCE_FP_SPEC 0x02A5 /*!< MVE floating-point vector reduction instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_VREDUCE_INT_RETIRED 0x02A8 /*!< MVE integer vector reduction instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_VREDUCE_INT_SPEC 0x02A9 /*!< MVE integer vector reduction instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_PRED 0x02B8 /*!< Cycles where one or more predicated beats architecturally executed */
|
||||
#define ARM_PMU_MVE_STALL 0x02CC /*!< Stall cycles caused by an MVE instruction */
|
||||
#define ARM_PMU_MVE_STALL_RESOURCE 0x02CD /*!< Stall cycles caused by an MVE instruction because of resource conflicts */
|
||||
#define ARM_PMU_MVE_STALL_RESOURCE_MEM 0x02CE /*!< Stall cycles caused by an MVE instruction because of memory resource conflicts */
|
||||
#define ARM_PMU_MVE_STALL_RESOURCE_FP 0x02CF /*!< Stall cycles caused by an MVE instruction because of floating-point resource conflicts */
|
||||
#define ARM_PMU_MVE_STALL_RESOURCE_INT 0x02D0 /*!< Stall cycles caused by an MVE instruction because of integer resource conflicts */
|
||||
#define ARM_PMU_MVE_STALL_BREAK 0x02D3 /*!< Stall cycles caused by an MVE chain break */
|
||||
#define ARM_PMU_MVE_STALL_DEPENDENCY 0x02D4 /*!< Stall cycles caused by MVE register dependency */
|
||||
#define ARM_PMU_ITCM_ACCESS 0x4007 /*!< Instruction TCM access */
|
||||
#define ARM_PMU_DTCM_ACCESS 0x4008 /*!< Data TCM access */
|
||||
#define ARM_PMU_TRCEXTOUT0 0x4010 /*!< ETM external output 0 */
|
||||
#define ARM_PMU_TRCEXTOUT1 0x4011 /*!< ETM external output 1 */
|
||||
#define ARM_PMU_TRCEXTOUT2 0x4012 /*!< ETM external output 2 */
|
||||
#define ARM_PMU_TRCEXTOUT3 0x4013 /*!< ETM external output 3 */
|
||||
#define ARM_PMU_CTI_TRIGOUT4 0x4018 /*!< Cross-trigger Interface output trigger 4 */
|
||||
#define ARM_PMU_CTI_TRIGOUT5 0x4019 /*!< Cross-trigger Interface output trigger 5 */
|
||||
#define ARM_PMU_CTI_TRIGOUT6 0x401A /*!< Cross-trigger Interface output trigger 6 */
|
||||
#define ARM_PMU_CTI_TRIGOUT7 0x401B /*!< Cross-trigger Interface output trigger 7 */
|
||||
|
||||
/** \brief PMU Functions */
|
||||
|
||||
__STATIC_INLINE void ARM_PMU_Enable(void);
|
||||
__STATIC_INLINE void ARM_PMU_Disable(void);
|
||||
|
||||
__STATIC_INLINE void ARM_PMU_Set_EVTYPER(uint32_t num, uint32_t type);
|
||||
|
||||
__STATIC_INLINE void ARM_PMU_CYCCNT_Reset(void);
|
||||
__STATIC_INLINE void ARM_PMU_EVCNTR_ALL_Reset(void);
|
||||
|
||||
__STATIC_INLINE void ARM_PMU_CNTR_Enable(uint32_t mask);
|
||||
__STATIC_INLINE void ARM_PMU_CNTR_Disable(uint32_t mask);
|
||||
|
||||
__STATIC_INLINE uint32_t ARM_PMU_Get_CCNTR(void);
|
||||
__STATIC_INLINE uint32_t ARM_PMU_Get_EVCNTR(uint32_t num);
|
||||
|
||||
__STATIC_INLINE uint32_t ARM_PMU_Get_CNTR_OVS(void);
|
||||
__STATIC_INLINE void ARM_PMU_Set_CNTR_OVS(uint32_t mask);
|
||||
|
||||
__STATIC_INLINE void ARM_PMU_Set_CNTR_IRQ_Enable(uint32_t mask);
|
||||
__STATIC_INLINE void ARM_PMU_Set_CNTR_IRQ_Disable(uint32_t mask);
|
||||
|
||||
__STATIC_INLINE void ARM_PMU_CNTR_Increment(uint32_t mask);
|
||||
|
||||
/**
|
||||
\brief Enable the PMU
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_Enable(void)
|
||||
{
|
||||
PMU->CTRL |= PMU_CTRL_ENABLE_Msk;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Disable the PMU
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_Disable(void)
|
||||
{
|
||||
PMU->CTRL &= ~PMU_CTRL_ENABLE_Msk;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Set event to count for PMU eventer counter
|
||||
\param [in] num Event counter (0-30) to configure
|
||||
\param [in] type Event to count
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_Set_EVTYPER(uint32_t num, uint32_t type)
|
||||
{
|
||||
PMU->EVTYPER[num] = type;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Reset cycle counter
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_CYCCNT_Reset(void)
|
||||
{
|
||||
PMU->CTRL |= PMU_CTRL_CYCCNT_RESET_Msk;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Reset all event counters
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_EVCNTR_ALL_Reset(void)
|
||||
{
|
||||
PMU->CTRL |= PMU_CTRL_EVENTCNT_RESET_Msk;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Enable counters
|
||||
\param [in] mask Counters to enable
|
||||
\note Enables one or more of the following:
|
||||
- event counters (0-30)
|
||||
- cycle counter
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_CNTR_Enable(uint32_t mask)
|
||||
{
|
||||
PMU->CNTENSET = mask;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Disable counters
|
||||
\param [in] mask Counters to enable
|
||||
\note Disables one or more of the following:
|
||||
- event counters (0-30)
|
||||
- cycle counter
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_CNTR_Disable(uint32_t mask)
|
||||
{
|
||||
PMU->CNTENCLR = mask;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Read cycle counter
|
||||
\return Cycle count
|
||||
*/
|
||||
__STATIC_INLINE uint32_t ARM_PMU_Get_CCNTR(void)
|
||||
{
|
||||
return PMU->CCNTR;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Read event counter
|
||||
\param [in] num Event counter (0-30) to read
|
||||
\return Event count
|
||||
*/
|
||||
__STATIC_INLINE uint32_t ARM_PMU_Get_EVCNTR(uint32_t num)
|
||||
{
|
||||
return PMU->EVCNTR[num];
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Read counter overflow status
|
||||
\return Counter overflow status bits for the following:
|
||||
- event counters (0-30)
|
||||
- cycle counter
|
||||
*/
|
||||
__STATIC_INLINE uint32_t ARM_PMU_Get_CNTR_OVS(void)
|
||||
{
|
||||
return PMU->OVSSET;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Clear counter overflow status
|
||||
\param [in] mask Counter overflow status bits to clear
|
||||
\note Clears overflow status bits for one or more of the following:
|
||||
- event counters (0-30)
|
||||
- cycle counter
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_Set_CNTR_OVS(uint32_t mask)
|
||||
{
|
||||
PMU->OVSCLR = mask;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Enable counter overflow interrupt request
|
||||
\param [in] mask Counter overflow interrupt request bits to set
|
||||
\note Sets overflow interrupt request bits for one or more of the following:
|
||||
- event counters (0-30)
|
||||
- cycle counter
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_Set_CNTR_IRQ_Enable(uint32_t mask)
|
||||
{
|
||||
PMU->INTENSET = mask;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Disable counter overflow interrupt request
|
||||
\param [in] mask Counter overflow interrupt request bits to clear
|
||||
\note Clears overflow interrupt request bits for one or more of the following:
|
||||
- event counters (0-30)
|
||||
- cycle counter
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_Set_CNTR_IRQ_Disable(uint32_t mask)
|
||||
{
|
||||
PMU->INTENCLR = mask;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Software increment event counter
|
||||
\param [in] mask Counters to increment
|
||||
\note Software increment bits for one or more event counters (0-30)
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_CNTR_Increment(uint32_t mask)
|
||||
{
|
||||
PMU->SWINC = mask;
|
||||
}
|
||||
|
||||
#endif
|
||||
/******************************************************************************
|
||||
* @file pmu_armv8.h
|
||||
* @brief CMSIS PMU API for Armv8.1-M PMU
|
||||
* @version V1.0.0
|
||||
* @date 24. March 2020
|
||||
******************************************************************************/
|
||||
/*
|
||||
* Copyright (c) 2020 Arm Limited. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#if defined ( __ICCARM__ )
|
||||
#pragma system_include /* treat file as system include file for MISRA check */
|
||||
#elif defined (__clang__)
|
||||
#pragma clang system_header /* treat file as system include file */
|
||||
#endif
|
||||
|
||||
#ifndef ARM_PMU_ARMV8_H
|
||||
#define ARM_PMU_ARMV8_H
|
||||
|
||||
/**
|
||||
* \brief PMU Events
|
||||
* \note See the Armv8.1-M Architecture Reference Manual for full details on these PMU events.
|
||||
* */
|
||||
|
||||
#define ARM_PMU_SW_INCR 0x0000 /*!< Software update to the PMU_SWINC register, architecturally executed and condition code check pass */
|
||||
#define ARM_PMU_L1I_CACHE_REFILL 0x0001 /*!< L1 I-Cache refill */
|
||||
#define ARM_PMU_L1D_CACHE_REFILL 0x0003 /*!< L1 D-Cache refill */
|
||||
#define ARM_PMU_L1D_CACHE 0x0004 /*!< L1 D-Cache access */
|
||||
#define ARM_PMU_LD_RETIRED 0x0006 /*!< Memory-reading instruction architecturally executed and condition code check pass */
|
||||
#define ARM_PMU_ST_RETIRED 0x0007 /*!< Memory-writing instruction architecturally executed and condition code check pass */
|
||||
#define ARM_PMU_INST_RETIRED 0x0008 /*!< Instruction architecturally executed */
|
||||
#define ARM_PMU_EXC_TAKEN 0x0009 /*!< Exception entry */
|
||||
#define ARM_PMU_EXC_RETURN 0x000A /*!< Exception return instruction architecturally executed and the condition code check pass */
|
||||
#define ARM_PMU_PC_WRITE_RETIRED 0x000C /*!< Software change to the Program Counter (PC). Instruction is architecturally executed and condition code check pass */
|
||||
#define ARM_PMU_BR_IMMED_RETIRED 0x000D /*!< Immediate branch architecturally executed */
|
||||
#define ARM_PMU_BR_RETURN_RETIRED 0x000E /*!< Function return instruction architecturally executed and the condition code check pass */
|
||||
#define ARM_PMU_UNALIGNED_LDST_RETIRED 0x000F /*!< Unaligned memory memory-reading or memory-writing instruction architecturally executed and condition code check pass */
|
||||
#define ARM_PMU_BR_MIS_PRED 0x0010 /*!< Mispredicted or not predicted branch speculatively executed */
|
||||
#define ARM_PMU_CPU_CYCLES 0x0011 /*!< Cycle */
|
||||
#define ARM_PMU_BR_PRED 0x0012 /*!< Predictable branch speculatively executed */
|
||||
#define ARM_PMU_MEM_ACCESS 0x0013 /*!< Data memory access */
|
||||
#define ARM_PMU_L1I_CACHE 0x0014 /*!< Level 1 instruction cache access */
|
||||
#define ARM_PMU_L1D_CACHE_WB 0x0015 /*!< Level 1 data cache write-back */
|
||||
#define ARM_PMU_L2D_CACHE 0x0016 /*!< Level 2 data cache access */
|
||||
#define ARM_PMU_L2D_CACHE_REFILL 0x0017 /*!< Level 2 data cache refill */
|
||||
#define ARM_PMU_L2D_CACHE_WB 0x0018 /*!< Level 2 data cache write-back */
|
||||
#define ARM_PMU_BUS_ACCESS 0x0019 /*!< Bus access */
|
||||
#define ARM_PMU_MEMORY_ERROR 0x001A /*!< Local memory error */
|
||||
#define ARM_PMU_INST_SPEC 0x001B /*!< Instruction speculatively executed */
|
||||
#define ARM_PMU_BUS_CYCLES 0x001D /*!< Bus cycles */
|
||||
#define ARM_PMU_CHAIN 0x001E /*!< For an odd numbered counter, increment when an overflow occurs on the preceding even-numbered counter on the same PE */
|
||||
#define ARM_PMU_L1D_CACHE_ALLOCATE 0x001F /*!< Level 1 data cache allocation without refill */
|
||||
#define ARM_PMU_L2D_CACHE_ALLOCATE 0x0020 /*!< Level 2 data cache allocation without refill */
|
||||
#define ARM_PMU_BR_RETIRED 0x0021 /*!< Branch instruction architecturally executed */
|
||||
#define ARM_PMU_BR_MIS_PRED_RETIRED 0x0022 /*!< Mispredicted branch instruction architecturally executed */
|
||||
#define ARM_PMU_STALL_FRONTEND 0x0023 /*!< No operation issued because of the frontend */
|
||||
#define ARM_PMU_STALL_BACKEND 0x0024 /*!< No operation issued because of the backend */
|
||||
#define ARM_PMU_L2I_CACHE 0x0027 /*!< Level 2 instruction cache access */
|
||||
#define ARM_PMU_L2I_CACHE_REFILL 0x0028 /*!< Level 2 instruction cache refill */
|
||||
#define ARM_PMU_L3D_CACHE_ALLOCATE 0x0029 /*!< Level 3 data cache allocation without refill */
|
||||
#define ARM_PMU_L3D_CACHE_REFILL 0x002A /*!< Level 3 data cache refill */
|
||||
#define ARM_PMU_L3D_CACHE 0x002B /*!< Level 3 data cache access */
|
||||
#define ARM_PMU_L3D_CACHE_WB 0x002C /*!< Level 3 data cache write-back */
|
||||
#define ARM_PMU_LL_CACHE_RD 0x0036 /*!< Last level data cache read */
|
||||
#define ARM_PMU_LL_CACHE_MISS_RD 0x0037 /*!< Last level data cache read miss */
|
||||
#define ARM_PMU_L1D_CACHE_MISS_RD 0x0039 /*!< Level 1 data cache read miss */
|
||||
#define ARM_PMU_OP_COMPLETE 0x003A /*!< Operation retired */
|
||||
#define ARM_PMU_OP_SPEC 0x003B /*!< Operation speculatively executed */
|
||||
#define ARM_PMU_STALL 0x003C /*!< Stall cycle for instruction or operation not sent for execution */
|
||||
#define ARM_PMU_STALL_OP_BACKEND 0x003D /*!< Stall cycle for instruction or operation not sent for execution due to pipeline backend */
|
||||
#define ARM_PMU_STALL_OP_FRONTEND 0x003E /*!< Stall cycle for instruction or operation not sent for execution due to pipeline frontend */
|
||||
#define ARM_PMU_STALL_OP 0x003F /*!< Instruction or operation slots not occupied each cycle */
|
||||
#define ARM_PMU_L1D_CACHE_RD 0x0040 /*!< Level 1 data cache read */
|
||||
#define ARM_PMU_LE_RETIRED 0x0100 /*!< Loop end instruction executed */
|
||||
#define ARM_PMU_LE_SPEC 0x0101 /*!< Loop end instruction speculatively executed */
|
||||
#define ARM_PMU_BF_RETIRED 0x0104 /*!< Branch future instruction architecturally executed and condition code check pass */
|
||||
#define ARM_PMU_BF_SPEC 0x0105 /*!< Branch future instruction speculatively executed and condition code check pass */
|
||||
#define ARM_PMU_LE_CANCEL 0x0108 /*!< Loop end instruction not taken */
|
||||
#define ARM_PMU_BF_CANCEL 0x0109 /*!< Branch future instruction not taken */
|
||||
#define ARM_PMU_SE_CALL_S 0x0114 /*!< Call to secure function, resulting in Security state change */
|
||||
#define ARM_PMU_SE_CALL_NS 0x0115 /*!< Call to non-secure function, resulting in Security state change */
|
||||
#define ARM_PMU_DWT_CMPMATCH0 0x0118 /*!< DWT comparator 0 match */
|
||||
#define ARM_PMU_DWT_CMPMATCH1 0x0119 /*!< DWT comparator 1 match */
|
||||
#define ARM_PMU_DWT_CMPMATCH2 0x011A /*!< DWT comparator 2 match */
|
||||
#define ARM_PMU_DWT_CMPMATCH3 0x011B /*!< DWT comparator 3 match */
|
||||
#define ARM_PMU_MVE_INST_RETIRED 0x0200 /*!< MVE instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_INST_SPEC 0x0201 /*!< MVE instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_FP_RETIRED 0x0204 /*!< MVE floating-point instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_FP_SPEC 0x0205 /*!< MVE floating-point instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_FP_HP_RETIRED 0x0208 /*!< MVE half-precision floating-point instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_FP_HP_SPEC 0x0209 /*!< MVE half-precision floating-point instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_FP_SP_RETIRED 0x020C /*!< MVE single-precision floating-point instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_FP_SP_SPEC 0x020D /*!< MVE single-precision floating-point instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_FP_MAC_RETIRED 0x0214 /*!< MVE floating-point multiply or multiply-accumulate instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_FP_MAC_SPEC 0x0215 /*!< MVE floating-point multiply or multiply-accumulate instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_INT_RETIRED 0x0224 /*!< MVE integer instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_INT_SPEC 0x0225 /*!< MVE integer instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_INT_MAC_RETIRED 0x0228 /*!< MVE multiply or multiply-accumulate instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_INT_MAC_SPEC 0x0229 /*!< MVE multiply or multiply-accumulate instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_LDST_RETIRED 0x0238 /*!< MVE load or store instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_LDST_SPEC 0x0239 /*!< MVE load or store instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_LD_RETIRED 0x023C /*!< MVE load instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_LD_SPEC 0x023D /*!< MVE load instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_ST_RETIRED 0x0240 /*!< MVE store instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_ST_SPEC 0x0241 /*!< MVE store instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_LDST_CONTIG_RETIRED 0x0244 /*!< MVE contiguous load or store instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_LDST_CONTIG_SPEC 0x0245 /*!< MVE contiguous load or store instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_LD_CONTIG_RETIRED 0x0248 /*!< MVE contiguous load instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_LD_CONTIG_SPEC 0x0249 /*!< MVE contiguous load instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_ST_CONTIG_RETIRED 0x024C /*!< MVE contiguous store instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_ST_CONTIG_SPEC 0x024D /*!< MVE contiguous store instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_LDST_NONCONTIG_RETIRED 0x0250 /*!< MVE non-contiguous load or store instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_LDST_NONCONTIG_SPEC 0x0251 /*!< MVE non-contiguous load or store instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_LD_NONCONTIG_RETIRED 0x0254 /*!< MVE non-contiguous load instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_LD_NONCONTIG_SPEC 0x0255 /*!< MVE non-contiguous load instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_ST_NONCONTIG_RETIRED 0x0258 /*!< MVE non-contiguous store instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_ST_NONCONTIG_SPEC 0x0259 /*!< MVE non-contiguous store instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_LDST_MULTI_RETIRED 0x025C /*!< MVE memory instruction targeting multiple registers architecturally executed */
|
||||
#define ARM_PMU_MVE_LDST_MULTI_SPEC 0x025D /*!< MVE memory instruction targeting multiple registers speculatively executed */
|
||||
#define ARM_PMU_MVE_LD_MULTI_RETIRED 0x0260 /*!< MVE memory load instruction targeting multiple registers architecturally executed */
|
||||
#define ARM_PMU_MVE_LD_MULTI_SPEC 0x0261 /*!< MVE memory load instruction targeting multiple registers speculatively executed */
|
||||
#define ARM_PMU_MVE_ST_MULTI_RETIRED 0x0261 /*!< MVE memory store instruction targeting multiple registers architecturally executed */
|
||||
#define ARM_PMU_MVE_ST_MULTI_SPEC 0x0265 /*!< MVE memory store instruction targeting multiple registers speculatively executed */
|
||||
#define ARM_PMU_MVE_LDST_UNALIGNED_RETIRED 0x028C /*!< MVE unaligned memory load or store instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_LDST_UNALIGNED_SPEC 0x028D /*!< MVE unaligned memory load or store instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_LD_UNALIGNED_RETIRED 0x0290 /*!< MVE unaligned load instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_LD_UNALIGNED_SPEC 0x0291 /*!< MVE unaligned load instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_ST_UNALIGNED_RETIRED 0x0294 /*!< MVE unaligned store instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_ST_UNALIGNED_SPEC 0x0295 /*!< MVE unaligned store instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_LDST_UNALIGNED_NONCONTIG_RETIRED 0x0298 /*!< MVE unaligned noncontiguous load or store instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_LDST_UNALIGNED_NONCONTIG_SPEC 0x0299 /*!< MVE unaligned noncontiguous load or store instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_VREDUCE_RETIRED 0x02A0 /*!< MVE vector reduction instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_VREDUCE_SPEC 0x02A1 /*!< MVE vector reduction instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_VREDUCE_FP_RETIRED 0x02A4 /*!< MVE floating-point vector reduction instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_VREDUCE_FP_SPEC 0x02A5 /*!< MVE floating-point vector reduction instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_VREDUCE_INT_RETIRED 0x02A8 /*!< MVE integer vector reduction instruction architecturally executed */
|
||||
#define ARM_PMU_MVE_VREDUCE_INT_SPEC 0x02A9 /*!< MVE integer vector reduction instruction speculatively executed */
|
||||
#define ARM_PMU_MVE_PRED 0x02B8 /*!< Cycles where one or more predicated beats architecturally executed */
|
||||
#define ARM_PMU_MVE_STALL 0x02CC /*!< Stall cycles caused by an MVE instruction */
|
||||
#define ARM_PMU_MVE_STALL_RESOURCE 0x02CD /*!< Stall cycles caused by an MVE instruction because of resource conflicts */
|
||||
#define ARM_PMU_MVE_STALL_RESOURCE_MEM 0x02CE /*!< Stall cycles caused by an MVE instruction because of memory resource conflicts */
|
||||
#define ARM_PMU_MVE_STALL_RESOURCE_FP 0x02CF /*!< Stall cycles caused by an MVE instruction because of floating-point resource conflicts */
|
||||
#define ARM_PMU_MVE_STALL_RESOURCE_INT 0x02D0 /*!< Stall cycles caused by an MVE instruction because of integer resource conflicts */
|
||||
#define ARM_PMU_MVE_STALL_BREAK 0x02D3 /*!< Stall cycles caused by an MVE chain break */
|
||||
#define ARM_PMU_MVE_STALL_DEPENDENCY 0x02D4 /*!< Stall cycles caused by MVE register dependency */
|
||||
#define ARM_PMU_ITCM_ACCESS 0x4007 /*!< Instruction TCM access */
|
||||
#define ARM_PMU_DTCM_ACCESS 0x4008 /*!< Data TCM access */
|
||||
#define ARM_PMU_TRCEXTOUT0 0x4010 /*!< ETM external output 0 */
|
||||
#define ARM_PMU_TRCEXTOUT1 0x4011 /*!< ETM external output 1 */
|
||||
#define ARM_PMU_TRCEXTOUT2 0x4012 /*!< ETM external output 2 */
|
||||
#define ARM_PMU_TRCEXTOUT3 0x4013 /*!< ETM external output 3 */
|
||||
#define ARM_PMU_CTI_TRIGOUT4 0x4018 /*!< Cross-trigger Interface output trigger 4 */
|
||||
#define ARM_PMU_CTI_TRIGOUT5 0x4019 /*!< Cross-trigger Interface output trigger 5 */
|
||||
#define ARM_PMU_CTI_TRIGOUT6 0x401A /*!< Cross-trigger Interface output trigger 6 */
|
||||
#define ARM_PMU_CTI_TRIGOUT7 0x401B /*!< Cross-trigger Interface output trigger 7 */
|
||||
|
||||
/** \brief PMU Functions */
|
||||
|
||||
__STATIC_INLINE void ARM_PMU_Enable(void);
|
||||
__STATIC_INLINE void ARM_PMU_Disable(void);
|
||||
|
||||
__STATIC_INLINE void ARM_PMU_Set_EVTYPER(uint32_t num, uint32_t type);
|
||||
|
||||
__STATIC_INLINE void ARM_PMU_CYCCNT_Reset(void);
|
||||
__STATIC_INLINE void ARM_PMU_EVCNTR_ALL_Reset(void);
|
||||
|
||||
__STATIC_INLINE void ARM_PMU_CNTR_Enable(uint32_t mask);
|
||||
__STATIC_INLINE void ARM_PMU_CNTR_Disable(uint32_t mask);
|
||||
|
||||
__STATIC_INLINE uint32_t ARM_PMU_Get_CCNTR(void);
|
||||
__STATIC_INLINE uint32_t ARM_PMU_Get_EVCNTR(uint32_t num);
|
||||
|
||||
__STATIC_INLINE uint32_t ARM_PMU_Get_CNTR_OVS(void);
|
||||
__STATIC_INLINE void ARM_PMU_Set_CNTR_OVS(uint32_t mask);
|
||||
|
||||
__STATIC_INLINE void ARM_PMU_Set_CNTR_IRQ_Enable(uint32_t mask);
|
||||
__STATIC_INLINE void ARM_PMU_Set_CNTR_IRQ_Disable(uint32_t mask);
|
||||
|
||||
__STATIC_INLINE void ARM_PMU_CNTR_Increment(uint32_t mask);
|
||||
|
||||
/**
|
||||
\brief Enable the PMU
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_Enable(void)
|
||||
{
|
||||
PMU->CTRL |= PMU_CTRL_ENABLE_Msk;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Disable the PMU
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_Disable(void)
|
||||
{
|
||||
PMU->CTRL &= ~PMU_CTRL_ENABLE_Msk;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Set event to count for PMU eventer counter
|
||||
\param [in] num Event counter (0-30) to configure
|
||||
\param [in] type Event to count
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_Set_EVTYPER(uint32_t num, uint32_t type)
|
||||
{
|
||||
PMU->EVTYPER[num] = type;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Reset cycle counter
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_CYCCNT_Reset(void)
|
||||
{
|
||||
PMU->CTRL |= PMU_CTRL_CYCCNT_RESET_Msk;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Reset all event counters
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_EVCNTR_ALL_Reset(void)
|
||||
{
|
||||
PMU->CTRL |= PMU_CTRL_EVENTCNT_RESET_Msk;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Enable counters
|
||||
\param [in] mask Counters to enable
|
||||
\note Enables one or more of the following:
|
||||
- event counters (0-30)
|
||||
- cycle counter
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_CNTR_Enable(uint32_t mask)
|
||||
{
|
||||
PMU->CNTENSET = mask;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Disable counters
|
||||
\param [in] mask Counters to enable
|
||||
\note Disables one or more of the following:
|
||||
- event counters (0-30)
|
||||
- cycle counter
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_CNTR_Disable(uint32_t mask)
|
||||
{
|
||||
PMU->CNTENCLR = mask;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Read cycle counter
|
||||
\return Cycle count
|
||||
*/
|
||||
__STATIC_INLINE uint32_t ARM_PMU_Get_CCNTR(void)
|
||||
{
|
||||
return PMU->CCNTR;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Read event counter
|
||||
\param [in] num Event counter (0-30) to read
|
||||
\return Event count
|
||||
*/
|
||||
__STATIC_INLINE uint32_t ARM_PMU_Get_EVCNTR(uint32_t num)
|
||||
{
|
||||
return PMU->EVCNTR[num];
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Read counter overflow status
|
||||
\return Counter overflow status bits for the following:
|
||||
- event counters (0-30)
|
||||
- cycle counter
|
||||
*/
|
||||
__STATIC_INLINE uint32_t ARM_PMU_Get_CNTR_OVS(void)
|
||||
{
|
||||
return PMU->OVSSET;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Clear counter overflow status
|
||||
\param [in] mask Counter overflow status bits to clear
|
||||
\note Clears overflow status bits for one or more of the following:
|
||||
- event counters (0-30)
|
||||
- cycle counter
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_Set_CNTR_OVS(uint32_t mask)
|
||||
{
|
||||
PMU->OVSCLR = mask;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Enable counter overflow interrupt request
|
||||
\param [in] mask Counter overflow interrupt request bits to set
|
||||
\note Sets overflow interrupt request bits for one or more of the following:
|
||||
- event counters (0-30)
|
||||
- cycle counter
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_Set_CNTR_IRQ_Enable(uint32_t mask)
|
||||
{
|
||||
PMU->INTENSET = mask;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Disable counter overflow interrupt request
|
||||
\param [in] mask Counter overflow interrupt request bits to clear
|
||||
\note Clears overflow interrupt request bits for one or more of the following:
|
||||
- event counters (0-30)
|
||||
- cycle counter
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_Set_CNTR_IRQ_Disable(uint32_t mask)
|
||||
{
|
||||
PMU->INTENCLR = mask;
|
||||
}
|
||||
|
||||
/**
|
||||
\brief Software increment event counter
|
||||
\param [in] mask Counters to increment
|
||||
\note Software increment bits for one or more event counters (0-30)
|
||||
*/
|
||||
__STATIC_INLINE void ARM_PMU_CNTR_Increment(uint32_t mask)
|
||||
{
|
||||
PMU->SWINC = mask;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,17 +1,15 @@
|
||||
/**
|
||||
**************************************************************************
|
||||
* @file at32f403a_407.h
|
||||
* @version v2.0.4
|
||||
* @date 2021-11-26
|
||||
* @brief at32f403a_407 header file
|
||||
**************************************************************************
|
||||
* Copyright notice & Disclaimer
|
||||
*
|
||||
* The software Board Support Package (BSP) that is made available to
|
||||
* download from Artery official website is the copyrighted work of Artery.
|
||||
* Artery authorizes customers to use, copy, and distribute the BSP
|
||||
* software and its related documentation for the purpose of design and
|
||||
* development in conjunction with Artery microcontrollers. Use of the
|
||||
* The software Board Support Package (BSP) that is made available to
|
||||
* download from Artery official website is the copyrighted work of Artery.
|
||||
* Artery authorizes customers to use, copy, and distribute the BSP
|
||||
* software and its related documentation for the purpose of design and
|
||||
* development in conjunction with Artery microcontrollers. Use of the
|
||||
* software is governed by this copyright notice and the following disclaimer.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ON "AS IS" BASIS WITHOUT WARRANTIES,
|
||||
@@ -42,7 +40,7 @@ extern "C" {
|
||||
/** @addtogroup AT32F403A_407
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/** @addtogroup Library_configuration_section
|
||||
* @{
|
||||
*/
|
||||
@@ -60,7 +58,7 @@ extern "C" {
|
||||
!defined (AT32F407VCT7) && !defined (AT32F407RET7) && !defined (AT32F407VET7) && \
|
||||
!defined (AT32F407AVCT7) && !defined (AT32F407AVGT7)
|
||||
|
||||
#error "Please select first the target at32f4xx device used in your application (in at32f4xx.h file)"
|
||||
#error "Please select first the target device used in your application (in at32f403a_407.h file)"
|
||||
#endif
|
||||
|
||||
#if defined (AT32F403AVCT7) || defined (AT32F403ARCT7) || defined (AT32F403ACCT7) || \
|
||||
@@ -78,11 +76,38 @@ extern "C" {
|
||||
#define AT32F407xx
|
||||
#endif
|
||||
|
||||
#if defined (AT32F403AVCT7) || defined (AT32F403AVET7) || defined (AT32F403AVGT7)
|
||||
|
||||
#define AT32F403AVx
|
||||
#endif
|
||||
|
||||
#if defined (AT32F403ARCT7) || defined (AT32F403ARET7) || defined (AT32F403ARGT7)
|
||||
|
||||
#define AT32F403ARx
|
||||
#endif
|
||||
|
||||
#if defined (AT32F403ACCT7) || defined (AT32F403ACCU7) || defined (AT32F403ACET7) || \
|
||||
defined (AT32F403ACEU7) || defined (AT32F403ACGT7) || defined (AT32F403ACGU7)
|
||||
|
||||
#define AT32F403ACx
|
||||
#endif
|
||||
|
||||
#if defined (AT32F407RCT7) || defined (AT32F407RET7) || defined (AT32F407RGT7)
|
||||
|
||||
#define AT32F407Rx
|
||||
#endif
|
||||
|
||||
#if defined (AT32F407VCT7) || defined (AT32F407VET7) || defined (AT32F407VGT7) || \
|
||||
defined (AT32F407AVCT7) || defined (AT32F407AVGT7)
|
||||
|
||||
#define AT32F407Vx
|
||||
#endif
|
||||
|
||||
#ifndef USE_STDPERIPH_DRIVER
|
||||
/**
|
||||
* @brief comment the line below if you will not use the peripherals drivers.
|
||||
* in this case, these drivers will not be included and the application code will
|
||||
* be based on direct access to peripherals registers
|
||||
* in this case, these drivers will not be included and the application code will
|
||||
* be based on direct access to peripherals registers
|
||||
*/
|
||||
#ifdef _RTE_
|
||||
#include "RTE_Components.h"
|
||||
@@ -96,7 +121,7 @@ extern "C" {
|
||||
* @brief at32f403a_407 standard peripheral library version number
|
||||
*/
|
||||
#define __AT32F403A_407_LIBRARY_VERSION_MAJOR (0x02) /*!< [31:24] major version */
|
||||
#define __AT32F403A_407_LIBRARY_VERSION_MIDDLE (0x00) /*!< [23:16] middle version */
|
||||
#define __AT32F403A_407_LIBRARY_VERSION_MIDDLE (0x01) /*!< [23:16] middle version */
|
||||
#define __AT32F403A_407_LIBRARY_VERSION_MINOR (0x04) /*!< [15:8] minor version */
|
||||
#define __AT32F403A_407_LIBRARY_VERSION_RC (0x00) /*!< [7:0] release candidate */
|
||||
#define __AT32F403A_407_LIBRARY_VERSION ((__AT32F403A_407_LIBRARY_VERSION_MAJOR << 24) | \
|
||||
@@ -293,7 +318,7 @@ typedef enum IRQn
|
||||
|
||||
/** @addtogroup Exported_types
|
||||
* @{
|
||||
*/
|
||||
*/
|
||||
|
||||
typedef int32_t INT32;
|
||||
typedef int16_t INT16;
|
||||
@@ -335,19 +360,19 @@ typedef __I uint16_t vuc16; /*!< read only */
|
||||
typedef __I uint8_t vuc8; /*!< read only */
|
||||
|
||||
/**
|
||||
* @brief flag status
|
||||
* @brief flag status
|
||||
*/
|
||||
typedef enum {RESET = 0, SET = !RESET} flag_status;
|
||||
typedef enum {RESET = 0, SET = !RESET} flag_status;
|
||||
|
||||
/**
|
||||
* @brief confirm state
|
||||
*/
|
||||
typedef enum {FALSE = 0, TRUE = !FALSE} confirm_state;
|
||||
*/
|
||||
typedef enum {FALSE = 0, TRUE = !FALSE} confirm_state;
|
||||
|
||||
/**
|
||||
* @brief error status
|
||||
*/
|
||||
typedef enum {ERROR = 0, SUCCESS = !ERROR} error_status;
|
||||
*/
|
||||
typedef enum {ERROR = 0, SUCCESS = !ERROR} error_status;
|
||||
|
||||
/**
|
||||
* @}
|
||||
@@ -415,7 +440,7 @@ typedef enum {ERROR = 0, SUCCESS = !ERROR} error_status;
|
||||
#define BPR_BASE (APB1PERIPH_BASE + 0x6C00)
|
||||
#define PWC_BASE (APB1PERIPH_BASE + 0x7000)
|
||||
#define DAC_BASE (APB1PERIPH_BASE + 0x7400)
|
||||
/* apb2 bus base address */
|
||||
/* apb2 bus base address */
|
||||
#define IOMUX_BASE (APB2PERIPH_BASE + 0x0000)
|
||||
#define EXINT_BASE (APB2PERIPH_BASE + 0x0400)
|
||||
#define GPIOA_BASE (APB2PERIPH_BASE + 0x0800)
|
||||
@@ -441,7 +466,7 @@ typedef enum {ERROR = 0, SUCCESS = !ERROR} error_status;
|
||||
#define I2S2EXT_BASE (APB2PERIPH_BASE + 0x6C00)
|
||||
#define I2S3EXT_BASE (APB2PERIPH_BASE + 0x7000)
|
||||
#define SDIO1_BASE (APB2PERIPH_BASE + 0x8000)
|
||||
/* ahb bus base address */
|
||||
/* ahb bus base address */
|
||||
#define DMA1_BASE (AHBPERIPH_BASE + 0x0000)
|
||||
#define DMA1_CHANNEL1_BASE (AHBPERIPH_BASE + 0x0008)
|
||||
#define DMA1_CHANNEL2_BASE (AHBPERIPH_BASE + 0x001C)
|
||||
@@ -499,7 +524,7 @@ typedef enum {ERROR = 0, SUCCESS = !ERROR} error_status;
|
||||
#define BPR_BASE (APB1PERIPH_BASE + 0x6C00)
|
||||
#define PWC_BASE (APB1PERIPH_BASE + 0x7000)
|
||||
#define DAC_BASE (APB1PERIPH_BASE + 0x7400)
|
||||
/* apb2 bus base address */
|
||||
/* apb2 bus base address */
|
||||
#define IOMUX_BASE (APB2PERIPH_BASE + 0x0000)
|
||||
#define EXINT_BASE (APB2PERIPH_BASE + 0x0400)
|
||||
#define GPIOA_BASE (APB2PERIPH_BASE + 0x0800)
|
||||
@@ -525,7 +550,7 @@ typedef enum {ERROR = 0, SUCCESS = !ERROR} error_status;
|
||||
#define I2S2EXT_BASE (APB2PERIPH_BASE + 0x6C00)
|
||||
#define I2S3EXT_BASE (APB2PERIPH_BASE + 0x7000)
|
||||
#define SDIO1_BASE (APB2PERIPH_BASE + 0x8000)
|
||||
/* ahb bus base address */
|
||||
/* ahb bus base address */
|
||||
#define DMA1_BASE (AHBPERIPH_BASE + 0x0000)
|
||||
#define DMA1_CHANNEL1_BASE (AHBPERIPH_BASE + 0x0008)
|
||||
#define DMA1_CHANNEL2_BASE (AHBPERIPH_BASE + 0x001C)
|
||||
@@ -548,11 +573,7 @@ typedef enum {ERROR = 0, SUCCESS = !ERROR} error_status;
|
||||
#define SDIO2_BASE (AHBPERIPH_BASE + 0x3400)
|
||||
#define EMAC_BASE (AHBPERIPH_BASE + 0x8000)
|
||||
#define XMC_BANK1_REG_BASE (XMC_REG_BASE + 0x0000)
|
||||
#define XMC_BANK1E_REG_BASE (XMC_REG_BASE + 0x0104)
|
||||
#define XMC_BANK1E_H_BASE (XMC_REG_BASE + 0x0220)
|
||||
#define XMC_BANK2_REG_BASE (XMC_REG_BASE + 0x0060)
|
||||
#define XMC_BANK3_REG_BASE (XMC_REG_BASE + 0x0080)
|
||||
#define XMC_BANK4_REG_BASE (XMC_REG_BASE + 0x00A0)
|
||||
#define EMAC_MMC_BASE (EMAC_BASE + 0x0100)
|
||||
#define EMAC_PTP_BASE (EMAC_BASE + 0x0700)
|
||||
#define EMAC_DMA_BASE (EMAC_BASE + 0x1000)
|
||||
@@ -569,7 +590,8 @@ typedef enum {ERROR = 0, SUCCESS = !ERROR} error_status;
|
||||
/**
|
||||
* @}
|
||||
*/
|
||||
|
||||
|
||||
#include "at32f403a_407_def.h"
|
||||
#include "at32f403a_407_conf.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@@ -1,17 +1,15 @@
|
||||
/**
|
||||
**************************************************************************
|
||||
* @file at32f403a_407_conf.h
|
||||
* @version v2.0.4
|
||||
* @date 2021-11-26
|
||||
* @brief at32f403a_407 config header file
|
||||
**************************************************************************
|
||||
* Copyright notice & Disclaimer
|
||||
*
|
||||
* The software Board Support Package (BSP) that is made available to
|
||||
* download from Artery official website is the copyrighted work of Artery.
|
||||
* Artery authorizes customers to use, copy, and distribute the BSP
|
||||
* software and its related documentation for the purpose of design and
|
||||
* development in conjunction with Artery microcontrollers. Use of the
|
||||
* The software Board Support Package (BSP) that is made available to
|
||||
* download from Artery official website is the copyrighted work of Artery.
|
||||
* Artery authorizes customers to use, copy, and distribute the BSP
|
||||
* software and its related documentation for the purpose of design and
|
||||
* development in conjunction with Artery microcontrollers. Use of the
|
||||
* software is governed by this copyright notice and the following disclaimer.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ON "AS IS" BASIS WITHOUT WARRANTIES,
|
||||
@@ -31,7 +29,7 @@
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief in the following line adjust the value of high speed exernal crystal (hext)
|
||||
@@ -47,8 +45,9 @@ extern "C" {
|
||||
* @brief in the following line adjust the high speed exernal crystal (hext) startup
|
||||
* timeout value
|
||||
*/
|
||||
#define HEXT_STARTUP_TIMEOUT ((uint16_t)0x3000) /*!< time out for hext start up */
|
||||
#define HICK_VALUE ((uint32_t)8000000) /*!< value of the high speed internal clock in hz */
|
||||
#define HEXT_STARTUP_TIMEOUT ((uint16_t)0x3000) /*!< time out for hext start up */
|
||||
#define HICK_VALUE ((uint32_t)8000000) /*!< value of the high speed internal clock in hz */
|
||||
#define LEXT_VALUE ((uint32_t)32768) /*!< value of the low speed exernal clock in hz */
|
||||
|
||||
/* module define -------------------------------------------------------------*/
|
||||
#define CRM_MODULE_ENABLED
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
ENTRY(Reset_Handler)
|
||||
|
||||
/* Highest address of the user mode stack */
|
||||
_estack = 0x20017FFF; /* end of RAM */
|
||||
_estack = 0x20018000; /* end of RAM */
|
||||
|
||||
/* Generate a link error if heap and stack don't fit into RAM */
|
||||
_Min_Heap_Size = 0x200; /* required amount of heap */
|
||||
@@ -33,6 +33,7 @@ MEMORY
|
||||
{
|
||||
FLASH (rx) : ORIGIN = 0x08000000, LENGTH = 256K
|
||||
RAM (xrw) : ORIGIN = 0x20000000, LENGTH = 96K
|
||||
SPIM (rx) : ORIGIN = 0x08400000, LENGTH = 16384K
|
||||
}
|
||||
|
||||
/* Define output sections */
|
||||
@@ -115,6 +116,19 @@ SECTIONS
|
||||
_edata = .; /* define a global symbol at data end */
|
||||
} >RAM AT> FLASH
|
||||
|
||||
_spim_init_base = LOADADDR(.spim);
|
||||
_spim_init_length = SIZEOF(.spim);
|
||||
|
||||
.spim :
|
||||
{
|
||||
. = ALIGN(4);
|
||||
_spim_start = .; /* create a global symbol at spim start */
|
||||
*(.spim) /* .spim sections */
|
||||
*(.spim*) /* .spim* sections */
|
||||
. = ALIGN(4);
|
||||
_spim_end = .; /* define a global symbols at end of spim */
|
||||
} >SPIM
|
||||
|
||||
/* Uninitialized data section */
|
||||
. = ALIGN(4);
|
||||
.bss :
|
||||
@@ -134,12 +148,12 @@ SECTIONS
|
||||
/* User_heap_stack section, used to check that there is enough RAM left */
|
||||
._user_heap_stack :
|
||||
{
|
||||
. = ALIGN(4);
|
||||
. = ALIGN(8);
|
||||
PROVIDE ( end = . );
|
||||
PROVIDE ( _end = . );
|
||||
. = . + _Min_Heap_Size;
|
||||
. = . + _Min_Stack_Size;
|
||||
. = ALIGN(4);
|
||||
. = ALIGN(8);
|
||||
} >RAM
|
||||
|
||||
/* Remove information from the standard libraries */
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
ENTRY(Reset_Handler)
|
||||
|
||||
/* Highest address of the user mode stack */
|
||||
_estack = 0x20017FFF; /* end of RAM */
|
||||
_estack = 0x20018000; /* end of RAM */
|
||||
|
||||
/* Generate a link error if heap and stack don't fit into RAM */
|
||||
_Min_Heap_Size = 0x200; /* required amount of heap */
|
||||
@@ -33,6 +33,7 @@ MEMORY
|
||||
{
|
||||
FLASH (rx) : ORIGIN = 0x08000000, LENGTH = 512K
|
||||
RAM (xrw) : ORIGIN = 0x20000000, LENGTH = 96K
|
||||
SPIM (rx) : ORIGIN = 0x08400000, LENGTH = 16384K
|
||||
}
|
||||
|
||||
/* Define output sections */
|
||||
@@ -115,6 +116,19 @@ SECTIONS
|
||||
_edata = .; /* define a global symbol at data end */
|
||||
} >RAM AT> FLASH
|
||||
|
||||
_spim_init_base = LOADADDR(.spim);
|
||||
_spim_init_length = SIZEOF(.spim);
|
||||
|
||||
.spim :
|
||||
{
|
||||
. = ALIGN(4);
|
||||
_spim_start = .; /* create a global symbol at spim start */
|
||||
*(.spim) /* .spim sections */
|
||||
*(.spim*) /* .spim* sections */
|
||||
. = ALIGN(4);
|
||||
_spim_end = .; /* define a global symbols at end of spim */
|
||||
} >SPIM
|
||||
|
||||
/* Uninitialized data section */
|
||||
. = ALIGN(4);
|
||||
.bss :
|
||||
@@ -134,12 +148,12 @@ SECTIONS
|
||||
/* User_heap_stack section, used to check that there is enough RAM left */
|
||||
._user_heap_stack :
|
||||
{
|
||||
. = ALIGN(4);
|
||||
. = ALIGN(8);
|
||||
PROVIDE ( end = . );
|
||||
PROVIDE ( _end = . );
|
||||
. = . + _Min_Heap_Size;
|
||||
. = . + _Min_Stack_Size;
|
||||
. = ALIGN(4);
|
||||
. = ALIGN(8);
|
||||
} >RAM
|
||||
|
||||
/* Remove information from the standard libraries */
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
ENTRY(Reset_Handler)
|
||||
|
||||
/* Highest address of the user mode stack */
|
||||
_estack = 0x20017FFF; /* end of RAM */
|
||||
_estack = 0x20018000; /* end of RAM */
|
||||
|
||||
/* Generate a link error if heap and stack don't fit into RAM */
|
||||
_Min_Heap_Size = 0x200; /* required amount of heap */
|
||||
@@ -33,6 +33,7 @@ MEMORY
|
||||
{
|
||||
FLASH (rx) : ORIGIN = 0x08000000, LENGTH = 1000K
|
||||
RAM (xrw) : ORIGIN = 0x20000000, LENGTH = 96K
|
||||
SPIM (rx) : ORIGIN = 0x08400000, LENGTH = 16384K
|
||||
}
|
||||
|
||||
/* Define output sections */
|
||||
@@ -115,6 +116,19 @@ SECTIONS
|
||||
_edata = .; /* define a global symbol at data end */
|
||||
} >RAM AT> FLASH
|
||||
|
||||
_spim_init_base = LOADADDR(.spim);
|
||||
_spim_init_length = SIZEOF(.spim);
|
||||
|
||||
.spim :
|
||||
{
|
||||
. = ALIGN(4);
|
||||
_spim_start = .; /* create a global symbol at spim start */
|
||||
*(.spim) /* .spim sections */
|
||||
*(.spim*) /* .spim* sections */
|
||||
. = ALIGN(4);
|
||||
_spim_end = .; /* define a global symbols at end of spim */
|
||||
} >SPIM
|
||||
|
||||
/* Uninitialized data section */
|
||||
. = ALIGN(4);
|
||||
.bss :
|
||||
@@ -134,12 +148,12 @@ SECTIONS
|
||||
/* User_heap_stack section, used to check that there is enough RAM left */
|
||||
._user_heap_stack :
|
||||
{
|
||||
. = ALIGN(4);
|
||||
. = ALIGN(8);
|
||||
PROVIDE ( end = . );
|
||||
PROVIDE ( _end = . );
|
||||
. = . + _Min_Heap_Size;
|
||||
. = . + _Min_Stack_Size;
|
||||
. = ALIGN(4);
|
||||
. = ALIGN(8);
|
||||
} >RAM
|
||||
|
||||
/* Remove information from the standard libraries */
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
ENTRY(Reset_Handler)
|
||||
|
||||
/* Highest address of the user mode stack */
|
||||
_estack = 0x20017FFF; /* end of RAM */
|
||||
_estack = 0x20018000; /* end of RAM */
|
||||
|
||||
/* Generate a link error if heap and stack don't fit into RAM */
|
||||
_Min_Heap_Size = 0x200; /* required amount of heap */
|
||||
@@ -33,6 +33,7 @@ MEMORY
|
||||
{
|
||||
FLASH (rx) : ORIGIN = 0x08000000, LENGTH = 256K
|
||||
RAM (xrw) : ORIGIN = 0x20000000, LENGTH = 96K
|
||||
SPIM (rx) : ORIGIN = 0x08400000, LENGTH = 16384K
|
||||
}
|
||||
|
||||
/* Define output sections */
|
||||
@@ -115,6 +116,19 @@ SECTIONS
|
||||
_edata = .; /* define a global symbol at data end */
|
||||
} >RAM AT> FLASH
|
||||
|
||||
_spim_init_base = LOADADDR(.spim);
|
||||
_spim_init_length = SIZEOF(.spim);
|
||||
|
||||
.spim :
|
||||
{
|
||||
. = ALIGN(4);
|
||||
_spim_start = .; /* create a global symbol at spim start */
|
||||
*(.spim) /* .spim sections */
|
||||
*(.spim*) /* .spim* sections */
|
||||
. = ALIGN(4);
|
||||
_spim_end = .; /* define a global symbols at end of spim */
|
||||
} >SPIM
|
||||
|
||||
/* Uninitialized data section */
|
||||
. = ALIGN(4);
|
||||
.bss :
|
||||
@@ -134,12 +148,12 @@ SECTIONS
|
||||
/* User_heap_stack section, used to check that there is enough RAM left */
|
||||
._user_heap_stack :
|
||||
{
|
||||
. = ALIGN(4);
|
||||
. = ALIGN(8);
|
||||
PROVIDE ( end = . );
|
||||
PROVIDE ( _end = . );
|
||||
. = . + _Min_Heap_Size;
|
||||
. = . + _Min_Stack_Size;
|
||||
. = ALIGN(4);
|
||||
. = ALIGN(8);
|
||||
} >RAM
|
||||
|
||||
/* Remove information from the standard libraries */
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
ENTRY(Reset_Handler)
|
||||
|
||||
/* Highest address of the user mode stack */
|
||||
_estack = 0x20017FFF; /* end of RAM */
|
||||
_estack = 0x20018000; /* end of RAM */
|
||||
|
||||
/* Generate a link error if heap and stack don't fit into RAM */
|
||||
_Min_Heap_Size = 0x200; /* required amount of heap */
|
||||
@@ -33,6 +33,7 @@ MEMORY
|
||||
{
|
||||
FLASH (rx) : ORIGIN = 0x08000000, LENGTH = 512K
|
||||
RAM (xrw) : ORIGIN = 0x20000000, LENGTH = 96K
|
||||
SPIM (rx) : ORIGIN = 0x08400000, LENGTH = 16384K
|
||||
}
|
||||
|
||||
/* Define output sections */
|
||||
@@ -115,6 +116,19 @@ SECTIONS
|
||||
_edata = .; /* define a global symbol at data end */
|
||||
} >RAM AT> FLASH
|
||||
|
||||
_spim_init_base = LOADADDR(.spim);
|
||||
_spim_init_length = SIZEOF(.spim);
|
||||
|
||||
.spim :
|
||||
{
|
||||
. = ALIGN(4);
|
||||
_spim_start = .; /* create a global symbol at spim start */
|
||||
*(.spim) /* .spim sections */
|
||||
*(.spim*) /* .spim* sections */
|
||||
. = ALIGN(4);
|
||||
_spim_end = .; /* define a global symbols at end of spim */
|
||||
} >SPIM
|
||||
|
||||
/* Uninitialized data section */
|
||||
. = ALIGN(4);
|
||||
.bss :
|
||||
@@ -134,12 +148,12 @@ SECTIONS
|
||||
/* User_heap_stack section, used to check that there is enough RAM left */
|
||||
._user_heap_stack :
|
||||
{
|
||||
. = ALIGN(4);
|
||||
. = ALIGN(8);
|
||||
PROVIDE ( end = . );
|
||||
PROVIDE ( _end = . );
|
||||
. = . + _Min_Heap_Size;
|
||||
. = . + _Min_Stack_Size;
|
||||
. = ALIGN(4);
|
||||
. = ALIGN(8);
|
||||
} >RAM
|
||||
|
||||
/* Remove information from the standard libraries */
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
ENTRY(Reset_Handler)
|
||||
|
||||
/* Highest address of the user mode stack */
|
||||
_estack = 0x20017FFF; /* end of RAM */
|
||||
_estack = 0x20018000; /* end of RAM */
|
||||
|
||||
/* Generate a link error if heap and stack don't fit into RAM */
|
||||
_Min_Heap_Size = 0x200; /* required amount of heap */
|
||||
@@ -33,6 +33,7 @@ MEMORY
|
||||
{
|
||||
FLASH (rx) : ORIGIN = 0x08000000, LENGTH = 1000K
|
||||
RAM (xrw) : ORIGIN = 0x20000000, LENGTH = 96K
|
||||
SPIM (rx) : ORIGIN = 0x08400000, LENGTH = 16384K
|
||||
}
|
||||
|
||||
/* Define output sections */
|
||||
@@ -115,6 +116,19 @@ SECTIONS
|
||||
_edata = .; /* define a global symbol at data end */
|
||||
} >RAM AT> FLASH
|
||||
|
||||
_spim_init_base = LOADADDR(.spim);
|
||||
_spim_init_length = SIZEOF(.spim);
|
||||
|
||||
.spim :
|
||||
{
|
||||
. = ALIGN(4);
|
||||
_spim_start = .; /* create a global symbol at spim start */
|
||||
*(.spim) /* .spim sections */
|
||||
*(.spim*) /* .spim* sections */
|
||||
. = ALIGN(4);
|
||||
_spim_end = .; /* define a global symbols at end of spim */
|
||||
} >SPIM
|
||||
|
||||
/* Uninitialized data section */
|
||||
. = ALIGN(4);
|
||||
.bss :
|
||||
@@ -134,12 +148,12 @@ SECTIONS
|
||||
/* User_heap_stack section, used to check that there is enough RAM left */
|
||||
._user_heap_stack :
|
||||
{
|
||||
. = ALIGN(4);
|
||||
. = ALIGN(8);
|
||||
PROVIDE ( end = . );
|
||||
PROVIDE ( _end = . );
|
||||
. = . + _Min_Heap_Size;
|
||||
. = . + _Min_Stack_Size;
|
||||
. = ALIGN(4);
|
||||
. = ALIGN(8);
|
||||
} >RAM
|
||||
|
||||
/* Remove information from the standard libraries */
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
/**
|
||||
******************************************************************************
|
||||
* @file startup_at32f403a_407.s
|
||||
* @version v2.0.4
|
||||
* @date 2021-11-26
|
||||
* @brief at32f403a_407xx devices vector table for gcc toolchain.
|
||||
* this module performs:
|
||||
* - set the initial sp
|
||||
@@ -106,7 +104,7 @@ Infinite_Loop:
|
||||
* The minimal vector table for a Cortex M3. Note that the proper constructs
|
||||
* must be placed on this to ensure that it ends up at physical address
|
||||
* 0x0000.0000.
|
||||
*
|
||||
*
|
||||
*******************************************************************************/
|
||||
.section .isr_vector,"a",%progbits
|
||||
.type g_pfnVectors, %object
|
||||
@@ -130,7 +128,7 @@ g_pfnVectors:
|
||||
.word 0
|
||||
.word PendSV_Handler
|
||||
.word SysTick_Handler
|
||||
|
||||
|
||||
/* External Interrupts */
|
||||
.word WWDT_IRQHandler /* Window Watchdog Timer */
|
||||
.word PVM_IRQHandler /* PVM through EXINT Line detect */
|
||||
@@ -216,20 +214,20 @@ g_pfnVectors:
|
||||
|
||||
/*******************************************************************************
|
||||
*
|
||||
* Provide weak aliases for each Exception handler to the Default_Handler.
|
||||
* As they are weak aliases, any function with the same name will override
|
||||
* Provide weak aliases for each Exception handler to the Default_Handler.
|
||||
* As they are weak aliases, any function with the same name will override
|
||||
* this definition.
|
||||
*
|
||||
*
|
||||
*******************************************************************************/
|
||||
.weak NMI_Handler
|
||||
.thumb_set NMI_Handler,Default_Handler
|
||||
|
||||
|
||||
.weak HardFault_Handler
|
||||
.thumb_set HardFault_Handler,Default_Handler
|
||||
|
||||
|
||||
.weak MemManage_Handler
|
||||
.thumb_set MemManage_Handler,Default_Handler
|
||||
|
||||
|
||||
.weak BusFault_Handler
|
||||
.thumb_set BusFault_Handler,Default_Handler
|
||||
|
||||
@@ -246,10 +244,10 @@ g_pfnVectors:
|
||||
.thumb_set PendSV_Handler,Default_Handler
|
||||
|
||||
.weak SysTick_Handler
|
||||
.thumb_set SysTick_Handler,Default_Handler
|
||||
|
||||
.thumb_set SysTick_Handler,Default_Handler
|
||||
|
||||
.weak WWDT_IRQHandler
|
||||
.thumb_set WWDT_IRQHandler,Default_Handler
|
||||
.thumb_set WWDT_IRQHandler,Default_Handler
|
||||
|
||||
.weak PVM_IRQHandler
|
||||
.thumb_set PVM_IRQHandler,Default_Handler
|
||||
@@ -273,7 +271,7 @@ g_pfnVectors:
|
||||
.thumb_set EXINT1_IRQHandler,Default_Handler
|
||||
|
||||
.weak EXINT2_IRQHandler
|
||||
.thumb_set EXINT2_IRQHandler,Default_Handler
|
||||
.thumb_set EXINT2_IRQHandler,Default_Handler
|
||||
|
||||
.weak EXINT3_IRQHandler
|
||||
.thumb_set EXINT3_IRQHandler,Default_Handler
|
||||
@@ -291,7 +289,7 @@ g_pfnVectors:
|
||||
.thumb_set DMA1_Channel3_IRQHandler,Default_Handler
|
||||
|
||||
.weak DMA1_Channel4_IRQHandler
|
||||
.thumb_set DMA1_Channel4_IRQHandler,Default_Handler
|
||||
.thumb_set DMA1_Channel4_IRQHandler,Default_Handler
|
||||
|
||||
.weak DMA1_Channel5_IRQHandler
|
||||
.thumb_set DMA1_Channel5_IRQHandler,Default_Handler
|
||||
@@ -443,10 +441,10 @@ g_pfnVectors:
|
||||
.weak CAN2_TX_IRQHandler
|
||||
.thumb_set CAN2_TX_IRQHandler,Default_Handler
|
||||
|
||||
.weak CAN2_RX0_IRQHandler
|
||||
.weak CAN2_RX0_IRQHandler
|
||||
.thumb_set CAN2_RX0_IRQHandler ,Default_Handler
|
||||
|
||||
.weak CAN2_RX1_IRQHandler
|
||||
.weak CAN2_RX1_IRQHandler
|
||||
.thumb_set CAN2_RX1_IRQHandler ,Default_Handler
|
||||
|
||||
.weak CAN2_SE_IRQHandler
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
;**************************************************************************
|
||||
;* @file startup_at32f403a_407.s
|
||||
;* @version v2.0.4
|
||||
;* @date 2021-11-26
|
||||
;* @brief at32f403a_407 startup file for IAR Systems
|
||||
;**************************************************************************
|
||||
;
|
||||
@@ -64,8 +62,8 @@ __vector_table
|
||||
DCD DMA1_Channel7_IRQHandler ; DMA1 Channel 7
|
||||
DCD ADC1_2_IRQHandler ; ADC1 & ADC2
|
||||
DCD USBFS_H_CAN1_TX_IRQHandler ; USB High Priority or CAN1 TX
|
||||
DCD USBFS_L_CAN1_RX0_IRQHandler ; USB Low Priority or CAN1 RX0
|
||||
DCD CAN1_RX1_IRQHandler ; CAN1 RX1
|
||||
DCD USBFS_L_CAN1_RX0_IRQHandler ; USB Low Priority or CAN1 RX0
|
||||
DCD CAN1_RX1_IRQHandler ; CAN1 RX1
|
||||
DCD CAN1_SE_IRQHandler ; CAN1 SE
|
||||
DCD EXINT9_5_IRQHandler ; EXINT Line [9:5]
|
||||
DCD TMR1_BRK_TMR9_IRQHandler ; TMR1 Brake and TMR9
|
||||
@@ -113,8 +111,8 @@ __vector_table
|
||||
DCD 0 ; Reserved
|
||||
DCD 0 ; Reserved
|
||||
DCD CAN2_TX_IRQHandler ; CAN2 TX
|
||||
DCD CAN2_RX0_IRQHandler ; CAN2 RX0
|
||||
DCD CAN2_RX1_IRQHandler ; CAN2 RX1
|
||||
DCD CAN2_RX0_IRQHandler ; CAN2 RX0
|
||||
DCD CAN2_RX1_IRQHandler ; CAN2 RX1
|
||||
DCD CAN2_SE_IRQHandler ; CAN2 SE
|
||||
DCD ACC_IRQHandler ; ACC
|
||||
DCD USBFS_MAPH_IRQHandler ; USB Map HP
|
||||
@@ -285,15 +283,15 @@ ADC1_2_IRQHandler
|
||||
USBFS_H_CAN1_TX_IRQHandler
|
||||
B USBFS_H_CAN1_TX_IRQHandler
|
||||
|
||||
PUBWEAK USBFS_L_CAN1_RX0_IRQHandler
|
||||
PUBWEAK USBFS_L_CAN1_RX0_IRQHandler
|
||||
SECTION .text:CODE:REORDER:NOROOT(1)
|
||||
USBFS_L_CAN1_RX0_IRQHandler
|
||||
B USBFS_L_CAN1_RX0_IRQHandler
|
||||
USBFS_L_CAN1_RX0_IRQHandler
|
||||
B USBFS_L_CAN1_RX0_IRQHandler
|
||||
|
||||
PUBWEAK CAN1_RX1_IRQHandler
|
||||
PUBWEAK CAN1_RX1_IRQHandler
|
||||
SECTION .text:CODE:REORDER:NOROOT(1)
|
||||
CAN1_RX1_IRQHandler
|
||||
B CAN1_RX1_IRQHandler
|
||||
CAN1_RX1_IRQHandler
|
||||
B CAN1_RX1_IRQHandler
|
||||
|
||||
PUBWEAK CAN1_SE_IRQHandler
|
||||
SECTION .text:CODE:REORDER:NOROOT(1)
|
||||
@@ -510,15 +508,15 @@ SPI4_IRQHandler
|
||||
CAN2_TX_IRQHandler
|
||||
B CAN2_TX_IRQHandler
|
||||
|
||||
PUBWEAK CAN2_RX0_IRQHandler
|
||||
PUBWEAK CAN2_RX0_IRQHandler
|
||||
SECTION .text:CODE:REORDER:NOROOT(1)
|
||||
CAN2_RX0_IRQHandler
|
||||
B CAN2_RX0_IRQHandler
|
||||
CAN2_RX0_IRQHandler
|
||||
B CAN2_RX0_IRQHandler
|
||||
|
||||
PUBWEAK CAN2_RX1_IRQHandler
|
||||
PUBWEAK CAN2_RX1_IRQHandler
|
||||
SECTION .text:CODE:REORDER:NOROOT(1)
|
||||
CAN2_RX1_IRQHandler
|
||||
B CAN2_RX1_IRQHandler
|
||||
CAN2_RX1_IRQHandler
|
||||
B CAN2_RX1_IRQHandler
|
||||
|
||||
PUBWEAK CAN2_SE_IRQHandler
|
||||
SECTION .text:CODE:REORDER:NOROOT(1)
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
;**************************************************************************
|
||||
;* @file startup_at32f403a_407.s
|
||||
;* @version v2.0.4
|
||||
;* @date 2021-11-26
|
||||
;* @brief at32f403a_407 startup file for keil
|
||||
;* <<< Use Configuration Wizard in Context Menu >>>
|
||||
;**************************************************************************
|
||||
;
|
||||
|
||||
@@ -77,8 +76,8 @@ __Vectors DCD __initial_sp ; Top of Stack
|
||||
DCD DMA1_Channel7_IRQHandler ; DMA1 Channel 7
|
||||
DCD ADC1_2_IRQHandler ; ADC1 & ADC2
|
||||
DCD USBFS_H_CAN1_TX_IRQHandler ; USB High Priority or CAN1 TX
|
||||
DCD USBFS_L_CAN1_RX0_IRQHandler ; USB Low Priority or CAN1 RX0
|
||||
DCD CAN1_RX1_IRQHandler ; CAN1 RX1
|
||||
DCD USBFS_L_CAN1_RX0_IRQHandler ; USB Low Priority or CAN1 RX0
|
||||
DCD CAN1_RX1_IRQHandler ; CAN1 RX1
|
||||
DCD CAN1_SE_IRQHandler ; CAN1 SE
|
||||
DCD EXINT9_5_IRQHandler ; EXINT Line [9:5]
|
||||
DCD TMR1_BRK_TMR9_IRQHandler ; TMR1 Brake and TMR9
|
||||
@@ -126,8 +125,8 @@ __Vectors DCD __initial_sp ; Top of Stack
|
||||
DCD 0 ; Reserved
|
||||
DCD 0 ; Reserved
|
||||
DCD CAN2_TX_IRQHandler ; CAN2 TX
|
||||
DCD CAN2_RX0_IRQHandler ; CAN2 RX0
|
||||
DCD CAN2_RX1_IRQHandler ; CAN2 RX1
|
||||
DCD CAN2_RX0_IRQHandler ; CAN2 RX0
|
||||
DCD CAN2_RX1_IRQHandler ; CAN2 RX1
|
||||
DCD CAN2_SE_IRQHandler ; CAN2 SE
|
||||
DCD ACC_IRQHandler ; ACC
|
||||
DCD USBFS_MAPH_IRQHandler ; USB Map High
|
||||
@@ -299,8 +298,8 @@ DMA1_Channel6_IRQHandler
|
||||
DMA1_Channel7_IRQHandler
|
||||
ADC1_2_IRQHandler
|
||||
USBFS_H_CAN1_TX_IRQHandler
|
||||
USBFS_L_CAN1_RX0_IRQHandler
|
||||
CAN1_RX1_IRQHandler
|
||||
USBFS_L_CAN1_RX0_IRQHandler
|
||||
CAN1_RX1_IRQHandler
|
||||
CAN1_SE_IRQHandler
|
||||
EXINT9_5_IRQHandler
|
||||
TMR1_BRK_TMR9_IRQHandler
|
||||
@@ -344,8 +343,8 @@ I2C3_EVT_IRQHandler
|
||||
I2C3_ERR_IRQHandler
|
||||
SPI4_IRQHandler
|
||||
CAN2_TX_IRQHandler
|
||||
CAN2_RX0_IRQHandler
|
||||
CAN2_RX1_IRQHandler
|
||||
CAN2_RX0_IRQHandler
|
||||
CAN2_RX1_IRQHandler
|
||||
CAN2_SE_IRQHandler
|
||||
ACC_IRQHandler
|
||||
USBFS_MAPH_IRQHandler
|
||||
|
||||
@@ -1,17 +1,15 @@
|
||||
/**
|
||||
**************************************************************************
|
||||
* @file system_at32f403a_407.c
|
||||
* @version v2.0.4
|
||||
* @date 2021-11-26
|
||||
* @brief contains all the functions for cmsis cortex-m4 system source file
|
||||
**************************************************************************
|
||||
* Copyright notice & Disclaimer
|
||||
*
|
||||
* The software Board Support Package (BSP) that is made available to
|
||||
* download from Artery official website is the copyrighted work of Artery.
|
||||
* Artery authorizes customers to use, copy, and distribute the BSP
|
||||
* software and its related documentation for the purpose of design and
|
||||
* development in conjunction with Artery microcontrollers. Use of the
|
||||
* The software Board Support Package (BSP) that is made available to
|
||||
* download from Artery official website is the copyrighted work of Artery.
|
||||
* Artery authorizes customers to use, copy, and distribute the BSP
|
||||
* software and its related documentation for the purpose of design and
|
||||
* development in conjunction with Artery microcontrollers. Use of the
|
||||
* software is governed by this copyright notice and the following disclaimer.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ON "AS IS" BASIS WITHOUT WARRANTIES,
|
||||
@@ -31,7 +29,7 @@
|
||||
/** @addtogroup AT32F403A_407_system
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
#include "at32f403a_407.h"
|
||||
|
||||
/** @addtogroup AT32F403A_407_system_private_defines
|
||||
@@ -81,13 +79,13 @@ void SystemInit (void)
|
||||
/* wait sclk switch status */
|
||||
while(CRM->cfg_bit.sclksts != CRM_SCLK_HICK);
|
||||
|
||||
/* reset cfg register, include sclk switch, ahbdiv, apb1div, apb2div, adcdiv,
|
||||
clkout pllrcs, pllhextdiv, pllmult, usbdiv and pllrange bits */
|
||||
CRM->cfg = 0;
|
||||
|
||||
/* reset hexten, hextbyps, cfden and pllen bits */
|
||||
CRM->ctrl &= ~(0x010D0000U);
|
||||
|
||||
/* reset cfg register, include sclk switch, ahbdiv, apb1div, apb2div, adcdiv,
|
||||
clkout pllrcs, pllhextdiv, pllmult, usbdiv and pllrange bits */
|
||||
CRM->cfg = 0;
|
||||
|
||||
/* reset clkout[3], usbbufs, hickdiv, clkoutdiv */
|
||||
CRM->misc1 = 0;
|
||||
|
||||
@@ -182,7 +180,7 @@ void system_core_clock_update(void)
|
||||
/**
|
||||
* @}
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @}
|
||||
*/
|
||||
|
||||
@@ -1,17 +1,15 @@
|
||||
/**
|
||||
**************************************************************************
|
||||
* @file system_at32f403a_407.h
|
||||
* @version v2.0.4
|
||||
* @date 2021-11-26
|
||||
* @brief cmsis cortex-m4 system header file.
|
||||
**************************************************************************
|
||||
* Copyright notice & Disclaimer
|
||||
*
|
||||
* The software Board Support Package (BSP) that is made available to
|
||||
* download from Artery official website is the copyrighted work of Artery.
|
||||
* Artery authorizes customers to use, copy, and distribute the BSP
|
||||
* software and its related documentation for the purpose of design and
|
||||
* development in conjunction with Artery microcontrollers. Use of the
|
||||
* The software Board Support Package (BSP) that is made available to
|
||||
* download from Artery official website is the copyrighted work of Artery.
|
||||
* Artery authorizes customers to use, copy, and distribute the BSP
|
||||
* software and its related documentation for the purpose of design and
|
||||
* development in conjunction with Artery microcontrollers. Use of the
|
||||
* software is governed by this copyright notice and the following disclaimer.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ON "AS IS" BASIS WITHOUT WARRANTIES,
|
||||
@@ -39,18 +37,19 @@ extern "C" {
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** @defgroup AT32F403A_407_system_clock_stable_definition
|
||||
/** @defgroup AT32F403A_407_system_clock_stable_definition
|
||||
* @{
|
||||
*/
|
||||
|
||||
#define HEXT_STABLE_DELAY (5000u)
|
||||
#define PLL_STABLE_DELAY (500u)
|
||||
#define SystemCoreClock system_core_clock
|
||||
|
||||
/**
|
||||
* @}
|
||||
*/
|
||||
|
||||
/** @defgroup AT32F403A_407_system_exported_variables
|
||||
/** @defgroup AT32F403A_407_system_exported_variables
|
||||
* @{
|
||||
*/
|
||||
|
||||
@@ -60,10 +59,10 @@ extern unsigned int system_core_clock; /*!< system clock frequency (core clock)
|
||||
* @}
|
||||
*/
|
||||
|
||||
/** @defgroup AT32F403A_407_system_exported_functions
|
||||
/** @defgroup AT32F403A_407_system_exported_functions
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
extern void SystemInit(void);
|
||||
extern void system_core_clock_update(void);
|
||||
|
||||
|
||||
414
libraries/cmsis/dsp/ComputeLibrary/Include/NEMath.h
Normal file
414
libraries/cmsis/dsp/ComputeLibrary/Include/NEMath.h
Normal file
@@ -0,0 +1,414 @@
|
||||
/*
|
||||
* Copyright (c) 2016, 2019 ARM Limited.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
#ifndef __ARM_COMPUTE_NEMATH_H__
|
||||
#define __ARM_COMPUTE_NEMATH_H__
|
||||
|
||||
|
||||
#if defined(ARM_MATH_NEON)
|
||||
/** Calculate floor of a vector.
|
||||
*
|
||||
* @param[in] val Input vector value in F32 format.
|
||||
*
|
||||
* @return The calculated floor vector.
|
||||
*/
|
||||
static inline float32x4_t vfloorq_f32(float32x4_t val);
|
||||
|
||||
/** Calculate inverse square root.
|
||||
*
|
||||
* @param[in] x Input value.
|
||||
*
|
||||
* @return The calculated inverse square root.
|
||||
*/
|
||||
static inline float32x2_t vinvsqrt_f32(float32x2_t x);
|
||||
|
||||
/** Calculate inverse square root.
|
||||
*
|
||||
* @param[in] x Input value.
|
||||
*
|
||||
* @return The calculated inverse square root.
|
||||
*/
|
||||
static inline float32x4_t vinvsqrtq_f32(float32x4_t x);
|
||||
|
||||
/** Calculate reciprocal.
|
||||
*
|
||||
* @param[in] x Input value.
|
||||
*
|
||||
* @return The calculated reciprocal.
|
||||
*/
|
||||
static inline float32x2_t vinv_f32(float32x2_t x);
|
||||
|
||||
/** Calculate reciprocal.
|
||||
*
|
||||
* @param[in] x Input value.
|
||||
*
|
||||
* @return The calculated reciprocal.
|
||||
*/
|
||||
static inline float32x4_t vinvq_f32(float32x4_t x);
|
||||
|
||||
/** Perform a 7th degree polynomial approximation using Estrin's method.
|
||||
*
|
||||
* @param[in] x Input vector value in F32 format.
|
||||
* @param[in] coeffs Polynomial coefficients table. (array of flattened float32x4_t vectors)
|
||||
*
|
||||
* @return The calculated approximation.
|
||||
*/
|
||||
static inline float32x4_t vtaylor_polyq_f32(float32x4_t x, const float32_t *coeffs);
|
||||
|
||||
/** Calculate exponential
|
||||
*
|
||||
* @param[in] x Input vector value in F32 format.
|
||||
*
|
||||
* @return The calculated exponent.
|
||||
*/
|
||||
static inline float32x4_t vexpq_f32(float32x4_t x);
|
||||
|
||||
/** Calculate logarithm
|
||||
*
|
||||
* @param[in] x Input vector value in F32 format.
|
||||
*
|
||||
* @return The calculated logarithm.
|
||||
*/
|
||||
static inline float32x4_t vlogq_f32(float32x4_t x);
|
||||
|
||||
/** Calculate hyperbolic tangent.
|
||||
*
|
||||
* tanh(x) = (e^2x - 1)/(e^2x + 1)
|
||||
*
|
||||
* @note We clamp x to [-5,5] to avoid overflowing issues.
|
||||
*
|
||||
* @param[in] val Input vector value in F32 format.
|
||||
*
|
||||
* @return The calculated Hyperbolic Tangent.
|
||||
*/
|
||||
static inline float32x4_t vtanhq_f32(float32x4_t val);
|
||||
|
||||
/** Calculate n power of a number.
|
||||
*
|
||||
* pow(x,n) = e^(n*log(x))
|
||||
*
|
||||
* @param[in] val Input vector value in F32 format.
|
||||
* @param[in] n Powers to raise the input to.
|
||||
*
|
||||
* @return The calculated power.
|
||||
*/
|
||||
static inline float32x4_t vpowq_f32(float32x4_t val, float32x4_t n);
|
||||
|
||||
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
|
||||
/** Calculate hyperbolic tangent.
|
||||
*
|
||||
* tanh(x) = (e^2x - 1)/(e^2x + 1)
|
||||
*
|
||||
* @note We clamp x to [-5,5] to avoid overflowing issues.
|
||||
*
|
||||
* @param[in] val Input vector value in F32 format.
|
||||
*
|
||||
* @return The calculated Hyperbolic Tangent.
|
||||
*/
|
||||
static inline float16x8_t vtanhq_f16(float16x8_t val);
|
||||
|
||||
/** Calculate reciprocal.
|
||||
*
|
||||
* @param[in] x Input value.
|
||||
*
|
||||
* @return The calculated reciprocal.
|
||||
*/
|
||||
static inline float16x4_t vinv_f16(float16x4_t x);
|
||||
|
||||
/** Calculate reciprocal.
|
||||
*
|
||||
* @param[in] x Input value.
|
||||
*
|
||||
* @return The calculated reciprocal.
|
||||
*/
|
||||
static inline float16x8_t vinvq_f16(float16x8_t x);
|
||||
|
||||
/** Calculate inverse square root.
|
||||
*
|
||||
* @param[in] x Input value.
|
||||
*
|
||||
* @return The calculated inverse square root.
|
||||
*/
|
||||
static inline float16x4_t vinvsqrt_f16(float16x4_t x);
|
||||
|
||||
/** Calculate inverse square root.
|
||||
*
|
||||
* @param[in] x Input value.
|
||||
*
|
||||
* @return The calculated inverse square root.
|
||||
*/
|
||||
static inline float16x8_t vinvsqrtq_f16(float16x8_t x);
|
||||
|
||||
/** Calculate exponential
|
||||
*
|
||||
* @param[in] x Input vector value in F16 format.
|
||||
*
|
||||
* @return The calculated exponent.
|
||||
*/
|
||||
static inline float16x8_t vexpq_f16(float16x8_t x);
|
||||
|
||||
/** Calculate n power of a number.
|
||||
*
|
||||
* pow(x,n) = e^(n*log(x))
|
||||
*
|
||||
* @param[in] val Input vector value in F16 format.
|
||||
* @param[in] n Powers to raise the input to.
|
||||
*
|
||||
* @return The calculated power.
|
||||
*/
|
||||
static inline float16x8_t vpowq_f16(float16x8_t val, float16x8_t n);
|
||||
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
|
||||
|
||||
/** Exponent polynomial coefficients */
|
||||
extern const float32_t exp_tab[4*8];
|
||||
|
||||
|
||||
/** Logarithm polynomial coefficients */
|
||||
extern const float32_t log_tab[4*8];
|
||||
|
||||
#ifndef DOXYGEN_SKIP_THIS
|
||||
inline float32x4_t vfloorq_f32(float32x4_t val)
|
||||
{
|
||||
static const float32_t CONST_1[4] = {1.f,1.f,1.f,1.f};
|
||||
|
||||
const int32x4_t z = vcvtq_s32_f32(val);
|
||||
const float32x4_t r = vcvtq_f32_s32(z);
|
||||
|
||||
return vbslq_f32(vcgtq_f32(r, val), vsubq_f32(r, vld1q_f32(CONST_1)), r);
|
||||
}
|
||||
|
||||
inline float32x2_t vinvsqrt_f32(float32x2_t x)
|
||||
{
|
||||
float32x2_t sqrt_reciprocal = vrsqrte_f32(x);
|
||||
sqrt_reciprocal = vmul_f32(vrsqrts_f32(vmul_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
|
||||
sqrt_reciprocal = vmul_f32(vrsqrts_f32(vmul_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
|
||||
|
||||
return sqrt_reciprocal;
|
||||
}
|
||||
|
||||
inline float32x4_t vinvsqrtq_f32(float32x4_t x)
|
||||
{
|
||||
float32x4_t sqrt_reciprocal = vrsqrteq_f32(x);
|
||||
sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
|
||||
sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
|
||||
|
||||
return sqrt_reciprocal;
|
||||
}
|
||||
|
||||
inline float32x2_t vinv_f32(float32x2_t x)
|
||||
{
|
||||
float32x2_t recip = vrecpe_f32(x);
|
||||
recip = vmul_f32(vrecps_f32(x, recip), recip);
|
||||
recip = vmul_f32(vrecps_f32(x, recip), recip);
|
||||
return recip;
|
||||
}
|
||||
|
||||
inline float32x4_t vinvq_f32(float32x4_t x)
|
||||
{
|
||||
float32x4_t recip = vrecpeq_f32(x);
|
||||
recip = vmulq_f32(vrecpsq_f32(x, recip), recip);
|
||||
recip = vmulq_f32(vrecpsq_f32(x, recip), recip);
|
||||
return recip;
|
||||
}
|
||||
|
||||
inline float32x4_t vtaylor_polyq_f32(float32x4_t x, const float32_t *coeffs)
|
||||
{
|
||||
float32x4_t A = vmlaq_f32(vld1q_f32(&coeffs[4*0]), vld1q_f32(&coeffs[4*4]), x);
|
||||
float32x4_t B = vmlaq_f32(vld1q_f32(&coeffs[4*2]), vld1q_f32(&coeffs[4*6]), x);
|
||||
float32x4_t C = vmlaq_f32(vld1q_f32(&coeffs[4*1]), vld1q_f32(&coeffs[4*5]), x);
|
||||
float32x4_t D = vmlaq_f32(vld1q_f32(&coeffs[4*3]), vld1q_f32(&coeffs[4*7]), x);
|
||||
float32x4_t x2 = vmulq_f32(x, x);
|
||||
float32x4_t x4 = vmulq_f32(x2, x2);
|
||||
float32x4_t res = vmlaq_f32(vmlaq_f32(A, B, x2), vmlaq_f32(C, D, x2), x4);
|
||||
return res;
|
||||
}
|
||||
|
||||
inline float32x4_t vexpq_f32(float32x4_t x)
|
||||
{
|
||||
static const float32_t CONST_LN2[4] = {0.6931471805f,0.6931471805f,0.6931471805f,0.6931471805f}; // ln(2)
|
||||
static const float32_t CONST_INV_LN2[4] = {1.4426950408f,1.4426950408f,1.4426950408f,1.4426950408f}; // 1/ln(2)
|
||||
static const float32_t CONST_0[4] = {0.f,0.f,0.f,0.f};
|
||||
static const int32_t CONST_NEGATIVE_126[4] = {-126,-126,-126,-126};
|
||||
|
||||
// Perform range reduction [-log(2),log(2)]
|
||||
int32x4_t m = vcvtq_s32_f32(vmulq_f32(x, vld1q_f32(CONST_INV_LN2)));
|
||||
float32x4_t val = vmlsq_f32(x, vcvtq_f32_s32(m), vld1q_f32(CONST_LN2));
|
||||
|
||||
// Polynomial Approximation
|
||||
float32x4_t poly = vtaylor_polyq_f32(val, exp_tab);
|
||||
|
||||
// Reconstruct
|
||||
poly = vreinterpretq_f32_s32(vqaddq_s32(vreinterpretq_s32_f32(poly), vqshlq_n_s32(m, 23)));
|
||||
poly = vbslq_f32(vcltq_s32(m, vld1q_s32(CONST_NEGATIVE_126)), vld1q_f32(CONST_0), poly);
|
||||
|
||||
return poly;
|
||||
}
|
||||
|
||||
inline float32x4_t vlogq_f32(float32x4_t x)
|
||||
{
|
||||
static const int32_t CONST_127[4] = {127,127,127,127}; // 127
|
||||
static const float32_t CONST_LN2[4] = {0.6931471805f,0.6931471805f,0.6931471805f,0.6931471805f}; // ln(2)
|
||||
|
||||
// Extract exponent
|
||||
int32x4_t m = vsubq_s32(vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_f32(x), 23)), vld1q_s32(CONST_127));
|
||||
float32x4_t val = vreinterpretq_f32_s32(vsubq_s32(vreinterpretq_s32_f32(x), vshlq_n_s32(m, 23)));
|
||||
|
||||
// Polynomial Approximation
|
||||
float32x4_t poly = vtaylor_polyq_f32(val, log_tab);
|
||||
|
||||
// Reconstruct
|
||||
poly = vmlaq_f32(poly, vcvtq_f32_s32(m), vld1q_f32(CONST_LN2));
|
||||
|
||||
return poly;
|
||||
}
|
||||
|
||||
inline float32x4_t vtanhq_f32(float32x4_t val)
|
||||
{
|
||||
static const float32_t CONST_1[4] = {1.f,1.f,1.f,1.f};
|
||||
static const float32_t CONST_2[4] = {2.f,2.f,2.f,2.f};
|
||||
static const float32_t CONST_MIN_TANH[4] = {-10.f,-10.f,-10.f,-10.f};
|
||||
static const float32_t CONST_MAX_TANH[4] = {10.f,10.f,10.f,10.f};
|
||||
|
||||
float32x4_t x = vminq_f32(vmaxq_f32(val, vld1q_f32(CONST_MIN_TANH)), vld1q_f32(CONST_MAX_TANH));
|
||||
float32x4_t exp2x = vexpq_f32(vmulq_f32(vld1q_f32(CONST_2), x));
|
||||
float32x4_t num = vsubq_f32(exp2x, vld1q_f32(CONST_1));
|
||||
float32x4_t den = vaddq_f32(exp2x, vld1q_f32(CONST_1));
|
||||
float32x4_t tanh = vmulq_f32(num, vinvq_f32(den));
|
||||
return tanh;
|
||||
}
|
||||
|
||||
inline float32x4_t vpowq_f32(float32x4_t val, float32x4_t n)
|
||||
{
|
||||
return vexpq_f32(vmulq_f32(n, vlogq_f32(val)));
|
||||
}
|
||||
#endif /* DOXYGEN_SKIP_THIS */
|
||||
|
||||
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
|
||||
/** Exponent polynomial coefficients */
|
||||
/** Logarithm polynomial coefficients */
|
||||
#ifndef DOXYGEN_SKIP_THIS
|
||||
inline float16x8_t vfloorq_f16(float16x8_t val)
|
||||
{
|
||||
static const float16_t CONST_1[8] = {1.f,1.f,1.f,1.f,1.f,1.f,1.f,1.f};
|
||||
|
||||
const int16x8_t z = vcvtq_s16_f16(val);
|
||||
const float16x8_t r = vcvtq_f16_s16(z);
|
||||
|
||||
return vbslq_f16(vcgtq_f16(r, val), vsubq_f16(r, vld1q_f16(CONST_1)), r);
|
||||
}
|
||||
inline float16x4_t vinvsqrt_f16(float16x4_t x)
|
||||
{
|
||||
float16x4_t sqrt_reciprocal = vrsqrte_f16(x);
|
||||
sqrt_reciprocal = vmul_f16(vrsqrts_f16(vmul_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
|
||||
sqrt_reciprocal = vmul_f16(vrsqrts_f16(vmul_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
|
||||
return sqrt_reciprocal;
|
||||
}
|
||||
|
||||
inline float16x8_t vinvsqrtq_f16(float16x8_t x)
|
||||
{
|
||||
float16x8_t sqrt_reciprocal = vrsqrteq_f16(x);
|
||||
sqrt_reciprocal = vmulq_f16(vrsqrtsq_f16(vmulq_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
|
||||
sqrt_reciprocal = vmulq_f16(vrsqrtsq_f16(vmulq_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
|
||||
return sqrt_reciprocal;
|
||||
}
|
||||
|
||||
inline float16x4_t vinv_f16(float16x4_t x)
|
||||
{
|
||||
float16x4_t recip = vrecpe_f16(x);
|
||||
recip = vmul_f16(vrecps_f16(x, recip), recip);
|
||||
recip = vmul_f16(vrecps_f16(x, recip), recip);
|
||||
return recip;
|
||||
}
|
||||
|
||||
inline float16x8_t vinvq_f16(float16x8_t x)
|
||||
{
|
||||
float16x8_t recip = vrecpeq_f16(x);
|
||||
recip = vmulq_f16(vrecpsq_f16(x, recip), recip);
|
||||
recip = vmulq_f16(vrecpsq_f16(x, recip), recip);
|
||||
return recip;
|
||||
}
|
||||
|
||||
inline float16x8_t vtanhq_f16(float16x8_t val)
|
||||
{
|
||||
const float16_t CONST_1[8] = {1.f,1.f,1.f,1.f,1.f,1.f,1.f,1.f};
|
||||
const float16_t CONST_2[8] = {2.f,2.f,2.f,2.f,2.f,2.f,2.f,2.f};
|
||||
const float16_t CONST_MIN_TANH[8] = {-10.f,-10.f,-10.f,-10.f,-10.f,-10.f,-10.f,-10.f};
|
||||
const float16_t CONST_MAX_TANH[8] = {10.f,10.f,10.f,10.f,10.f,10.f,10.f,10.f};
|
||||
|
||||
const float16x8_t x = vminq_f16(vmaxq_f16(val, vld1q_f16(CONST_MIN_TANH)), vld1q_f16(CONST_MAX_TANH));
|
||||
const float16x8_t exp2x = vexpq_f16(vmulq_f16(vld1q_f16(CONST_2), x));
|
||||
const float16x8_t num = vsubq_f16(exp2x, vld1q_f16(CONST_1));
|
||||
const float16x8_t den = vaddq_f16(exp2x, vld1q_f16(CONST_1));
|
||||
const float16x8_t tanh = vmulq_f16(num, vinvq_f16(den));
|
||||
return tanh;
|
||||
}
|
||||
|
||||
inline float16x8_t vtaylor_polyq_f16(float16x8_t x, const float16_t *coeffs)
|
||||
{
|
||||
const float16x8_t A = vaddq_f16(&coeffs[8*0], vmulq_f16(&coeffs[8*4], x));
|
||||
const float16x8_t B = vaddq_f16(&coeffs[8*2], vmulq_f16(&coeffs[8*6], x));
|
||||
const float16x8_t C = vaddq_f16(&coeffs[8*1], vmulq_f16(&coeffs[8*5], x));
|
||||
const float16x8_t D = vaddq_f16(&coeffs[8*3], vmulq_f16(&coeffs[8*7], x));
|
||||
const float16x8_t x2 = vmulq_f16(x, x);
|
||||
const float16x8_t x4 = vmulq_f16(x2, x2);
|
||||
const float16x8_t res = vaddq_f16(vaddq_f16(A, vmulq_f16(B, x2)), vmulq_f16(vaddq_f16(C, vmulq_f16(D, x2)), x4));
|
||||
return res;
|
||||
}
|
||||
|
||||
inline float16x8_t vexpq_f16(float16x8_t x)
|
||||
{
|
||||
// TODO (COMPMID-1535) : Revisit FP16 approximations
|
||||
const float32x4_t x_high = vcvt_f32_f16(vget_high_f16(x));
|
||||
const float32x4_t x_low = vcvt_f32_f16(vget_low_f16(x));
|
||||
|
||||
const float16x8_t res = vcvt_high_f16_f32(vcvt_f16_f32(vexpq_f32(x_low)), vexpq_f32(x_high));
|
||||
return res;
|
||||
}
|
||||
|
||||
inline float16x8_t vlogq_f16(float16x8_t x)
|
||||
{
|
||||
// TODO (COMPMID-1535) : Revisit FP16 approximations
|
||||
const float32x4_t x_high = vcvt_f32_f16(vget_high_f16(x));
|
||||
const float32x4_t x_low = vcvt_f32_f16(vget_low_f16(x));
|
||||
|
||||
const float16x8_t res = vcvt_high_f16_f32(vcvt_f16_f32(vlogq_f32(x_low)), vlogq_f32(x_high));
|
||||
return res;
|
||||
}
|
||||
|
||||
inline float16x8_t vpowq_f16(float16x8_t val, float16x8_t n)
|
||||
{
|
||||
// TODO (giaiod01) - COMPMID-1535
|
||||
float32x4_t n0_f32 = vcvt_f32_f16(vget_low_f16(n));
|
||||
float32x4_t n1_f32 = vcvt_f32_f16(vget_high_f16(n));
|
||||
float32x4_t val0_f32 = vcvt_f32_f16(vget_low_f16(val));
|
||||
float32x4_t val1_f32 = vcvt_f32_f16(vget_high_f16(val));
|
||||
|
||||
float32x4_t res0_f32 = vexpq_f32(vmulq_f32(n0_f32, vlogq_f32(val0_f32)));
|
||||
float32x4_t res1_f32 = vexpq_f32(vmulq_f32(n1_f32, vlogq_f32(val1_f32)));
|
||||
|
||||
return vcombine_f16(vcvt_f16_f32(res0_f32), vcvt_f16_f32(res1_f32));
|
||||
}
|
||||
#endif /* DOXYGEN_SKIP_THIS */
|
||||
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
|
||||
#endif
|
||||
#endif /* __ARM_COMPUTE_NEMATH_H__ */
|
||||
21
libraries/cmsis/dsp/ComputeLibrary/LICENSE.txt
Normal file
21
libraries/cmsis/dsp/ComputeLibrary/LICENSE.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2017-2019 ARM Software
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
19
libraries/cmsis/dsp/ComputeLibrary/README.md
Normal file
19
libraries/cmsis/dsp/ComputeLibrary/README.md
Normal file
@@ -0,0 +1,19 @@
|
||||
README
|
||||
======
|
||||
|
||||
This folder is containing two files imported, and slightly modified, from the ComputeLibrary:
|
||||
|
||||
NEMath.h and arm_cl_tables.c
|
||||
|
||||
In the original compute library, there are instead two other files:
|
||||
|
||||
NEMath.h and NEMath.inl
|
||||
|
||||
NEMath.inl is included from NEMath.h whereas in this CMSIS DSP implementation, there is no NEMath.inl and its content is copied into NEMath.h
|
||||
|
||||
The tables contained in NEMath.inl have been moved to arm_cl_tables.c and finally the files are in C for the CMSIS DSP library and in C++ in the original Compute Library.
|
||||
|
||||
Otherwise, the features and implementations are the same : a few optimized Neon functions.
|
||||
|
||||
The license covering those files is different : It is a MIT license.
|
||||
Other parts of the CMSIS-DSP are covered with an Apache-2.0 license.
|
||||
55
libraries/cmsis/dsp/ComputeLibrary/Source/arm_cl_tables.c
Normal file
55
libraries/cmsis/dsp/ComputeLibrary/Source/arm_cl_tables.c
Normal file
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (c) 2016, 2019 ARM Limited.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
#include "arm_math.h"
|
||||
#include "NEMath.h"
|
||||
|
||||
#if defined(ARM_MATH_NEON)
|
||||
|
||||
/** Exponent polynomial coefficients */
|
||||
const float32_t exp_tab[4*8] =
|
||||
{
|
||||
1.f,1.f,1.f,1.f,
|
||||
0.0416598916054f,0.0416598916054f,0.0416598916054f,0.0416598916054f,
|
||||
0.500000596046f,0.500000596046f,0.500000596046f,0.500000596046f,
|
||||
0.0014122662833f,0.0014122662833f,0.0014122662833f,0.0014122662833f,
|
||||
1.00000011921f,1.00000011921f,1.00000011921f,1.00000011921f,
|
||||
0.00833693705499f,0.00833693705499f,0.00833693705499f,0.00833693705499f,
|
||||
0.166665703058f,0.166665703058f,0.166665703058f,0.166665703058f,
|
||||
0.000195780929062f,0.000195780929062f,0.000195780929062f,0.000195780929062f
|
||||
};
|
||||
|
||||
/** Logarithm polynomial coefficients */
|
||||
const float32_t log_tab[4*8] =
|
||||
{
|
||||
-2.29561495781f,-2.29561495781f,-2.29561495781f,-2.29561495781f,
|
||||
-2.47071170807f,-2.47071170807f,-2.47071170807f,-2.47071170807f,
|
||||
-5.68692588806f,-5.68692588806f,-5.68692588806f,-5.68692588806f,
|
||||
-0.165253549814f,-0.165253549814f,-0.165253549814f,-0.165253549814f,
|
||||
5.17591238022f,5.17591238022f,5.17591238022f,5.17591238022f,
|
||||
0.844007015228f,0.844007015228f,0.844007015228f,0.844007015228f,
|
||||
4.58445882797f,4.58445882797f,4.58445882797f,4.58445882797f,
|
||||
0.0141278216615f,0.0141278216615f,0.0141278216615f,0.0141278216615f
|
||||
};
|
||||
|
||||
#endif
|
||||
200
libraries/cmsis/dsp/PrivateInclude/arm_sorting.h
Normal file
200
libraries/cmsis/dsp/PrivateInclude/arm_sorting.h
Normal file
@@ -0,0 +1,200 @@
|
||||
/******************************************************************************
|
||||
* @file arm_sorting.h
|
||||
* @brief Private header file for CMSIS DSP Library
|
||||
* @version V1.7.0
|
||||
* @date 2019
|
||||
******************************************************************************/
|
||||
/*
|
||||
* Copyright (c) 2010-2019 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef _ARM_SORTING_H_
|
||||
#define _ARM_SORTING_H_
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @param[in] S points to an instance of the sorting structure.
|
||||
* @param[in] pSrc points to the block of input data.
|
||||
* @param[out] pDst points to the block of output data.
|
||||
* @param[in] blockSize number of samples to process.
|
||||
*/
|
||||
void arm_bubble_sort_f32(
|
||||
const arm_sort_instance_f32 * S,
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize);
|
||||
|
||||
/**
|
||||
* @param[in] S points to an instance of the sorting structure.
|
||||
* @param[in] pSrc points to the block of input data.
|
||||
* @param[out] pDst points to the block of output data.
|
||||
* @param[in] blockSize number of samples to process.
|
||||
*/
|
||||
void arm_heap_sort_f32(
|
||||
const arm_sort_instance_f32 * S,
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize);
|
||||
|
||||
/**
|
||||
* @param[in] S points to an instance of the sorting structure.
|
||||
* @param[in] pSrc points to the block of input data.
|
||||
* @param[out] pDst points to the block of output data.
|
||||
* @param[in] blockSize number of samples to process.
|
||||
*/
|
||||
void arm_insertion_sort_f32(
|
||||
const arm_sort_instance_f32 * S,
|
||||
float32_t *pSrc,
|
||||
float32_t* pDst,
|
||||
uint32_t blockSize);
|
||||
|
||||
/**
|
||||
* @param[in] S points to an instance of the sorting structure.
|
||||
* @param[in] pSrc points to the block of input data.
|
||||
* @param[out] pDst points to the block of output data
|
||||
* @param[in] blockSize number of samples to process.
|
||||
*/
|
||||
void arm_quick_sort_f32(
|
||||
const arm_sort_instance_f32 * S,
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize);
|
||||
|
||||
/**
|
||||
* @param[in] S points to an instance of the sorting structure.
|
||||
* @param[in] pSrc points to the block of input data.
|
||||
* @param[out] pDst points to the block of output data
|
||||
* @param[in] blockSize number of samples to process.
|
||||
*/
|
||||
void arm_selection_sort_f32(
|
||||
const arm_sort_instance_f32 * S,
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize);
|
||||
|
||||
/**
|
||||
* @param[in] S points to an instance of the sorting structure.
|
||||
* @param[in] pSrc points to the block of input data.
|
||||
* @param[out] pDst points to the block of output data
|
||||
* @param[in] blockSize number of samples to process.
|
||||
*/
|
||||
void arm_bitonic_sort_f32(
|
||||
const arm_sort_instance_f32 * S,
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize);
|
||||
|
||||
#if defined(ARM_MATH_NEON)
|
||||
|
||||
#define vtrn256_128q(a, b) \
|
||||
do { \
|
||||
float32x4_t vtrn128_temp = a.val[1]; \
|
||||
a.val[1] = b.val[0]; \
|
||||
b.val[0] = vtrn128_temp ; \
|
||||
} while (0)
|
||||
|
||||
#define vtrn128_64q(a, b) \
|
||||
do { \
|
||||
float32x2_t ab, cd, ef, gh; \
|
||||
ab = vget_low_f32(a); \
|
||||
ef = vget_low_f32(b); \
|
||||
cd = vget_high_f32(a); \
|
||||
gh = vget_high_f32(b); \
|
||||
a = vcombine_f32(ab, ef); \
|
||||
b = vcombine_f32(cd, gh); \
|
||||
} while (0)
|
||||
|
||||
#define vtrn256_64q(a, b) \
|
||||
do { \
|
||||
float32x2_t a_0, a_1, a_2, a_3; \
|
||||
float32x2_t b_0, b_1, b_2, b_3; \
|
||||
a_0 = vget_low_f32(a.val[0]); \
|
||||
a_1 = vget_high_f32(a.val[0]); \
|
||||
a_2 = vget_low_f32(a.val[1]); \
|
||||
a_3 = vget_high_f32(a.val[1]); \
|
||||
b_0 = vget_low_f32(b.val[0]); \
|
||||
b_1 = vget_high_f32(b.val[0]); \
|
||||
b_2 = vget_low_f32(b.val[1]); \
|
||||
b_3 = vget_high_f32(b.val[1]); \
|
||||
a.val[0] = vcombine_f32(a_0, b_0); \
|
||||
a.val[1] = vcombine_f32(a_2, b_2); \
|
||||
b.val[0] = vcombine_f32(a_1, b_1); \
|
||||
b.val[1] = vcombine_f32(a_3, b_3); \
|
||||
} while (0)
|
||||
|
||||
#define vtrn128_32q(a, b) \
|
||||
do { \
|
||||
float32x4x2_t vtrn32_tmp = vtrnq_f32((a), (b)); \
|
||||
(a) = vtrn32_tmp.val[0]; \
|
||||
(b) = vtrn32_tmp.val[1]; \
|
||||
} while (0)
|
||||
|
||||
#define vtrn256_32q(a, b) \
|
||||
do { \
|
||||
float32x4x2_t vtrn32_tmp_1 = vtrnq_f32((a.val[0]), (b.val[0])); \
|
||||
float32x4x2_t vtrn32_tmp_2 = vtrnq_f32((a.val[1]), (b.val[1])); \
|
||||
a.val[0] = vtrn32_tmp_1.val[0]; \
|
||||
a.val[1] = vtrn32_tmp_2.val[0]; \
|
||||
b.val[0] = vtrn32_tmp_1.val[1]; \
|
||||
b.val[1] = vtrn32_tmp_2.val[1]; \
|
||||
} while (0)
|
||||
|
||||
#define vminmaxq(a, b) \
|
||||
do { \
|
||||
float32x4_t minmax_tmp = (a); \
|
||||
(a) = vminq_f32((a), (b)); \
|
||||
(b) = vmaxq_f32(minmax_tmp, (b)); \
|
||||
} while (0)
|
||||
|
||||
#define vminmax256q(a, b) \
|
||||
do { \
|
||||
float32x4x2_t minmax256_tmp = (a); \
|
||||
a.val[0] = vminq_f32(a.val[0], b.val[0]); \
|
||||
a.val[1] = vminq_f32(a.val[1], b.val[1]); \
|
||||
b.val[0] = vmaxq_f32(minmax256_tmp.val[0], b.val[0]); \
|
||||
b.val[1] = vmaxq_f32(minmax256_tmp.val[1], b.val[1]); \
|
||||
} while (0)
|
||||
|
||||
#define vrev128q_f32(a) \
|
||||
vcombine_f32(vrev64_f32(vget_high_f32(a)), vrev64_f32(vget_low_f32(a)))
|
||||
|
||||
#define vrev256q_f32(a) \
|
||||
do { \
|
||||
float32x4_t rev_tmp = vcombine_f32(vrev64_f32(vget_high_f32(a.val[0])), vrev64_f32(vget_low_f32(a.val[0]))); \
|
||||
a.val[0] = vcombine_f32(vrev64_f32(vget_high_f32(a.val[1])), vrev64_f32(vget_low_f32(a.val[1]))); \
|
||||
a.val[1] = rev_tmp; \
|
||||
} while (0)
|
||||
|
||||
#define vldrev128q_f32(a, p) \
|
||||
do { \
|
||||
a = vld1q_f32(p); \
|
||||
a = vrev128q_f32(a); \
|
||||
} while (0)
|
||||
|
||||
#endif /* ARM_MATH_NEON */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ARM_SORTING_H */
|
||||
58
libraries/cmsis/dsp/PrivateInclude/arm_vec_fft.h
Normal file
58
libraries/cmsis/dsp/PrivateInclude/arm_vec_fft.h
Normal file
@@ -0,0 +1,58 @@
|
||||
/******************************************************************************
|
||||
* @file arm_vec_fft.h
|
||||
* @brief Private header file for CMSIS DSP Library
|
||||
* @version V1.7.0
|
||||
* @date 07. January 2020
|
||||
******************************************************************************/
|
||||
/*
|
||||
* Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef _ARM_VEC_FFT_H_
|
||||
#define _ARM_VEC_FFT_H_
|
||||
|
||||
#include "arm_math.h"
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#define MVE_CMPLX_ADD_A_ixB(A, B) vcaddq_rot90(A,B)
|
||||
#define MVE_CMPLX_SUB_A_ixB(A,B) vcaddq_rot270(A,B)
|
||||
#define MVE_CMPLX_MULT_FLT_AxB(A,B) vcmlaq_rot90(vcmulq(A, B), A, B)
|
||||
#define MVE_CMPLX_MULT_FLT_Conj_AxB(A,B) vcmlaq_rot270(vcmulq(A, B), A, B)
|
||||
|
||||
#define MVE_CMPLX_MULT_FX_AxB(A,B) vqdmladhxq(vqdmlsdhq((__typeof(A))vuninitializedq_s32(), A, B), A, B);
|
||||
#define MVE_CMPLX_MULT_FX_AxConjB(A,B) vqdmladhq(vqdmlsdhxq((__typeof(A))vuninitializedq_s32(), A, B), A, B);
|
||||
|
||||
#define MVE_CMPLX_ADD_FX_A_ixB(A, B) vhcaddq_rot90(A,B)
|
||||
#define MVE_CMPLX_SUB_FX_A_ixB(A,B) vhcaddq_rot270(A,B)
|
||||
|
||||
|
||||
#endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)*/
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* _ARM_VEC_FFT_H_ */
|
||||
1661
libraries/cmsis/dsp/PrivateInclude/arm_vec_filtering.h
Normal file
1661
libraries/cmsis/dsp/PrivateInclude/arm_vec_filtering.h
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,75 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: BasicMathFunctions.c
|
||||
* Description: Combination of all basic math function source files.
|
||||
*
|
||||
* $Date: 16. March 2020
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_abs_f32.c"
|
||||
#include "arm_abs_q15.c"
|
||||
#include "arm_abs_q31.c"
|
||||
#include "arm_abs_q7.c"
|
||||
#include "arm_add_f32.c"
|
||||
#include "arm_add_q15.c"
|
||||
#include "arm_add_q31.c"
|
||||
#include "arm_add_q7.c"
|
||||
#include "arm_and_u16.c"
|
||||
#include "arm_and_u32.c"
|
||||
#include "arm_and_u8.c"
|
||||
#include "arm_dot_prod_f32.c"
|
||||
#include "arm_dot_prod_q15.c"
|
||||
#include "arm_dot_prod_q31.c"
|
||||
#include "arm_dot_prod_q7.c"
|
||||
#include "arm_mult_f32.c"
|
||||
#include "arm_mult_q15.c"
|
||||
#include "arm_mult_q31.c"
|
||||
#include "arm_mult_q7.c"
|
||||
#include "arm_negate_f32.c"
|
||||
#include "arm_negate_q15.c"
|
||||
#include "arm_negate_q31.c"
|
||||
#include "arm_negate_q7.c"
|
||||
#include "arm_not_u16.c"
|
||||
#include "arm_not_u32.c"
|
||||
#include "arm_not_u8.c"
|
||||
#include "arm_offset_f32.c"
|
||||
#include "arm_offset_q15.c"
|
||||
#include "arm_offset_q31.c"
|
||||
#include "arm_offset_q7.c"
|
||||
#include "arm_or_u16.c"
|
||||
#include "arm_or_u32.c"
|
||||
#include "arm_or_u8.c"
|
||||
#include "arm_scale_f32.c"
|
||||
#include "arm_scale_q15.c"
|
||||
#include "arm_scale_q31.c"
|
||||
#include "arm_scale_q7.c"
|
||||
#include "arm_shift_q15.c"
|
||||
#include "arm_shift_q31.c"
|
||||
#include "arm_shift_q7.c"
|
||||
#include "arm_sub_f32.c"
|
||||
#include "arm_sub_q15.c"
|
||||
#include "arm_sub_q31.c"
|
||||
#include "arm_sub_q7.c"
|
||||
#include "arm_xor_u16.c"
|
||||
#include "arm_xor_u32.c"
|
||||
#include "arm_xor_u8.c"
|
||||
19
libraries/cmsis/dsp/Source/BasicMathFunctions/CMakeLists.txt
Normal file
19
libraries/cmsis/dsp/Source/BasicMathFunctions/CMakeLists.txt
Normal file
@@ -0,0 +1,19 @@
|
||||
cmake_minimum_required (VERSION 3.6)
|
||||
|
||||
project(CMSISDSPBasicMath)
|
||||
|
||||
include(configLib)
|
||||
include(configDsp)
|
||||
|
||||
file(GLOB SRC "./*_*.c")
|
||||
|
||||
add_library(CMSISDSPBasicMath STATIC ${SRC})
|
||||
|
||||
configLib(CMSISDSPBasicMath ${ROOT})
|
||||
configDsp(CMSISDSPBasicMath ${ROOT})
|
||||
|
||||
### Includes
|
||||
target_include_directories(CMSISDSPBasicMath PUBLIC "${DSP}/Include")
|
||||
|
||||
|
||||
|
||||
196
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_abs_f32.c
Normal file
196
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_abs_f32.c
Normal file
@@ -0,0 +1,196 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_abs_f32.c
|
||||
* Description: Floating-point vector absolute value
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
#include <math.h>
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@defgroup BasicAbs Vector Absolute Value
|
||||
|
||||
Computes the absolute value of a vector on an element-by-element basis.
|
||||
|
||||
<pre>
|
||||
pDst[n] = abs(pSrc[n]), 0 <= n < blockSize.
|
||||
</pre>
|
||||
|
||||
The functions support in-place computation allowing the source and
|
||||
destination pointers to reference the same memory buffer.
|
||||
There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicAbs
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Floating-point vector absolute value.
|
||||
@param[in] pSrc points to the input vector
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_abs_f32(
|
||||
const float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
f32x4_t vec1;
|
||||
f32x4_t res;
|
||||
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = |A| */
|
||||
|
||||
/* Calculate absolute values and then store the results in the destination buffer. */
|
||||
vec1 = vld1q(pSrc);
|
||||
res = vabsq(vec1);
|
||||
vst1q(pDst, res);
|
||||
|
||||
/* Increment pointers */
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0x3;
|
||||
|
||||
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
/* C = |A| */
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vec1 = vld1q(pSrc);
|
||||
vstrwq_p(pDst, vabsq(vec1), p0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_abs_f32(
|
||||
const float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
f32x4_t vec1;
|
||||
f32x4_t res;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = |A| */
|
||||
|
||||
/* Calculate absolute values and then store the results in the destination buffer. */
|
||||
vec1 = vld1q_f32(pSrc);
|
||||
res = vabsq_f32(vec1);
|
||||
vst1q_f32(pDst, res);
|
||||
|
||||
/* Increment pointers */
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0x3;
|
||||
|
||||
#else
|
||||
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = |A| */
|
||||
|
||||
/* Calculate absolute and store result in destination buffer. */
|
||||
*pDst++ = fabsf(*pSrc++);
|
||||
|
||||
*pDst++ = fabsf(*pSrc++);
|
||||
|
||||
*pDst++ = fabsf(*pSrc++);
|
||||
|
||||
*pDst++ = fabsf(*pSrc++);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
#endif /* #if defined(ARM_MATH_NEON) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = |A| */
|
||||
|
||||
/* Calculate absolute and store result in destination buffer. */
|
||||
*pDst++ = fabsf(*pSrc++);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
/**
|
||||
@} end of BasicAbs group
|
||||
*/
|
||||
178
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_abs_q15.c
Normal file
178
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_abs_q15.c
Normal file
@@ -0,0 +1,178 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_abs_q15.c
|
||||
* Description: Q15 vector absolute value
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicAbs
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Q15 vector absolute value.
|
||||
@param[in] pSrc points to the input vector
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_abs_q15(
|
||||
const q15_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q15x8_t vecSrc;
|
||||
|
||||
/* Compute 8 outputs at a time */
|
||||
blkCnt = blockSize >> 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = |A|
|
||||
* Calculate absolute and then store the results in the destination buffer.
|
||||
*/
|
||||
vecSrc = vld1q(pSrc);
|
||||
vst1q(pDst, vqabsq(vecSrc));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 8;
|
||||
pDst += 8;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 7;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp16q(blkCnt);
|
||||
vecSrc = vld1q(pSrc);
|
||||
vstrhq_p(pDst, vqabsq(vecSrc), p0);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_abs_q15(
|
||||
const q15_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
q15_t in; /* Temporary input variable */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = |A| */
|
||||
|
||||
/* Calculate absolute of input (if -1 then saturated to 0x7fff) and store result in destination buffer. */
|
||||
in = *pSrc++;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = (in > 0) ? in : (q15_t)__QSUB16(0, in);
|
||||
#else
|
||||
*pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
|
||||
#endif
|
||||
|
||||
in = *pSrc++;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = (in > 0) ? in : (q15_t)__QSUB16(0, in);
|
||||
#else
|
||||
*pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
|
||||
#endif
|
||||
|
||||
in = *pSrc++;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = (in > 0) ? in : (q15_t)__QSUB16(0, in);
|
||||
#else
|
||||
*pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
|
||||
#endif
|
||||
|
||||
in = *pSrc++;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = (in > 0) ? in : (q15_t)__QSUB16(0, in);
|
||||
#else
|
||||
*pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = |A| */
|
||||
|
||||
/* Calculate absolute of input (if -1 then saturated to 0x7fff) and store result in destination buffer. */
|
||||
in = *pSrc++;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = (in > 0) ? in : (q15_t)__QSUB16(0, in);
|
||||
#else
|
||||
*pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicAbs group
|
||||
*/
|
||||
208
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_abs_q31.c
Normal file
208
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_abs_q31.c
Normal file
@@ -0,0 +1,208 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_abs_q31.c
|
||||
* Description: Q31 vector absolute value
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicAbs
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Q31 vector absolute value.
|
||||
@param[in] pSrc points to the input vector
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_abs_q31(
|
||||
const q31_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counters */
|
||||
q31x4_t vecSrc;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = |A|
|
||||
* Calculate absolute and then store the results in the destination buffer.
|
||||
*/
|
||||
vecSrc = vld1q(pSrc);
|
||||
vst1q(pDst, vqabsq(vecSrc));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* Advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
}
|
||||
/*
|
||||
* Tail
|
||||
*/
|
||||
blkCnt = blockSize & 3;
|
||||
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vecSrc = vld1q(pSrc);
|
||||
vstrwq_p(pDst, vqabsq(vecSrc), p0);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_abs_q31(
|
||||
const q31_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
q31_t in; /* Temporary variable */
|
||||
|
||||
#if defined(ARM_MATH_NEON)
|
||||
int32x4_t vec1;
|
||||
int32x4_t res;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = |A| */
|
||||
/* Calculate absolute and then store the results in the destination buffer. */
|
||||
|
||||
vec1 = vld1q_s32(pSrc);
|
||||
res = vqabsq_s32(vec1);
|
||||
vst1q_s32(pDst, res);
|
||||
|
||||
/* Increment pointers */
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0x3;
|
||||
|
||||
#else
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = |A| */
|
||||
|
||||
/* Calculate absolute of input (if -1 then saturated to 0x7fffffff) and store result in destination buffer. */
|
||||
in = *pSrc++;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = (in > 0) ? in : (q31_t)__QSUB(0, in);
|
||||
#else
|
||||
*pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
|
||||
#endif
|
||||
|
||||
in = *pSrc++;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = (in > 0) ? in : (q31_t)__QSUB(0, in);
|
||||
#else
|
||||
*pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
|
||||
#endif
|
||||
|
||||
in = *pSrc++;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = (in > 0) ? in : (q31_t)__QSUB(0, in);
|
||||
#else
|
||||
*pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
|
||||
#endif
|
||||
|
||||
in = *pSrc++;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = (in > 0) ? in : (q31_t)__QSUB(0, in);
|
||||
#else
|
||||
*pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
#endif /* #if defined (ARM_MATH_NEON) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = |A| */
|
||||
|
||||
/* Calculate absolute of input (if -1 then saturated to 0x7fffffff) and store result in destination buffer. */
|
||||
in = *pSrc++;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = (in > 0) ? in : (q31_t)__QSUB(0, in);
|
||||
#else
|
||||
*pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* #if defined (ARM_MATH_MVEI) */
|
||||
/**
|
||||
@} end of BasicAbs group
|
||||
*/
|
||||
180
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_abs_q7.c
Normal file
180
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_abs_q7.c
Normal file
@@ -0,0 +1,180 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_abs_q7.c
|
||||
* Description: Q7 vector absolute value
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicAbs
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Q7 vector absolute value.
|
||||
@param[in] pSrc points to the input vector
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Conditions for optimum performance
|
||||
Input and output buffers should be aligned by 32-bit
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
The Q7 value -1 (0x80) will be saturated to the maximum allowable positive value 0x7F.
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_abs_q7(
|
||||
const q7_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q7x16_t vecSrc;
|
||||
|
||||
/* Compute 16 outputs at a time */
|
||||
blkCnt = blockSize >> 4;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = |A|
|
||||
* Calculate absolute and then store the results in the destination buffer.
|
||||
*/
|
||||
vecSrc = vld1q(pSrc);
|
||||
vst1q(pDst, vqabsq(vecSrc));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 16;
|
||||
pDst += 16;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 0xF;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp8q(blkCnt);
|
||||
vecSrc = vld1q(pSrc);
|
||||
vstrbq_p(pDst, vqabsq(vecSrc), p0);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_abs_q7(
|
||||
const q7_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
q7_t in; /* Temporary input variable */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = |A| */
|
||||
|
||||
/* Calculate absolute of input (if -1 then saturated to 0x7f) and store result in destination buffer. */
|
||||
in = *pSrc++;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = (in > 0) ? in : (q7_t)__QSUB8(0, in);
|
||||
#else
|
||||
*pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? (q7_t) 0x7f : -in);
|
||||
#endif
|
||||
|
||||
in = *pSrc++;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = (in > 0) ? in : (q7_t)__QSUB8(0, in);
|
||||
#else
|
||||
*pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? (q7_t) 0x7f : -in);
|
||||
#endif
|
||||
|
||||
in = *pSrc++;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = (in > 0) ? in : (q7_t)__QSUB8(0, in);
|
||||
#else
|
||||
*pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? (q7_t) 0x7f : -in);
|
||||
#endif
|
||||
|
||||
in = *pSrc++;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = (in > 0) ? in : (q7_t)__QSUB8(0, in);
|
||||
#else
|
||||
*pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? (q7_t) 0x7f : -in);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = |A| */
|
||||
|
||||
/* Calculate absolute of input (if -1 then saturated to 0x7f) and store result in destination buffer. */
|
||||
in = *pSrc++;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = (in > 0) ? in : (q7_t) __QSUB8(0, in);
|
||||
#else
|
||||
*pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? (q7_t) 0x7f : -in);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicAbs group
|
||||
*/
|
||||
199
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_add_f32.c
Normal file
199
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_add_f32.c
Normal file
@@ -0,0 +1,199 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_add_f32.c
|
||||
* Description: Floating-point vector addition
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@defgroup BasicAdd Vector Addition
|
||||
|
||||
Element-by-element addition of two vectors.
|
||||
|
||||
<pre>
|
||||
pDst[n] = pSrcA[n] + pSrcB[n], 0 <= n < blockSize.
|
||||
</pre>
|
||||
|
||||
There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicAdd
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Floating-point vector addition.
|
||||
@param[in] pSrcA points to first input vector
|
||||
@param[in] pSrcB points to second input vector
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_add_f32(
|
||||
const float32_t * pSrcA,
|
||||
const float32_t * pSrcB,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
f32x4_t vec1;
|
||||
f32x4_t vec2;
|
||||
f32x4_t res;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + B */
|
||||
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
vec1 = vld1q(pSrcA);
|
||||
vec2 = vld1q(pSrcB);
|
||||
res = vaddq(vec1, vec2);
|
||||
vst1q(pDst, res);
|
||||
|
||||
/* Increment pointers */
|
||||
pSrcA += 4;
|
||||
pSrcB += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0x3;
|
||||
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + B */
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vec1 = vld1q(pSrcA);
|
||||
vec2 = vld1q(pSrcB);
|
||||
vstrwq_p(pDst, vaddq(vec1,vec2), p0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_add_f32(
|
||||
const float32_t * pSrcA,
|
||||
const float32_t * pSrcB,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
f32x4_t vec1;
|
||||
f32x4_t vec2;
|
||||
f32x4_t res;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + B */
|
||||
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
vec1 = vld1q_f32(pSrcA);
|
||||
vec2 = vld1q_f32(pSrcB);
|
||||
res = vaddq_f32(vec1, vec2);
|
||||
vst1q_f32(pDst, res);
|
||||
|
||||
/* Increment pointers */
|
||||
pSrcA += 4;
|
||||
pSrcB += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0x3;
|
||||
|
||||
#else
|
||||
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + B */
|
||||
|
||||
/* Add and store result in destination buffer. */
|
||||
*pDst++ = (*pSrcA++) + (*pSrcB++);
|
||||
*pDst++ = (*pSrcA++) + (*pSrcB++);
|
||||
*pDst++ = (*pSrcA++) + (*pSrcB++);
|
||||
*pDst++ = (*pSrcA++) + (*pSrcB++);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
#endif /* #if defined(ARM_MATH_NEON) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + B */
|
||||
|
||||
/* Add and store result in destination buffer. */
|
||||
*pDst++ = (*pSrcA++) + (*pSrcB++);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of BasicAdd group
|
||||
*/
|
||||
176
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_add_q15.c
Normal file
176
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_add_q15.c
Normal file
@@ -0,0 +1,176 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_add_q15.c
|
||||
* Description: Q15 vector addition
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicAdd
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Q15 vector addition.
|
||||
@param[in] pSrcA points to the first input vector
|
||||
@param[in] pSrcB points to the second input vector
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_add_q15(
|
||||
const q15_t * pSrcA,
|
||||
const q15_t * pSrcB,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q15x8_t vecA;
|
||||
q15x8_t vecB;
|
||||
|
||||
/* Compute 8 outputs at a time */
|
||||
blkCnt = blockSize >> 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A + B
|
||||
* Add and then store the results in the destination buffer.
|
||||
*/
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
vst1q(pDst, vqaddq(vecA, vecB));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrcA += 8;
|
||||
pSrcB += 8;
|
||||
pDst += 8;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 7;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp16q(blkCnt);
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
vstrhq_p(pDst, vqaddq(vecA, vecB), p0);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_add_q15(
|
||||
const q15_t * pSrcA,
|
||||
const q15_t * pSrcB,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
q31_t inA1, inA2;
|
||||
q31_t inB1, inB2;
|
||||
#endif
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + B */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
/* read 2 times 2 samples at a time from sourceA */
|
||||
inA1 = read_q15x2_ia ((q15_t **) &pSrcA);
|
||||
inA2 = read_q15x2_ia ((q15_t **) &pSrcA);
|
||||
/* read 2 times 2 samples at a time from sourceB */
|
||||
inB1 = read_q15x2_ia ((q15_t **) &pSrcB);
|
||||
inB2 = read_q15x2_ia ((q15_t **) &pSrcB);
|
||||
|
||||
/* Add and store 2 times 2 samples at a time */
|
||||
write_q15x2_ia (&pDst, __QADD16(inA1, inB1));
|
||||
write_q15x2_ia (&pDst, __QADD16(inA2, inB2));
|
||||
#else
|
||||
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ + *pSrcB++), 16);
|
||||
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ + *pSrcB++), 16);
|
||||
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ + *pSrcB++), 16);
|
||||
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ + *pSrcB++), 16);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + B */
|
||||
|
||||
/* Add and store result in destination buffer. */
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = (q15_t) __QADD16(*pSrcA++, *pSrcB++);
|
||||
#else
|
||||
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ + *pSrcB++), 16);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
/**
|
||||
@} end of BasicAdd group
|
||||
*/
|
||||
159
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_add_q31.c
Normal file
159
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_add_q31.c
Normal file
@@ -0,0 +1,159 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_add_q31.c
|
||||
* Description: Q31 vector addition
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicAdd
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Q31 vector addition.
|
||||
@param[in] pSrcA points to the first input vector
|
||||
@param[in] pSrcB points to the second input vector
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_add_q31(
|
||||
const q31_t * pSrcA,
|
||||
const q31_t * pSrcB,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt;
|
||||
q31x4_t vecA;
|
||||
q31x4_t vecB;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A + B
|
||||
* Add and then store the results in the destination buffer.
|
||||
*/
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
vst1q(pDst, vqaddq(vecA, vecB));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrcA += 4;
|
||||
pSrcB += 4;
|
||||
pDst += 4;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 3;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
vstrwq_p(pDst, vqaddq(vecA, vecB), p0);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_add_q31(
|
||||
const q31_t * pSrcA,
|
||||
const q31_t * pSrcB,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + B */
|
||||
|
||||
/* Add and store result in destination buffer. */
|
||||
*pDst++ = __QADD(*pSrcA++, *pSrcB++);
|
||||
|
||||
*pDst++ = __QADD(*pSrcA++, *pSrcB++);
|
||||
|
||||
*pDst++ = __QADD(*pSrcA++, *pSrcB++);
|
||||
|
||||
*pDst++ = __QADD(*pSrcA++, *pSrcB++);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + B */
|
||||
|
||||
/* Add and store result in destination buffer. */
|
||||
*pDst++ = __QADD(*pSrcA++, *pSrcB++);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
/**
|
||||
@} end of BasicAdd group
|
||||
*/
|
||||
158
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_add_q7.c
Normal file
158
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_add_q7.c
Normal file
@@ -0,0 +1,158 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_add_q7.c
|
||||
* Description: Q7 vector addition
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicAdd
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Q7 vector addition.
|
||||
@param[in] pSrcA points to the first input vector
|
||||
@param[in] pSrcB points to the second input vector
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_add_q7(
|
||||
const q7_t * pSrcA,
|
||||
const q7_t * pSrcB,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q7x16_t vecA;
|
||||
q7x16_t vecB;
|
||||
|
||||
/* Compute 16 outputs at a time */
|
||||
blkCnt = blockSize >> 4;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A + B
|
||||
* Add and then store the results in the destination buffer.
|
||||
*/
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
vst1q(pDst, vqaddq(vecA, vecB));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrcA += 16;
|
||||
pSrcB += 16;
|
||||
pDst += 16;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 0xF;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp8q(blkCnt);
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
vstrbq_p(pDst, vqaddq(vecA, vecB), p0);
|
||||
}
|
||||
}
|
||||
#else
|
||||
void arm_add_q7(
|
||||
const q7_t * pSrcA,
|
||||
const q7_t * pSrcB,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + B */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
/* Add and store result in destination buffer (4 samples at a time). */
|
||||
write_q7x4_ia (&pDst, __QADD8 (read_q7x4_ia ((q7_t **) &pSrcA), read_q7x4_ia ((q7_t **) &pSrcB)));
|
||||
#else
|
||||
*pDst++ = (q7_t) __SSAT ((q15_t) *pSrcA++ + *pSrcB++, 8);
|
||||
*pDst++ = (q7_t) __SSAT ((q15_t) *pSrcA++ + *pSrcB++, 8);
|
||||
*pDst++ = (q7_t) __SSAT ((q15_t) *pSrcA++ + *pSrcB++, 8);
|
||||
*pDst++ = (q7_t) __SSAT ((q15_t) *pSrcA++ + *pSrcB++, 8);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + B */
|
||||
|
||||
/* Add and store result in destination buffer. */
|
||||
*pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ + *pSrcB++, 8);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
/**
|
||||
@} end of BasicAdd group
|
||||
*/
|
||||
137
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_and_u16.c
Normal file
137
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_and_u16.c
Normal file
@@ -0,0 +1,137 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_and_u16.c
|
||||
* Description: uint16_t bitwise AND
|
||||
*
|
||||
* $Date: 14 November 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@defgroup And Vector bitwise AND
|
||||
|
||||
Compute the logical bitwise AND.
|
||||
|
||||
There are separate functions for uint32_t, uint16_t, and uint7_t data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup And
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Compute the logical bitwise AND of two fixed-point vectors.
|
||||
@param[in] pSrcA points to input vector A
|
||||
@param[in] pSrcB points to input vector B
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_and_u16(
|
||||
const uint16_t * pSrcA,
|
||||
const uint16_t * pSrcB,
|
||||
uint16_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
q15x8_t vecSrcA, vecSrcB;
|
||||
|
||||
/* Compute 8 outputs at a time */
|
||||
blkCnt = blockSize >> 3;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
|
||||
vst1q(pDst, vandq_u16(vecSrcA, vecSrcB) );
|
||||
|
||||
pSrcA += 8;
|
||||
pSrcB += 8;
|
||||
pDst += 8;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 7;
|
||||
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp16q(blkCnt);
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
vstrhq_p(pDst, vandq_u16(vecSrcA, vecSrcB), p0);
|
||||
}
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
uint16x8_t vecA, vecB;
|
||||
|
||||
/* Compute 8 outputs at a time */
|
||||
blkCnt = blockSize >> 3U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecA = vld1q_u16(pSrcA);
|
||||
vecB = vld1q_u16(pSrcB);
|
||||
|
||||
vst1q_u16(pDst, vandq_u16(vecA, vecB) );
|
||||
|
||||
pSrcA += 8;
|
||||
pSrcB += 8;
|
||||
pDst += 8;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 7;
|
||||
#else
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
#endif
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
*pDst++ = (*pSrcA++)&(*pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
#endif /* if defined(ARM_MATH_MVEI) */
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of And group
|
||||
*/
|
||||
129
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_and_u32.c
Normal file
129
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_and_u32.c
Normal file
@@ -0,0 +1,129 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_and_u32.c
|
||||
* Description: uint32_t bitwise AND
|
||||
*
|
||||
* $Date: 14 November 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup And
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Compute the logical bitwise AND of two fixed-point vectors.
|
||||
@param[in] pSrcA points to input vector A
|
||||
@param[in] pSrcB points to input vector B
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_and_u32(
|
||||
const uint32_t * pSrcA,
|
||||
const uint32_t * pSrcB,
|
||||
uint32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
q31x4_t vecSrcA, vecSrcB;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
|
||||
vst1q(pDst, vandq_u32(vecSrcA, vecSrcB) );
|
||||
|
||||
pSrcA += 4;
|
||||
pSrcB += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 3;
|
||||
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
vstrwq_p(pDst, vandq_u32(vecSrcA, vecSrcB), p0);
|
||||
}
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
uint32x4_t vecA, vecB;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecA = vld1q_u32(pSrcA);
|
||||
vecB = vld1q_u32(pSrcB);
|
||||
|
||||
vst1q_u32(pDst, vandq_u32(vecA, vecB) );
|
||||
|
||||
pSrcA += 4;
|
||||
pSrcB += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 3;
|
||||
#else
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
#endif
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
*pDst++ = (*pSrcA++)&(*pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
#endif /* if defined(ARM_MATH_MVEI) */
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of And group
|
||||
*/
|
||||
130
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_and_u8.c
Normal file
130
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_and_u8.c
Normal file
@@ -0,0 +1,130 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_and_u8.c
|
||||
* Description: uint8_t bitwise AND
|
||||
*
|
||||
* $Date: 14 November 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@addtogroup And
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Compute the logical bitwise AND of two fixed-point vectors.
|
||||
@param[in] pSrcA points to input vector A
|
||||
@param[in] pSrcB points to input vector B
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_and_u8(
|
||||
const uint8_t * pSrcA,
|
||||
const uint8_t * pSrcB,
|
||||
uint8_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
q7x16_t vecSrcA, vecSrcB;
|
||||
|
||||
/* Compute 16 outputs at a time */
|
||||
blkCnt = blockSize >> 4;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
|
||||
vst1q(pDst, vandq_u8(vecSrcA, vecSrcB) );
|
||||
|
||||
pSrcA += 16;
|
||||
pSrcB += 16;
|
||||
pDst += 16;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0xF;
|
||||
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp8q(blkCnt);
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
vstrbq_p(pDst, vandq_u8(vecSrcA, vecSrcB), p0);
|
||||
}
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
uint8x16_t vecA, vecB;
|
||||
|
||||
/* Compute 16 outputs at a time */
|
||||
blkCnt = blockSize >> 4U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecA = vld1q_u8(pSrcA);
|
||||
vecB = vld1q_u8(pSrcB);
|
||||
|
||||
vst1q_u8(pDst, vandq_u8(vecA, vecB) );
|
||||
|
||||
pSrcA += 16;
|
||||
pSrcB += 16;
|
||||
pDst += 16;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0xF;
|
||||
#else
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
#endif
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
*pDst++ = (*pSrcA++)&(*pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
#endif /* if defined(ARM_MATH_MVEI) */
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of And group
|
||||
*/
|
||||
226
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_dot_prod_f32.c
Normal file
226
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_dot_prod_f32.c
Normal file
@@ -0,0 +1,226 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_dot_prod_f32.c
|
||||
* Description: Floating-point dot product
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@defgroup BasicDotProd Vector Dot Product
|
||||
|
||||
Computes the dot product of two vectors.
|
||||
The vectors are multiplied element-by-element and then summed.
|
||||
|
||||
<pre>
|
||||
sum = pSrcA[0]*pSrcB[0] + pSrcA[1]*pSrcB[1] + ... + pSrcA[blockSize-1]*pSrcB[blockSize-1]
|
||||
</pre>
|
||||
|
||||
There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicDotProd
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Dot product of floating-point vectors.
|
||||
@param[in] pSrcA points to the first input vector.
|
||||
@param[in] pSrcB points to the second input vector.
|
||||
@param[in] blockSize number of samples in each vector.
|
||||
@param[out] result output result returned here.
|
||||
@return none
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
|
||||
void arm_dot_prod_f32(
|
||||
const float32_t * pSrcA,
|
||||
const float32_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
float32_t * result)
|
||||
{
|
||||
f32x4_t vecA, vecB;
|
||||
f32x4_t vecSum;
|
||||
uint32_t blkCnt;
|
||||
float32_t sum = 0.0f;
|
||||
vecSum = vdupq_n_f32(0.0f);
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1]
|
||||
* Calculate dot product and then store the result in a temporary buffer.
|
||||
* and advance vector source and destination pointers
|
||||
*/
|
||||
vecA = vld1q(pSrcA);
|
||||
pSrcA += 4;
|
||||
|
||||
vecB = vld1q(pSrcB);
|
||||
pSrcB += 4;
|
||||
|
||||
vecSum = vfmaq(vecSum, vecA, vecB);
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt --;
|
||||
}
|
||||
|
||||
|
||||
blkCnt = blockSize & 3;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
vecSum = vfmaq_m(vecSum, vecA, vecB, p0);
|
||||
}
|
||||
|
||||
sum = vecAddAcrossF32Mve(vecSum);
|
||||
|
||||
/* Store result in destination buffer */
|
||||
*result = sum;
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void arm_dot_prod_f32(
|
||||
const float32_t * pSrcA,
|
||||
const float32_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
float32_t * result)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
float32_t sum = 0.0f; /* Temporary return variable */
|
||||
|
||||
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
f32x4_t vec1;
|
||||
f32x4_t vec2;
|
||||
f32x4_t accum = vdupq_n_f32(0);
|
||||
f32x2_t tmp = vdup_n_f32(0);
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
vec1 = vld1q_f32(pSrcA);
|
||||
vec2 = vld1q_f32(pSrcB);
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A[0]*B[0] + A[1]*B[1] + A[2]*B[2] + ... + A[blockSize-1]*B[blockSize-1] */
|
||||
/* Calculate dot product and then store the result in a temporary buffer. */
|
||||
|
||||
accum = vmlaq_f32(accum, vec1, vec2);
|
||||
|
||||
/* Increment pointers */
|
||||
pSrcA += 4;
|
||||
pSrcB += 4;
|
||||
|
||||
vec1 = vld1q_f32(pSrcA);
|
||||
vec2 = vld1q_f32(pSrcB);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#if __aarch64__
|
||||
sum = vpadds_f32(vpadd_f32(vget_low_f32(accum), vget_high_f32(accum)));
|
||||
#else
|
||||
tmp = vpadd_f32(vget_low_f32(accum), vget_high_f32(accum));
|
||||
sum = vget_lane_f32(tmp, 0) + vget_lane_f32(tmp, 1);
|
||||
|
||||
#endif
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0x3;
|
||||
|
||||
#else
|
||||
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
|
||||
/* Calculate dot product and store result in a temporary buffer. */
|
||||
sum += (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
sum += (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
sum += (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
sum += (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
#endif /* #if defined(ARM_MATH_NEON) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
|
||||
/* Calculate dot product and store result in a temporary buffer. */
|
||||
sum += (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Store result in destination buffer */
|
||||
*result = sum;
|
||||
}
|
||||
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
/**
|
||||
@} end of BasicDotProd group
|
||||
*/
|
||||
172
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_dot_prod_q15.c
Normal file
172
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_dot_prod_q15.c
Normal file
@@ -0,0 +1,172 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_dot_prod_q15.c
|
||||
* Description: Q15 dot product
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicDotProd
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Dot product of Q15 vectors.
|
||||
@param[in] pSrcA points to the first input vector
|
||||
@param[in] pSrcB points to the second input vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@param[out] result output result returned here
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The intermediate multiplications are in 1.15 x 1.15 = 2.30 format and these
|
||||
results are added to a 64-bit accumulator in 34.30 format.
|
||||
Nonsaturating additions are used and given that there are 33 guard bits in the accumulator
|
||||
there is no risk of overflow.
|
||||
The return result is in 34.30 format.
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_dot_prod_q15(
|
||||
const q15_t * pSrcA,
|
||||
const q15_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
q63_t * result)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q15x8_t vecA;
|
||||
q15x8_t vecB;
|
||||
q63_t sum = 0LL;
|
||||
|
||||
/* Compute 8 outputs at a time */
|
||||
blkCnt = blockSize >> 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1]
|
||||
* Calculate dot product and then store the result in a temporary buffer.
|
||||
*/
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
sum = vmlaldavaq(sum, vecA, vecB);
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrcA += 8;
|
||||
pSrcB += 8;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 7;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp16q(blkCnt);
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
sum = vmlaldavaq_p(sum, vecA, vecB, p0);
|
||||
}
|
||||
|
||||
*result = sum;
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_dot_prod_q15(
|
||||
const q15_t * pSrcA,
|
||||
const q15_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
q63_t * result)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
q63_t sum = 0; /* Temporary return variable */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
/* Calculate dot product and store result in a temporary buffer. */
|
||||
sum = __SMLALD(read_q15x2_ia ((q15_t **) &pSrcA), read_q15x2_ia ((q15_t **) &pSrcB), sum);
|
||||
sum = __SMLALD(read_q15x2_ia ((q15_t **) &pSrcA), read_q15x2_ia ((q15_t **) &pSrcB), sum);
|
||||
#else
|
||||
sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
|
||||
sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
|
||||
sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
|
||||
sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
|
||||
/* Calculate dot product and store result in a temporary buffer. */
|
||||
//#if defined (ARM_MATH_DSP)
|
||||
// sum = __SMLALD(*pSrcA++, *pSrcB++, sum);
|
||||
//#else
|
||||
sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
|
||||
//#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Store result in destination buffer in 34.30 format */
|
||||
*result = sum;
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicDotProd group
|
||||
*/
|
||||
174
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_dot_prod_q31.c
Normal file
174
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_dot_prod_q31.c
Normal file
@@ -0,0 +1,174 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_dot_prod_q31.c
|
||||
* Description: Q31 dot product
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicDotProd
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Dot product of Q31 vectors.
|
||||
@param[in] pSrcA points to the first input vector.
|
||||
@param[in] pSrcB points to the second input vector.
|
||||
@param[in] blockSize number of samples in each vector.
|
||||
@param[out] result output result returned here.
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The intermediate multiplications are in 1.31 x 1.31 = 2.62 format and these
|
||||
are truncated to 2.48 format by discarding the lower 14 bits.
|
||||
The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format.
|
||||
There are 15 guard bits in the accumulator and there is no risk of overflow as long as
|
||||
the length of the vectors is less than 2^16 elements.
|
||||
The return result is in 16.48 format.
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_dot_prod_q31(
|
||||
const q31_t * pSrcA,
|
||||
const q31_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
q63_t * result)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q31x4_t vecA;
|
||||
q31x4_t vecB;
|
||||
q63_t sum = 0LL;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1]
|
||||
* Calculate dot product and then store the result in a temporary buffer.
|
||||
*/
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
sum = vrmlaldavhaq(sum, vecA, vecB);
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrcA += 4;
|
||||
pSrcB += 4;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 3;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
sum = vrmlaldavhaq_p(sum, vecA, vecB, p0);
|
||||
}
|
||||
|
||||
/*
|
||||
* vrmlaldavhaq provides extra intermediate accumulator headroom.
|
||||
* limiting the need of intermediate scaling
|
||||
* Scalar variant uses 2.48 accu format by right shifting accumulators by 14.
|
||||
* 16.48 output conversion is performed outside the loop by scaling accu. by 6
|
||||
*/
|
||||
*result = asrl(sum, (14 - 8));
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_dot_prod_q31(
|
||||
const q31_t * pSrcA,
|
||||
const q31_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
q63_t * result)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
q63_t sum = 0; /* Temporary return variable */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
|
||||
/* Calculate dot product and store result in a temporary buffer. */
|
||||
sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
|
||||
|
||||
sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
|
||||
|
||||
sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
|
||||
|
||||
sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
|
||||
/* Calculate dot product and store result in a temporary buffer. */
|
||||
sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Store result in destination buffer in 16.48 format */
|
||||
*result = sum;
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicDotProd group
|
||||
*/
|
||||
191
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_dot_prod_q7.c
Normal file
191
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_dot_prod_q7.c
Normal file
@@ -0,0 +1,191 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_dot_prod_q7.c
|
||||
* Description: Q7 dot product
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicDotProd
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Dot product of Q7 vectors.
|
||||
@param[in] pSrcA points to the first input vector
|
||||
@param[in] pSrcB points to the second input vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@param[out] result output result returned here
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The intermediate multiplications are in 1.7 x 1.7 = 2.14 format and these
|
||||
results are added to an accumulator in 18.14 format.
|
||||
Nonsaturating additions are used and there is no danger of wrap around as long as
|
||||
the vectors are less than 2^18 elements long.
|
||||
The return result is in 18.14 format.
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_dot_prod_q7(
|
||||
const q7_t * pSrcA,
|
||||
const q7_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
q31_t * result)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q7x16_t vecA;
|
||||
q7x16_t vecB;
|
||||
q31_t sum = 0;
|
||||
|
||||
/* Compute 16 outputs at a time */
|
||||
blkCnt = blockSize >> 4;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1]
|
||||
* Calculate dot product and then store the result in a temporary buffer.
|
||||
*/
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
sum = vmladavaq(sum, vecA, vecB);
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrcA += 16;
|
||||
pSrcB += 16;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 0xF;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp8q(blkCnt);
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
sum = vmladavaq_p(sum, vecA, vecB, p0);
|
||||
}
|
||||
|
||||
*result = sum;
|
||||
}
|
||||
#else
|
||||
void arm_dot_prod_q7(
|
||||
const q7_t * pSrcA,
|
||||
const q7_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
q31_t * result)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
q31_t sum = 0; /* Temporary return variable */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
q31_t input1, input2; /* Temporary variables */
|
||||
q31_t inA1, inA2, inB1, inB2; /* Temporary variables */
|
||||
#endif
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
/* read 4 samples at a time from sourceA */
|
||||
input1 = read_q7x4_ia ((q7_t **) &pSrcA);
|
||||
/* read 4 samples at a time from sourceB */
|
||||
input2 = read_q7x4_ia ((q7_t **) &pSrcB);
|
||||
|
||||
/* extract two q7_t samples to q15_t samples */
|
||||
inA1 = __SXTB16(__ROR(input1, 8));
|
||||
/* extract reminaing two samples */
|
||||
inA2 = __SXTB16(input1);
|
||||
/* extract two q7_t samples to q15_t samples */
|
||||
inB1 = __SXTB16(__ROR(input2, 8));
|
||||
/* extract reminaing two samples */
|
||||
inB2 = __SXTB16(input2);
|
||||
|
||||
/* multiply and accumulate two samples at a time */
|
||||
sum = __SMLAD(inA1, inB1, sum);
|
||||
sum = __SMLAD(inA2, inB2, sum);
|
||||
#else
|
||||
sum += (q31_t) ((q15_t) *pSrcA++ * *pSrcB++);
|
||||
sum += (q31_t) ((q15_t) *pSrcA++ * *pSrcB++);
|
||||
sum += (q31_t) ((q15_t) *pSrcA++ * *pSrcB++);
|
||||
sum += (q31_t) ((q15_t) *pSrcA++ * *pSrcB++);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
|
||||
/* Calculate dot product and store result in a temporary buffer. */
|
||||
//#if defined (ARM_MATH_DSP)
|
||||
// sum = __SMLAD(*pSrcA++, *pSrcB++, sum);
|
||||
//#else
|
||||
sum += (q31_t) ((q15_t) *pSrcA++ * *pSrcB++);
|
||||
//#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Store result in destination buffer in 18.14 format */
|
||||
*result = sum;
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicDotProd group
|
||||
*/
|
||||
200
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_mult_f32.c
Normal file
200
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_mult_f32.c
Normal file
@@ -0,0 +1,200 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mult_f32.c
|
||||
* Description: Floating-point vector multiplication
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@defgroup BasicMult Vector Multiplication
|
||||
|
||||
Element-by-element multiplication of two vectors.
|
||||
|
||||
<pre>
|
||||
pDst[n] = pSrcA[n] * pSrcB[n], 0 <= n < blockSize.
|
||||
</pre>
|
||||
|
||||
There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicMult
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Floating-point vector multiplication.
|
||||
@param[in] pSrcA points to the first input vector.
|
||||
@param[in] pSrcB points to the second input vector.
|
||||
@param[out] pDst points to the output vector.
|
||||
@param[in] blockSize number of samples in each vector.
|
||||
@return none
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_mult_f32(
|
||||
const float32_t * pSrcA,
|
||||
const float32_t * pSrcB,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
f32x4_t vec1;
|
||||
f32x4_t vec2;
|
||||
f32x4_t res;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + B */
|
||||
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
vec1 = vld1q(pSrcA);
|
||||
vec2 = vld1q(pSrcB);
|
||||
res = vmulq(vec1, vec2);
|
||||
vst1q(pDst, res);
|
||||
|
||||
/* Increment pointers */
|
||||
pSrcA += 4;
|
||||
pSrcB += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0x3;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + B */
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vec1 = vld1q(pSrcA);
|
||||
vec2 = vld1q(pSrcB);
|
||||
vstrwq_p(pDst, vmulq(vec1,vec2), p0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_mult_f32(
|
||||
const float32_t * pSrcA,
|
||||
const float32_t * pSrcB,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
f32x4_t vec1;
|
||||
f32x4_t vec2;
|
||||
f32x4_t res;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * B */
|
||||
|
||||
/* Multiply the inputs and then store the results in the destination buffer. */
|
||||
vec1 = vld1q_f32(pSrcA);
|
||||
vec2 = vld1q_f32(pSrcB);
|
||||
res = vmulq_f32(vec1, vec2);
|
||||
vst1q_f32(pDst, res);
|
||||
|
||||
/* Increment pointers */
|
||||
pSrcA += 4;
|
||||
pSrcB += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0x3;
|
||||
|
||||
#else
|
||||
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * B */
|
||||
|
||||
/* Multiply inputs and store result in destination buffer. */
|
||||
*pDst++ = (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
*pDst++ = (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
*pDst++ = (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
*pDst++ = (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
#endif /* #if defined(ARM_MATH_NEON) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * B */
|
||||
|
||||
/* Multiply input and store result in destination buffer. */
|
||||
*pDst++ = (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of BasicMult group
|
||||
*/
|
||||
192
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_mult_q15.c
Normal file
192
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_mult_q15.c
Normal file
@@ -0,0 +1,192 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mult_q15.c
|
||||
* Description: Q15 vector multiplication
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicMult
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Q15 vector multiplication
|
||||
@param[in] pSrcA points to first input vector
|
||||
@param[in] pSrcB points to second input vector
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_mult_q15(
|
||||
const q15_t * pSrcA,
|
||||
const q15_t * pSrcB,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q15x8_t vecA, vecB;
|
||||
|
||||
/* Compute 8 outputs at a time */
|
||||
blkCnt = blockSize >> 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A * B
|
||||
* Multiply the inputs and then store the results in the destination buffer.
|
||||
*/
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
vst1q(pDst, vqdmulhq(vecA, vecB));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrcA += 8;
|
||||
pSrcB += 8;
|
||||
pDst += 8;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 7;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp16q(blkCnt);
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
vstrhq_p(pDst, vqdmulhq(vecA, vecB), p0);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_mult_q15(
|
||||
const q15_t * pSrcA,
|
||||
const q15_t * pSrcB,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
q31_t inA1, inA2, inB1, inB2; /* Temporary input variables */
|
||||
q15_t out1, out2, out3, out4; /* Temporary output variables */
|
||||
q31_t mul1, mul2, mul3, mul4; /* Temporary variables */
|
||||
#endif
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * B */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
/* read 2 samples at a time from sourceA */
|
||||
inA1 = read_q15x2_ia ((q15_t **) &pSrcA);
|
||||
/* read 2 samples at a time from sourceB */
|
||||
inB1 = read_q15x2_ia ((q15_t **) &pSrcB);
|
||||
/* read 2 samples at a time from sourceA */
|
||||
inA2 = read_q15x2_ia ((q15_t **) &pSrcA);
|
||||
/* read 2 samples at a time from sourceB */
|
||||
inB2 = read_q15x2_ia ((q15_t **) &pSrcB);
|
||||
|
||||
/* multiply mul = sourceA * sourceB */
|
||||
mul1 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
|
||||
mul2 = (q31_t) ((q15_t) (inA1 ) * (q15_t) (inB1 ));
|
||||
mul3 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB2 >> 16));
|
||||
mul4 = (q31_t) ((q15_t) (inA2 ) * (q15_t) (inB2 ));
|
||||
|
||||
/* saturate result to 16 bit */
|
||||
out1 = (q15_t) __SSAT(mul1 >> 15, 16);
|
||||
out2 = (q15_t) __SSAT(mul2 >> 15, 16);
|
||||
out3 = (q15_t) __SSAT(mul3 >> 15, 16);
|
||||
out4 = (q15_t) __SSAT(mul4 >> 15, 16);
|
||||
|
||||
/* store result to destination */
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
write_q15x2_ia (&pDst, __PKHBT(out2, out1, 16));
|
||||
write_q15x2_ia (&pDst, __PKHBT(out4, out3, 16));
|
||||
#else
|
||||
write_q15x2_ia (&pDst, __PKHBT(out1, out2, 16));
|
||||
write_q15x2_ia (&pDst, __PKHBT(out3, out4, 16));
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
#else
|
||||
*pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
|
||||
*pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
|
||||
*pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
|
||||
*pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * B */
|
||||
|
||||
/* Multiply inputs and store result in destination buffer. */
|
||||
*pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicMult group
|
||||
*/
|
||||
168
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_mult_q31.c
Normal file
168
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_mult_q31.c
Normal file
@@ -0,0 +1,168 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mult_q31.c
|
||||
* Description: Q31 vector multiplication
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicMult
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Q31 vector multiplication.
|
||||
@param[in] pSrcA points to the first input vector.
|
||||
@param[in] pSrcB points to the second input vector.
|
||||
@param[out] pDst points to the output vector.
|
||||
@param[in] blockSize number of samples in each vector.
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] are saturated.
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_mult_q31(
|
||||
const q31_t * pSrcA,
|
||||
const q31_t * pSrcB,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q31x4_t vecA, vecB;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A * B
|
||||
* Multiply the inputs and then store the results in the destination buffer.
|
||||
*/
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
vst1q(pDst, vqdmulhq(vecA, vecB));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrcA += 4;
|
||||
pSrcB += 4;
|
||||
pDst += 4;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 3;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
vstrwq_p(pDst, vqdmulhq(vecA, vecB), p0);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_mult_q31(
|
||||
const q31_t * pSrcA,
|
||||
const q31_t * pSrcB,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
q31_t out; /* Temporary output variable */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * B */
|
||||
|
||||
/* Multiply inputs and store result in destination buffer. */
|
||||
out = ((q63_t) *pSrcA++ * *pSrcB++) >> 32;
|
||||
out = __SSAT(out, 31);
|
||||
*pDst++ = out << 1U;
|
||||
|
||||
out = ((q63_t) *pSrcA++ * *pSrcB++) >> 32;
|
||||
out = __SSAT(out, 31);
|
||||
*pDst++ = out << 1U;
|
||||
|
||||
out = ((q63_t) *pSrcA++ * *pSrcB++) >> 32;
|
||||
out = __SSAT(out, 31);
|
||||
*pDst++ = out << 1U;
|
||||
|
||||
out = ((q63_t) *pSrcA++ * *pSrcB++) >> 32;
|
||||
out = __SSAT(out, 31);
|
||||
*pDst++ = out << 1U;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * B */
|
||||
|
||||
/* Multiply inputs and store result in destination buffer. */
|
||||
out = ((q63_t) *pSrcA++ * *pSrcB++) >> 32;
|
||||
out = __SSAT(out, 31);
|
||||
*pDst++ = out << 1U;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicMult group
|
||||
*/
|
||||
168
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_mult_q7.c
Normal file
168
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_mult_q7.c
Normal file
@@ -0,0 +1,168 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mult_q7.c
|
||||
* Description: Q7 vector multiplication
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicMult
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Q7 vector multiplication
|
||||
@param[in] pSrcA points to the first input vector
|
||||
@param[in] pSrcB points to the second input vector
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_mult_q7(
|
||||
const q7_t * pSrcA,
|
||||
const q7_t * pSrcB,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q7x16_t vecA, vecB;
|
||||
|
||||
/* Compute 16 outputs at a time */
|
||||
blkCnt = blockSize >> 4;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A * B
|
||||
* Multiply the inputs and then store the results in the destination buffer.
|
||||
*/
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
vst1q(pDst, vqdmulhq(vecA, vecB));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrcA += 16;
|
||||
pSrcB += 16;
|
||||
pDst += 16;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 0xF;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp8q(blkCnt);
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
vstrbq_p(pDst, vqdmulhq(vecA, vecB), p0);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_mult_q7(
|
||||
const q7_t * pSrcA,
|
||||
const q7_t * pSrcB,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
q7_t out1, out2, out3, out4; /* Temporary output variables */
|
||||
#endif
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * B */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
/* Multiply inputs and store results in temporary variables */
|
||||
out1 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||
out2 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||
out3 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||
out4 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||
|
||||
/* Pack and store result in destination buffer (in single write) */
|
||||
write_q7x4_ia (&pDst, __PACKq7(out1, out2, out3, out4));
|
||||
#else
|
||||
*pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||
*pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||
*pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||
*pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * B */
|
||||
|
||||
/* Multiply input and store result in destination buffer. */
|
||||
*pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicMult group
|
||||
*/
|
||||
192
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_negate_f32.c
Normal file
192
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_negate_f32.c
Normal file
@@ -0,0 +1,192 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_negate_f32.c
|
||||
* Description: Negates floating-point vectors
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@defgroup BasicNegate Vector Negate
|
||||
|
||||
Negates the elements of a vector.
|
||||
|
||||
<pre>
|
||||
pDst[n] = -pSrc[n], 0 <= n < blockSize.
|
||||
</pre>
|
||||
|
||||
The functions support in-place computation allowing the source and
|
||||
destination pointers to reference the same memory buffer.
|
||||
There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicNegate
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Negates the elements of a floating-point vector.
|
||||
@param[in] pSrc points to input vector.
|
||||
@param[out] pDst points to output vector.
|
||||
@param[in] blockSize number of samples in each vector.
|
||||
@return none
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_negate_f32(
|
||||
const float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
f32x4_t vec1;
|
||||
f32x4_t res;
|
||||
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = |A| */
|
||||
|
||||
/* Calculate absolute values and then store the results in the destination buffer. */
|
||||
vec1 = vld1q(pSrc);
|
||||
res = vnegq(vec1);
|
||||
vst1q(pDst, res);
|
||||
|
||||
/* Increment pointers */
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0x3;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
/* C = |A| */
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vec1 = vld1q((float32_t const *) pSrc);
|
||||
vstrwq_p(pDst, vnegq(vec1), p0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_negate_f32(
|
||||
const float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined(ARM_MATH_NEON_EXPERIMENTAL) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
f32x4_t vec1;
|
||||
f32x4_t res;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = -A */
|
||||
|
||||
/* Negate and then store the results in the destination buffer. */
|
||||
vec1 = vld1q_f32(pSrc);
|
||||
res = vnegq_f32(vec1);
|
||||
vst1q_f32(pDst, res);
|
||||
|
||||
/* Increment pointers */
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0x3;
|
||||
|
||||
#else
|
||||
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = -A */
|
||||
|
||||
/* Negate and store result in destination buffer. */
|
||||
*pDst++ = -*pSrc++;
|
||||
|
||||
*pDst++ = -*pSrc++;
|
||||
|
||||
*pDst++ = -*pSrc++;
|
||||
|
||||
*pDst++ = -*pSrc++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
#endif /* #if defined(ARM_MATH_NEON_EXPERIMENTAL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = -A */
|
||||
|
||||
/* Negate and store result in destination buffer. */
|
||||
*pDst++ = -*pSrc++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of BasicNegate group
|
||||
*/
|
||||
171
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_negate_q15.c
Normal file
171
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_negate_q15.c
Normal file
@@ -0,0 +1,171 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_negate_q15.c
|
||||
* Description: Negates Q15 vectors
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicNegate
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Negates the elements of a Q15 vector.
|
||||
@param[in] pSrc points to the input vector.
|
||||
@param[out] pDst points to the output vector.
|
||||
@param[in] blockSize number of samples in each vector.
|
||||
@return none
|
||||
|
||||
@par Conditions for optimum performance
|
||||
Input and output buffers should be aligned by 32-bit
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
The Q15 value -1 (0x8000) is saturated to the maximum allowable positive value 0x7FFF.
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_negate_q15(
|
||||
const q15_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q15x8_t vecSrc;
|
||||
|
||||
/* Compute 8 outputs at a time */
|
||||
blkCnt = blockSize >> 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = -A
|
||||
* Negate and then store the results in the destination buffer.
|
||||
*/
|
||||
vecSrc = vld1q(pSrc);
|
||||
vst1q(pDst, vqnegq(vecSrc));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 8;
|
||||
pDst += 8;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 7;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp16q(blkCnt);
|
||||
vecSrc = vld1q(pSrc);
|
||||
vstrhq_p(pDst, vqnegq(vecSrc), p0);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_negate_q15(
|
||||
const q15_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
q15_t in; /* Temporary input variable */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
q31_t in1; /* Temporary input variables */
|
||||
#endif
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = -A */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
/* Negate and store result in destination buffer (2 samples at a time). */
|
||||
in1 = read_q15x2_ia ((q15_t **) &pSrc);
|
||||
write_q15x2_ia (&pDst, __QSUB16(0, in1));
|
||||
|
||||
in1 = read_q15x2_ia ((q15_t **) &pSrc);
|
||||
write_q15x2_ia (&pDst, __QSUB16(0, in1));
|
||||
#else
|
||||
in = *pSrc++;
|
||||
*pDst++ = (in == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in;
|
||||
|
||||
in = *pSrc++;
|
||||
*pDst++ = (in == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in;
|
||||
|
||||
in = *pSrc++;
|
||||
*pDst++ = (in == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in;
|
||||
|
||||
in = *pSrc++;
|
||||
*pDst++ = (in == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in;
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = -A */
|
||||
|
||||
/* Negate and store result in destination buffer. */
|
||||
in = *pSrc++;
|
||||
*pDst++ = (in == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicNegate group
|
||||
*/
|
||||
178
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_negate_q31.c
Normal file
178
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_negate_q31.c
Normal file
@@ -0,0 +1,178 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_negate_q31.c
|
||||
* Description: Negates Q31 vectors
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicNegate
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Negates the elements of a Q31 vector.
|
||||
@param[in] pSrc points to the input vector.
|
||||
@param[out] pDst points to the output vector.
|
||||
@param[in] blockSize number of samples in each vector.
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
The Q31 value -1 (0x80000000) is saturated to the maximum allowable positive value 0x7FFFFFFF.
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_negate_q31(
|
||||
const q31_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q31x4_t vecSrc;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = -A
|
||||
* Negate and then store the results in the destination buffer.
|
||||
*/
|
||||
vecSrc = vld1q(pSrc);
|
||||
vst1q(pDst, vqnegq(vecSrc));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 3;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vecSrc = vld1q(pSrc);
|
||||
vstrwq_p(pDst, vqnegq(vecSrc), p0);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_negate_q31(
|
||||
const q31_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
q31_t in; /* Temporary input variable */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = -A */
|
||||
|
||||
/* Negate and store result in destination buffer. */
|
||||
in = *pSrc++;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = __QSUB(0, in);
|
||||
#else
|
||||
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
|
||||
#endif
|
||||
|
||||
in = *pSrc++;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = __QSUB(0, in);
|
||||
#else
|
||||
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
|
||||
#endif
|
||||
|
||||
in = *pSrc++;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = __QSUB(0, in);
|
||||
#else
|
||||
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
|
||||
#endif
|
||||
|
||||
in = *pSrc++;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = __QSUB(0, in);
|
||||
#else
|
||||
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = -A */
|
||||
|
||||
/* Negate and store result in destination buffer. */
|
||||
in = *pSrc++;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = __QSUB(0, in);
|
||||
#else
|
||||
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicNegate group
|
||||
*/
|
||||
171
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_negate_q7.c
Normal file
171
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_negate_q7.c
Normal file
@@ -0,0 +1,171 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_negate_q7.c
|
||||
* Description: Negates Q7 vectors
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicNegate
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Negates the elements of a Q7 vector.
|
||||
@param[in] pSrc points to the input vector.
|
||||
@param[out] pDst points to the output vector.
|
||||
@param[in] blockSize number of samples in each vector.
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
The Q7 value -1 (0x80) is saturated to the maximum allowable positive value 0x7F.
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_negate_q7(
|
||||
const q7_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q7x16_t vecSrc;
|
||||
|
||||
/* Compute 16 outputs at a time */
|
||||
blkCnt = blockSize >> 4;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = -A
|
||||
* Negate and then store the results in the destination buffer.
|
||||
*/
|
||||
vecSrc = vld1q(pSrc);
|
||||
vst1q(pDst, vqnegq(vecSrc));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 16;
|
||||
pDst += 16;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 0xF;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp8q(blkCnt);
|
||||
vecSrc = vld1q(pSrc);
|
||||
vstrbq_p(pDst, vqnegq(vecSrc), p0);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_negate_q7(
|
||||
const q7_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
q7_t in; /* Temporary input variable */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
q31_t in1; /* Temporary input variable */
|
||||
#endif
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = -A */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
/* Negate and store result in destination buffer (4 samples at a time). */
|
||||
in1 = read_q7x4_ia ((q7_t **) &pSrc);
|
||||
write_q7x4_ia (&pDst, __QSUB8(0, in1));
|
||||
#else
|
||||
in = *pSrc++;
|
||||
*pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
|
||||
|
||||
in = *pSrc++;
|
||||
*pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
|
||||
|
||||
in = *pSrc++;
|
||||
*pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
|
||||
|
||||
in = *pSrc++;
|
||||
*pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = -A */
|
||||
|
||||
/* Negate and store result in destination buffer. */
|
||||
in = *pSrc++;
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = (q7_t) __QSUB8(0, in);
|
||||
#else
|
||||
*pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicNegate group
|
||||
*/
|
||||
130
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_not_u16.c
Normal file
130
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_not_u16.c
Normal file
@@ -0,0 +1,130 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_not_u16.c
|
||||
* Description: uint16_t bitwise NOT
|
||||
*
|
||||
* $Date: 14 November 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@defgroup Not Vector bitwise NOT
|
||||
|
||||
Compute the logical bitwise NOT.
|
||||
|
||||
There are separate functions for uint32_t, uint16_t, and uint8_t data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Not
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Compute the logical bitwise NOT of a fixed-point vector.
|
||||
@param[in] pSrc points to input vector
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_not_u16(
|
||||
const uint16_t * pSrc,
|
||||
uint16_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
q15x8_t vecSrc;
|
||||
|
||||
/* Compute 8 outputs at a time */
|
||||
blkCnt = blockSize >> 3;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSrc = vld1q(pSrc);
|
||||
|
||||
vst1q(pDst, vmvnq_u16(vecSrc) );
|
||||
|
||||
pSrc += 8;
|
||||
pDst += 8;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 7;
|
||||
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp16q(blkCnt);
|
||||
vecSrc = vld1q(pSrc);
|
||||
vstrhq_p(pDst, vmvnq_u16(vecSrc), p0);
|
||||
}
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
uint16x8_t inV;
|
||||
|
||||
/* Compute 8 outputs at a time */
|
||||
blkCnt = blockSize >> 3U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
inV = vld1q_u16(pSrc);
|
||||
|
||||
vst1q_u16(pDst, vmvnq_u16(inV) );
|
||||
|
||||
pSrc += 8;
|
||||
pDst += 8;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 7;
|
||||
#else
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
#endif
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
*pDst++ = ~(*pSrc++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
#endif /* if defined(ARM_MATH_MVEI) */
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of Not group
|
||||
*/
|
||||
122
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_not_u32.c
Normal file
122
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_not_u32.c
Normal file
@@ -0,0 +1,122 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_not_u32.c
|
||||
* Description: uint32_t bitwise NOT
|
||||
*
|
||||
* $Date: 14 November 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Not
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Compute the logical bitwise NOT of a fixed-point vector.
|
||||
@param[in] pSrc points to input vector
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_not_u32(
|
||||
const uint32_t * pSrc,
|
||||
uint32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
q31x4_t vecSrc;
|
||||
|
||||
/* Compute 8 outputs at a time */
|
||||
blkCnt = blockSize >> 2;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSrc = vld1q(pSrc);
|
||||
|
||||
vst1q(pDst, vmvnq_u32(vecSrc) );
|
||||
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 3;
|
||||
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vecSrc = vld1q(pSrc);
|
||||
vstrwq_p(pDst, vmvnq_u32(vecSrc), p0);
|
||||
}
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
uint32x4_t inV;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
inV = vld1q_u32(pSrc);
|
||||
|
||||
vst1q_u32(pDst, vmvnq_u32(inV) );
|
||||
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 3;
|
||||
#else
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
#endif
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
*pDst++ = ~(*pSrc++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
#endif /* if defined(ARM_MATH_MVEI) */
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of Not group
|
||||
*/
|
||||
122
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_not_u8.c
Normal file
122
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_not_u8.c
Normal file
@@ -0,0 +1,122 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_not_u8.c
|
||||
* Description: uint8_t bitwise NOT
|
||||
*
|
||||
* $Date: 14 November 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Not
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Compute the logical bitwise NOT of a fixed-point vector.
|
||||
@param[in] pSrc points to input vector
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_not_u8(
|
||||
const uint8_t * pSrc,
|
||||
uint8_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
q7x16_t vecSrc;
|
||||
|
||||
/* Compute 16 outputs at a time */
|
||||
blkCnt = blockSize >> 4;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSrc = vld1q(pSrc);
|
||||
|
||||
vst1q(pDst, vmvnq_u8(vecSrc) );
|
||||
|
||||
pSrc += 16;
|
||||
pDst += 16;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0xF;
|
||||
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp8q(blkCnt);
|
||||
vecSrc = vld1q(pSrc);
|
||||
vstrbq_p(pDst, vmvnq_u8(vecSrc), p0);
|
||||
}
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
uint8x16_t inV;
|
||||
|
||||
/* Compute 16 outputs at a time */
|
||||
blkCnt = blockSize >> 4U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
inV = vld1q_u8(pSrc);
|
||||
|
||||
vst1q_u8(pDst, vmvnq_u8(inV) );
|
||||
|
||||
pSrc += 16;
|
||||
pDst += 16;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0xF;
|
||||
#else
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
#endif
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
*pDst++ = ~(*pSrc++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
#endif /* if defined(ARM_MATH_MVEI) */
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of Not group
|
||||
*/
|
||||
196
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_offset_f32.c
Normal file
196
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_offset_f32.c
Normal file
@@ -0,0 +1,196 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_offset_f32.c
|
||||
* Description: Floating-point vector offset
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@defgroup BasicOffset Vector Offset
|
||||
|
||||
Adds a constant offset to each element of a vector.
|
||||
|
||||
<pre>
|
||||
pDst[n] = pSrc[n] + offset, 0 <= n < blockSize.
|
||||
</pre>
|
||||
|
||||
The functions support in-place computation allowing the source and
|
||||
destination pointers to reference the same memory buffer.
|
||||
There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicOffset
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Adds a constant offset to a floating-point vector.
|
||||
@param[in] pSrc points to the input vector
|
||||
@param[in] offset is the offset to be added
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_offset_f32(
|
||||
const float32_t * pSrc,
|
||||
float32_t offset,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
f32x4_t vec1;
|
||||
f32x4_t res;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + offset */
|
||||
|
||||
/* Add offset and then store the results in the destination buffer. */
|
||||
vec1 = vld1q(pSrc);
|
||||
res = vaddq(vec1,offset);
|
||||
vst1q(pDst, res);
|
||||
|
||||
/* Increment pointers */
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0x3;
|
||||
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vec1 = vld1q((float32_t const *) pSrc);
|
||||
vstrwq_p(pDst, vaddq(vec1, offset), p0);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_offset_f32(
|
||||
const float32_t * pSrc,
|
||||
float32_t offset,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined(ARM_MATH_NEON_EXPERIMENTAL) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
f32x4_t vec1;
|
||||
f32x4_t res;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + offset */
|
||||
|
||||
/* Add offset and then store the results in the destination buffer. */
|
||||
vec1 = vld1q_f32(pSrc);
|
||||
res = vaddq_f32(vec1,vdupq_n_f32(offset));
|
||||
vst1q_f32(pDst, res);
|
||||
|
||||
/* Increment pointers */
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0x3;
|
||||
|
||||
#else
|
||||
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + offset */
|
||||
|
||||
/* Add offset and store result in destination buffer. */
|
||||
*pDst++ = (*pSrc++) + offset;
|
||||
|
||||
*pDst++ = (*pSrc++) + offset;
|
||||
|
||||
*pDst++ = (*pSrc++) + offset;
|
||||
|
||||
*pDst++ = (*pSrc++) + offset;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
#endif /* #if defined(ARM_MATH_NEON_EXPERIMENTAL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + offset */
|
||||
|
||||
/* Add offset and store result in destination buffer. */
|
||||
*pDst++ = (*pSrc++) + offset;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of BasicOffset group
|
||||
*/
|
||||
168
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_offset_q15.c
Normal file
168
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_offset_q15.c
Normal file
@@ -0,0 +1,168 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_offset_q15.c
|
||||
* Description: Q15 vector offset
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicOffset
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Adds a constant offset to a Q15 vector.
|
||||
@param[in] pSrc points to the input vector
|
||||
@param[in] offset is the offset to be added
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_offset_q15(
|
||||
const q15_t * pSrc,
|
||||
q15_t offset,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q15x8_t vecSrc;
|
||||
|
||||
/* Compute 8 outputs at a time */
|
||||
blkCnt = blockSize >> 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A + offset
|
||||
* Add offset and then store the result in the destination buffer.
|
||||
*/
|
||||
vecSrc = vld1q(pSrc);
|
||||
vst1q(pDst, vqaddq(vecSrc, offset));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 8;
|
||||
pDst += 8;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 7;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp16q(blkCnt);
|
||||
vecSrc = vld1q(pSrc);
|
||||
vstrhq_p(pDst, vqaddq(vecSrc, offset), p0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
void arm_offset_q15(
|
||||
const q15_t * pSrc,
|
||||
q15_t offset,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
q31_t offset_packed; /* Offset packed to 32 bit */
|
||||
|
||||
/* Offset is packed to 32 bit in order to use SIMD32 for addition */
|
||||
offset_packed = __PKHBT(offset, offset, 16);
|
||||
#endif
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + offset */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
/* Add offset and store result in destination buffer (2 samples at a time). */
|
||||
write_q15x2_ia (&pDst, __QADD16(read_q15x2_ia ((q15_t **) &pSrc), offset_packed));
|
||||
write_q15x2_ia (&pDst, __QADD16(read_q15x2_ia ((q15_t **) &pSrc), offset_packed));
|
||||
#else
|
||||
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrc++ + offset), 16);
|
||||
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrc++ + offset), 16);
|
||||
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrc++ + offset), 16);
|
||||
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrc++ + offset), 16);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + offset */
|
||||
|
||||
/* Add offset and store result in destination buffer. */
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = (q15_t) __QADD16(*pSrc++, offset);
|
||||
#else
|
||||
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrc++ + offset), 16);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicOffset group
|
||||
*/
|
||||
175
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_offset_q31.c
Normal file
175
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_offset_q31.c
Normal file
@@ -0,0 +1,175 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_offset_q31.c
|
||||
* Description: Q31 vector offset
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicOffset
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Adds a constant offset to a Q31 vector.
|
||||
@param[in] pSrc points to the input vector
|
||||
@param[in] offset is the offset to be added
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_offset_q31(
|
||||
const q31_t * pSrc,
|
||||
q31_t offset,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q31x4_t vecSrc;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A + offset
|
||||
* Add offset and then store the result in the destination buffer.
|
||||
*/
|
||||
vecSrc = vld1q(pSrc);
|
||||
vst1q(pDst, vqaddq(vecSrc, offset));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 3;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vecSrc = vld1q(pSrc);
|
||||
vstrwq_p(pDst, vqaddq(vecSrc, offset), p0);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_offset_q31(
|
||||
const q31_t * pSrc,
|
||||
q31_t offset,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + offset */
|
||||
|
||||
/* Add offset and store result in destination buffer. */
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = __QADD(*pSrc++, offset);
|
||||
#else
|
||||
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrc++ + offset);
|
||||
#endif
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = __QADD(*pSrc++, offset);
|
||||
#else
|
||||
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrc++ + offset);
|
||||
#endif
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = __QADD(*pSrc++, offset);
|
||||
#else
|
||||
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrc++ + offset);
|
||||
#endif
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = __QADD(*pSrc++, offset);
|
||||
#else
|
||||
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrc++ + offset);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + offset */
|
||||
|
||||
/* Add offset and store result in destination buffer. */
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = __QADD(*pSrc++, offset);
|
||||
#else
|
||||
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrc++ + offset);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicOffset group
|
||||
*/
|
||||
162
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_offset_q7.c
Normal file
162
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_offset_q7.c
Normal file
@@ -0,0 +1,162 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_offset_q7.c
|
||||
* Description: Q7 vector offset
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicOffset
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Adds a constant offset to a Q7 vector.
|
||||
@param[in] pSrc points to the input vector
|
||||
@param[in] offset is the offset to be added
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_offset_q7(
|
||||
const q7_t * pSrc,
|
||||
q7_t offset,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q7x16_t vecSrc;
|
||||
|
||||
/* Compute 16 outputs at a time */
|
||||
blkCnt = blockSize >> 4;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A + offset
|
||||
* Add offset and then store the result in the destination buffer.
|
||||
*/
|
||||
vecSrc = vld1q(pSrc);
|
||||
vst1q(pDst, vqaddq(vecSrc, offset));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 16;
|
||||
pDst += 16;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 0xF;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp8q(blkCnt);
|
||||
vecSrc = vld1q(pSrc);
|
||||
vstrbq_p(pDst, vqaddq(vecSrc, offset), p0);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_offset_q7(
|
||||
const q7_t * pSrc,
|
||||
q7_t offset,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
q31_t offset_packed; /* Offset packed to 32 bit */
|
||||
|
||||
/* Offset is packed to 32 bit in order to use SIMD32 for addition */
|
||||
offset_packed = __PACKq7(offset, offset, offset, offset);
|
||||
#endif
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + offset */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
/* Add offset and store result in destination buffer (4 samples at a time). */
|
||||
write_q7x4_ia (&pDst, __QADD8(read_q7x4_ia ((q7_t **) &pSrc), offset_packed));
|
||||
#else
|
||||
*pDst++ = (q7_t) __SSAT(*pSrc++ + offset, 8);
|
||||
*pDst++ = (q7_t) __SSAT(*pSrc++ + offset, 8);
|
||||
*pDst++ = (q7_t) __SSAT(*pSrc++ + offset, 8);
|
||||
*pDst++ = (q7_t) __SSAT(*pSrc++ + offset, 8);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + offset */
|
||||
|
||||
/* Add offset and store result in destination buffer. */
|
||||
*pDst++ = (q7_t) __SSAT((q15_t) *pSrc++ + offset, 8);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicOffset group
|
||||
*/
|
||||
137
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_or_u16.c
Normal file
137
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_or_u16.c
Normal file
@@ -0,0 +1,137 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_or_u16.c
|
||||
* Description: uint16_t bitwise inclusive OR
|
||||
*
|
||||
* $Date: 14 November 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@defgroup Or Vector bitwise inclusive OR
|
||||
|
||||
Compute the logical bitwise OR.
|
||||
|
||||
There are separate functions for uint32_t, uint16_t, and uint8_t data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Or
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Compute the logical bitwise OR of two fixed-point vectors.
|
||||
@param[in] pSrcA points to input vector A
|
||||
@param[in] pSrcB points to input vector B
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_or_u16(
|
||||
const uint16_t * pSrcA,
|
||||
const uint16_t * pSrcB,
|
||||
uint16_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
q15x8_t vecSrcA, vecSrcB;
|
||||
|
||||
/* Compute 8 outputs at a time */
|
||||
blkCnt = blockSize >> 3;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
|
||||
vst1q(pDst, vorrq_u16(vecSrcA, vecSrcB) );
|
||||
|
||||
pSrcA += 8;
|
||||
pSrcB += 8;
|
||||
pDst += 8;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 7;
|
||||
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp16q(blkCnt);
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
vstrhq_p(pDst, vorrq_u16(vecSrcA, vecSrcB), p0);
|
||||
}
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
uint16x8_t vecA, vecB;
|
||||
|
||||
/* Compute 8 outputs at a time */
|
||||
blkCnt = blockSize >> 3U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecA = vld1q_u16(pSrcA);
|
||||
vecB = vld1q_u16(pSrcB);
|
||||
|
||||
vst1q_u16(pDst, vorrq_u16(vecA, vecB) );
|
||||
|
||||
pSrcA += 8;
|
||||
pSrcB += 8;
|
||||
pDst += 8;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 7;
|
||||
#else
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
#endif
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
*pDst++ = (*pSrcA++)|(*pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
#endif /* if defined(ARM_MATH_MVEI) */
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of Or group
|
||||
*/
|
||||
128
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_or_u32.c
Normal file
128
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_or_u32.c
Normal file
@@ -0,0 +1,128 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_or_u32.c
|
||||
* Description: uint32_t bitwise inclusive OR
|
||||
*
|
||||
* $Date: 14 November 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Or
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Compute the logical bitwise OR of two fixed-point vectors.
|
||||
@param[in] pSrcA points to input vector A
|
||||
@param[in] pSrcB points to input vector B
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_or_u32(
|
||||
const uint32_t * pSrcA,
|
||||
const uint32_t * pSrcB,
|
||||
uint32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
q31x4_t vecSrcA, vecSrcB;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
|
||||
vst1q(pDst, vorrq_u32(vecSrcA, vecSrcB) );
|
||||
|
||||
pSrcA += 4;
|
||||
pSrcB += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 3;
|
||||
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
vstrwq_p(pDst, vorrq_u32(vecSrcA, vecSrcB), p0);
|
||||
}
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
uint32x4_t vecA, vecB;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecA = vld1q_u32(pSrcA);
|
||||
vecB = vld1q_u32(pSrcB);
|
||||
|
||||
vst1q_u32(pDst, vorrq_u32(vecA, vecB) );
|
||||
|
||||
pSrcA += 4;
|
||||
pSrcB += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 3;
|
||||
#else
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
#endif
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
*pDst++ = (*pSrcA++)|(*pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
#endif /* if defined(ARM_MATH_MVEI) */
|
||||
}
|
||||
/**
|
||||
@} end of Or group
|
||||
*/
|
||||
128
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_or_u8.c
Normal file
128
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_or_u8.c
Normal file
@@ -0,0 +1,128 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_or_u8.c
|
||||
* Description: uint8_t bitwise inclusive OR
|
||||
*
|
||||
* $Date: 14 November 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Or
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Compute the logical bitwise OR of two fixed-point vectors.
|
||||
@param[in] pSrcA points to input vector A
|
||||
@param[in] pSrcB points to input vector B
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_or_u8(
|
||||
const uint8_t * pSrcA,
|
||||
const uint8_t * pSrcB,
|
||||
uint8_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
q7x16_t vecSrcA, vecSrcB;
|
||||
|
||||
/* Compute 16 outputs at a time */
|
||||
blkCnt = blockSize >> 4;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
|
||||
vst1q(pDst, vorrq_u8(vecSrcA, vecSrcB) );
|
||||
|
||||
pSrcA += 16;
|
||||
pSrcB += 16;
|
||||
pDst += 16;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0xF;
|
||||
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp8q(blkCnt);
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
vstrbq_p(pDst, vorrq_u8(vecSrcA, vecSrcB), p0);
|
||||
}
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
uint8x16_t vecA, vecB;
|
||||
|
||||
/* Compute 16 outputs at a time */
|
||||
blkCnt = blockSize >> 4U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecA = vld1q_u8(pSrcA);
|
||||
vecB = vld1q_u8(pSrcB);
|
||||
|
||||
vst1q_u8(pDst, vorrq_u8(vecA, vecB) );
|
||||
|
||||
pSrcA += 16;
|
||||
pSrcB += 16;
|
||||
pDst += 16;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0xF;
|
||||
#else
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
#endif
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
*pDst++ = (*pSrcA++)|(*pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
#endif /* if defined(ARM_MATH_MVEI) */
|
||||
}
|
||||
/**
|
||||
@} end of Or group
|
||||
*/
|
||||
216
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_scale_f32.c
Normal file
216
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_scale_f32.c
Normal file
@@ -0,0 +1,216 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_scale_f32.c
|
||||
* Description: Multiplies a floating-point vector by a scalar
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@defgroup BasicScale Vector Scale
|
||||
|
||||
Multiply a vector by a scalar value. For floating-point data, the algorithm used is:
|
||||
|
||||
<pre>
|
||||
pDst[n] = pSrc[n] * scale, 0 <= n < blockSize.
|
||||
</pre>
|
||||
|
||||
In the fixed-point Q7, Q15, and Q31 functions, <code>scale</code> is represented by
|
||||
a fractional multiplication <code>scaleFract</code> and an arithmetic shift <code>shift</code>.
|
||||
The shift allows the gain of the scaling operation to exceed 1.0.
|
||||
The algorithm used with fixed-point data is:
|
||||
|
||||
<pre>
|
||||
pDst[n] = (pSrc[n] * scaleFract) << shift, 0 <= n < blockSize.
|
||||
</pre>
|
||||
|
||||
The overall scale factor applied to the fixed-point data is
|
||||
<pre>
|
||||
scale = scaleFract * 2^shift.
|
||||
</pre>
|
||||
|
||||
The functions support in-place computation allowing the source and destination
|
||||
pointers to reference the same memory buffer.
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicScale
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Multiplies a floating-point vector by a scalar.
|
||||
@param[in] pSrc points to the input vector
|
||||
@param[in] scale scale factor to be applied
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_scale_f32(
|
||||
const float32_t * pSrc,
|
||||
float32_t scale,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
f32x4_t vec1;
|
||||
f32x4_t res;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + offset */
|
||||
|
||||
/* Add offset and then store the results in the destination buffer. */
|
||||
vec1 = vld1q(pSrc);
|
||||
res = vmulq(vec1,scale);
|
||||
vst1q(pDst, res);
|
||||
|
||||
/* Increment pointers */
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0x3;
|
||||
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vec1 = vld1q((float32_t const *) pSrc);
|
||||
vstrwq_p(pDst, vmulq(vec1, scale), p0);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_scale_f32(
|
||||
const float32_t *pSrc,
|
||||
float32_t scale,
|
||||
float32_t *pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
|
||||
f32x4_t vec1;
|
||||
f32x4_t res;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * scale */
|
||||
|
||||
/* Scale the input and then store the results in the destination buffer. */
|
||||
vec1 = vld1q_f32(pSrc);
|
||||
res = vmulq_f32(vec1, vdupq_n_f32(scale));
|
||||
vst1q_f32(pDst, res);
|
||||
|
||||
/* Increment pointers */
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0x3;
|
||||
|
||||
#else
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
float32_t in1, in2, in3, in4;
|
||||
|
||||
/* C = A * scale */
|
||||
|
||||
/* Scale input and store result in destination buffer. */
|
||||
in1 = (*pSrc++) * scale;
|
||||
|
||||
in2 = (*pSrc++) * scale;
|
||||
|
||||
in3 = (*pSrc++) * scale;
|
||||
|
||||
in4 = (*pSrc++) * scale;
|
||||
|
||||
*pDst++ = in1;
|
||||
*pDst++ = in2;
|
||||
*pDst++ = in3;
|
||||
*pDst++ = in4;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
#endif /* #if defined(ARM_MATH_NEON_EXPERIMENTAL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * scale */
|
||||
|
||||
/* Scale input and store result in destination buffer. */
|
||||
*pDst++ = (*pSrc++) * scale;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of BasicScale group
|
||||
*/
|
||||
201
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_scale_q15.c
Normal file
201
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_scale_q15.c
Normal file
@@ -0,0 +1,201 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_scale_q15.c
|
||||
* Description: Multiplies a Q15 vector by a scalar
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicScale
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Multiplies a Q15 vector by a scalar.
|
||||
@param[in] pSrc points to the input vector
|
||||
@param[in] scaleFract fractional portion of the scale value
|
||||
@param[in] shift number of bits to shift the result by
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.15 format.
|
||||
These are multiplied to yield a 2.30 intermediate result and this is shifted with saturation to 1.15 format.
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_scale_q15(
|
||||
const q15_t * pSrc,
|
||||
q15_t scaleFract,
|
||||
int8_t shift,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q15x8_t vecSrc;
|
||||
q15x8_t vecDst;
|
||||
|
||||
|
||||
/* Compute 8 outputs at a time */
|
||||
blkCnt = blockSize >> 3;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A * scale
|
||||
* Scale the input and then store the result in the destination buffer.
|
||||
*/
|
||||
vecSrc = vld1q(pSrc);
|
||||
vecDst = vmulhq(vecSrc, vdupq_n_s16(scaleFract));
|
||||
vecDst = vqshlq_r(vecDst, shift + 1);
|
||||
vst1q(pDst, vecDst);
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 8;
|
||||
pDst += 8;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 7;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp16q(blkCnt);;
|
||||
vecSrc = vld1q(pSrc);
|
||||
vecDst = vmulhq(vecSrc, vdupq_n_s16(scaleFract));
|
||||
vecDst = vqshlq_r(vecDst, shift + 1);
|
||||
vstrhq_p(pDst, vecDst, p0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
void arm_scale_q15(
|
||||
const q15_t *pSrc,
|
||||
q15_t scaleFract,
|
||||
int8_t shift,
|
||||
q15_t *pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
int8_t kShift = 15 - shift; /* Shift to apply after scaling */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
#if defined (ARM_MATH_DSP)
|
||||
q31_t inA1, inA2;
|
||||
q31_t out1, out2, out3, out4; /* Temporary output variables */
|
||||
q15_t in1, in2, in3, in4; /* Temporary input variables */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * scale */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
/* read 2 times 2 samples at a time from source */
|
||||
inA1 = read_q15x2_ia ((q15_t **) &pSrc);
|
||||
inA2 = read_q15x2_ia ((q15_t **) &pSrc);
|
||||
|
||||
/* Scale inputs and store result in temporary variables
|
||||
* in single cycle by packing the outputs */
|
||||
out1 = (q31_t) ((q15_t) (inA1 >> 16) * scaleFract);
|
||||
out2 = (q31_t) ((q15_t) (inA1 ) * scaleFract);
|
||||
out3 = (q31_t) ((q15_t) (inA2 >> 16) * scaleFract);
|
||||
out4 = (q31_t) ((q15_t) (inA2 ) * scaleFract);
|
||||
|
||||
/* apply shifting */
|
||||
out1 = out1 >> kShift;
|
||||
out2 = out2 >> kShift;
|
||||
out3 = out3 >> kShift;
|
||||
out4 = out4 >> kShift;
|
||||
|
||||
/* saturate the output */
|
||||
in1 = (q15_t) (__SSAT(out1, 16));
|
||||
in2 = (q15_t) (__SSAT(out2, 16));
|
||||
in3 = (q15_t) (__SSAT(out3, 16));
|
||||
in4 = (q15_t) (__SSAT(out4, 16));
|
||||
|
||||
/* store result to destination */
|
||||
write_q15x2_ia (&pDst, __PKHBT(in2, in1, 16));
|
||||
write_q15x2_ia (&pDst, __PKHBT(in4, in3, 16));
|
||||
#else
|
||||
*pDst++ = (q15_t) (__SSAT(((q31_t) *pSrc++ * scaleFract) >> kShift, 16));
|
||||
*pDst++ = (q15_t) (__SSAT(((q31_t) *pSrc++ * scaleFract) >> kShift, 16));
|
||||
*pDst++ = (q15_t) (__SSAT(((q31_t) *pSrc++ * scaleFract) >> kShift, 16));
|
||||
*pDst++ = (q15_t) (__SSAT(((q31_t) *pSrc++ * scaleFract) >> kShift, 16));
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * scale */
|
||||
|
||||
/* Scale input and store result in destination buffer. */
|
||||
*pDst++ = (q15_t) (__SSAT(((q31_t) *pSrc++ * scaleFract) >> kShift, 16));
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicScale group
|
||||
*/
|
||||
244
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_scale_q31.c
Normal file
244
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_scale_q31.c
Normal file
@@ -0,0 +1,244 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_scale_q31.c
|
||||
* Description: Multiplies a Q31 vector by a scalar
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicScale
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Multiplies a Q31 vector by a scalar.
|
||||
@param[in] pSrc points to the input vector
|
||||
@param[in] scaleFract fractional portion of the scale value
|
||||
@param[in] shift number of bits to shift the result by
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.31 format.
|
||||
These are multiplied to yield a 2.62 intermediate result and this is shifted with saturation to 1.31 format.
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_scale_q31(
|
||||
const q31_t * pSrc,
|
||||
q31_t scaleFract,
|
||||
int8_t shift,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q31x4_t vecSrc;
|
||||
q31x4_t vecDst;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A * scale
|
||||
* Scale the input and then store the result in the destination buffer.
|
||||
*/
|
||||
vecSrc = vld1q(pSrc);
|
||||
vecDst = vmulhq(vecSrc, vdupq_n_s32(scaleFract));
|
||||
vecDst = vqshlq_r(vecDst, shift + 1);
|
||||
vst1q(pDst, vecDst);
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 3;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vecSrc = vld1q(pSrc);
|
||||
vecDst = vmulhq(vecSrc, vdupq_n_s32(scaleFract));
|
||||
vecDst = vqshlq_r(vecDst, shift + 1);
|
||||
vstrwq_p(pDst, vecDst, p0);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_scale_q31(
|
||||
const q31_t *pSrc,
|
||||
q31_t scaleFract,
|
||||
int8_t shift,
|
||||
q31_t *pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
q31_t in, out; /* Temporary variables */
|
||||
int8_t kShift = shift + 1; /* Shift to apply after scaling */
|
||||
int8_t sign = (kShift & 0x80);
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
if (sign == 0U)
|
||||
{
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * scale */
|
||||
|
||||
/* Scale input and store result in destination buffer. */
|
||||
in = *pSrc++; /* read input from source */
|
||||
in = ((q63_t) in * scaleFract) >> 32; /* multiply input with scaler value */
|
||||
out = in << kShift; /* apply shifting */
|
||||
if (in != (out >> kShift)) /* saturate the result */
|
||||
out = 0x7FFFFFFF ^ (in >> 31);
|
||||
*pDst++ = out; /* Store result destination */
|
||||
|
||||
in = *pSrc++;
|
||||
in = ((q63_t) in * scaleFract) >> 32;
|
||||
out = in << kShift;
|
||||
if (in != (out >> kShift))
|
||||
out = 0x7FFFFFFF ^ (in >> 31);
|
||||
*pDst++ = out;
|
||||
|
||||
in = *pSrc++;
|
||||
in = ((q63_t) in * scaleFract) >> 32;
|
||||
out = in << kShift;
|
||||
if (in != (out >> kShift))
|
||||
out = 0x7FFFFFFF ^ (in >> 31);
|
||||
*pDst++ = out;
|
||||
|
||||
in = *pSrc++;
|
||||
in = ((q63_t) in * scaleFract) >> 32;
|
||||
out = in << kShift;
|
||||
if (in != (out >> kShift))
|
||||
out = 0x7FFFFFFF ^ (in >> 31);
|
||||
*pDst++ = out;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * scale */
|
||||
|
||||
/* Scale input and store result in destination buffer. */
|
||||
in = *pSrc++; /* read four inputs from source */
|
||||
in = ((q63_t) in * scaleFract) >> 32; /* multiply input with scaler value */
|
||||
out = in >> -kShift; /* apply shifting */
|
||||
*pDst++ = out; /* Store result destination */
|
||||
|
||||
in = *pSrc++;
|
||||
in = ((q63_t) in * scaleFract) >> 32;
|
||||
out = in >> -kShift;
|
||||
*pDst++ = out;
|
||||
|
||||
in = *pSrc++;
|
||||
in = ((q63_t) in * scaleFract) >> 32;
|
||||
out = in >> -kShift;
|
||||
*pDst++ = out;
|
||||
|
||||
in = *pSrc++;
|
||||
in = ((q63_t) in * scaleFract) >> 32;
|
||||
out = in >> -kShift;
|
||||
*pDst++ = out;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
if (sign == 0U)
|
||||
{
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * scale */
|
||||
|
||||
/* Scale input and store result in destination buffer. */
|
||||
in = *pSrc++;
|
||||
in = ((q63_t) in * scaleFract) >> 32;
|
||||
out = in << kShift;
|
||||
if (in != (out >> kShift))
|
||||
out = 0x7FFFFFFF ^ (in >> 31);
|
||||
*pDst++ = out;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * scale */
|
||||
|
||||
/* Scale input and store result in destination buffer. */
|
||||
in = *pSrc++;
|
||||
in = ((q63_t) in * scaleFract) >> 32;
|
||||
out = in >> -kShift;
|
||||
*pDst++ = out;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicScale group
|
||||
*/
|
||||
186
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_scale_q7.c
Normal file
186
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_scale_q7.c
Normal file
@@ -0,0 +1,186 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_scale_q7.c
|
||||
* Description: Multiplies a Q7 vector by a scalar
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicScale
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Multiplies a Q7 vector by a scalar.
|
||||
@param[in] pSrc points to the input vector
|
||||
@param[in] scaleFract fractional portion of the scale value
|
||||
@param[in] shift number of bits to shift the result by
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.7 format.
|
||||
These are multiplied to yield a 2.14 intermediate result and this is shifted with saturation to 1.7 format.
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
|
||||
void arm_scale_q7(
|
||||
const q7_t * pSrc,
|
||||
q7_t scaleFract,
|
||||
int8_t shift,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q7x16_t vecSrc;
|
||||
q7x16_t vecDst;
|
||||
|
||||
|
||||
/* Compute 16 outputs at a time */
|
||||
blkCnt = blockSize >> 4;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A * scale
|
||||
* Scale the input and then store the result in the destination buffer.
|
||||
*/
|
||||
vecSrc = vld1q(pSrc);
|
||||
vecDst = vmulhq(vecSrc, vdupq_n_s8(scaleFract));
|
||||
vecDst = vqshlq_r(vecDst, shift + 1);
|
||||
vst1q(pDst, vecDst);
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 16;
|
||||
pDst += 16;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 0xF;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp8q(blkCnt);
|
||||
vecSrc = vld1q(pSrc);
|
||||
vecDst = vmulhq(vecSrc, vdupq_n_s8(scaleFract));
|
||||
vecDst = vqshlq_r(vecDst, shift + 1);
|
||||
vstrbq_p(pDst, vecDst, p0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_scale_q7(
|
||||
const q7_t * pSrc,
|
||||
q7_t scaleFract,
|
||||
int8_t shift,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
int8_t kShift = 7 - shift; /* Shift to apply after scaling */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
q7_t in1, in2, in3, in4; /* Temporary input variables */
|
||||
q7_t out1, out2, out3, out4; /* Temporary output variables */
|
||||
#endif
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * scale */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
/* Reading 4 inputs from memory */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
in3 = *pSrc++;
|
||||
in4 = *pSrc++;
|
||||
|
||||
/* Scale inputs and store result in the temporary variable. */
|
||||
out1 = (q7_t) (__SSAT(((in1) * scaleFract) >> kShift, 8));
|
||||
out2 = (q7_t) (__SSAT(((in2) * scaleFract) >> kShift, 8));
|
||||
out3 = (q7_t) (__SSAT(((in3) * scaleFract) >> kShift, 8));
|
||||
out4 = (q7_t) (__SSAT(((in4) * scaleFract) >> kShift, 8));
|
||||
|
||||
/* Pack and store result in destination buffer (in single write) */
|
||||
write_q7x4_ia (&pDst, __PACKq7(out1, out2, out3, out4));
|
||||
#else
|
||||
*pDst++ = (q7_t) (__SSAT((((q15_t) *pSrc++ * scaleFract) >> kShift), 8));
|
||||
*pDst++ = (q7_t) (__SSAT((((q15_t) *pSrc++ * scaleFract) >> kShift), 8));
|
||||
*pDst++ = (q7_t) (__SSAT((((q15_t) *pSrc++ * scaleFract) >> kShift), 8));
|
||||
*pDst++ = (q7_t) (__SSAT((((q15_t) *pSrc++ * scaleFract) >> kShift), 8));
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * scale */
|
||||
|
||||
/* Scale input and store result in destination buffer. */
|
||||
*pDst++ = (q7_t) (__SSAT((((q15_t) *pSrc++ * scaleFract) >> kShift), 8));
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicScale group
|
||||
*/
|
||||
251
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_shift_q15.c
Normal file
251
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_shift_q15.c
Normal file
@@ -0,0 +1,251 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_shift_q15.c
|
||||
* Description: Shifts the elements of a Q15 vector by a specified number of bits
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicShift
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Shifts the elements of a Q15 vector a specified number of bits
|
||||
@param[in] pSrc points to the input vector
|
||||
@param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_shift_q15(
|
||||
const q15_t * pSrc,
|
||||
int8_t shiftBits,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q15x8_t vecSrc;
|
||||
q15x8_t vecDst;
|
||||
|
||||
/* Compute 8 outputs at a time */
|
||||
blkCnt = blockSize >> 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A (>> or <<) shiftBits
|
||||
* Shift the input and then store the result in the destination buffer.
|
||||
*/
|
||||
vecSrc = vld1q(pSrc);
|
||||
vecDst = vqshlq_r(vecSrc, shiftBits);
|
||||
vst1q(pDst, vecDst);
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 8;
|
||||
pDst += 8;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 7;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp16q(blkCnt);
|
||||
vecSrc = vld1q(pSrc);
|
||||
vecDst = vqshlq_r(vecSrc, shiftBits);
|
||||
vstrhq_p(pDst, vecDst, p0);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_shift_q15(
|
||||
const q15_t * pSrc,
|
||||
int8_t shiftBits,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
uint8_t sign = (shiftBits & 0x80); /* Sign of shiftBits */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
q15_t in1, in2; /* Temporary input variables */
|
||||
#endif
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* If the shift value is positive then do right shift else left shift */
|
||||
if (sign == 0U)
|
||||
{
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A << shiftBits */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
/* read 2 samples from source */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
|
||||
/* Shift the inputs and then store the results in the destination buffer. */
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
write_q15x2_ia (&pDst, __PKHBT(__SSAT((in1 << shiftBits), 16),
|
||||
__SSAT((in2 << shiftBits), 16), 16));
|
||||
#else
|
||||
write_q15x2_ia (&pDst, __PKHBT(__SSAT((in2 << shiftBits), 16),
|
||||
__SSAT((in1 << shiftBits), 16), 16));
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* read 2 samples from source */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
write_q15x2_ia (&pDst, __PKHBT(__SSAT((in1 << shiftBits), 16),
|
||||
__SSAT((in2 << shiftBits), 16), 16));
|
||||
#else
|
||||
write_q15x2_ia (&pDst, __PKHBT(__SSAT((in2 << shiftBits), 16),
|
||||
__SSAT((in1 << shiftBits), 16), 16));
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
#else
|
||||
*pDst++ = __SSAT(((q31_t) *pSrc++ << shiftBits), 16);
|
||||
*pDst++ = __SSAT(((q31_t) *pSrc++ << shiftBits), 16);
|
||||
*pDst++ = __SSAT(((q31_t) *pSrc++ << shiftBits), 16);
|
||||
*pDst++ = __SSAT(((q31_t) *pSrc++ << shiftBits), 16);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A >> shiftBits */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
/* read 2 samples from source */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
|
||||
/* Shift the inputs and then store the results in the destination buffer. */
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
write_q15x2_ia (&pDst, __PKHBT((in1 >> -shiftBits),
|
||||
(in2 >> -shiftBits), 16));
|
||||
#else
|
||||
write_q15x2_ia (&pDst, __PKHBT((in2 >> -shiftBits),
|
||||
(in1 >> -shiftBits), 16));
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* read 2 samples from source */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
write_q15x2_ia (&pDst, __PKHBT((in1 >> -shiftBits),
|
||||
(in2 >> -shiftBits), 16));
|
||||
#else
|
||||
write_q15x2_ia (&pDst, __PKHBT((in2 >> -shiftBits),
|
||||
(in1 >> -shiftBits), 16));
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
#else
|
||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
/* If the shift value is positive then do right shift else left shift */
|
||||
if (sign == 0U)
|
||||
{
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A << shiftBits */
|
||||
|
||||
/* Shift input and store result in destination buffer. */
|
||||
*pDst++ = __SSAT(((q31_t) *pSrc++ << shiftBits), 16);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A >> shiftBits */
|
||||
|
||||
/* Shift input and store result in destination buffer. */
|
||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicShift group
|
||||
*/
|
||||
232
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_shift_q31.c
Normal file
232
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_shift_q31.c
Normal file
@@ -0,0 +1,232 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_shift_q31.c
|
||||
* Description: Shifts the elements of a Q31 vector by a specified number of bits
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
/**
|
||||
@defgroup BasicShift Vector Shift
|
||||
|
||||
Shifts the elements of a fixed-point vector by a specified number of bits.
|
||||
There are separate functions for Q7, Q15, and Q31 data types.
|
||||
The underlying algorithm used is:
|
||||
|
||||
<pre>
|
||||
pDst[n] = pSrc[n] << shift, 0 <= n < blockSize.
|
||||
</pre>
|
||||
|
||||
If <code>shift</code> is positive then the elements of the vector are shifted to the left.
|
||||
If <code>shift</code> is negative then the elements of the vector are shifted to the right.
|
||||
|
||||
The functions support in-place computation allowing the source and destination
|
||||
pointers to reference the same memory buffer.
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicShift
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Shifts the elements of a Q31 vector a specified number of bits.
|
||||
@param[in] pSrc points to the input vector
|
||||
@param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in the vector
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_shift_q31(
|
||||
const q31_t * pSrc,
|
||||
int8_t shiftBits,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q31x4_t vecSrc;
|
||||
q31x4_t vecDst;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A (>> or <<) shiftBits
|
||||
* Shift the input and then store the result in the destination buffer.
|
||||
*/
|
||||
vecSrc = vld1q((q31_t const *) pSrc);
|
||||
vecDst = vqshlq_r(vecSrc, shiftBits);
|
||||
vst1q(pDst, vecDst);
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 3;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vecSrc = vld1q((q31_t const *) pSrc);
|
||||
vecDst = vqshlq_r(vecSrc, shiftBits);
|
||||
vstrwq_p(pDst, vecDst, p0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
void arm_shift_q31(
|
||||
const q31_t * pSrc,
|
||||
int8_t shiftBits,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
uint8_t sign = (shiftBits & 0x80); /* Sign of shiftBits */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
q31_t in, out; /* Temporary variables */
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* If the shift value is positive then do right shift else left shift */
|
||||
if (sign == 0U)
|
||||
{
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A << shiftBits */
|
||||
|
||||
/* Shift input and store result in destination buffer. */
|
||||
in = *pSrc++;
|
||||
out = in << shiftBits;
|
||||
if (in != (out >> shiftBits))
|
||||
out = 0x7FFFFFFF ^ (in >> 31);
|
||||
*pDst++ = out;
|
||||
|
||||
in = *pSrc++;
|
||||
out = in << shiftBits;
|
||||
if (in != (out >> shiftBits))
|
||||
out = 0x7FFFFFFF ^ (in >> 31);
|
||||
*pDst++ = out;
|
||||
|
||||
in = *pSrc++;
|
||||
out = in << shiftBits;
|
||||
if (in != (out >> shiftBits))
|
||||
out = 0x7FFFFFFF ^ (in >> 31);
|
||||
*pDst++ = out;
|
||||
|
||||
in = *pSrc++;
|
||||
out = in << shiftBits;
|
||||
if (in != (out >> shiftBits))
|
||||
out = 0x7FFFFFFF ^ (in >> 31);
|
||||
*pDst++ = out;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A >> shiftBits */
|
||||
|
||||
/* Shift input and store results in destination buffer. */
|
||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
/* If the shift value is positive then do right shift else left shift */
|
||||
if (sign == 0U)
|
||||
{
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A << shiftBits */
|
||||
|
||||
/* Shift input and store result in destination buffer. */
|
||||
*pDst++ = clip_q63_to_q31((q63_t) *pSrc++ << shiftBits);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A >> shiftBits */
|
||||
|
||||
/* Shift input and store result in destination buffer. */
|
||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicShift group
|
||||
*/
|
||||
225
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_shift_q7.c
Normal file
225
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_shift_q7.c
Normal file
@@ -0,0 +1,225 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_shift_q7.c
|
||||
* Description: Processing function for the Q7 Shifting
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicShift
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Shifts the elements of a Q7 vector a specified number of bits
|
||||
@param[in] pSrc points to the input vector
|
||||
@param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par onditions for optimum performance
|
||||
Input and output buffers should be aligned by 32-bit
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_shift_q7(
|
||||
const q7_t * pSrc,
|
||||
int8_t shiftBits,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q7x16_t vecSrc;
|
||||
q7x16_t vecDst;
|
||||
|
||||
/* Compute 16 outputs at a time */
|
||||
blkCnt = blockSize >> 4;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A (>> or <<) shiftBits
|
||||
* Shift the input and then store the result in the destination buffer.
|
||||
*/
|
||||
vecSrc = vld1q(pSrc);
|
||||
vecDst = vqshlq_r(vecSrc, shiftBits);
|
||||
vst1q(pDst, vecDst);
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 16;
|
||||
pDst += 16;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 0xF;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp8q(blkCnt);
|
||||
vecSrc = vld1q(pSrc);
|
||||
vecDst = vqshlq_r(vecSrc, shiftBits);
|
||||
vstrbq_p(pDst, vecDst, p0);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_shift_q7(
|
||||
const q7_t * pSrc,
|
||||
int8_t shiftBits,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
uint8_t sign = (shiftBits & 0x80); /* Sign of shiftBits */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
q7_t in1, in2, in3, in4; /* Temporary input variables */
|
||||
#endif
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* If the shift value is positive then do right shift else left shift */
|
||||
if (sign == 0U)
|
||||
{
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A << shiftBits */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
/* Read 4 inputs */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
in3 = *pSrc++;
|
||||
in4 = *pSrc++;
|
||||
|
||||
/* Pack and store result in destination buffer (in single write) */
|
||||
write_q7x4_ia (&pDst, __PACKq7(__SSAT((in1 << shiftBits), 8),
|
||||
__SSAT((in2 << shiftBits), 8),
|
||||
__SSAT((in3 << shiftBits), 8),
|
||||
__SSAT((in4 << shiftBits), 8) ));
|
||||
#else
|
||||
*pDst++ = (q7_t) __SSAT(((q15_t) *pSrc++ << shiftBits), 8);
|
||||
*pDst++ = (q7_t) __SSAT(((q15_t) *pSrc++ << shiftBits), 8);
|
||||
*pDst++ = (q7_t) __SSAT(((q15_t) *pSrc++ << shiftBits), 8);
|
||||
*pDst++ = (q7_t) __SSAT(((q15_t) *pSrc++ << shiftBits), 8);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A >> shiftBits */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
/* Read 4 inputs */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
in3 = *pSrc++;
|
||||
in4 = *pSrc++;
|
||||
|
||||
/* Pack and store result in destination buffer (in single write) */
|
||||
write_q7x4_ia (&pDst, __PACKq7((in1 >> -shiftBits),
|
||||
(in2 >> -shiftBits),
|
||||
(in3 >> -shiftBits),
|
||||
(in4 >> -shiftBits) ));
|
||||
#else
|
||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
/* If the shift value is positive then do right shift else left shift */
|
||||
if (sign == 0U)
|
||||
{
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A << shiftBits */
|
||||
|
||||
/* Shift input and store result in destination buffer. */
|
||||
*pDst++ = (q7_t) __SSAT(((q15_t) *pSrc++ << shiftBits), 8);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A >> shiftBits */
|
||||
|
||||
/* Shift input and store result in destination buffer. */
|
||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicShift group
|
||||
*/
|
||||
202
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_sub_f32.c
Normal file
202
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_sub_f32.c
Normal file
@@ -0,0 +1,202 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_sub_f32.c
|
||||
* Description: Floating-point vector subtraction
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@defgroup BasicSub Vector Subtraction
|
||||
|
||||
Element-by-element subtraction of two vectors.
|
||||
|
||||
<pre>
|
||||
pDst[n] = pSrcA[n] - pSrcB[n], 0 <= n < blockSize.
|
||||
</pre>
|
||||
|
||||
There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicSub
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Floating-point vector subtraction.
|
||||
@param[in] pSrcA points to the first input vector
|
||||
@param[in] pSrcB points to the second input vector
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_sub_f32(
|
||||
const float32_t * pSrcA,
|
||||
const float32_t * pSrcB,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
f32x4_t vec1;
|
||||
f32x4_t vec2;
|
||||
f32x4_t res;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + B */
|
||||
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
vec1 = vld1q(pSrcA);
|
||||
vec2 = vld1q(pSrcB);
|
||||
res = vsubq(vec1, vec2);
|
||||
vst1q(pDst, res);
|
||||
|
||||
/* Increment pointers */
|
||||
pSrcA += 4;
|
||||
pSrcB += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0x3;
|
||||
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
/* C = A + B */
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vec1 = vld1q(pSrcA);
|
||||
vec2 = vld1q(pSrcB);
|
||||
vstrwq_p(pDst, vsubq(vec1,vec2), p0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_sub_f32(
|
||||
const float32_t * pSrcA,
|
||||
const float32_t * pSrcB,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
f32x4_t vec1;
|
||||
f32x4_t vec2;
|
||||
f32x4_t res;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A - B */
|
||||
|
||||
/* Subtract and then store the results in the destination buffer. */
|
||||
vec1 = vld1q_f32(pSrcA);
|
||||
vec2 = vld1q_f32(pSrcB);
|
||||
res = vsubq_f32(vec1, vec2);
|
||||
vst1q_f32(pDst, res);
|
||||
|
||||
/* Increment pointers */
|
||||
pSrcA += 4;
|
||||
pSrcB += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0x3;
|
||||
|
||||
#else
|
||||
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A - B */
|
||||
|
||||
/* Subtract and store result in destination buffer. */
|
||||
*pDst++ = (*pSrcA++) - (*pSrcB++);
|
||||
|
||||
*pDst++ = (*pSrcA++) - (*pSrcB++);
|
||||
|
||||
*pDst++ = (*pSrcA++) - (*pSrcB++);
|
||||
|
||||
*pDst++ = (*pSrcA++) - (*pSrcB++);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
#endif /* #if defined(ARM_MATH_NEON) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A - B */
|
||||
|
||||
/* Subtract and store result in destination buffer. */
|
||||
*pDst++ = (*pSrcA++) - (*pSrcB++);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of BasicSub group
|
||||
*/
|
||||
178
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_sub_q15.c
Normal file
178
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_sub_q15.c
Normal file
@@ -0,0 +1,178 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_sub_q15.c
|
||||
* Description: Q15 vector subtraction
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicSub
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Q15 vector subtraction.
|
||||
@param[in] pSrcA points to the first input vector
|
||||
@param[in] pSrcB points to the second input vector
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_sub_q15(
|
||||
const q15_t * pSrcA,
|
||||
const q15_t * pSrcB,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q15x8_t vecA;
|
||||
q15x8_t vecB;
|
||||
|
||||
/* Compute 8 outputs at a time */
|
||||
blkCnt = blockSize >> 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A - B
|
||||
* Subtract and then store the results in the destination buffer.
|
||||
*/
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
vst1q(pDst, vqsubq(vecA, vecB));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrcA += 8;
|
||||
pSrcB += 8;
|
||||
pDst += 8;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 7;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp16q(blkCnt);
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
vstrhq_p(pDst, vqsubq(vecA, vecB), p0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
void arm_sub_q15(
|
||||
const q15_t * pSrcA,
|
||||
const q15_t * pSrcB,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
q31_t inA1, inA2;
|
||||
q31_t inB1, inB2;
|
||||
#endif
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A - B */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
/* read 2 times 2 samples at a time from sourceA */
|
||||
inA1 = read_q15x2_ia ((q15_t **) &pSrcA);
|
||||
inA2 = read_q15x2_ia ((q15_t **) &pSrcA);
|
||||
/* read 2 times 2 samples at a time from sourceB */
|
||||
inB1 = read_q15x2_ia ((q15_t **) &pSrcB);
|
||||
inB2 = read_q15x2_ia ((q15_t **) &pSrcB);
|
||||
|
||||
/* Subtract and store 2 times 2 samples at a time */
|
||||
write_q15x2_ia (&pDst, __QSUB16(inA1, inB1));
|
||||
write_q15x2_ia (&pDst, __QSUB16(inA2, inB2));
|
||||
#else
|
||||
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ - *pSrcB++), 16);
|
||||
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ - *pSrcB++), 16);
|
||||
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ - *pSrcB++), 16);
|
||||
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ - *pSrcB++), 16);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A - B */
|
||||
|
||||
/* Subtract and store result in destination buffer. */
|
||||
#if defined (ARM_MATH_DSP)
|
||||
*pDst++ = (q15_t) __QSUB16(*pSrcA++, *pSrcB++);
|
||||
#else
|
||||
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ - *pSrcB++), 16);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicSub group
|
||||
*/
|
||||
159
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_sub_q31.c
Normal file
159
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_sub_q31.c
Normal file
@@ -0,0 +1,159 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_sub_q31.c
|
||||
* Description: Q31 vector subtraction
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicSub
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Q31 vector subtraction.
|
||||
@param[in] pSrcA points to the first input vector
|
||||
@param[in] pSrcB points to the second input vector
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_sub_q31(
|
||||
const q31_t * pSrcA,
|
||||
const q31_t * pSrcB,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt;
|
||||
q31x4_t vecA;
|
||||
q31x4_t vecB;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A + B
|
||||
* Add and then store the results in the destination buffer.
|
||||
*/
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
vst1q(pDst, vqsubq(vecA, vecB));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrcA += 4;
|
||||
pSrcB += 4;
|
||||
pDst += 4;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 3;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
vstrwq_p(pDst, vqsubq(vecA, vecB), p0);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_sub_q31(
|
||||
const q31_t * pSrcA,
|
||||
const q31_t * pSrcB,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A - B */
|
||||
|
||||
/* Subtract and store result in destination buffer. */
|
||||
*pDst++ = __QSUB(*pSrcA++, *pSrcB++);
|
||||
|
||||
*pDst++ = __QSUB(*pSrcA++, *pSrcB++);
|
||||
|
||||
*pDst++ = __QSUB(*pSrcA++, *pSrcB++);
|
||||
|
||||
*pDst++ = __QSUB(*pSrcA++, *pSrcB++);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A - B */
|
||||
|
||||
/* Subtract and store result in destination buffer. */
|
||||
*pDst++ = __QSUB(*pSrcA++, *pSrcB++);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicSub group
|
||||
*/
|
||||
158
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_sub_q7.c
Normal file
158
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_sub_q7.c
Normal file
@@ -0,0 +1,158 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_sub_q7.c
|
||||
* Description: Q7 vector subtraction
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup BasicSub
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Q7 vector subtraction.
|
||||
@param[in] pSrcA points to the first input vector
|
||||
@param[in] pSrcB points to the second input vector
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_sub_q7(
|
||||
const q7_t * pSrcA,
|
||||
const q7_t * pSrcB,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q7x16_t vecA;
|
||||
q7x16_t vecB;
|
||||
|
||||
/* Compute 16 outputs at a time */
|
||||
blkCnt = blockSize >> 4;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/*
|
||||
* C = A - B
|
||||
* Subtract and then store the results in the destination buffer.
|
||||
*/
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
vst1q(pDst, vqsubq(vecA, vecB));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
/*
|
||||
* advance vector source and destination pointers
|
||||
*/
|
||||
pSrcA += 16;
|
||||
pSrcB += 16;
|
||||
pDst += 16;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 0xF;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp8q(blkCnt);
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
vstrbq_p(pDst, vqsubq(vecA, vecB), p0);
|
||||
}
|
||||
}
|
||||
#else
|
||||
void arm_sub_q7(
|
||||
const q7_t * pSrcA,
|
||||
const q7_t * pSrcB,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A - B */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
/* Subtract and store result in destination buffer (4 samples at a time). */
|
||||
write_q7x4_ia (&pDst, __QSUB8(read_q7x4_ia ((q7_t **) &pSrcA), read_q7x4_ia ((q7_t **) &pSrcB)));
|
||||
#else
|
||||
*pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ - *pSrcB++, 8);
|
||||
*pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ - *pSrcB++, 8);
|
||||
*pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ - *pSrcB++, 8);
|
||||
*pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ - *pSrcB++, 8);
|
||||
#endif
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A - B */
|
||||
|
||||
/* Subtract and store result in destination buffer. */
|
||||
*pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ - *pSrcB++, 8);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicSub group
|
||||
*/
|
||||
137
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_xor_u16.c
Normal file
137
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_xor_u16.c
Normal file
@@ -0,0 +1,137 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_xor_u16.c
|
||||
* Description: uint16_t bitwise exclusive OR
|
||||
*
|
||||
* $Date: 14 November 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@defgroup Xor Vector bitwise exclusive OR
|
||||
|
||||
Compute the logical bitwise XOR.
|
||||
|
||||
There are separate functions for uint32_t, uint16_t, and uint8_t data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Xor
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Compute the logical bitwise XOR of two fixed-point vectors.
|
||||
@param[in] pSrcA points to input vector A
|
||||
@param[in] pSrcB points to input vector B
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_xor_u16(
|
||||
const uint16_t * pSrcA,
|
||||
const uint16_t * pSrcB,
|
||||
uint16_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
q15x8_t vecSrcA, vecSrcB;
|
||||
|
||||
/* Compute 8 outputs at a time */
|
||||
blkCnt = blockSize >> 3;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
|
||||
vst1q(pDst, veorq_u16(vecSrcA, vecSrcB) );
|
||||
|
||||
pSrcA += 8;
|
||||
pSrcB += 8;
|
||||
pDst += 8;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 7;
|
||||
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp16q(blkCnt);
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
vstrhq_p(pDst, veorq_u16(vecSrcA, vecSrcB), p0);
|
||||
}
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
uint16x8_t vecA, vecB;
|
||||
|
||||
/* Compute 8 outputs at a time */
|
||||
blkCnt = blockSize >> 3U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecA = vld1q_u16(pSrcA);
|
||||
vecB = vld1q_u16(pSrcB);
|
||||
|
||||
vst1q_u16(pDst, veorq_u16(vecA, vecB) );
|
||||
|
||||
pSrcA += 8;
|
||||
pSrcB += 8;
|
||||
pDst += 8;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 7;
|
||||
#else
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
#endif
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
*pDst++ = (*pSrcA++)^(*pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
#endif /* if defined(ARM_MATH_MVEI) */
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of Xor group
|
||||
*/
|
||||
129
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_xor_u32.c
Normal file
129
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_xor_u32.c
Normal file
@@ -0,0 +1,129 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_xor_u32.c
|
||||
* Description: uint32_t bitwise exclusive OR
|
||||
*
|
||||
* $Date: 14 November 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Xor
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Compute the logical bitwise XOR of two fixed-point vectors.
|
||||
@param[in] pSrcA points to input vector A
|
||||
@param[in] pSrcB points to input vector B
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_xor_u32(
|
||||
const uint32_t * pSrcA,
|
||||
const uint32_t * pSrcB,
|
||||
uint32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
q31x4_t vecSrcA, vecSrcB;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
|
||||
vst1q(pDst, veorq_u32(vecSrcA, vecSrcB) );
|
||||
|
||||
pSrcA += 4;
|
||||
pSrcB += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 3;
|
||||
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
vstrwq_p(pDst, veorq_u32(vecSrcA, vecSrcB), p0);
|
||||
}
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
uint32x4_t vecA, vecB;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecA = vld1q_u32(pSrcA);
|
||||
vecB = vld1q_u32(pSrcB);
|
||||
|
||||
vst1q_u32(pDst, veorq_u32(vecA, vecB) );
|
||||
|
||||
pSrcA += 4;
|
||||
pSrcB += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 3;
|
||||
#else
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
#endif
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
*pDst++ = (*pSrcA++)^(*pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
#endif /* if defined(ARM_MATH_MVEI) */
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of Xor group
|
||||
*/
|
||||
129
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_xor_u8.c
Normal file
129
libraries/cmsis/dsp/Source/BasicMathFunctions/arm_xor_u8.c
Normal file
@@ -0,0 +1,129 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_xor_u8.c
|
||||
* Description: uint8_t bitwise exclusive OR
|
||||
*
|
||||
* $Date: 14 November 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Xor
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Compute the logical bitwise XOR of two fixed-point vectors.
|
||||
@param[in] pSrcA points to input vector A
|
||||
@param[in] pSrcB points to input vector B
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_xor_u8(
|
||||
const uint8_t * pSrcA,
|
||||
const uint8_t * pSrcB,
|
||||
uint8_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
q7x16_t vecSrcA, vecSrcB;
|
||||
|
||||
/* Compute 16 outputs at a time */
|
||||
blkCnt = blockSize >> 4;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
|
||||
vst1q(pDst, veorq_u8(vecSrcA, vecSrcB) );
|
||||
|
||||
pSrcA += 16;
|
||||
pSrcB += 16;
|
||||
pDst += 16;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0xF;
|
||||
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp8q(blkCnt);
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
vstrbq_p(pDst, veorq_u8(vecSrcA, vecSrcB), p0);
|
||||
}
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
uint8x16_t vecA, vecB;
|
||||
|
||||
/* Compute 16 outputs at a time */
|
||||
blkCnt = blockSize >> 4U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecA = vld1q_u8(pSrcA);
|
||||
vecB = vld1q_u8(pSrcB);
|
||||
|
||||
vst1q_u8(pDst, veorq_u8(vecA, vecB) );
|
||||
|
||||
pSrcA += 16;
|
||||
pSrcB += 16;
|
||||
pDst += 16;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = blockSize & 0xF;
|
||||
#else
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
#endif
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
*pDst++ = (*pSrcA++)^(*pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
#endif /* if defined(ARM_MATH_MVEI) */
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of Xor group
|
||||
*/
|
||||
29
libraries/cmsis/dsp/Source/BayesFunctions/BayesFunctions.c
Normal file
29
libraries/cmsis/dsp/Source/BayesFunctions/BayesFunctions.c
Normal file
@@ -0,0 +1,29 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: BayesFunctions.c
|
||||
* Description: Combination of all bayes function source files.
|
||||
*
|
||||
* $Date: 16. March 2020
|
||||
* $Revision: V1.0.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2020 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_gaussian_naive_bayes_predict_f32.c"
|
||||
19
libraries/cmsis/dsp/Source/BayesFunctions/CMakeLists.txt
Normal file
19
libraries/cmsis/dsp/Source/BayesFunctions/CMakeLists.txt
Normal file
@@ -0,0 +1,19 @@
|
||||
cmake_minimum_required (VERSION 3.6)
|
||||
|
||||
project(CMSISDSPBayes)
|
||||
|
||||
include(configLib)
|
||||
include(configDsp)
|
||||
|
||||
file(GLOB SRC "./*_*.c")
|
||||
|
||||
add_library(CMSISDSPBayes STATIC ${SRC})
|
||||
|
||||
configLib(CMSISDSPBayes ${ROOT})
|
||||
configDsp(CMSISDSPBayes ${ROOT})
|
||||
|
||||
### Includes
|
||||
target_include_directories(CMSISDSPBayes PUBLIC "${DSP}/Include")
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,397 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_naive_gaussian_bayes_predict_f32
|
||||
* Description: Naive Gaussian Bayesian Estimator
|
||||
*
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
|
||||
#define PI_F 3.1415926535897932384626433832795f
|
||||
#define DPI_F (2.0f*3.1415926535897932384626433832795f)
|
||||
|
||||
/**
|
||||
* @addtogroup groupBayes
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Naive Gaussian Bayesian Estimator
|
||||
*
|
||||
* @param[in] *S points to a naive bayes instance structure
|
||||
* @param[in] *in points to the elements of the input vector.
|
||||
* @param[in] *pBuffer points to a buffer of length numberOfClasses
|
||||
* @return The predicted class
|
||||
*
|
||||
* @par If the number of classes is big, MVE version will consume lot of
|
||||
* stack since the log prior are computed on the stack.
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
#include "arm_vec_math.h"
|
||||
|
||||
uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_instance_f32 *S,
|
||||
const float32_t * in,
|
||||
float32_t *pBuffer)
|
||||
{
|
||||
uint32_t nbClass;
|
||||
const float32_t *pTheta = S->theta;
|
||||
const float32_t *pSigma = S->sigma;
|
||||
float32_t *buffer = pBuffer;
|
||||
const float32_t *pIn = in;
|
||||
float32_t result;
|
||||
f32x4_t vsigma;
|
||||
float32_t tmp;
|
||||
f32x4_t vacc1, vacc2;
|
||||
uint32_t index;
|
||||
float32_t logclassPriors[S->numberOfClasses];
|
||||
float32_t *pLogPrior = logclassPriors;
|
||||
|
||||
arm_vlog_f32((float32_t *) S->classPriors, logclassPriors, S->numberOfClasses);
|
||||
|
||||
pTheta = S->theta;
|
||||
pSigma = S->sigma;
|
||||
|
||||
for (nbClass = 0; nbClass < S->numberOfClasses; nbClass++) {
|
||||
pIn = in;
|
||||
|
||||
vacc1 = vdupq_n_f32(0);
|
||||
vacc2 = vdupq_n_f32(0);
|
||||
|
||||
uint32_t blkCnt =S->vectorDimension >> 2;
|
||||
while (blkCnt > 0U) {
|
||||
f32x4_t vinvSigma, vtmp;
|
||||
|
||||
vsigma = vaddq_n_f32(vld1q(pSigma), S->epsilon);
|
||||
vacc1 = vaddq(vacc1, vlogq_f32(vmulq_n_f32(vsigma, 2.0f * PI)));
|
||||
|
||||
vinvSigma = vrecip_medprec_f32(vsigma);
|
||||
|
||||
vtmp = vsubq(vld1q(pIn), vld1q(pTheta));
|
||||
/* squaring */
|
||||
vtmp = vmulq(vtmp, vtmp);
|
||||
|
||||
vacc2 = vfmaq(vacc2, vtmp, vinvSigma);
|
||||
|
||||
pIn += 4;
|
||||
pTheta += 4;
|
||||
pSigma += 4;
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
blkCnt = S->vectorDimension & 3;
|
||||
if (blkCnt > 0U) {
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
f32x4_t vinvSigma, vtmp;
|
||||
|
||||
vsigma = vaddq_n_f32(vld1q(pSigma), S->epsilon);
|
||||
vacc1 =
|
||||
vaddq_m_f32(vacc1, vacc1, vlogq_f32(vmulq_n_f32(vsigma, 2.0f * PI)), p0);
|
||||
|
||||
vinvSigma = vrecip_medprec_f32(vsigma);
|
||||
|
||||
vtmp = vsubq(vld1q(pIn), vld1q(pTheta));
|
||||
/* squaring */
|
||||
vtmp = vmulq(vtmp, vtmp);
|
||||
|
||||
vacc2 = vfmaq_m_f32(vacc2, vtmp, vinvSigma, p0);
|
||||
|
||||
pTheta += blkCnt;
|
||||
pSigma += blkCnt;
|
||||
}
|
||||
|
||||
tmp = -0.5f * vecAddAcrossF32Mve(vacc1);
|
||||
tmp -= 0.5f * vecAddAcrossF32Mve(vacc2);
|
||||
|
||||
*buffer = tmp + *pLogPrior++;
|
||||
buffer++;
|
||||
}
|
||||
|
||||
arm_max_f32(pBuffer, S->numberOfClasses, &result, &index);
|
||||
|
||||
return (index);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#if defined(ARM_MATH_NEON)
|
||||
|
||||
#include "NEMath.h"
|
||||
|
||||
|
||||
|
||||
uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_instance_f32 *S,
|
||||
const float32_t * in,
|
||||
float32_t *pBuffer)
|
||||
{
|
||||
|
||||
const float32_t *pPrior = S->classPriors;
|
||||
|
||||
const float32_t *pTheta = S->theta;
|
||||
const float32_t *pSigma = S->sigma;
|
||||
|
||||
const float32_t *pTheta1 = S->theta + S->vectorDimension;
|
||||
const float32_t *pSigma1 = S->sigma + S->vectorDimension;
|
||||
|
||||
float32_t *buffer = pBuffer;
|
||||
const float32_t *pIn=in;
|
||||
|
||||
float32_t result;
|
||||
float32_t sigma,sigma1;
|
||||
float32_t tmp,tmp1;
|
||||
uint32_t index;
|
||||
uint32_t vecBlkCnt;
|
||||
uint32_t classBlkCnt;
|
||||
float32x4_t epsilonV;
|
||||
float32x4_t sigmaV,sigmaV1;
|
||||
float32x4_t tmpV,tmpVb,tmpV1;
|
||||
float32x2_t tmpV2;
|
||||
float32x4_t thetaV,thetaV1;
|
||||
float32x4_t inV;
|
||||
|
||||
epsilonV = vdupq_n_f32(S->epsilon);
|
||||
|
||||
classBlkCnt = S->numberOfClasses >> 1;
|
||||
while(classBlkCnt > 0)
|
||||
{
|
||||
|
||||
|
||||
pIn = in;
|
||||
|
||||
tmp = logf(*pPrior++);
|
||||
tmp1 = logf(*pPrior++);
|
||||
tmpV = vdupq_n_f32(0.0f);
|
||||
tmpV1 = vdupq_n_f32(0.0f);
|
||||
|
||||
vecBlkCnt = S->vectorDimension >> 2;
|
||||
while(vecBlkCnt > 0)
|
||||
{
|
||||
sigmaV = vld1q_f32(pSigma);
|
||||
thetaV = vld1q_f32(pTheta);
|
||||
|
||||
sigmaV1 = vld1q_f32(pSigma1);
|
||||
thetaV1 = vld1q_f32(pTheta1);
|
||||
|
||||
inV = vld1q_f32(pIn);
|
||||
|
||||
sigmaV = vaddq_f32(sigmaV, epsilonV);
|
||||
sigmaV1 = vaddq_f32(sigmaV1, epsilonV);
|
||||
|
||||
tmpVb = vmulq_n_f32(sigmaV,DPI_F);
|
||||
tmpVb = vlogq_f32(tmpVb);
|
||||
tmpV = vmlsq_n_f32(tmpV,tmpVb,0.5f);
|
||||
|
||||
tmpVb = vmulq_n_f32(sigmaV1,DPI_F);
|
||||
tmpVb = vlogq_f32(tmpVb);
|
||||
tmpV1 = vmlsq_n_f32(tmpV1,tmpVb,0.5f);
|
||||
|
||||
tmpVb = vsubq_f32(inV,thetaV);
|
||||
tmpVb = vmulq_f32(tmpVb,tmpVb);
|
||||
tmpVb = vmulq_f32(tmpVb, vinvq_f32(sigmaV));
|
||||
tmpV = vmlsq_n_f32(tmpV,tmpVb,0.5f);
|
||||
|
||||
tmpVb = vsubq_f32(inV,thetaV1);
|
||||
tmpVb = vmulq_f32(tmpVb,tmpVb);
|
||||
tmpVb = vmulq_f32(tmpVb, vinvq_f32(sigmaV1));
|
||||
tmpV1 = vmlsq_n_f32(tmpV1,tmpVb,0.5f);
|
||||
|
||||
pIn += 4;
|
||||
pTheta += 4;
|
||||
pSigma += 4;
|
||||
pTheta1 += 4;
|
||||
pSigma1 += 4;
|
||||
|
||||
vecBlkCnt--;
|
||||
}
|
||||
tmpV2 = vpadd_f32(vget_low_f32(tmpV),vget_high_f32(tmpV));
|
||||
tmp += vget_lane_f32(tmpV2, 0) + vget_lane_f32(tmpV2, 1);
|
||||
|
||||
tmpV2 = vpadd_f32(vget_low_f32(tmpV1),vget_high_f32(tmpV1));
|
||||
tmp1 += vget_lane_f32(tmpV2, 0) + vget_lane_f32(tmpV2, 1);
|
||||
|
||||
vecBlkCnt = S->vectorDimension & 3;
|
||||
while(vecBlkCnt > 0)
|
||||
{
|
||||
sigma = *pSigma + S->epsilon;
|
||||
sigma1 = *pSigma1 + S->epsilon;
|
||||
|
||||
tmp -= 0.5f*logf(2.0f * PI_F * sigma);
|
||||
tmp -= 0.5f*(*pIn - *pTheta) * (*pIn - *pTheta) / sigma;
|
||||
|
||||
tmp1 -= 0.5f*logf(2.0f * PI_F * sigma1);
|
||||
tmp1 -= 0.5f*(*pIn - *pTheta1) * (*pIn - *pTheta1) / sigma1;
|
||||
|
||||
pIn++;
|
||||
pTheta++;
|
||||
pSigma++;
|
||||
pTheta1++;
|
||||
pSigma1++;
|
||||
vecBlkCnt--;
|
||||
}
|
||||
|
||||
*buffer++ = tmp;
|
||||
*buffer++ = tmp1;
|
||||
|
||||
pSigma += S->vectorDimension;
|
||||
pTheta += S->vectorDimension;
|
||||
pSigma1 += S->vectorDimension;
|
||||
pTheta1 += S->vectorDimension;
|
||||
|
||||
classBlkCnt--;
|
||||
}
|
||||
|
||||
classBlkCnt = S->numberOfClasses & 1;
|
||||
|
||||
while(classBlkCnt > 0)
|
||||
{
|
||||
|
||||
|
||||
pIn = in;
|
||||
|
||||
tmp = logf(*pPrior++);
|
||||
tmpV = vdupq_n_f32(0.0f);
|
||||
|
||||
vecBlkCnt = S->vectorDimension >> 2;
|
||||
while(vecBlkCnt > 0)
|
||||
{
|
||||
sigmaV = vld1q_f32(pSigma);
|
||||
thetaV = vld1q_f32(pTheta);
|
||||
inV = vld1q_f32(pIn);
|
||||
|
||||
sigmaV = vaddq_f32(sigmaV, epsilonV);
|
||||
|
||||
tmpVb = vmulq_n_f32(sigmaV,DPI_F);
|
||||
tmpVb = vlogq_f32(tmpVb);
|
||||
tmpV = vmlsq_n_f32(tmpV,tmpVb,0.5f);
|
||||
|
||||
tmpVb = vsubq_f32(inV,thetaV);
|
||||
tmpVb = vmulq_f32(tmpVb,tmpVb);
|
||||
tmpVb = vmulq_f32(tmpVb, vinvq_f32(sigmaV));
|
||||
tmpV = vmlsq_n_f32(tmpV,tmpVb,0.5f);
|
||||
|
||||
pIn += 4;
|
||||
pTheta += 4;
|
||||
pSigma += 4;
|
||||
|
||||
vecBlkCnt--;
|
||||
}
|
||||
tmpV2 = vpadd_f32(vget_low_f32(tmpV),vget_high_f32(tmpV));
|
||||
tmp += vget_lane_f32(tmpV2, 0) + vget_lane_f32(tmpV2, 1);
|
||||
|
||||
vecBlkCnt = S->vectorDimension & 3;
|
||||
while(vecBlkCnt > 0)
|
||||
{
|
||||
sigma = *pSigma + S->epsilon;
|
||||
tmp -= 0.5f*logf(2.0f * PI_F * sigma);
|
||||
tmp -= 0.5f*(*pIn - *pTheta) * (*pIn - *pTheta) / sigma;
|
||||
|
||||
pIn++;
|
||||
pTheta++;
|
||||
pSigma++;
|
||||
vecBlkCnt--;
|
||||
}
|
||||
|
||||
*buffer++ = tmp;
|
||||
|
||||
classBlkCnt--;
|
||||
}
|
||||
|
||||
arm_max_f32(pBuffer,S->numberOfClasses,&result,&index);
|
||||
|
||||
return(index);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/**
|
||||
* @brief Naive Gaussian Bayesian Estimator
|
||||
*
|
||||
* @param[in] *S points to a naive bayes instance structure
|
||||
* @param[in] *in points to the elements of the input vector.
|
||||
* @param[in] *pBuffer points to a buffer of length numberOfClasses
|
||||
* @return The predicted class
|
||||
*
|
||||
*/
|
||||
uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_instance_f32 *S,
|
||||
const float32_t * in,
|
||||
float32_t *pBuffer)
|
||||
{
|
||||
uint32_t nbClass;
|
||||
uint32_t nbDim;
|
||||
const float32_t *pPrior = S->classPriors;
|
||||
const float32_t *pTheta = S->theta;
|
||||
const float32_t *pSigma = S->sigma;
|
||||
float32_t *buffer = pBuffer;
|
||||
const float32_t *pIn=in;
|
||||
float32_t result;
|
||||
float32_t sigma;
|
||||
float32_t tmp;
|
||||
float32_t acc1,acc2;
|
||||
uint32_t index;
|
||||
|
||||
pTheta=S->theta;
|
||||
pSigma=S->sigma;
|
||||
|
||||
for(nbClass = 0; nbClass < S->numberOfClasses; nbClass++)
|
||||
{
|
||||
|
||||
|
||||
pIn = in;
|
||||
|
||||
tmp = 0.0;
|
||||
acc1 = 0.0f;
|
||||
acc2 = 0.0f;
|
||||
for(nbDim = 0; nbDim < S->vectorDimension; nbDim++)
|
||||
{
|
||||
sigma = *pSigma + S->epsilon;
|
||||
acc1 += logf(2.0f * PI_F * sigma);
|
||||
acc2 += (*pIn - *pTheta) * (*pIn - *pTheta) / sigma;
|
||||
|
||||
pIn++;
|
||||
pTheta++;
|
||||
pSigma++;
|
||||
}
|
||||
|
||||
tmp = -0.5f * acc1;
|
||||
tmp -= 0.5f * acc2;
|
||||
|
||||
|
||||
*buffer = tmp + logf(*pPrior++);
|
||||
buffer++;
|
||||
}
|
||||
|
||||
arm_max_f32(pBuffer,S->numberOfClasses,&result,&index);
|
||||
|
||||
return(index);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
* @} end of groupBayes group
|
||||
*/
|
||||
280
libraries/cmsis/dsp/Source/CMakeLists.txt
Normal file
280
libraries/cmsis/dsp/Source/CMakeLists.txt
Normal file
@@ -0,0 +1,280 @@
|
||||
cmake_minimum_required (VERSION 3.6)
|
||||
cmake_policy(SET CMP0077 NEW)
|
||||
project(CMSISDSP)
|
||||
|
||||
# DSP Sources
|
||||
SET(DSP ${ROOT}/CMSIS/DSP)
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH ${DSP}/Source)
|
||||
list(APPEND CMAKE_MODULE_PATH ${DSP})
|
||||
|
||||
|
||||
include(configLib)
|
||||
|
||||
|
||||
option(NEON "Neon acceleration" OFF)
|
||||
option(NEONEXPERIMENTAL "Neon experimental acceleration" OFF)
|
||||
option(LOOPUNROLL "Loop unrolling" ON)
|
||||
option(ROUNDING "Rounding" OFF)
|
||||
option(MATRIXCHECK "Matrix Checks" OFF)
|
||||
option(HELIUM "Helium acceleration (MVEF and MVEI supported)" OFF)
|
||||
option(MVEF "MVEF intrinsics supported" OFF)
|
||||
option(MVEI "MVEI intrinsics supported" OFF)
|
||||
|
||||
# Select which parts of the CMSIS-DSP must be compiled.
|
||||
# There are some dependencies between the parts but they are not tracked
|
||||
# by this cmake. So, enabling some functions may require to enable some
|
||||
# other ones.
|
||||
option(BASICMATH "Basic Math Functions" ON)
|
||||
option(COMPLEXMATH "Complex Math Functions" ON)
|
||||
option(CONTROLLER "Controller Functions" ON)
|
||||
option(FASTMATH "Fast Math Functions" ON)
|
||||
option(FILTERING "Filtering Functions" ON)
|
||||
option(MATRIX "Matrix Functions" ON)
|
||||
option(STATISTICS "Statistics Functions" ON)
|
||||
option(SUPPORT "Support Functions" ON)
|
||||
option(TRANSFORM "Transform Functions" ON)
|
||||
option(SVM "Support Vector Machine Functions" ON)
|
||||
option(BAYES "Bayesian Estimators" ON)
|
||||
option(DISTANCE "Distance Functions" ON)
|
||||
|
||||
# When OFF it is the default behavior : all tables are included.
|
||||
option(CONFIGTABLE "Configuration of table allowed" OFF)
|
||||
|
||||
# When CONFIGTABLE is ON, select if all interpolation tables must be included
|
||||
option(ALLFAST "All interpolation tables included" OFF)
|
||||
# When CONFIGTABLE is ON, select if all FFT tables must be included
|
||||
option(ALLFFT "All fft tables included" OFF)
|
||||
|
||||
# Features which require inclusion of a data table.
|
||||
# Since some tables may be big, the corresponding feature can be
|
||||
# disabled.
|
||||
# Those options are taken into account only when CONFIGTABLE is ON
|
||||
option(ARM_COS_F32 "cos f32" OFF)
|
||||
option(ARM_COS_Q31 "cos q31" OFF)
|
||||
option(ARM_COS_Q15 "cos q15" OFF)
|
||||
option(ARM_SIN_F32 "sin f32" OFF)
|
||||
option(ARM_SIN_Q31 "sin q31" OFF)
|
||||
option(ARM_SIN_Q15 "sin q15" OFF)
|
||||
option(ARM_SIN_COS_F32 "sin cos f32" OFF)
|
||||
option(ARM_SIN_COS_Q31 "sin cos q31" OFF)
|
||||
|
||||
option(ARM_LMS_NORM_Q31 "lms norm q31" OFF)
|
||||
option(ARM_LMS_NORM_Q15 "lms norm q15" OFF)
|
||||
|
||||
option(CFFT_F64_16 "cfft f64 16" OFF)
|
||||
option(CFFT_F64_32 "cfft f64 32" OFF)
|
||||
option(CFFT_F64_64 "cfft f64 64" OFF)
|
||||
option(CFFT_F64_128 "cfft f64 128" OFF)
|
||||
option(CFFT_F64_256 "cfft f64 256" OFF)
|
||||
option(CFFT_F64_512 "cfft f64 512" OFF)
|
||||
option(CFFT_F64_1024 "cfft f64 1024" OFF)
|
||||
option(CFFT_F64_2048 "cfft f64 2048" OFF)
|
||||
option(CFFT_F64_4096 "cfft f64 4096" OFF)
|
||||
|
||||
option(CFFT_F32_16 "cfft f32 16" OFF)
|
||||
option(CFFT_F32_32 "cfft f32 32" OFF)
|
||||
option(CFFT_F32_64 "cfft f32 64" OFF)
|
||||
option(CFFT_F32_128 "cfft f32 128" OFF)
|
||||
option(CFFT_F32_256 "cfft f32 256" OFF)
|
||||
option(CFFT_F32_512 "cfft f32 512" OFF)
|
||||
option(CFFT_F32_1024 "cfft f32 1024" OFF)
|
||||
option(CFFT_F32_2048 "cfft f32 2048" OFF)
|
||||
option(CFFT_F32_4096 "cfft f32 4096" OFF)
|
||||
|
||||
option(CFFT_Q31_16 "cfft q31 16" OFF)
|
||||
option(CFFT_Q31_32 "cfft q31 32" OFF)
|
||||
option(CFFT_Q31_64 "cfft q31 64" OFF)
|
||||
option(CFFT_Q31_128 "cfft q31 128" OFF)
|
||||
option(CFFT_Q31_256 "cfft q31 256" OFF)
|
||||
option(CFFT_Q31_512 "cfft q31 512" OFF)
|
||||
option(CFFT_Q31_1024 "cfft q31 1024" OFF)
|
||||
option(CFFT_Q31_2048 "cfft q31 2048" OFF)
|
||||
option(CFFT_Q31_4096 "cfft q31 4096" OFF)
|
||||
|
||||
option(CFFT_Q15_16 "cfft q15 16" OFF)
|
||||
option(CFFT_Q15_32 "cfft q15 32" OFF)
|
||||
option(CFFT_Q15_64 "cfft q15 64" OFF)
|
||||
option(CFFT_Q15_128 "cfft q15 128" OFF)
|
||||
option(CFFT_Q15_256 "cfft q15 256" OFF)
|
||||
option(CFFT_Q15_512 "cfft q15 512" OFF)
|
||||
option(CFFT_Q15_1024 "cfft q15 1024" OFF)
|
||||
option(CFFT_Q15_2048 "cfft q15 2048" OFF)
|
||||
option(CFFT_Q15_4096 "cfft q15 4096" OFF)
|
||||
|
||||
option(RFFT_FAST_F32_32 "rfft fast f32 32" OFF)
|
||||
option(RFFT_FAST_F32_64 "rfft fast f32 64" OFF)
|
||||
option(RFFT_FAST_F32_128 "rfft fast f32 128" OFF)
|
||||
option(RFFT_FAST_F32_256 "rfft fast f32 256" OFF)
|
||||
option(RFFT_FAST_F32_512 "rfft fast f32 512" OFF)
|
||||
option(RFFT_FAST_F32_1024 "rfft fast f32 1024" OFF)
|
||||
option(RFFT_FAST_F32_2048 "rfft fast f32 2048" OFF)
|
||||
option(RFFT_FAST_F32_4096 "rfft fast f32 4096" OFF)
|
||||
|
||||
|
||||
option(RFFT_F32_128 "rfft f32 128" OFF)
|
||||
option(RFFT_F32_512 "rfft f32 512" OFF)
|
||||
option(RFFT_F32_2048 "rfft f32 2048" OFF)
|
||||
option(RFFT_F32_8192 "rfft f32 8192" OFF)
|
||||
|
||||
option(RFFT_FAST_F64_32 "rfft fast f64 32" OFF)
|
||||
option(RFFT_FAST_F64_64 "rfft fast f64 64" OFF)
|
||||
option(RFFT_FAST_F64_128 "rfft fast f64 128" OFF)
|
||||
option(RFFT_FAST_F64_256 "rfft fast f64 256" OFF)
|
||||
option(RFFT_FAST_F64_512 "rfft fast f64 512" OFF)
|
||||
option(RFFT_FAST_F64_1024 "rfft fast f64 1024" OFF)
|
||||
option(RFFT_FAST_F64_2048 "rfft fast f64 2048" OFF)
|
||||
option(RFFT_FAST_F64_4096 "rfft fast f64 4096" OFF)
|
||||
|
||||
|
||||
option(RFFT_F64_128 "rfft f64 128" OFF)
|
||||
option(RFFT_F64_512 "rfft f64 512" OFF)
|
||||
option(RFFT_F64_2048 "rfft f64 2048" OFF)
|
||||
option(RFFT_F64_8192 "rfft f64 8192" OFF)
|
||||
|
||||
option(RFFT_Q31_32 "rfft q31 32" OFF)
|
||||
option(RFFT_Q31_64 "rfft q31 64" OFF)
|
||||
option(RFFT_Q31_128 "rfft q31 128" OFF)
|
||||
option(RFFT_Q31_256 "rfft q31 256" OFF)
|
||||
option(RFFT_Q31_512 "rfft q31 512" OFF)
|
||||
option(RFFT_Q31_1024 "rfft q31 1024" OFF)
|
||||
option(RFFT_Q31_2048 "rfft q31 2048" OFF)
|
||||
option(RFFT_Q31_4096 "rfft q31 4096" OFF)
|
||||
option(RFFT_Q31_8192 "rfft q31 8192" OFF)
|
||||
|
||||
option(RFFT_Q15_32 "rfft q15 32" OFF)
|
||||
option(RFFT_Q15_64 "rfft q15 64" OFF)
|
||||
option(RFFT_Q15_128 "rfft q15 128" OFF)
|
||||
option(RFFT_Q15_256 "rfft q15 256" OFF)
|
||||
option(RFFT_Q15_512 "rfft q15 512" OFF)
|
||||
option(RFFT_Q15_1024 "rfft q15 1024" OFF)
|
||||
option(RFFT_Q15_2048 "rfft q15 2048" OFF)
|
||||
option(RFFT_Q15_4096 "rfft q15 4096" OFF)
|
||||
option(RFFT_Q15_8192 "rfft q15 8192" OFF)
|
||||
|
||||
option(DCT4_F32_128 "dct4 f32 128" OFF)
|
||||
option(DCT4_F32_512 "dct4 f32 512" OFF)
|
||||
option(DCT4_F32_2048 "dct4 f32 2048" OFF)
|
||||
option(DCT4_F32_8192 "dct4 f32 8192" OFF)
|
||||
|
||||
option(DCT4_Q31_128 "dct4 q31 128" OFF)
|
||||
option(DCT4_Q31_512 "dct4 q31 512" OFF)
|
||||
option(DCT4_Q31_2048 "dct4 q31 2048" OFF)
|
||||
option(DCT4_Q31_8192 "dct4 q31 8192" OFF)
|
||||
|
||||
option(DCT4_Q15_128 "dct4 q15 128" OFF)
|
||||
option(DCT4_Q15_512 "dct4 q15 512" OFF)
|
||||
option(DCT4_Q15_2048 "dct4 q15 2048" OFF)
|
||||
option(DCT4_Q15_8192 "dct4 q15 8192" OFF)
|
||||
|
||||
|
||||
###########################
|
||||
#
|
||||
# CMSIS DSP
|
||||
#
|
||||
###########################
|
||||
|
||||
|
||||
|
||||
add_library(CMSISDSP INTERFACE)
|
||||
|
||||
if (BASICMATH)
|
||||
add_subdirectory(BasicMathFunctions)
|
||||
target_link_libraries(CMSISDSP INTERFACE CMSISDSPBasicMath)
|
||||
endif()
|
||||
|
||||
if (COMPLEXMATH)
|
||||
add_subdirectory(ComplexMathFunctions)
|
||||
target_link_libraries(CMSISDSP INTERFACE CMSISDSPComplexMath)
|
||||
endif()
|
||||
|
||||
if (CONTROLLER)
|
||||
add_subdirectory(ControllerFunctions)
|
||||
# Fast tables inclusion is allowed
|
||||
if (CONFIGTABLE)
|
||||
target_compile_definitions(CMSISDSPController PUBLIC ARM_FAST_ALLOW_TABLES)
|
||||
endif()
|
||||
target_link_libraries(CMSISDSP INTERFACE CMSISDSPController)
|
||||
endif()
|
||||
|
||||
if (FASTMATH)
|
||||
add_subdirectory(FastMathFunctions)
|
||||
# Fast tables inclusion is allowed
|
||||
if (CONFIGTABLE)
|
||||
target_compile_definitions(CMSISDSPFastMath PUBLIC ARM_FAST_ALLOW_TABLES)
|
||||
endif()
|
||||
target_link_libraries(CMSISDSP INTERFACE CMSISDSPFastMath)
|
||||
endif()
|
||||
|
||||
if (FILTERING)
|
||||
add_subdirectory(FilteringFunctions)
|
||||
# Fast tables inclusion is allowed
|
||||
if (CONFIGTABLE)
|
||||
target_compile_definitions(CMSISDSPFiltering PUBLIC ARM_FAST_ALLOW_TABLES)
|
||||
endif()
|
||||
target_link_libraries(CMSISDSP INTERFACE CMSISDSPFiltering)
|
||||
endif()
|
||||
|
||||
if (MATRIX)
|
||||
add_subdirectory(MatrixFunctions)
|
||||
target_link_libraries(CMSISDSP INTERFACE CMSISDSPMatrix)
|
||||
endif()
|
||||
|
||||
if (STATISTICS)
|
||||
add_subdirectory(StatisticsFunctions)
|
||||
target_link_libraries(CMSISDSP INTERFACE CMSISDSPStatistics)
|
||||
endif()
|
||||
|
||||
if (SUPPORT)
|
||||
add_subdirectory(SupportFunctions)
|
||||
target_link_libraries(CMSISDSP INTERFACE CMSISDSPSupport)
|
||||
endif()
|
||||
|
||||
if (TRANSFORM)
|
||||
add_subdirectory(TransformFunctions)
|
||||
# FFT tables inclusion is allowed
|
||||
if (CONFIGTABLE)
|
||||
target_compile_definitions(CMSISDSPTransform PUBLIC ARM_FFT_ALLOW_TABLES)
|
||||
endif()
|
||||
target_link_libraries(CMSISDSP INTERFACE CMSISDSPTransform)
|
||||
endif()
|
||||
|
||||
if (FILTERING OR CONTROLLER OR FASTMATH OR TRANSFORM OR SVM OR DISTANCE)
|
||||
add_subdirectory(CommonTables)
|
||||
if (TRANSFORM)
|
||||
# FFT tables inclusion is allowed
|
||||
if (CONFIGTABLE)
|
||||
target_compile_definitions(CMSISDSPCommon PUBLIC ARM_FFT_ALLOW_TABLES)
|
||||
endif()
|
||||
endif()
|
||||
if (FILTERING OR CONTROLLER OR FASTMATH)
|
||||
# Select which tables to include
|
||||
if (CONFIGTABLE)
|
||||
target_compile_definitions(CMSISDSPCommon PUBLIC ARM_FAST_ALLOW_TABLES)
|
||||
endif()
|
||||
endif()
|
||||
target_link_libraries(CMSISDSP INTERFACE CMSISDSPCommon)
|
||||
# Common project is adding ComputeLibrary tables used by SVM and Distance
|
||||
# when NEon is ON.
|
||||
endif()
|
||||
|
||||
if (SVM)
|
||||
add_subdirectory(SVMFunctions)
|
||||
target_link_libraries(CMSISDSP INTERFACE CMSISDSPSVM)
|
||||
endif()
|
||||
|
||||
if (BAYES)
|
||||
add_subdirectory(BayesFunctions)
|
||||
target_link_libraries(CMSISDSP INTERFACE CMSISDSPBayes)
|
||||
endif()
|
||||
|
||||
if (DISTANCE)
|
||||
add_subdirectory(DistanceFunctions)
|
||||
target_link_libraries(CMSISDSP INTERFACE CMSISDSPDistance)
|
||||
endif()
|
||||
|
||||
### Includes
|
||||
target_include_directories(CMSISDSP INTERFACE "${DSP}/Include")
|
||||
|
||||
|
||||
|
||||
41
libraries/cmsis/dsp/Source/CommonTables/CMakeLists.txt
Normal file
41
libraries/cmsis/dsp/Source/CommonTables/CMakeLists.txt
Normal file
@@ -0,0 +1,41 @@
|
||||
cmake_minimum_required (VERSION 3.6)
|
||||
|
||||
project(CMSISDSPCommon)
|
||||
|
||||
include(configLib)
|
||||
include(configDsp)
|
||||
|
||||
add_library(CMSISDSPCommon STATIC arm_common_tables.c)
|
||||
|
||||
configLib(CMSISDSPCommon ${ROOT})
|
||||
configDsp(CMSISDSPCommon ${ROOT})
|
||||
|
||||
if (CONFIGTABLE AND ALLFFT)
|
||||
target_compile_definitions(CMSISDSPCommon PUBLIC ARM_ALL_FFT_TABLES)
|
||||
endif()
|
||||
|
||||
if (CONFIGTABLE AND ALLFAST)
|
||||
target_compile_definitions(CMSISDSPCommon PUBLIC ARM_ALL_FAST_TABLES)
|
||||
endif()
|
||||
|
||||
include(fft)
|
||||
fft(CMSISDSPCommon)
|
||||
|
||||
include(interpol)
|
||||
interpol(CMSISDSPCommon)
|
||||
|
||||
target_sources(CMSISDSPCommon PRIVATE arm_const_structs.c)
|
||||
|
||||
|
||||
### Includes
|
||||
target_include_directories(CMSISDSPCommon PUBLIC "${DSP}/Include")
|
||||
|
||||
if (NEON OR NEONEXPERIMENTAL)
|
||||
target_sources(CMSISDSPCommon PRIVATE "${DSP}/ComputeLibrary/Source/arm_cl_tables.c")
|
||||
endif()
|
||||
|
||||
if (HELIUM OR MVEF)
|
||||
target_sources(CMSISDSPCommon PRIVATE "${DSP}/Source/CommonTables/arm_mve_tables.c")
|
||||
endif()
|
||||
|
||||
|
||||
31
libraries/cmsis/dsp/Source/CommonTables/CommonTables.c
Normal file
31
libraries/cmsis/dsp/Source/CommonTables/CommonTables.c
Normal file
@@ -0,0 +1,31 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: CommonTables.c
|
||||
* Description: Combination of all common table source files.
|
||||
*
|
||||
* $Date: 08. January 2020
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_common_tables.c"
|
||||
#include "arm_const_structs.c"
|
||||
#include "arm_mve_tables.c"
|
||||
70552
libraries/cmsis/dsp/Source/CommonTables/arm_common_tables.c
Normal file
70552
libraries/cmsis/dsp/Source/CommonTables/arm_common_tables.c
Normal file
File diff suppressed because it is too large
Load Diff
663
libraries/cmsis/dsp/Source/CommonTables/arm_const_structs.c
Normal file
663
libraries/cmsis/dsp/Source/CommonTables/arm_const_structs.c
Normal file
@@ -0,0 +1,663 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_const_structs.c
|
||||
* Description: Constant structs that are initialized for user convenience.
|
||||
* For example, some can be given as arguments to the arm_cfft_f32() or arm_rfft_f32() functions.
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
#include "arm_const_structs.h"
|
||||
|
||||
/*
|
||||
ALLOW TABLE is true when config table is enabled and the Tramsform folder is included
|
||||
for compilation.
|
||||
*/
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
|
||||
|
||||
/* Floating-point structs */
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_16) && defined(ARM_TABLE_BITREVIDX_FLT64_16))
|
||||
const arm_cfft_instance_f64 arm_cfft_sR_f64_len16 = {
|
||||
16, (const float64_t *)twiddleCoefF64_16, armBitRevIndexTableF64_16, ARMBITREVINDEXTABLEF64_16_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_32) && defined(ARM_TABLE_BITREVIDX_FLT64_32))
|
||||
const arm_cfft_instance_f64 arm_cfft_sR_f64_len32 = {
|
||||
32, (const float64_t *)twiddleCoefF64_32, armBitRevIndexTableF64_32, ARMBITREVINDEXTABLEF64_32_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_64) && defined(ARM_TABLE_BITREVIDX_FLT64_64))
|
||||
const arm_cfft_instance_f64 arm_cfft_sR_f64_len64 = {
|
||||
64, (const float64_t *)twiddleCoefF64_64, armBitRevIndexTableF64_64, ARMBITREVINDEXTABLEF64_64_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_128) && defined(ARM_TABLE_BITREVIDX_FLT64_128))
|
||||
const arm_cfft_instance_f64 arm_cfft_sR_f64_len128 = {
|
||||
128, (const float64_t *)twiddleCoefF64_128, armBitRevIndexTableF64_128, ARMBITREVINDEXTABLEF64_128_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_256) && defined(ARM_TABLE_BITREVIDX_FLT64_256))
|
||||
const arm_cfft_instance_f64 arm_cfft_sR_f64_len256 = {
|
||||
256, (const float64_t *)twiddleCoefF64_256, armBitRevIndexTableF64_256, ARMBITREVINDEXTABLEF64_256_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_512) && defined(ARM_TABLE_BITREVIDX_FLT64_512))
|
||||
const arm_cfft_instance_f64 arm_cfft_sR_f64_len512 = {
|
||||
512, (const float64_t *)twiddleCoefF64_512, armBitRevIndexTableF64_512, ARMBITREVINDEXTABLEF64_512_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_1024) && defined(ARM_TABLE_BITREVIDX_FLT64_1024))
|
||||
const arm_cfft_instance_f64 arm_cfft_sR_f64_len1024 = {
|
||||
1024, (const float64_t *)twiddleCoefF64_1024, armBitRevIndexTableF64_1024, ARMBITREVINDEXTABLEF64_1024_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_2048) && defined(ARM_TABLE_BITREVIDX_FLT64_2048))
|
||||
const arm_cfft_instance_f64 arm_cfft_sR_f64_len2048 = {
|
||||
2048, (const float64_t *)twiddleCoefF64_2048, armBitRevIndexTableF64_2048, ARMBITREVINDEXTABLEF64_2048_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_4096) && defined(ARM_TABLE_BITREVIDX_FLT64_4096))
|
||||
const arm_cfft_instance_f64 arm_cfft_sR_f64_len4096 = {
|
||||
4096, (const float64_t *)twiddleCoefF64_4096, armBitRevIndexTableF64_4096, ARMBITREVINDEXTABLEF64_4096_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
/* Floating-point structs */
|
||||
#if !defined(ARM_MATH_MVEF) || defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
/*
|
||||
|
||||
Those structures cannot be used to initialize the MVE version of the FFT F32 instances.
|
||||
So they are not compiled when MVE is defined.
|
||||
|
||||
For the MVE version, the new arm_cfft_init_f32 must be used.
|
||||
|
||||
|
||||
*/
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_16) && defined(ARM_TABLE_BITREVIDX_FLT_16))
|
||||
const arm_cfft_instance_f32 arm_cfft_sR_f32_len16 = {
|
||||
16, twiddleCoef_16, armBitRevIndexTable16, ARMBITREVINDEXTABLE_16_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_32) && defined(ARM_TABLE_BITREVIDX_FLT_32))
|
||||
const arm_cfft_instance_f32 arm_cfft_sR_f32_len32 = {
|
||||
32, twiddleCoef_32, armBitRevIndexTable32, ARMBITREVINDEXTABLE_32_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_64) && defined(ARM_TABLE_BITREVIDX_FLT_64))
|
||||
const arm_cfft_instance_f32 arm_cfft_sR_f32_len64 = {
|
||||
64, twiddleCoef_64, armBitRevIndexTable64, ARMBITREVINDEXTABLE_64_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_128) && defined(ARM_TABLE_BITREVIDX_FLT_128))
|
||||
const arm_cfft_instance_f32 arm_cfft_sR_f32_len128 = {
|
||||
128, twiddleCoef_128, armBitRevIndexTable128, ARMBITREVINDEXTABLE_128_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_256) && defined(ARM_TABLE_BITREVIDX_FLT_256))
|
||||
const arm_cfft_instance_f32 arm_cfft_sR_f32_len256 = {
|
||||
256, twiddleCoef_256, armBitRevIndexTable256, ARMBITREVINDEXTABLE_256_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_512) && defined(ARM_TABLE_BITREVIDX_FLT_512))
|
||||
const arm_cfft_instance_f32 arm_cfft_sR_f32_len512 = {
|
||||
512, twiddleCoef_512, armBitRevIndexTable512, ARMBITREVINDEXTABLE_512_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024))
|
||||
const arm_cfft_instance_f32 arm_cfft_sR_f32_len1024 = {
|
||||
1024, twiddleCoef_1024, armBitRevIndexTable1024, ARMBITREVINDEXTABLE_1024_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048))
|
||||
const arm_cfft_instance_f32 arm_cfft_sR_f32_len2048 = {
|
||||
2048, twiddleCoef_2048, armBitRevIndexTable2048, ARMBITREVINDEXTABLE_2048_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_4096) && defined(ARM_TABLE_BITREVIDX_FLT_4096))
|
||||
const arm_cfft_instance_f32 arm_cfft_sR_f32_len4096 = {
|
||||
4096, twiddleCoef_4096, armBitRevIndexTable4096, ARMBITREVINDEXTABLE_4096_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif /* !defined(ARM_MATH_MVEF) || defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/* Fixed-point structs */
|
||||
|
||||
#if !defined(ARM_MATH_MVEI)
|
||||
|
||||
/*
|
||||
|
||||
Those structures cannot be used to initialize the MVE version of the FFT Q31 instances.
|
||||
So they are not compiled when MVE is defined.
|
||||
|
||||
For the MVE version, the new arm_cfft_init_f32 must be used.
|
||||
|
||||
|
||||
*/
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_16) && defined(ARM_TABLE_BITREVIDX_FXT_16))
|
||||
const arm_cfft_instance_q31 arm_cfft_sR_q31_len16 = {
|
||||
16, twiddleCoef_16_q31, armBitRevIndexTable_fixed_16, ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_32) && defined(ARM_TABLE_BITREVIDX_FXT_32))
|
||||
const arm_cfft_instance_q31 arm_cfft_sR_q31_len32 = {
|
||||
32, twiddleCoef_32_q31, armBitRevIndexTable_fixed_32, ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_64) && defined(ARM_TABLE_BITREVIDX_FXT_64))
|
||||
const arm_cfft_instance_q31 arm_cfft_sR_q31_len64 = {
|
||||
64, twiddleCoef_64_q31, armBitRevIndexTable_fixed_64, ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_128) && defined(ARM_TABLE_BITREVIDX_FXT_128))
|
||||
const arm_cfft_instance_q31 arm_cfft_sR_q31_len128 = {
|
||||
128, twiddleCoef_128_q31, armBitRevIndexTable_fixed_128, ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_256) && defined(ARM_TABLE_BITREVIDX_FXT_256))
|
||||
const arm_cfft_instance_q31 arm_cfft_sR_q31_len256 = {
|
||||
256, twiddleCoef_256_q31, armBitRevIndexTable_fixed_256, ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_512) && defined(ARM_TABLE_BITREVIDX_FXT_512))
|
||||
const arm_cfft_instance_q31 arm_cfft_sR_q31_len512 = {
|
||||
512, twiddleCoef_512_q31, armBitRevIndexTable_fixed_512, ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_1024) && defined(ARM_TABLE_BITREVIDX_FXT_1024))
|
||||
const arm_cfft_instance_q31 arm_cfft_sR_q31_len1024 = {
|
||||
1024, twiddleCoef_1024_q31, armBitRevIndexTable_fixed_1024, ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_2048) && defined(ARM_TABLE_BITREVIDX_FXT_2048))
|
||||
const arm_cfft_instance_q31 arm_cfft_sR_q31_len2048 = {
|
||||
2048, twiddleCoef_2048_q31, armBitRevIndexTable_fixed_2048, ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_4096) && defined(ARM_TABLE_BITREVIDX_FXT_4096))
|
||||
const arm_cfft_instance_q31 arm_cfft_sR_q31_len4096 = {
|
||||
4096, twiddleCoef_4096_q31, armBitRevIndexTable_fixed_4096, ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_16) && defined(ARM_TABLE_BITREVIDX_FXT_16))
|
||||
const arm_cfft_instance_q15 arm_cfft_sR_q15_len16 = {
|
||||
16, twiddleCoef_16_q15, armBitRevIndexTable_fixed_16, ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_32) && defined(ARM_TABLE_BITREVIDX_FXT_32))
|
||||
const arm_cfft_instance_q15 arm_cfft_sR_q15_len32 = {
|
||||
32, twiddleCoef_32_q15, armBitRevIndexTable_fixed_32, ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_64) && defined(ARM_TABLE_BITREVIDX_FXT_64))
|
||||
const arm_cfft_instance_q15 arm_cfft_sR_q15_len64 = {
|
||||
64, twiddleCoef_64_q15, armBitRevIndexTable_fixed_64, ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_128) && defined(ARM_TABLE_BITREVIDX_FXT_128))
|
||||
const arm_cfft_instance_q15 arm_cfft_sR_q15_len128 = {
|
||||
128, twiddleCoef_128_q15, armBitRevIndexTable_fixed_128, ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_256) && defined(ARM_TABLE_BITREVIDX_FXT_256))
|
||||
const arm_cfft_instance_q15 arm_cfft_sR_q15_len256 = {
|
||||
256, twiddleCoef_256_q15, armBitRevIndexTable_fixed_256, ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_512) && defined(ARM_TABLE_BITREVIDX_FXT_512))
|
||||
const arm_cfft_instance_q15 arm_cfft_sR_q15_len512 = {
|
||||
512, twiddleCoef_512_q15, armBitRevIndexTable_fixed_512, ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_1024) && defined(ARM_TABLE_BITREVIDX_FXT_1024))
|
||||
const arm_cfft_instance_q15 arm_cfft_sR_q15_len1024 = {
|
||||
1024, twiddleCoef_1024_q15, armBitRevIndexTable_fixed_1024, ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_2048) && defined(ARM_TABLE_BITREVIDX_FXT_2048))
|
||||
const arm_cfft_instance_q15 arm_cfft_sR_q15_len2048 = {
|
||||
2048, twiddleCoef_2048_q15, armBitRevIndexTable_fixed_2048, ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_4096) && defined(ARM_TABLE_BITREVIDX_FXT_4096))
|
||||
const arm_cfft_instance_q15 arm_cfft_sR_q15_len4096 = {
|
||||
4096, twiddleCoef_4096_q15, armBitRevIndexTable_fixed_4096, ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif /* !defined(ARM_MATH_MVEI) */
|
||||
|
||||
/* Structure for real-value inputs */
|
||||
/* Double precision strucs */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_32) && defined(ARM_TABLE_BITREVIDX_FLT64_32) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_32))
|
||||
const arm_rfft_fast_instance_f64 arm_rfft_fast_sR_f64_len32 = {
|
||||
{ 16, (const float64_t *)twiddleCoefF64_16, armBitRevIndexTableF64_16, ARMBITREVINDEXTABLEF64_16_TABLE_LENGTH },
|
||||
32U,
|
||||
(float64_t *)twiddleCoefF64_rfft_32
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_64) && defined(ARM_TABLE_BITREVIDX_FLT64_64) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_64))
|
||||
const arm_rfft_fast_instance_f64 arm_rfft_fast_sR_f64_len64 = {
|
||||
{ 32, (const float64_t *)twiddleCoefF64_32, armBitRevIndexTableF64_32, ARMBITREVINDEXTABLEF64_32_TABLE_LENGTH },
|
||||
64U,
|
||||
(float64_t *)twiddleCoefF64_rfft_64
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_128) && defined(ARM_TABLE_BITREVIDX_FLT64_128) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_128))
|
||||
const arm_rfft_fast_instance_f64 arm_rfft_fast_sR_f64_len128 = {
|
||||
{ 64, (const float64_t *)twiddleCoefF64_64, armBitRevIndexTableF64_64, ARMBITREVINDEXTABLEF64_64_TABLE_LENGTH },
|
||||
128U,
|
||||
(float64_t *)twiddleCoefF64_rfft_128
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_256) && defined(ARM_TABLE_BITREVIDX_FLT64_256) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_256))
|
||||
const arm_rfft_fast_instance_f64 arm_rfft_fast_sR_f64_len256 = {
|
||||
{ 128, (const float64_t *)twiddleCoefF64_128, armBitRevIndexTableF64_128, ARMBITREVINDEXTABLEF64_128_TABLE_LENGTH },
|
||||
256U,
|
||||
(float64_t *)twiddleCoefF64_rfft_256
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_512) && defined(ARM_TABLE_BITREVIDX_FLT64_512) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_512))
|
||||
const arm_rfft_fast_instance_f64 arm_rfft_fast_sR_f64_len512 = {
|
||||
{ 256, (const float64_t *)twiddleCoefF64_256, armBitRevIndexTableF64_256, ARMBITREVINDEXTABLEF64_256_TABLE_LENGTH },
|
||||
512U,
|
||||
(float64_t *)twiddleCoefF64_rfft_512
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_1024) && defined(ARM_TABLE_BITREVIDX_FLT64_1024) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_1024))
|
||||
const arm_rfft_fast_instance_f64 arm_rfft_fast_sR_f64_len1024 = {
|
||||
{ 512, (const float64_t *)twiddleCoefF64_512, armBitRevIndexTableF64_512, ARMBITREVINDEXTABLEF64_512_TABLE_LENGTH },
|
||||
1024U,
|
||||
(float64_t *)twiddleCoefF64_rfft_1024
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_2048) && defined(ARM_TABLE_BITREVIDX_FLT64_2048) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_2048))
|
||||
const arm_rfft_fast_instance_f64 arm_rfft_fast_sR_f64_len2048 = {
|
||||
{ 1024, (const float64_t *)twiddleCoefF64_1024, armBitRevIndexTableF64_1024, ARMBITREVINDEXTABLEF64_1024_TABLE_LENGTH },
|
||||
2048U,
|
||||
(float64_t *)twiddleCoefF64_rfft_2048
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_4096) && defined(ARM_TABLE_BITREVIDX_FLT64_4096) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_4096))
|
||||
const arm_rfft_fast_instance_f64 arm_rfft_fast_sR_f64_len4096 = {
|
||||
{ 2048, (const float64_t *)twiddleCoefF64_2048, armBitRevIndexTableF64_2048, ARMBITREVINDEXTABLEF64_2048_TABLE_LENGTH },
|
||||
4096U,
|
||||
(float64_t *)twiddleCoefF64_rfft_4096
|
||||
};
|
||||
#endif
|
||||
|
||||
/* Floating-point structs */
|
||||
|
||||
#if !defined(ARM_MATH_MVEF) || defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_32) && defined(ARM_TABLE_BITREVIDX_FLT_32) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_32))
|
||||
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len32 = {
|
||||
{ 16, twiddleCoef_16, armBitRevIndexTable16, ARMBITREVINDEXTABLE_16_TABLE_LENGTH },
|
||||
32U,
|
||||
(float32_t *)twiddleCoef_rfft_32
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_64) && defined(ARM_TABLE_BITREVIDX_FLT_64) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_64))
|
||||
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len64 = {
|
||||
{ 32, twiddleCoef_32, armBitRevIndexTable32, ARMBITREVINDEXTABLE_32_TABLE_LENGTH },
|
||||
64U,
|
||||
(float32_t *)twiddleCoef_rfft_64
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_128) && defined(ARM_TABLE_BITREVIDX_FLT_128) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_128))
|
||||
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len128 = {
|
||||
{ 64, twiddleCoef_64, armBitRevIndexTable64, ARMBITREVINDEXTABLE_64_TABLE_LENGTH },
|
||||
128U,
|
||||
(float32_t *)twiddleCoef_rfft_128
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_256) && defined(ARM_TABLE_BITREVIDX_FLT_256) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_256))
|
||||
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len256 = {
|
||||
{ 128, twiddleCoef_128, armBitRevIndexTable128, ARMBITREVINDEXTABLE_128_TABLE_LENGTH },
|
||||
256U,
|
||||
(float32_t *)twiddleCoef_rfft_256
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_512) && defined(ARM_TABLE_BITREVIDX_FLT_512) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_512))
|
||||
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len512 = {
|
||||
{ 256, twiddleCoef_256, armBitRevIndexTable256, ARMBITREVINDEXTABLE_256_TABLE_LENGTH },
|
||||
512U,
|
||||
(float32_t *)twiddleCoef_rfft_512
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_1024))
|
||||
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len1024 = {
|
||||
{ 512, twiddleCoef_512, armBitRevIndexTable512, ARMBITREVINDEXTABLE_512_TABLE_LENGTH },
|
||||
1024U,
|
||||
(float32_t *)twiddleCoef_rfft_1024
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_2048))
|
||||
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len2048 = {
|
||||
{ 1024, twiddleCoef_1024, armBitRevIndexTable1024, ARMBITREVINDEXTABLE_1024_TABLE_LENGTH },
|
||||
2048U,
|
||||
(float32_t *)twiddleCoef_rfft_2048
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_4096) && defined(ARM_TABLE_BITREVIDX_FLT_4096) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_4096))
|
||||
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len4096 = {
|
||||
{ 2048, twiddleCoef_2048, armBitRevIndexTable2048, ARMBITREVINDEXTABLE_2048_TABLE_LENGTH },
|
||||
4096U,
|
||||
(float32_t *)twiddleCoef_rfft_4096
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif /* #if !defined(ARM_MATH_MVEF) || defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/* Fixed-point structs */
|
||||
/* q31_t */
|
||||
|
||||
#if !defined(ARM_MATH_MVEI)
|
||||
|
||||
/*
|
||||
|
||||
Those structures cannot be used to initialize the MVE version of the FFT Q31 instances.
|
||||
So they are not compiled when MVE is defined.
|
||||
|
||||
For the MVE version, the new arm_cfft_init_f32 must be used.
|
||||
|
||||
|
||||
*/
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q31) && defined(ARM_TABLE_TWIDDLECOEF_Q31_16) && defined(ARM_TABLE_BITREVIDX_FXT_16))
|
||||
const arm_rfft_instance_q31 arm_rfft_sR_q31_len32 = {
|
||||
32U,
|
||||
0,
|
||||
1,
|
||||
256U,
|
||||
(q31_t*)realCoefAQ31,
|
||||
(q31_t*)realCoefBQ31,
|
||||
&arm_cfft_sR_q31_len16
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q31) && defined(ARM_TABLE_TWIDDLECOEF_Q31_32) && defined(ARM_TABLE_BITREVIDX_FXT_32))
|
||||
const arm_rfft_instance_q31 arm_rfft_sR_q31_len64 = {
|
||||
64U,
|
||||
0,
|
||||
1,
|
||||
128U,
|
||||
(q31_t*)realCoefAQ31,
|
||||
(q31_t*)realCoefBQ31,
|
||||
&arm_cfft_sR_q31_len32
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q31) && defined(ARM_TABLE_TWIDDLECOEF_Q31_64) && defined(ARM_TABLE_BITREVIDX_FXT_64))
|
||||
const arm_rfft_instance_q31 arm_rfft_sR_q31_len128 = {
|
||||
128U,
|
||||
0,
|
||||
1,
|
||||
64U,
|
||||
(q31_t*)realCoefAQ31,
|
||||
(q31_t*)realCoefBQ31,
|
||||
&arm_cfft_sR_q31_len64
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q31) && defined(ARM_TABLE_TWIDDLECOEF_Q31_128) && defined(ARM_TABLE_BITREVIDX_FXT_128))
|
||||
const arm_rfft_instance_q31 arm_rfft_sR_q31_len256 = {
|
||||
256U,
|
||||
0,
|
||||
1,
|
||||
32U,
|
||||
(q31_t*)realCoefAQ31,
|
||||
(q31_t*)realCoefBQ31,
|
||||
&arm_cfft_sR_q31_len128
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q31) && defined(ARM_TABLE_TWIDDLECOEF_Q31_256) && defined(ARM_TABLE_BITREVIDX_FXT_256))
|
||||
const arm_rfft_instance_q31 arm_rfft_sR_q31_len512 = {
|
||||
512U,
|
||||
0,
|
||||
1,
|
||||
16U,
|
||||
(q31_t*)realCoefAQ31,
|
||||
(q31_t*)realCoefBQ31,
|
||||
&arm_cfft_sR_q31_len256
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q31) && defined(ARM_TABLE_TWIDDLECOEF_Q31_512) && defined(ARM_TABLE_BITREVIDX_FXT_512))
|
||||
const arm_rfft_instance_q31 arm_rfft_sR_q31_len1024 = {
|
||||
1024U,
|
||||
0,
|
||||
1,
|
||||
8U,
|
||||
(q31_t*)realCoefAQ31,
|
||||
(q31_t*)realCoefBQ31,
|
||||
&arm_cfft_sR_q31_len512
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q31) && defined(ARM_TABLE_TWIDDLECOEF_Q31_1024) && defined(ARM_TABLE_BITREVIDX_FXT_1024))
|
||||
const arm_rfft_instance_q31 arm_rfft_sR_q31_len2048 = {
|
||||
2048U,
|
||||
0,
|
||||
1,
|
||||
4U,
|
||||
(q31_t*)realCoefAQ31,
|
||||
(q31_t*)realCoefBQ31,
|
||||
&arm_cfft_sR_q31_len1024
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q31) && defined(ARM_TABLE_TWIDDLECOEF_Q31_2048) && defined(ARM_TABLE_BITREVIDX_FXT_2048))
|
||||
const arm_rfft_instance_q31 arm_rfft_sR_q31_len4096 = {
|
||||
4096U,
|
||||
0,
|
||||
1,
|
||||
2U,
|
||||
(q31_t*)realCoefAQ31,
|
||||
(q31_t*)realCoefBQ31,
|
||||
&arm_cfft_sR_q31_len2048
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q31) && defined(ARM_TABLE_TWIDDLECOEF_Q31_4096) && defined(ARM_TABLE_BITREVIDX_FXT_4096))
|
||||
const arm_rfft_instance_q31 arm_rfft_sR_q31_len8192 = {
|
||||
8192U,
|
||||
0,
|
||||
1,
|
||||
1U,
|
||||
(q31_t*)realCoefAQ31,
|
||||
(q31_t*)realCoefBQ31,
|
||||
&arm_cfft_sR_q31_len4096
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* q15_t */
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q15) && defined(ARM_TABLE_TWIDDLECOEF_Q15_16) && defined(ARM_TABLE_BITREVIDX_FXT_16))
|
||||
const arm_rfft_instance_q15 arm_rfft_sR_q15_len32 = {
|
||||
32U,
|
||||
0,
|
||||
1,
|
||||
256U,
|
||||
(q15_t*)realCoefAQ15,
|
||||
(q15_t*)realCoefBQ15,
|
||||
&arm_cfft_sR_q15_len16
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q15) && defined(ARM_TABLE_TWIDDLECOEF_Q15_32) && defined(ARM_TABLE_BITREVIDX_FXT_32))
|
||||
const arm_rfft_instance_q15 arm_rfft_sR_q15_len64 = {
|
||||
64U,
|
||||
0,
|
||||
1,
|
||||
128U,
|
||||
(q15_t*)realCoefAQ15,
|
||||
(q15_t*)realCoefBQ15,
|
||||
&arm_cfft_sR_q15_len32
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q15) && defined(ARM_TABLE_TWIDDLECOEF_Q15_64) && defined(ARM_TABLE_BITREVIDX_FXT_64))
|
||||
const arm_rfft_instance_q15 arm_rfft_sR_q15_len128 = {
|
||||
128U,
|
||||
0,
|
||||
1,
|
||||
64U,
|
||||
(q15_t*)realCoefAQ15,
|
||||
(q15_t*)realCoefBQ15,
|
||||
&arm_cfft_sR_q15_len64
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q15) && defined(ARM_TABLE_TWIDDLECOEF_Q15_128) && defined(ARM_TABLE_BITREVIDX_FXT_128))
|
||||
const arm_rfft_instance_q15 arm_rfft_sR_q15_len256 = {
|
||||
256U,
|
||||
0,
|
||||
1,
|
||||
32U,
|
||||
(q15_t*)realCoefAQ15,
|
||||
(q15_t*)realCoefBQ15,
|
||||
&arm_cfft_sR_q15_len128
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q15) && defined(ARM_TABLE_TWIDDLECOEF_Q15_256) && defined(ARM_TABLE_BITREVIDX_FXT_256))
|
||||
const arm_rfft_instance_q15 arm_rfft_sR_q15_len512 = {
|
||||
512U,
|
||||
0,
|
||||
1,
|
||||
16U,
|
||||
(q15_t*)realCoefAQ15,
|
||||
(q15_t*)realCoefBQ15,
|
||||
&arm_cfft_sR_q15_len256
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q15) && defined(ARM_TABLE_TWIDDLECOEF_Q15_512) && defined(ARM_TABLE_BITREVIDX_FXT_512))
|
||||
const arm_rfft_instance_q15 arm_rfft_sR_q15_len1024 = {
|
||||
1024U,
|
||||
0,
|
||||
1,
|
||||
8U,
|
||||
(q15_t*)realCoefAQ15,
|
||||
(q15_t*)realCoefBQ15,
|
||||
&arm_cfft_sR_q15_len512
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q15) && defined(ARM_TABLE_TWIDDLECOEF_Q15_1024) && defined(ARM_TABLE_BITREVIDX_FXT_1024))
|
||||
const arm_rfft_instance_q15 arm_rfft_sR_q15_len2048 = {
|
||||
2048U,
|
||||
0,
|
||||
1,
|
||||
4U,
|
||||
(q15_t*)realCoefAQ15,
|
||||
(q15_t*)realCoefBQ15,
|
||||
&arm_cfft_sR_q15_len1024
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q15) && defined(ARM_TABLE_TWIDDLECOEF_Q15_2048) && defined(ARM_TABLE_BITREVIDX_FXT_2048))
|
||||
const arm_rfft_instance_q15 arm_rfft_sR_q15_len4096 = {
|
||||
4096U,
|
||||
0,
|
||||
1,
|
||||
2U,
|
||||
(q15_t*)realCoefAQ15,
|
||||
(q15_t*)realCoefBQ15,
|
||||
&arm_cfft_sR_q15_len2048
|
||||
};
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q15) && defined(ARM_TABLE_TWIDDLECOEF_Q15_4096) && defined(ARM_TABLE_BITREVIDX_FXT_4096))
|
||||
const arm_rfft_instance_q15 arm_rfft_sR_q15_len8192 = {
|
||||
8192U,
|
||||
0,
|
||||
1,
|
||||
1U,
|
||||
(q15_t*)realCoefAQ15,
|
||||
(q15_t*)realCoefBQ15,
|
||||
&arm_cfft_sR_q15_len4096
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif /* !defined(ARM_MATH_MVEI) */
|
||||
|
||||
|
||||
#endif
|
||||
6542
libraries/cmsis/dsp/Source/CommonTables/arm_mve_tables.c
Normal file
6542
libraries/cmsis/dsp/Source/CommonTables/arm_mve_tables.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,53 @@
|
||||
cmake_minimum_required (VERSION 3.6)
|
||||
|
||||
project(CMSISDSPComplexMath)
|
||||
|
||||
include(configLib)
|
||||
include(configDsp)
|
||||
|
||||
file(GLOB SRC "./*_*.c")
|
||||
|
||||
add_library(CMSISDSPComplexMath STATIC)
|
||||
|
||||
configLib(CMSISDSPComplexMath ${ROOT})
|
||||
configDsp(CMSISDSPComplexMath ${ROOT})
|
||||
|
||||
|
||||
include(interpol)
|
||||
interpol(CMSISDSPFastMath)
|
||||
|
||||
if (CONFIGTABLE AND ALLFAST)
|
||||
target_compile_definitions(CMSISDSPComplexMath PUBLIC ARM_ALL_FAST_TABLES)
|
||||
endif()
|
||||
|
||||
# MVE code is using a table for computing the fast sqrt arm_cmplx_mag_q31
|
||||
# There is the possibility of not compiling this function and not including
|
||||
# the table.
|
||||
if (NOT CONFIGTABLE OR ALLFAST OR ARM_CMPLX_MAG_Q31 OR (NOT HELIUM AND NOT MVEI))
|
||||
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mag_q31.c)
|
||||
endif()
|
||||
|
||||
if (NOT CONFIGTABLE OR ALLFAST OR ARM_CMPLX_MAG_Q15 OR (NOT HELIUM AND NOT MVEI))
|
||||
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mag_q15.c)
|
||||
endif()
|
||||
|
||||
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_conj_f32.c)
|
||||
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_conj_q15.c)
|
||||
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_conj_q31.c)
|
||||
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_dot_prod_f32.c)
|
||||
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_dot_prod_q15.c)
|
||||
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_dot_prod_q31.c)
|
||||
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mag_f32.c)
|
||||
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mag_squared_f32.c)
|
||||
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mag_squared_q15.c)
|
||||
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mag_squared_q31.c)
|
||||
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_cmplx_f32.c)
|
||||
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_cmplx_q15.c)
|
||||
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_cmplx_q31.c)
|
||||
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_real_f32.c)
|
||||
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_real_q15.c)
|
||||
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_real_q31.c)
|
||||
|
||||
|
||||
### Includes
|
||||
target_include_directories(CMSISDSPComplexMath PUBLIC "${DSP}/Include")
|
||||
@@ -0,0 +1,46 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: CompexMathFunctions.c
|
||||
* Description: Combination of all comlex math function source files.
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.0.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_cmplx_conj_f32.c"
|
||||
#include "arm_cmplx_conj_q15.c"
|
||||
#include "arm_cmplx_conj_q31.c"
|
||||
#include "arm_cmplx_dot_prod_f32.c"
|
||||
#include "arm_cmplx_dot_prod_q15.c"
|
||||
#include "arm_cmplx_dot_prod_q31.c"
|
||||
#include "arm_cmplx_mag_f32.c"
|
||||
#include "arm_cmplx_mag_q15.c"
|
||||
#include "arm_cmplx_mag_q31.c"
|
||||
#include "arm_cmplx_mag_squared_f32.c"
|
||||
#include "arm_cmplx_mag_squared_q15.c"
|
||||
#include "arm_cmplx_mag_squared_q31.c"
|
||||
#include "arm_cmplx_mult_cmplx_f32.c"
|
||||
#include "arm_cmplx_mult_cmplx_q15.c"
|
||||
#include "arm_cmplx_mult_cmplx_q31.c"
|
||||
#include "arm_cmplx_mult_real_f32.c"
|
||||
#include "arm_cmplx_mult_real_q15.c"
|
||||
#include "arm_cmplx_mult_real_q31.c"
|
||||
@@ -0,0 +1,213 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_conj_f32.c
|
||||
* Description: Floating-point complex conjugate
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
@ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
@defgroup cmplx_conj Complex Conjugate
|
||||
|
||||
Conjugates the elements of a complex data vector.
|
||||
|
||||
The <code>pSrc</code> points to the source data and
|
||||
<code>pDst</code> points to the destination data where the result should be written.
|
||||
<code>numSamples</code> specifies the number of complex samples
|
||||
and the data in each array is stored in an interleaved fashion
|
||||
(real, imag, real, imag, ...).
|
||||
Each array has a total of <code>2*numSamples</code> values.
|
||||
|
||||
The underlying algorithm is used:
|
||||
<pre>
|
||||
for (n = 0; n < numSamples; n++) {
|
||||
pDst[(2*n) ] = pSrc[(2*n) ]; // real part
|
||||
pDst[(2*n)+1] = -pSrc[(2*n)+1]; // imag part
|
||||
}
|
||||
</pre>
|
||||
|
||||
There are separate functions for floating-point, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup cmplx_conj
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Floating-point complex conjugate.
|
||||
@param[in] pSrc points to the input vector
|
||||
@param[out] pDst points to the output vector
|
||||
@param[in] numSamples number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
void arm_cmplx_conj_f32(
|
||||
const float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t numSamples)
|
||||
{
|
||||
static const float32_t cmplx_conj_sign[4] = { 1.0f, -1.0f, 1.0f, -1.0f };
|
||||
uint32_t blockSize = numSamples * CMPLX_DIM; /* loop counters */
|
||||
uint32_t blkCnt;
|
||||
f32x4_t vecSrc;
|
||||
f32x4_t vecSign;
|
||||
|
||||
/*
|
||||
* load sign vector
|
||||
*/
|
||||
vecSign = *(f32x4_t *) cmplx_conj_sign;
|
||||
|
||||
/* Compute 4 real samples at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSrc = vld1q(pSrc);
|
||||
vst1q(pDst,vmulq(vecSrc, vecSign));
|
||||
/*
|
||||
* Decrement the blkCnt loop counter
|
||||
* Advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = (blockSize & 0x3) >> 1;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C[0] + jC[1] = A[0]+ j(-1)A[1] */
|
||||
|
||||
/* Calculate Complex Conjugate and store result in destination buffer. */
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = -*pSrc++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_cmplx_conj_f32(
|
||||
const float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t numSamples)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
float32x4_t zero;
|
||||
float32x4x2_t vec;
|
||||
|
||||
zero = vdupq_n_f32(0.0f);
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = numSamples >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C[0]+jC[1] = A[0]+(-1)*jA[1] */
|
||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||
vec = vld2q_f32(pSrc);
|
||||
vec.val[1] = vsubq_f32(zero,vec.val[1]);
|
||||
vst2q_f32(pDst,vec);
|
||||
|
||||
/* Increment pointers */
|
||||
pSrc += 8;
|
||||
pDst += 8;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Tail */
|
||||
blkCnt = numSamples & 0x3;
|
||||
|
||||
#else
|
||||
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = numSamples >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C[0] + jC[1] = A[0]+ j(-1)A[1] */
|
||||
|
||||
/* Calculate Complex Conjugate and store result in destination buffer. */
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = -*pSrc++;
|
||||
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = -*pSrc++;
|
||||
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = -*pSrc++;
|
||||
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = -*pSrc++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = numSamples % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = numSamples;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
#endif /* #if defined (ARM_MATH_NEON) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C[0] + jC[1] = A[0]+ j(-1)A[1] */
|
||||
|
||||
/* Calculate Complex Conjugate and store result in destination buffer. */
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = -*pSrc++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of cmplx_conj group
|
||||
*/
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user