diff --git a/src/ppl/kernel/x86/fp32/abs/abs_fp32_avx.cpp b/src/ppl/kernel/x86/fp32/abs/abs_fp32_avx.cpp index 92b62ea8..a927c468 100644 --- a/src/ppl/kernel/x86/fp32/abs/abs_fp32_avx.cpp +++ b/src/ppl/kernel/x86/fp32/abs/abs_fp32_avx.cpp @@ -37,8 +37,8 @@ ppl::common::RetCode abs_fp32_avx( for (int64_t i = 0; i < unroll_body; i += unroll_n) { __m256 src0 = _mm256_loadu_ps(x + i + 0 * V_REG_ELTS); __m256 src1 = _mm256_loadu_ps(x + i + 1 * V_REG_ELTS); - __m256 dst0 = _mm256_andnot_ps(src0, vsignbit); - __m256 dst1 = _mm256_andnot_ps(src1, vsignbit); + __m256 dst0 = _mm256_andnot_ps(vsignbit, src0); + __m256 dst1 = _mm256_andnot_ps(vsignbit, src1); _mm256_storeu_ps(y + i + 0 * V_REG_ELTS, dst0); _mm256_storeu_ps(y + i + 1 * V_REG_ELTS, dst1); } @@ -49,4 +49,4 @@ ppl::common::RetCode abs_fp32_avx( return ppl::common::RC_SUCCESS; } -}}}; // namespace ppl::kernel::x86 \ No newline at end of file +}}}; // namespace ppl::kernel::x86 diff --git a/src/ppl/kernel/x86/fp32/abs/abs_fp32_sse.cpp b/src/ppl/kernel/x86/fp32/abs/abs_fp32_sse.cpp index 1132d722..32e91253 100644 --- a/src/ppl/kernel/x86/fp32/abs/abs_fp32_sse.cpp +++ b/src/ppl/kernel/x86/fp32/abs/abs_fp32_sse.cpp @@ -39,10 +39,10 @@ ppl::common::RetCode abs_fp32_sse( __m128 src1 = _mm_loadu_ps(x + i + 1 * V_REG_ELTS); __m128 src2 = _mm_loadu_ps(x + i + 2 * V_REG_ELTS); __m128 src3 = _mm_loadu_ps(x + i + 3 * V_REG_ELTS); - __m128 dst0 = _mm_andnot_ps(src0, vsignbit); - __m128 dst1 = _mm_andnot_ps(src1, vsignbit); - __m128 dst2 = _mm_andnot_ps(src2, vsignbit); - __m128 dst3 = _mm_andnot_ps(src3, vsignbit); + __m128 dst0 = _mm_andnot_ps(vsignbit, src0); + __m128 dst1 = _mm_andnot_ps(vsignbit, src1); + __m128 dst2 = _mm_andnot_ps(vsignbit, src2); + __m128 dst3 = _mm_andnot_ps(vsignbit, src3); _mm_storeu_ps(y + i + 0 * V_REG_ELTS, dst0); _mm_storeu_ps(y + i + 1 * V_REG_ELTS, dst1); _mm_storeu_ps(y + i + 2 * V_REG_ELTS, dst2); @@ -55,4 +55,4 @@ ppl::common::RetCode abs_fp32_sse( return ppl::common::RC_SUCCESS; } -}}}; // namespace ppl::kernel::x86 \ No newline at end of file +}}}; // namespace ppl::kernel::x86