39#define OVERRIDE_INNER_PRODUCT_SINGLE 
   44   __m128 sum = _mm_setzero_ps();
 
   47      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(
a+i), _mm_loadu_ps(
b+i)));
 
   48      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(
a+i+4), _mm_loadu_ps(
b+i+4)));
 
   50   sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
 
   51   sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
 
   52   _mm_store_ss(&ret, sum);
 
 
   56#define OVERRIDE_INTERPOLATE_PRODUCT_SINGLE 
   60  __m128 sum = _mm_setzero_ps();
 
   61  __m128 f = _mm_loadu_ps(frac);
 
   64    sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(
a+i), _mm_loadu_ps(
b+i*oversample)));
 
   65    sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(
a+i+1), _mm_loadu_ps(
b+(i+1)*oversample)));
 
   67   sum = _mm_mul_ps(f, sum);
 
   68   sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
 
   69   sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
 
   70   _mm_store_ss(&ret, sum);
 
 
   76#define OVERRIDE_INNER_PRODUCT_DOUBLE 
   78static inline double inner_product_double(
const float *
a, 
const float *
b, 
unsigned int len)
 
   82   __m128d sum = _mm_setzero_pd();
 
   86      t = _mm_mul_ps(_mm_loadu_ps(
a+i), _mm_loadu_ps(
b+i));
 
   87      sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
 
   88      sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
 
   90      t = _mm_mul_ps(_mm_loadu_ps(
a+i+4), _mm_loadu_ps(
b+i+4));
 
   91      sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
 
   92      sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
 
   94   sum = _mm_add_sd(sum, (__m128d) _mm_movehl_ps((__m128) sum, (__m128) sum));
 
   95   _mm_store_sd(&ret, sum);
 
   99#define OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE 
  100static inline double interpolate_product_double(
const float *
a, 
const float *
b, 
unsigned int len, 
const spx_uint32_t oversample, 
float *frac) {
 
  104  __m128d sum1 = _mm_setzero_pd();
 
  105  __m128d sum2 = _mm_setzero_pd();
 
  106  __m128 f = _mm_loadu_ps(frac);
 
  107  __m128d f1 = _mm_cvtps_pd(f);
 
  108  __m128d f2 = _mm_cvtps_pd(_mm_movehl_ps(f,f));
 
  112    t = _mm_mul_ps(_mm_load1_ps(
a+i), _mm_loadu_ps(
b+i*oversample));
 
  113    sum1 = _mm_add_pd(sum1, _mm_cvtps_pd(t));
 
  114    sum2 = _mm_add_pd(sum2, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
 
  116    t = _mm_mul_ps(_mm_load1_ps(
a+i+1), _mm_loadu_ps(
b+(i+1)*oversample));
 
  117    sum1 = _mm_add_pd(sum1, _mm_cvtps_pd(t));
 
  118    sum2 = _mm_add_pd(sum2, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
 
  120  sum1 = _mm_mul_pd(f1, sum1);
 
  121  sum2 = _mm_mul_pd(f2, sum2);
 
  122  sum = _mm_add_pd(sum1, sum2);
 
  123  sum = _mm_add_sd(sum, (__m128d) _mm_movehl_ps((__m128) sum, (__m128) sum));
 
  124  _mm_store_sd(&ret, sum);
 
static int len(struct ast_channel *chan, const char *cmd, char *data, char *buf, size_t buflen)
static float interpolate_product_single(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac)
static float inner_product_single(const float *a, const float *b, unsigned int len)