Thursday, June 19, 2014

SSE horizontal minimum and maximum

 static inline float sseHorizontalMin(const __m128 &p)  
 {   
     __m128 data = p;             /* [0, 1, 2, 3] */   
     __m128 low = _mm_movehl_ps(data, data); /* [2, 3, 2, 3] */   
     __m128 low_accum = _mm_min_ps(low, data); /* [0|2, 1|3, 2|2, 3|3] */   
     __m128 elem1 = _mm_shuffle_ps(low_accum,   
                       low_accum,   
                       _MM_SHUFFLE(1,1,1,1)); /* [1|3, 1|3, 1|3, 1|3] */   
     __m128 accum = _mm_min_ss(low_accum, elem1);   
     return _mm_cvtss_f32(accum);   
 }  
 static inline float sseHorizontalMax(const __m128 &p)  
 {   
     __m128 data = p;             /* [0, 1, 2, 3] */   
     __m128 high = _mm_movehl_ps(data, data); /* [2, 3, 2, 3] */   
     __m128 high_accum = _mm_max_ps(high, data); /* [0|2, 1|3, 2|2, 3|3] */   
     __m128 elem1 = _mm_shuffle_ps(high_accum,   
                       high_accum,   
                       _MM_SHUFFLE(1,1,1,1)); /* [1|3, 1|3, 1|3, 1|3] */   
     __m128 accum = _mm_max_ss(high_accum, elem1);   
     return _mm_cvtss_f32(accum);   
 }  


Follow the project on Facebook : https://www.facebook.com/immersionengine
Follow me on twitter : twitter.com/lefebv_l