Kaynağa Gözat

libswscale: Test AV_CPU_FLAG_SLOW_GATHER for hscale functions.

This is instead of EXTERNAL_AVX2_FAST so that the avx2 hscale functions
are only used where they are faster.
Alan Kelly 3 yıl önce
ebeveyn
işleme
eebe406c80
3 değiştirilmiş dosya ile 3 ekleme ve 3 silme
  1. 1 1
      libswscale/utils.c
  2. 1 1
      libswscale/x86/swscale.c
  3. 1 1
      tests/checkasm/sw_scale.c

+ 1 - 1
libswscale/utils.c

@@ -282,7 +282,7 @@ void ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int filterSiz
 #if ARCH_X86_64
     int i, j, k, l;
     int cpu_flags = av_get_cpu_flags();
-    if (EXTERNAL_AVX2_FAST(cpu_flags)){
+    if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_SLOW_GATHER)) {
         if ((c->srcBpc == 8) && (c->dstBpc <= 14)){
             if (dstW % 16 == 0){
                 if (filter != NULL){

+ 1 - 1
libswscale/x86/swscale.c

@@ -578,7 +578,7 @@ switch(c->dstBpc){ \
              break; \
     }
 
-    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+    if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_SLOW_GATHER)) {
         if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
             if (c->chrDstW % 16 == 0)
                 ASSIGN_AVX2_SCALE_FUNC(c->hcScale, c->hChrFilterSize);

+ 1 - 1
tests/checkasm/sw_scale.c

@@ -217,7 +217,7 @@ static void check_hscale(void)
             }
             ff_sws_init_scale(ctx);
             memcpy(filterAvx2, filter, sizeof(uint16_t) * (SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH));
-            if (cpu_flags & AV_CPU_FLAG_AVX2)
+            if ((cpu_flags & AV_CPU_FLAG_AVX2) && !(cpu_flags & AV_CPU_FLAG_SLOW_GATHER))
                 ff_shuffle_filter_coefficients(ctx, filterPosAvx, width, filterAvx2, SRC_PIXELS);
 
             if (check_func(ctx->hcScale, "hscale_%d_to_%d_width%d", ctx->srcBpc, ctx->dstBpc + 1, width)) {