há 2 anos atrás · bbe95f7353
--- a/libavcodec/x86/aacpsdsp.asm
+++ b/libavcodec/x86/aacpsdsp.asm
@@ -49,7 +49,7 @@ align 16
 
				     add  dstq, mmsize
			
 
				     add    nq, mmsize*2
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse
			
@@ -83,7 +83,7 @@ align 16
 
				     add   src2q, mmsize
			
 
				     add      nq, mmsize*2
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ;***********************************************************************
			
 
				 ;void ff_ps_stereo_interpolate_sse3(float (*l)[2], float (*r)[2],
			
@@ -116,7 +116,7 @@ align 16
 
				     movhps [rq+nq], m2
			
 
				     add      nq, 8
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ;***************************************************************************
			
 
				 ;void ps_stereo_interpolate_ipdopd_sse3(float (*l)[2], float (*r)[2],
			
@@ -164,7 +164,7 @@ align 16
 
				     movhps [rq+nq], m2
			
 
				     add      nq, 8
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ;**********************************************************
			
 
				 ;void ps_hybrid_analysis_ileave_sse(float out[2][38][64],
			
@@ -484,7 +484,7 @@ align 16
 
				     add    outq, strideq
			
 
				     add      nq, 64
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse
			
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -60,7 +60,7 @@ cglobal ac3_exponent_min, 3, 4, 2, exp, reuse_blks, expn, offset
 
				     sub        expnq, mmsize
			
 
				     jg .nextexp
			
 
				 .end:
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 %define LOOP_ALIGN ALIGN 16
			
@@ -126,7 +126,7 @@ cglobal float_to_fixed24, 3, 3, 9, dst, src, len
 
				     sub      lenq, 16
			
 
				 %endif
			
 
				     ja .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ;------------------------------------------------------------------------------
			
 
				 ; int ff_ac3_compute_mantissa_size(uint16_t mant_cnt[6][16])
			
@@ -220,7 +220,7 @@ cglobal ac3_extract_exponents, 3, 3, 4, exp, coef, len
 
				 
			
 
				     add     lenq, 4
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 %if HAVE_SSE2_EXTERNAL
			
--- a/libavcodec/x86/alacdsp.asm
+++ b/libavcodec/x86/alacdsp.asm
@@ -100,7 +100,7 @@ align 16
 
				 
			
 
				     add     lenq, mmsize*2
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 %if ARCH_X86_64
			
 
				 cglobal alac_append_extra_bits_mono, 2, 5, 3, buf, exbuf, exbits, ch, len
			
@@ -130,4 +130,4 @@ align 16
 
				 
			
 
				     add     lenq, mmsize*2
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm
@@ -123,7 +123,7 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
 
				     add     dstq, mmsize*4*(%2+%3)
			
 
				     sub     lend, mmsize*(%2+%3)
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
--- a/libavcodec/x86/dirac_dwt.asm
+++ b/libavcodec/x86/dirac_dwt.asm
@@ -75,7 +75,7 @@ cglobal vertical_compose53iL0_%1, 4,4,1, b0, b1, b2, width
 
				     COMPOSE_53iL0 m0, m1, [b2q+2*widthq], m2
			
 
				     mova    [b1q+2*widthq], m0
			
 
				     jg      .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ; void vertical_compose_dirac53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
			
 
				 ;                                  int width)
			
@@ -93,7 +93,7 @@ cglobal vertical_compose_dirac53iH0_%1, 4,4,1, b0, b1, b2, width
 
				     paddw   m0, [b1q+2*widthq]
			
 
				     mova    [b1q+2*widthq], m0
			
 
				     jg      .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ; void vertical_compose_dd97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
			
 
				 ;                               IDWTELEM *b3, IDWTELEM *b4, int width)
			
@@ -110,7 +110,7 @@ cglobal vertical_compose_dd97iH0_%1, 6,6,5, b0, b1, b2, b3, b4, width
 
				     COMPOSE_DD97iH0 [b2q+2*widthq], [b3q+2*widthq], [b4q+2*widthq]
			
 
				     mova    [b2q+2*widthq], m1
			
 
				     jg      .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ; void vertical_compose_dd137iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
			
 
				 ;                                IDWTELEM *b3, IDWTELEM *b4, int width)
			
@@ -139,7 +139,7 @@ cglobal vertical_compose_dd137iL0_%1, 6,6,6, b0, b1, b2, b3, b4, width
 
				     psubw   m5, m1
			
 
				     mova    [b2q+2*widthq], m5
			
 
				     jg      .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ; void vertical_compose_haar(IDWTELEM *b0, IDWTELEM *b1, int width)
			
 
				 cglobal vertical_compose_haar_%1, 3,4,3, b0, b1, width
			
@@ -159,7 +159,7 @@ cglobal vertical_compose_haar_%1, 3,4,3, b0, b1, width
 
				     paddw   m2, m0
			
 
				     mova    [b1q+2*widthq], m2
			
 
				     jg      .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 ; extend the left and right edges of the tmp array by %1 and %2 respectively
			
@@ -225,7 +225,7 @@ cglobal horizontal_compose_haar%2i_%1, 3,6,4, b, tmp, w, x, w2, b_w2
 
				     cmp     xq, w2q
			
 
				     jl      .highpass_loop
			
 
				 .end:
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 
			
@@ -290,7 +290,7 @@ cglobal horizontal_compose_dd97i_ssse3, 3,6,8, b, tmp, w, x, w2, b_w2
 
				     cmp     xd, w2d
			
 
				     jl      .highpass_loop
			
 
				 .end:
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 
			
 
				 INIT_XMM
			
--- a/libavcodec/x86/fft.asm
+++ b/libavcodec/x86/fft.asm
@@ -475,7 +475,7 @@ cglobal fft_calc, 2,5,8
 
				     mov     r0, r1
			
 
				     mov     r1, r3
			
 
				     FFT_DISPATCH _interleave %+ SUFFIX, r1
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 %endif
			
 
				 
			
@@ -510,7 +510,7 @@ cglobal fft_calc, 2,5,8
 
				     add      r2, mmsize*2
			
 
				     jl       .loop
			
 
				 .end:
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 cglobal fft_permute, 2,7,1
			
 
				     mov     r4,  [r0 + FFTContext.revtab]
			
@@ -543,7 +543,7 @@ cglobal fft_permute, 2,7,1
 
				     movaps  [r1 + r2 + 16], xmm1
			
 
				     add     r2, 32
			
 
				     jl      .loopcopy
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 INIT_XMM sse
			
 
				 cglobal imdct_calc, 3,5,3
			
@@ -583,7 +583,7 @@ cglobal imdct_calc, 3,5,3
 
				     sub     r3, mmsize
			
 
				     add     r2, mmsize
			
 
				     jl      .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 %ifdef PIC
			
 
				 %define SECTION_REL - $$
			
--- a/libavcodec/x86/flacdsp.asm
+++ b/libavcodec/x86/flacdsp.asm
@@ -79,7 +79,7 @@ ALIGN 16
 
				     movd   [decodedq+4], m1
			
 
				     jg .loop_sample
			
 
				 .ret:
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 %if HAVE_XOP_EXTERNAL
			
@@ -133,7 +133,7 @@ align 16
 
				     mova [outq + lenq], m%2
			
 
				     add      lenq, 16
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
@@ -177,7 +177,7 @@ align 16
 
				     add      outq, mmsize*2
			
 
				     sub      lend, mmsize/4
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
@@ -302,7 +302,7 @@ align 16
 
				     add      outq, mmsize*REPCOUNT
			
 
				     sub      lend, mmsize/4
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM ssse3
			
--- a/libavcodec/x86/h264_chromamc.asm
+++ b/libavcodec/x86/h264_chromamc.asm
@@ -112,7 +112,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0
 
				     jne .at_least_one_non_zero
			
 
				     ; mx == 0 AND my == 0 - no filter needed
			
 
				     mv0_pixels_mc8
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 .at_least_one_non_zero:
			
 
				 %ifidn %2, rv40
			
@@ -192,7 +192,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0
 
				     add           r1, r2
			
 
				     dec           r3d
			
 
				     jne .next1drow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 .both_non_zero: ; general case, bilinear
			
 
				     movd          m4, r4d         ; x
			
@@ -365,7 +365,7 @@ cglobal %1_%2_chroma_mc4, 6, 6 + extra_regs, 0
 
				     add           r0, r2
			
 
				     sub          r3d, 2
			
 
				     jnz .next2rows
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 %macro chroma_mc2_mmx_func 2
			
@@ -407,7 +407,7 @@ cglobal %1_%2_chroma_mc2, 6, 7, 0
 
				     add           r0, r2
			
 
				     sub          r3d, 1
			
 
				     jnz .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 %define rnd_1d_h264 pw_4
			
@@ -453,7 +453,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8
 
				     jne .at_least_one_non_zero
			
 
				     ; mx == 0 AND my == 0 - no filter needed
			
 
				     mv0_pixels_mc8
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 .at_least_one_non_zero:
			
 
				     test         r5d, r5d
			
@@ -514,7 +514,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8
 
				     sub          r3d, 2
			
 
				     lea           r0, [r0+r2*2]
			
 
				     jg .next2rows
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 .my_is_zero:
			
 
				     mov          r5d, r4d
			
@@ -551,7 +551,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8
 
				     lea           r0, [r0+r2*2]
			
 
				     lea           r1, [r1+r2*2]
			
 
				     jg .next2xrows
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 .mx_is_zero:
			
 
				     mov          r4d, r5d
			
@@ -588,7 +588,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8
 
				     sub          r3d, 2
			
 
				     lea           r0, [r0+r2*2]
			
 
				     jg .next2yrows
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 %macro chroma_mc4_ssse3_func 2
			
@@ -638,7 +638,7 @@ cglobal %1_%2_chroma_mc4, 6, 7, 0
 
				     sub          r3d, 2
			
 
				     lea           r0, [r0+r2*2]
			
 
				     jg .next2rows
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 %define CHROMAMC_AVG NOTHING
			
--- a/libavcodec/x86/h264_chromamc_10bit.asm
+++ b/libavcodec/x86/h264_chromamc_10bit.asm
@@ -67,7 +67,7 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8
 
				     jne .at_least_one_non_zero
			
 
				     ; mx == 0 AND my == 0 - no filter needed
			
 
				     MV0_PIXELS_MC8
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 .at_least_one_non_zero:
			
 
				     mov          r6d, 2
			
@@ -102,7 +102,7 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8
 
				     add           r1, r2
			
 
				     dec           r3d
			
 
				     jne .next1drow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 .xy_interpolation: ; general case, bilinear
			
 
				     movd          m4, r4m         ; x
			
@@ -144,7 +144,7 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8
 
				     add           r0, r2
			
 
				     dec          r3d
			
 
				     jne .next2drow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 ;-----------------------------------------------------------------------------
			
@@ -194,7 +194,7 @@ cglobal %1_h264_chroma_mc4_10, 6,6,7
 
				     MC4_OP m6, m0
			
 
				     sub   r3d, 2
			
 
				     jnz .next2rows
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 ;-----------------------------------------------------------------------------
			
@@ -234,7 +234,7 @@ cglobal %1_h264_chroma_mc2_10, 6,7
 
				     add           r0, r2
			
 
				     dec          r3d
			
 
				     jnz .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 %macro NOTHING 2-3
			
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -372,7 +372,7 @@ cglobal deblock_v_luma_10, 5,5,15
 
				     add         r4, 2
			
 
				     dec         r3
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 cglobal deblock_h_luma_10, 5,7,15
			
 
				     shl        r2d, 2
			
@@ -411,7 +411,7 @@ cglobal deblock_h_luma_10, 5,7,15
 
				     lea         r5, [r5+r1*8]
			
 
				     dec         r6
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
@@ -648,7 +648,7 @@ cglobal deblock_v_luma_intra_10, 4,7,16
 
				     add     r4, mmsize
			
 
				     dec     r6
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ;-----------------------------------------------------------------------------
			
 
				 ; void ff_deblock_h_luma_intra_10(uint16_t *pix, int stride, int alpha,
			
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -354,7 +354,7 @@ INIT_MMX cpuname
 
				     add          r2, 128
			
 
				     cmp          r5, 16
			
 
				     jl .nextblock
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 .no_dc:
			
 
				 INIT_XMM cpuname
			
 
				     mov       dst2d, dword [r1+r5*4]
			
@@ -368,7 +368,7 @@ INIT_XMM cpuname
 
				     add          r2, 128
			
 
				     cmp          r5, 16
			
 
				     jl .nextblock
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 INIT_MMX mmx
			
 
				 h264_idct_add8_mmx_plane:
			
@@ -508,7 +508,7 @@ cglobal h264_idct_add16_8, 5, 5 + ARCH_X86_64, 8
 
				     add16_sse2_cycle 5, 0x24
			
 
				     add16_sse2_cycle 6, 0x1e
			
 
				     add16_sse2_cycle 7, 0x26
			
 
				-REP_RET
			
 
				+RET
			
 
				 
			
 
				 %macro add16intra_sse2_cycle 2
			
 
				     movzx       r0, word [r4+%2]
			
@@ -555,7 +555,7 @@ cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8
 
				     add16intra_sse2_cycle 5, 0x24
			
 
				     add16intra_sse2_cycle 6, 0x1e
			
 
				     add16intra_sse2_cycle 7, 0x26
			
 
				-REP_RET
			
 
				+RET
			
 
				 
			
 
				 %macro add8_sse2_cycle 2
			
 
				     movzx       r0, word [r4+%2]
			
@@ -610,7 +610,7 @@ cglobal h264_idct_add8_8, 5, 7 + ARCH_X86_64, 8
 
				 %endif
			
 
				     add8_sse2_cycle 2, 0x5c
			
 
				     add8_sse2_cycle 3, 0x64
			
 
				-REP_RET
			
 
				+RET
			
 
				 
			
 
				 ;void ff_h264_luma_dc_dequant_idct_mmx(int16_t *output, int16_t *input, int qmul)
			
 
				 
			
--- a/libavcodec/x86/h264_idct_10bit.asm
+++ b/libavcodec/x86/h264_idct_10bit.asm
@@ -155,7 +155,7 @@ cglobal h264_idct_add16_10, 5,6
 
				     ADD16_OP 13, 7+3*8
			
 
				     ADD16_OP 14, 6+4*8
			
 
				     ADD16_OP 15, 7+4*8
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
@@ -292,7 +292,7 @@ cglobal h264_idct_add16intra_10,5,7,8
 
				     ADD16_OP_INTRA 10, 4+4*8
			
 
				     ADD16_OP_INTRA 12, 6+3*8
			
 
				     ADD16_OP_INTRA 14, 6+4*8
			
 
				-    REP_RET
			
 
				+    RET
			
 
				     AC 8
			
 
				     AC 10
			
 
				     AC 12
			
@@ -335,7 +335,7 @@ cglobal h264_idct_add8_10,5,8,7
 
				 %endif
			
 
				     ADD16_OP_INTRA 32, 4+11*8
			
 
				     ADD16_OP_INTRA 34, 4+12*8
			
 
				-    REP_RET
			
 
				+    RET
			
 
				     AC 16
			
 
				     AC 18
			
 
				     AC 32
			
@@ -384,7 +384,7 @@ cglobal h264_idct_add8_422_10, 5, 8, 7
 
				     ADD16_OP_INTRA 34, 4+12*8
			
 
				     ADD16_OP_INTRA 40, 4+13*8 ; i+4
			
 
				     ADD16_OP_INTRA 42, 4+14*8 ; i+4
			
 
				-REP_RET
			
 
				+RET
			
 
				     AC 16
			
 
				     AC 18
			
 
				     AC 24 ; i+4
			
--- a/libavcodec/x86/h264_intrapred.asm
+++ b/libavcodec/x86/h264_intrapred.asm
@@ -62,7 +62,7 @@ cglobal pred16x16_vertical_8, 2,3
 
				     lea   r0, [r0+r1*2]
			
 
				     dec   r2
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ;-----------------------------------------------------------------------------
			
 
				 ; void ff_pred16x16_horizontal_8(uint8_t *src, ptrdiff_t stride)
			
@@ -95,7 +95,7 @@ cglobal pred16x16_horizontal_8, 2,3
 
				     lea       r0, [r0+r1*2]
			
 
				     dec       r2
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -146,7 +146,7 @@ cglobal pred16x16_dc_8, 2,7
 
				     lea   r4, [r4+r1*2]
			
 
				     dec   r3d
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
@@ -192,7 +192,7 @@ cglobal pred16x16_tm_vp8_8, 2,6,6
 
				     lea          r0, [r0+r1*2]
			
 
				     dec         r5d
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 %if HAVE_AVX2_EXTERNAL
			
 
				 INIT_YMM avx2
			
@@ -228,7 +228,7 @@ cglobal pred16x16_tm_vp8_8, 2, 4, 5, dst, stride, stride3, iteration
 
				     lea                       dstq, [dstq+strideq*4]
			
 
				     dec                 iterationd
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endif
			
 
				 
			
 
				 ;-----------------------------------------------------------------------------
			
@@ -427,7 +427,7 @@ cglobal pred16x16_plane_%1_8, 2,9,7
 
				     lea          r0, [r0+r2*2]
			
 
				     dec          r4
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
@@ -556,7 +556,7 @@ ALIGN 16
 
				     lea          r0, [r0+r2*2]
			
 
				     dec          r4
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
@@ -599,7 +599,7 @@ cglobal pred8x8_horizontal_8, 2,3
 
				     lea       r0, [r0+r1*2]
			
 
				     dec       r2
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -737,7 +737,7 @@ cglobal pred8x8_dc_rv40_8, 2,7
 
				     lea   r4, [r4+r1*2]
			
 
				     dec   r3d
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ;-----------------------------------------------------------------------------
			
 
				 ; void ff_pred8x8_tm_vp8_8(uint8_t *src, ptrdiff_t stride)
			
@@ -770,7 +770,7 @@ cglobal pred8x8_tm_vp8_8, 2,6,4
 
				     lea          r0, [r0+r1*2]
			
 
				     dec         r5d
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 INIT_XMM ssse3
			
 
				 cglobal pred8x8_tm_vp8_8, 2,3,6
			
@@ -797,7 +797,7 @@ cglobal pred8x8_tm_vp8_8, 2,3,6
 
				     lea          r0, [r0+r1*2]
			
 
				     dec         r2d
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ; dest, left, right, src, tmp
			
 
				 ; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
			
@@ -1802,7 +1802,7 @@ cglobal pred4x4_tm_vp8_8, 3,6
 
				     lea        r0, [r0+r2*2]
			
 
				     dec       r5d
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 INIT_XMM ssse3
			
 
				 cglobal pred4x4_tm_vp8_8, 3,3
			
--- a/libavcodec/x86/h264_intrapred_10bit.asm
+++ b/libavcodec/x86/h264_intrapred_10bit.asm
@@ -327,7 +327,7 @@ cglobal pred8x8_horizontal_10, 2, 3
 
				     lea          r0, [r0+r1*2]
			
 
				     dec          r2d
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ;-----------------------------------------------------------------------------
			
 
				 ; void ff_predict_8x8_dc_10(pixel *src, ptrdiff_t stride)
			
@@ -481,7 +481,7 @@ cglobal pred8x8_plane_10, 2, 7, 7
 
				     add       r0, r1
			
 
				     dec r2d
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 
			
 
				 ;-----------------------------------------------------------------------------
			
@@ -994,7 +994,7 @@ cglobal pred16x16_vertical_10, 2, 3
 
				     lea   r0, [r0+r1*2]
			
 
				     dec   r2d
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ;-----------------------------------------------------------------------------
			
 
				 ; void ff_pred16x16_horizontal_10(pixel *src, ptrdiff_t stride)
			
@@ -1012,7 +1012,7 @@ cglobal pred16x16_horizontal_10, 2, 3
 
				     lea    r0, [r0+r1*2]
			
 
				     dec    r2d
			
 
				     jg .vloop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ;-----------------------------------------------------------------------------
			
 
				 ; void ff_pred16x16_dc_10(pixel *src, ptrdiff_t stride)
			
@@ -1048,7 +1048,7 @@ cglobal pred16x16_dc_10, 2, 6
 
				     lea        r5, [r5+r1*2]
			
 
				     dec       r3d
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ;-----------------------------------------------------------------------------
			
 
				 ; void ff_pred16x16_top_dc_10(pixel *src, ptrdiff_t stride)
			
@@ -1070,7 +1070,7 @@ cglobal pred16x16_top_dc_10, 2, 3
 
				     lea        r0, [r0+r1*2]
			
 
				     dec       r2d
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ;-----------------------------------------------------------------------------
			
 
				 ; void ff_pred16x16_left_dc_10(pixel *src, ptrdiff_t stride)
			
@@ -1101,7 +1101,7 @@ cglobal pred16x16_left_dc_10, 2, 6
 
				     lea        r5, [r5+r1*2]
			
 
				     dec       r3d
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ;-----------------------------------------------------------------------------
			
 
				 ; void ff_pred16x16_128_dc_10(pixel *src, ptrdiff_t stride)
			
@@ -1116,4 +1116,4 @@ cglobal pred16x16_128_dc_10, 2,3
 
				     lea        r0, [r0+r1*2]
			
 
				     dec       r2d
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
--- a/libavcodec/x86/h264_qpel_10bit.asm
+++ b/libavcodec/x86/h264_qpel_10bit.asm
@@ -211,7 +211,7 @@ cglobal %1_h264_qpel16_mc00_10, 3,4
 
				     lea            r1, [r1+r2*2]
			
 
				     dec r3d
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 %define OP_MOV mova
			
--- a/libavcodec/x86/h264_qpel_8bit.asm
+++ b/libavcodec/x86/h264_qpel_8bit.asm
@@ -89,7 +89,7 @@ cglobal %1_h264_qpel4_h_lowpass, 4,5 ; dst, src, dstStride, srcStride
 
				     add           r1, r3
			
 
				     dec          r4d
			
 
				     jg         .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -149,7 +149,7 @@ cglobal %1_h264_qpel8_h_lowpass, 4,5 ; dst, src, dstStride, srcStride
 
				     add           r1, r3
			
 
				     dec          r4d
			
 
				     jg         .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -192,7 +192,7 @@ cglobal %1_h264_qpel8_h_lowpass, 4,5,8 ; dst, src, dstStride, srcStride
 
				     add           r0, r2
			
 
				     dec          r4d
			
 
				     jne        .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM ssse3
			
@@ -239,7 +239,7 @@ cglobal %1_h264_qpel4_h_lowpass_l2, 5,6 ; dst, src, src2, dstStride, srcStride
 
				     add           r2, r4
			
 
				     dec          r5d
			
 
				     jg         .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -303,7 +303,7 @@ cglobal %1_h264_qpel8_h_lowpass_l2, 5,6 ; dst, src, src2, dstStride, srcStride
 
				     add           r2, r4
			
 
				     dec          r5d
			
 
				     jg         .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -350,7 +350,7 @@ cglobal %1_h264_qpel8_h_lowpass_l2, 5,6,8 ; dst, src, src2, dstStride, src2Strid
 
				     add           r2, r4
			
 
				     dec          r5d
			
 
				     jg         .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM ssse3
			
@@ -458,7 +458,7 @@ cglobal %1_h264_qpel8or16_v_lowpass_op, 5,5,8 ; dst, src, dstStride, srcStride,
 
				     FILT_V        %1
			
 
				     FILT_V        %1
			
 
				 .end:
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
@@ -531,7 +531,7 @@ cglobal %1_h264_qpel4_hv_lowpass_h, 3,4 ; tmp, dst, dstStride
 
				     add           r1, r2
			
 
				     dec          r3d
			
 
				     jnz        .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -574,7 +574,7 @@ cglobal %1_h264_qpel8or16_hv1_lowpass_op, 4,4,8 ; src, tmp, srcStride, size
 
				     FILT_HV    14*48
			
 
				     FILT_HV    15*48
			
 
				 .end:
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
@@ -619,7 +619,7 @@ cglobal %1_h264_qpel8or16_hv2_lowpass_op, 5,5 ; dst, tmp, dstStride, unused, h
 
				     add           r0, r2
			
 
				     dec          r4d
			
 
				     jne        .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -710,7 +710,7 @@ cglobal %1_h264_qpel8or16_hv2_lowpass, 5,5,8 ; dst, tmp, dstStride, tmpStride, s
 
				     dec          r4d
			
 
				     jne        .op16
			
 
				 .done:
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM ssse3
			
@@ -776,7 +776,7 @@ cglobal %1_pixels8_l2_shift5, 6, 6 ; dst, src16, src8, dstStride, src8Stride, h
 
				     lea           r0, [r0+2*r3]
			
 
				     sub          r5d, 2
			
 
				     jne        .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -845,7 +845,7 @@ cglobal %1_h264_qpel16_h_lowpass_l2, 5, 6, 16 ; dst, src, src2, dstStride, src2S
 
				     add           r2, r4
			
 
				     dec          r5d
			
 
				     jg         .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM ssse3
			
--- a/libavcodec/x86/h264_weight.asm
+++ b/libavcodec/x86/h264_weight.asm
@@ -79,7 +79,7 @@ cglobal h264_weight_%1, 6, 6, %2
 
				     add        r0, r1
			
 
				     dec        r2d
			
 
				     jnz .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
@@ -102,7 +102,7 @@ cglobal h264_weight_%1, 6, 6, %2
 
				     add        r0, r3
			
 
				     dec        r2d
			
 
				     jnz .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -196,7 +196,7 @@ cglobal h264_biweight_%1, 7, 8, %2
 
				     add        r1, r2
			
 
				     dec        r3d
			
 
				     jnz .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
@@ -223,7 +223,7 @@ cglobal h264_biweight_%1, 7, 8, %2
 
				     add        r1, r4
			
 
				     dec        r3d
			
 
				     jnz .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -258,7 +258,7 @@ cglobal h264_biweight_16, 7, 8, 8
 
				     add        r1, r2
			
 
				     dec        r3d
			
 
				     jnz .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 INIT_XMM ssse3
			
 
				 cglobal h264_biweight_8, 7, 8, 8
			
@@ -281,4 +281,4 @@ cglobal h264_biweight_8, 7, 8, 8
 
				     add        r1, r4
			
 
				     dec        r3d
			
 
				     jnz .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
--- a/libavcodec/x86/h264_weight_10bit.asm
+++ b/libavcodec/x86/h264_weight_10bit.asm
@@ -101,7 +101,7 @@ cglobal h264_weight_16_10
 
				     add       r0, r1
			
 
				     dec       r2d
			
 
				     jnz .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
@@ -120,7 +120,7 @@ cglobal h264_weight_8_10
 
				     add        r0, r1
			
 
				     dec        r2d
			
 
				     jnz .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
@@ -142,7 +142,7 @@ cglobal h264_weight_4_10
 
				     add         r0, r3
			
 
				     dec         r2d
			
 
				     jnz .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
@@ -234,7 +234,7 @@ cglobal h264_biweight_16_10
 
				     add       r1, r2
			
 
				     dec       r3d
			
 
				     jnz .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
@@ -253,7 +253,7 @@ cglobal h264_biweight_8_10
 
				     add      r1, r2
			
 
				     dec      r3d
			
 
				     jnz .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
@@ -275,7 +275,7 @@ cglobal h264_biweight_4_10
 
				     add         r1, r4
			
 
				     dec         r3d
			
 
				     jnz .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
--- a/libavcodec/x86/hevc_sao.asm
+++ b/libavcodec/x86/hevc_sao.asm
@@ -166,7 +166,7 @@ INIT_YMM cpuname
 
				     add             srcq, srcstrideq             ; src += srcstride
			
 
				     dec          heightd                         ; cmp height
			
 
				     jnz               .loop                      ; height loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 
			
--- a/libavcodec/x86/hevc_sao_10bit.asm
+++ b/libavcodec/x86/hevc_sao_10bit.asm
@@ -145,7 +145,7 @@ align 16
 
				     add             srcq, srcstrideq
			
 
				     dec          heightd
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 %macro HEVC_SAO_BAND_FILTER_FUNCS 0
			
--- a/libavcodec/x86/hpeldsp.asm
+++ b/libavcodec/x86/hpeldsp.asm
@@ -78,7 +78,7 @@ cglobal put_pixels8_x2, 4,5
 
				     add          r0, r4
			
 
				     sub         r3d, 4
			
 
				     jne .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -120,7 +120,7 @@ cglobal put_pixels16_x2, 4,5
 
				     add          r0, r4
			
 
				     sub         r3d, 4
			
 
				     jne .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -162,7 +162,7 @@ cglobal put_no_rnd_pixels8_x2, 4,5
 
				     add          r0, r4
			
 
				     sub         r3d, 4
			
 
				     jne .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 
			
 
				 ; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
			
@@ -194,7 +194,7 @@ cglobal put_pixels8_y2, 4,5
 
				     add          r0, r4
			
 
				     sub         r3d, 4
			
 
				     jne .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -232,7 +232,7 @@ cglobal put_no_rnd_pixels8_y2, 4,5
 
				     add          r0, r4
			
 
				     sub         r3d, 4
			
 
				     jne .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 
			
 
				 ; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
			
@@ -280,7 +280,7 @@ cglobal avg_pixels8_x2, 4,5
 
				     add          r0, r4
			
 
				     sub         r3d, 4
			
 
				     jne .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -323,7 +323,7 @@ cglobal avg_pixels8_y2, 4,5
 
				     add          r0, r4
			
 
				     sub         r3d, 4
			
 
				     jne .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -370,7 +370,7 @@ cglobal avg_approx_pixels8_xy2, 4,5
 
				     add          r0, r4
			
 
				     sub         r3d, 4
			
 
				     jne .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 
			
 
				 ; void ff_avg_pixels16_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
			
@@ -448,7 +448,7 @@ cglobal %1_pixels8_xy2, 4,5
 
				     add         r4, r2
			
 
				     sub        r3d, 2
			
 
				     jnz .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -514,7 +514,7 @@ cglobal %1_pixels8_xy2, 4,5
 
				     add         r4, r2
			
 
				     sub        r3d, 2
			
 
				     jnz .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX ssse3
			
--- a/libavcodec/x86/hpeldsp_vp3.asm
+++ b/libavcodec/x86/hpeldsp_vp3.asm
@@ -60,7 +60,7 @@ cglobal put_no_rnd_pixels8_x2_exact, 4,5
 
				     lea          r0, [r0+r2*4]
			
 
				     sub         r3d, 4
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 
			
 
				 ; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
			
@@ -96,4 +96,4 @@ cglobal put_no_rnd_pixels8_y2_exact, 4,5
 
				     lea          r0, [r0+r2*4]
			
 
				     sub         r3d, 4
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
--- a/libavcodec/x86/huffyuvdsp.asm
+++ b/libavcodec/x86/huffyuvdsp.asm
@@ -74,7 +74,7 @@ cglobal add_hfyu_left_pred_bgr32, 4,4,3, dst, src, w, left
 
				     jl         .loop
			
 
				     movd          m0, [dstq-4]
			
 
				     movd     [leftq], m0
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 
			
 
				 ; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top)
			
--- a/libavcodec/x86/jpeg2000dsp.asm
+++ b/libavcodec/x86/jpeg2000dsp.asm
@@ -113,7 +113,7 @@ align 16
 
				     movaps   [src1q+csizeq], m5
			
 
				     add  csizeq, mmsize
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse
			
@@ -153,7 +153,7 @@ align 16
 
				     mova   [src0q+csizeq], m2
			
 
				     add  csizeq, mmsize
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
--- a/libavcodec/x86/lossless_videodsp.asm
+++ b/libavcodec/x86/lossless_videodsp.asm
@@ -229,7 +229,7 @@ cglobal add_bytes, 3,4,2, dst, src, w, size
 
				     inc     wq
			
 
				     jl .3
			
 
				 .end:
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
--- a/libavcodec/x86/lossless_videoencdsp.asm
+++ b/libavcodec/x86/lossless_videoencdsp.asm
@@ -110,7 +110,7 @@ cglobal diff_bytes, 4,5,2, dst, src1, src2, w
 
				     inc               wq
			
 
				         jl .loop_gpr_%1%2
			
 
				 .end_%1%2:
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
--- a/libavcodec/x86/me_cmp.asm
+++ b/libavcodec/x86/me_cmp.asm
@@ -458,7 +458,7 @@ cglobal hf_noise%1, 3,3,0, pix1, lsize, h
 
				     psrlq      m6, 32
			
 
				     paddd      m0, m6
			
 
				     movd      eax, m0   ; eax = result of hf_noise8;
			
 
				-    REP_RET                 ; return eax;
			
 
				+    RET                 ; return eax;
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmx
			
--- a/libavcodec/x86/pngdsp.asm
+++ b/libavcodec/x86/pngdsp.asm
@@ -75,7 +75,7 @@ cglobal add_bytes_l2, 4, 6, 2, dst, src1, src2, wa, w, i
 
				 .end_s:
			
 
				     cmp                 iq, wq
			
 
				     jl .loop_s
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 %macro ADD_PAETH_PRED_FN 1
			
 
				 cglobal add_png_paeth_prediction, 5, 7, %1, dst, src, top, w, bpp, end, cntr
			
--- a/libavcodec/x86/qpel.asm
+++ b/libavcodec/x86/qpel.asm
@@ -81,7 +81,7 @@ cglobal %1_pixels4_l2, 6,6
 
				     add          r2, 16
			
 
				     sub         r5d, 4
			
 
				     jne       .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -125,7 +125,7 @@ cglobal %1_pixels8_l2, 6,6
 
				     add          r2, 32
			
 
				     sub         r5d, 4
			
 
				     jne       .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -171,7 +171,7 @@ cglobal %1_pixels16_l2, 6,6
 
				     add          r2, 32
			
 
				     sub         r5d, 2
			
 
				     jne       .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
--- a/libavcodec/x86/qpeldsp.asm
+++ b/libavcodec/x86/qpeldsp.asm
@@ -92,7 +92,7 @@ cglobal put_no_rnd_pixels8_l2, 6,6
 
				     add          r2, 32
			
 
				     sub         r5d, 4
			
 
				     jne .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -161,7 +161,7 @@ cglobal put_no_rnd_pixels16_l2, 6,6
 
				     add          r2, 32
			
 
				     sub         r5d, 2
			
 
				     jne .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -274,7 +274,7 @@ cglobal %1_mpeg4_qpel16_h_lowpass, 5, 5, 0, 16
 
				     add          r0, r2
			
 
				     dec r4d
			
 
				     jne .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 %macro PUT_OP 2-3
			
@@ -357,7 +357,7 @@ cglobal %1_mpeg4_qpel8_h_lowpass, 5, 5, 0, 8
 
				     add          r0, r2
			
 
				     dec r4d
			
 
				     jne .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -466,7 +466,7 @@ cglobal %1_mpeg4_qpel16_v_lowpass, 4, 6, 0, 544
 
				     add    r0, r1
			
 
				     dec r4d
			
 
				     jne .loopv
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 %macro PUT_OPH 2-3
			
@@ -543,7 +543,7 @@ cglobal %1_mpeg4_qpel8_v_lowpass, 4, 6, 0, 288
 
				     add    r0, r1
			
 
				     dec r4d
			
 
				     jne .loopv
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
--- a/libavcodec/x86/rv34dsp.asm
+++ b/libavcodec/x86/rv34dsp.asm
@@ -54,7 +54,7 @@ cglobal rv34_idct_dc_noround, 1, 2, 0
 
				     movq    [r0+ 8], m0
			
 
				     movq    [r0+16], m0
			
 
				     movq    [r0+24], m0
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ; Load coeffs and perform row transform
			
 
				 ; Output: coeffs in mm[0467], rounder in mm5
			
--- a/libavcodec/x86/rv40dsp.asm
+++ b/libavcodec/x86/rv40dsp.asm
@@ -170,7 +170,7 @@ cglobal %1_rv40_qpel_v, 6,6+npicregs,12, dst, dststride, src, srcstride, height,
 
				     add     srcq, srcstrideq
			
 
				     dec  heightd                           ; next row
			
 
				     jg .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 %macro FILTER_H  1
			
@@ -227,7 +227,7 @@ cglobal %1_rv40_qpel_h, 6, 6+npicregs, 12, dst, dststride, src, srcstride, heigh
 
				     add     srcq, srcstrideq
			
 
				     dec  heightd            ; next row
			
 
				     jg .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM  sse2
			
@@ -280,7 +280,7 @@ cglobal %1_rv40_qpel_v, 6,6+npicregs,8, dst, dststride, src, srcstride, height,
 
				     add     srcq, srcstrideq
			
 
				     dec       heightd                          ; next row
			
 
				     jg       .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 cglobal %1_rv40_qpel_h, 6,6+npicregs,8, dst, dststride, src, srcstride, height, mx, picreg
			
 
				 %ifdef PIC
			
@@ -313,7 +313,7 @@ cglobal %1_rv40_qpel_h, 6,6+npicregs,8, dst, dststride, src, srcstride, height,
 
				     add     srcq, srcstrideq
			
 
				     dec  heightd            ; next row
			
 
				     jg .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM ssse3
			
@@ -464,7 +464,7 @@ cglobal rv40_weight_func_%1_%2, 6, 7, 8
 
				 .loop:
			
 
				     MAIN_LOOP  %2, RND
			
 
				     jnz        .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -208,7 +208,7 @@ cglobal sbr_sum64x5, 1,2,4,z
 
				     add     zq, 32
			
 
				     cmp     zq, r1q
			
 
				     jne  .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 INIT_XMM sse
			
 
				 cglobal sbr_qmf_post_shuffle, 2,3,4,W,z
			
@@ -227,7 +227,7 @@ cglobal sbr_qmf_post_shuffle, 2,3,4,W,z
 
				     add               zq, 16
			
 
				     cmp               zq, r2q
			
 
				     jl             .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 INIT_XMM sse
			
 
				 cglobal sbr_neg_odd_64, 1,2,4,z
			
@@ -248,7 +248,7 @@ cglobal sbr_neg_odd_64, 1,2,4,z
 
				     add         zq, 64
			
 
				     cmp         zq, r1q
			
 
				     jne      .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ; void ff_sbr_qmf_deint_bfly_sse2(float *v, const float *src0, const float *src1)
			
 
				 INIT_XMM sse2
			
@@ -276,7 +276,7 @@ cglobal sbr_qmf_deint_bfly, 3,5,8, v,src0,src1,vrev,c
 
				     add            vrevq, 2*mmsize
			
 
				     sub               cq, 2*mmsize
			
 
				     jge            .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 INIT_XMM sse2
			
 
				 cglobal sbr_qmf_pre_shuffle, 1,4,6,z
			
@@ -306,7 +306,7 @@ cglobal sbr_qmf_pre_shuffle, 1,4,6,z
 
				     jge      .loop
			
 
				     movq       m2, [zq]
			
 
				     movq    [r2q], m2
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 %ifdef PIC
			
 
				 %define NREGS 1
			
@@ -432,7 +432,7 @@ cglobal sbr_qmf_deint_neg, 2,4,4,v,src,vrev,c
 
				     sub        vq, mmsize
			
 
				     add        cq, mmsize
			
 
				     jl      .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 %macro SBR_AUTOCORRELATE 0
			
 
				 cglobal sbr_autocorrelate, 2,3,8,32, x, phi, cnt
			
--- a/libavcodec/x86/takdsp.asm
+++ b/libavcodec/x86/takdsp.asm
@@ -43,7 +43,7 @@ cglobal tak_decorrelate_ls, 3, 3, 2, p1, p2, length
 
				     mova     [p2q+lengthq+mmsize*1], m1
			
 
				     add                     lengthq, mmsize*2
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 cglobal tak_decorrelate_sr, 3, 3, 2, p1, p2, length
			
 
				     shl                     lengthd, 2
			
@@ -60,7 +60,7 @@ cglobal tak_decorrelate_sr, 3, 3, 2, p1, p2, length
 
				     mova     [p1q+lengthq+mmsize*1], m1
			
 
				     add                     lengthq, mmsize*2
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 cglobal tak_decorrelate_sm, 3, 3, 6, p1, p2, length
			
 
				     shl                     lengthd, 2
			
@@ -87,7 +87,7 @@ cglobal tak_decorrelate_sm, 3, 3, 6, p1, p2, length
 
				     mova       [p2q+lengthq+mmsize], m4
			
 
				     add                     lengthq, mmsize*2
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 INIT_XMM sse4
			
 
				 cglobal tak_decorrelate_sf, 3, 3, 5, p1, p2, length, dshift, dfactor
			
@@ -113,4 +113,4 @@ cglobal tak_decorrelate_sf, 3, 3, 5, p1, p2, length, dshift, dfactor
 
				     mova      [p1q+lengthq], m1
			
 
				     add             lengthq, mmsize
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
--- a/libavcodec/x86/utvideodsp.asm
+++ b/libavcodec/x86/utvideodsp.asm
@@ -69,7 +69,7 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
 
				     add        src_bq, linesize_bq
			
 
				     sub        hd, 1
			
 
				     jg .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
@@ -125,7 +125,7 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
 
				     add        src_bq, linesize_bq
			
 
				     sub        hd, 1
			
 
				     jg .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
--- a/libavcodec/x86/v210.asm
+++ b/libavcodec/x86/v210.asm
@@ -116,7 +116,7 @@ cglobal v210_planar_unpack_%1, 5, 5, 6 + 2 * cpuflag(avx2), src, y, u, v, w
 
				     add wq, (mmsize*3)/8
			
 
				     jl  .loop
			
 
				 
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM ssse3
			
--- a/libavcodec/x86/vc1dsp_mc.asm
+++ b/libavcodec/x86/vc1dsp_mc.asm
@@ -139,7 +139,7 @@ cglobal vc1_put_ver_16b_shift2, 4,7,0, dst, src, stride
 
				     add              dstq, 8
			
 
				     dec                 i
			
 
				         jnz         .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %undef rnd
			
 
				 %undef shift
			
 
				 %undef stride_neg2
			
--- a/libavcodec/x86/videodsp.asm
+++ b/libavcodec/x86/videodsp.asm
@@ -433,4 +433,4 @@ cglobal prefetch, 3, 3, 0, buf, stride, h
 
				     add      bufq, strideq
			
 
				     dec        hd
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -200,7 +200,7 @@ cglobal put_vp8_epel%1_h6, 6, 6 + npicregs, 8, dst, dststride, src, srcstride, h
 
				     add     srcq, srcstrideq
			
 
				     dec  heightd            ; next row
			
 
				     jg .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg
			
 
				     shl      mxd, 4
			
@@ -230,7 +230,7 @@ cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, h
 
				     add     srcq, srcstrideq
			
 
				     dec  heightd            ; next row
			
 
				     jg .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
			
 
				     shl      myd, 4
			
@@ -268,7 +268,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr
 
				     add      srcq, srcstrideq
			
 
				     dec   heightd                          ; next row
			
 
				     jg .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
			
 
				     lea      myd, [myq*3]
			
@@ -314,7 +314,7 @@ cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picr
 
				     add      srcq, srcstrideq
			
 
				     dec   heightd                          ; next row
			
 
				     jg .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX ssse3
			
@@ -368,7 +368,7 @@ cglobal put_vp8_epel4_h4, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he
 
				     add      srcq, srcstrideq
			
 
				     dec   heightd                          ; next row
			
 
				     jg .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ; 4x4 block, H-only 6-tap filter
			
 
				 INIT_MMX mmxext
			
@@ -426,7 +426,7 @@ cglobal put_vp8_epel4_h6, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he
 
				     add      srcq, srcstrideq
			
 
				     dec   heightd                          ; next row
			
 
				     jg .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 INIT_XMM sse2
			
 
				 cglobal put_vp8_epel8_h4, 6, 6 + npicregs, 10, dst, dststride, src, srcstride, height, mx, picreg
			
@@ -474,7 +474,7 @@ cglobal put_vp8_epel8_h4, 6, 6 + npicregs, 10, dst, dststride, src, srcstride, h
 
				     add     srcq, srcstrideq
			
 
				     dec  heightd            ; next row
			
 
				     jg .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 INIT_XMM sse2
			
 
				 cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, height, mx, picreg
			
@@ -537,7 +537,7 @@ cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, h
 
				     add     srcq, srcstrideq
			
 
				     dec  heightd            ; next row
			
 
				     jg .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 %macro FILTER_V 1
			
 
				 ; 4x4 block, V-only 4-tap filter
			
@@ -590,7 +590,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr
 
				     add     srcq, srcstrideq
			
 
				     dec  heightd                           ; next row
			
 
				     jg .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 
			
 
				 ; 4x4 block, V-only 6-tap filter
			
@@ -655,7 +655,7 @@ cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picr
 
				     add     srcq, srcstrideq
			
 
				     dec  heightd                           ; next row
			
 
				     jg .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -738,7 +738,7 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p
 
				     lea     srcq, [srcq+srcstrideq*2]
			
 
				     sub  heightd, 2
			
 
				     jg .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 %if cpuflag(ssse3)
			
 
				 cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
			
@@ -815,7 +815,7 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride
 
				     lea     srcq, [srcq+srcstrideq*2]
			
 
				     sub  heightd, 2
			
 
				     jg .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
@@ -838,7 +838,7 @@ cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height
 
				     lea    dstq, [dstq+dststrideq*2]
			
 
				     sub heightd, 2
			
 
				     jg .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 INIT_XMM sse
			
 
				 cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height
			
@@ -851,7 +851,7 @@ cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height
 
				     lea    dstq, [dstq+dststrideq*2]
			
 
				     sub heightd, 2
			
 
				     jg .nextrow
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ;-----------------------------------------------------------------------------
			
 
				 ; void ff_vp8_idct_dc_add_<opt>(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
			
--- a/libavfilter/x86/af_volume.asm
+++ b/libavfilter/x86/af_volume.asm
@@ -56,7 +56,7 @@ cglobal scale_samples_s16, 4,4,4, dst, src, len, volume
 
				     mova  [dstq+lenq], m3
			
 
				     sub       lenq, mmsize
			
 
				     jge .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ;------------------------------------------------------------------------------
			
 
				 ; void ff_scale_samples_s32(uint8_t *dst, const uint8_t *src, int len,
			
@@ -93,7 +93,7 @@ cglobal scale_samples_s32, 4,4,4, dst, src, len, volume
 
				 %endif
			
 
				     sub            lenq, mmsize
			
 
				     jge .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
@@ -137,4 +137,4 @@ cglobal scale_samples_s32, 4,4,8, dst, src, len, volume
 
				     mova  [dstq+lenq], m0
			
 
				     sub       lenq, mmsize
			
 
				     jge .loop
			
 
				-    REP_RET
			
 
				+    RET
			
--- a/libavfilter/x86/avf_showcqt.asm
+++ b/libavfilter/x86/avf_showcqt.asm
@@ -127,7 +127,7 @@ cglobal showcqt_cqt_calc, 5, 10, 12, dst, src, coeffs, len, fft_len, x, coeffs_v
 
				         lea     dstq, [dstq + 16]
			
 
				         lea     coeffsq, [coeffsq + 2*Coeffs.sizeof]
			
 
				         jnz     .loop_k
			
 
				-        REP_RET
			
 
				+        RET
			
 
				         align   16
			
 
				         .check_loop_a:
			
 
				         cmp     xd, [coeffsq + Coeffs.len]
			
@@ -170,7 +170,7 @@ cglobal showcqt_cqt_calc, 4, 7, 8, dst, src, coeffs, len, x, coeffs_val, i
 
				         lea     dstq, [dstq + 8]
			
 
				         lea     coeffsq, [coeffsq + Coeffs.sizeof]
			
 
				         jnz     .loop_k
			
 
				-        REP_RET
			
 
				+        RET
			
 
				 %endif ; ARCH_X86_64
			
 
				 %endmacro ; DECLARE_CQT_CALC
			
 
				 
			
--- a/libavfilter/x86/scene_sad.asm
+++ b/libavfilter/x86/scene_sad.asm
@@ -53,7 +53,7 @@ cglobal scene_sad, 6, 7, 2, src1, stride1, src2, stride2, width, end, x
 
				 
			
 
				     mov         r0q, r6mp
			
 
				     movu      [r0q], m1      ; sum
			
 
				-REP_RET
			
 
				+RET
			
 
				 %endmacro
			
 
				 
			
 
				 
			
--- a/libavfilter/x86/vf_blend.asm
+++ b/libavfilter/x86/vf_blend.asm
@@ -63,7 +63,7 @@ cglobal blend_%1, 5, 7, %2, top, top_linesize, bottom, bottom_linesize, dst, end
 
				     add          dstq, dst_linesizeq
			
 
				     sub          endd, 1
			
 
				     jg .nextrow
			
 
				-REP_RET
			
 
				+RET
			
 
				 %endmacro
			
 
				 
			
 
				 %macro BLEND_SIMPLE 2-3 0
			
--- a/libavfilter/x86/vf_framerate.asm
+++ b/libavfilter/x86/vf_framerate.asm
@@ -84,7 +84,7 @@ cglobal blend_frames%1, 5, 7, 5, src1, src1_linesize, src2, src2_linesize, dst,
 
				     add      dstq, dst_linesizeq
			
 
				     sub      endd, 1
			
 
				     jg .nextrow
			
 
				-REP_RET
			
 
				+RET
			
 
				 %endmacro
			
 
				 
			
 
				 
			
--- a/libavfilter/x86/vf_gradfun.asm
+++ b/libavfilter/x86/vf_gradfun.asm
@@ -64,7 +64,7 @@ cglobal gradfun_filter_line, 6, 6
 
				     add       r0, 4
			
 
				     jl .loop
			
 
				 .end:
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 INIT_XMM ssse3
			
 
				 cglobal gradfun_filter_line, 6, 6, 8
			
@@ -78,7 +78,7 @@ cglobal gradfun_filter_line, 6, 6, 8
 
				     FILTER_LINE m4
			
 
				     add        r0, 8
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 %macro BLUR_LINE 1
			
 
				 cglobal gradfun_blur_line_%1, 6, 6, 8
			
@@ -102,7 +102,7 @@ cglobal gradfun_blur_line_%1, 6, 6, 8
 
				     mova   [r3+r0], m0
			
 
				     add         r0, 16
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
--- a/libavfilter/x86/vf_hqdn3d.asm
+++ b/libavfilter/x86/vf_hqdn3d.asm
@@ -97,7 +97,7 @@ ALIGN 16
 
				     inc    xq
			
 
				     jl .loop
			
 
				     je .loop2
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro ; HQDN3D_ROW
			
 
				 
			
 
				 HQDN3D_ROW 8
			
--- a/libavfilter/x86/vf_interlace.asm
+++ b/libavfilter/x86/vf_interlace.asm
@@ -73,7 +73,7 @@ SECTION .text
 
				     jl .loop
			
 
				 
			
 
				 .end:
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 %macro LOWPASS_LINE 0
			
@@ -146,7 +146,7 @@ cglobal lowpass_line_complex, 5, 5, 8, dst, h, src, mref, pref
 
				     add srcq, mmsize
			
 
				     sub hd, mmsize
			
 
				     jg .loop
			
 
				-REP_RET
			
 
				+RET
			
 
				 
			
 
				 cglobal lowpass_line_complex_12, 5, 5, 8, 16, dst, h, src, mref, pref, clip_max
			
 
				     movd m7, DWORD clip_maxm
			
@@ -208,7 +208,7 @@ cglobal lowpass_line_complex_12, 5, 5, 8, 16, dst, h, src, mref, pref, clip_max
 
				     add srcq, 2*mmsize
			
 
				     sub hd, mmsize
			
 
				     jg .loop
			
 
				-REP_RET
			
 
				+RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
--- a/libavfilter/x86/vf_maskedmerge.asm
+++ b/libavfilter/x86/vf_maskedmerge.asm
@@ -81,4 +81,4 @@ cglobal maskedmerge8, 5, 7, 8, bsrc, osrc, msrc, dst, blinesize, w, x
 
				     add          dstq, dlinesizeq
			
 
				     sub         hd, 1
			
 
				     jg .nextrow
			
 
				-REP_RET
			
 
				+RET
			
--- a/libavfilter/x86/vf_stereo3d.asm
+++ b/libavfilter/x86/vf_stereo3d.asm
@@ -213,4 +213,4 @@ cglobal anaglyph, 3, 6, 8, 2*9*mmsize, dst, lsrc, rsrc, dst_linesize, o, cnt
 
				     add         rsrcq, r_linesizeq
			
 
				     sub       heightd, 1
			
 
				     jg .nextrow
			
 
				-REP_RET
			
 
				+RET
			
--- a/libavfilter/x86/vf_w3fdif.asm
+++ b/libavfilter/x86/vf_w3fdif.asm
@@ -38,7 +38,7 @@ cglobal w3fdif_scale, 3, 3, 2, 0, out_pixel, work_pixel, linesize
 
				     add                 work_pixelq, mmsize*2
			
 
				     sub                   linesized, mmsize/2
			
 
				     jg .loop
			
 
				-REP_RET
			
 
				+RET
			
 
				 
			
 
				 cglobal w3fdif_simple_low, 4, 5, 6, 0, work_line, in_lines_cur0, coef, linesize, offset
			
 
				     movd                  m1, [coefq]
			
@@ -63,7 +63,7 @@ cglobal w3fdif_simple_low, 4, 5, 6, 0, work_line, in_lines_cur0, coef, linesize,
 
				     add                               offsetq, mmsize/2
			
 
				     sub                             linesized, mmsize/2
			
 
				     jg .loop
			
 
				-REP_RET
			
 
				+RET
			
 
				 
			
 
				 cglobal w3fdif_complex_low, 4, 7, 8, 0, work_line, in_lines_cur0, coef, linesize
			
 
				     movq                  m0, [coefq]
			
@@ -99,7 +99,7 @@ cglobal w3fdif_complex_low, 4, 7, 8, 0, work_line, in_lines_cur0, coef, linesize
 
				     add                               offsetq, mmsize/2
			
 
				     sub                             linesized, mmsize/2
			
 
				     jg .loop
			
 
				-REP_RET
			
 
				+RET
			
 
				 
			
 
				 %if ARCH_X86_64
			
 
				 cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
			
@@ -179,7 +179,7 @@ cglobal w3fdif_simple_high, 4, 7, 8, 0, work_line, in_lines_cur0, in_lines_adj0,
 
				     add                               offsetq, mmsize/2
			
 
				     sub                             linesized, mmsize/2
			
 
				     jg .loop
			
 
				-REP_RET
			
 
				+RET
			
 
				 
			
 
				 %if ARCH_X86_64
			
 
				 
			
@@ -254,6 +254,6 @@ cglobal w3fdif_complex_high, 5, 13, 10, 0, work_line, in_lines_cur0, in_lines_ad
 
				     add                               offsetq, mmsize/2
			
 
				     sub                             linesized, mmsize/2
			
 
				     jg .loop
			
 
				-REP_RET
			
 
				+RET
			
 
				 
			
 
				 %endif
			
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm
@@ -48,7 +48,7 @@ ALIGN 16
 
				 
			
 
				     sub       lenq, 64
			
 
				     jge       .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse
			
@@ -141,7 +141,7 @@ cglobal vector_fmac_scalar, 4,4,5, dst, src, mul, len
 
				 %endif ; mmsize
			
 
				     sub    lenq, 64
			
 
				     jge .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse
			
@@ -178,7 +178,7 @@ cglobal vector_fmul_scalar, 4,4,3, dst, src, mul, len
 
				     mova  [dstq+lenq], m1
			
 
				     sub    lenq, mmsize
			
 
				     jge .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse
			
@@ -233,7 +233,7 @@ cglobal vector_dmac_scalar, 4,4,5, dst, src, mul, len
 
				     movaps [dstq+lenq+3*mmsize], m4
			
 
				     sub    lenq, mmsize*4
			
 
				     jge .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
@@ -280,7 +280,7 @@ cglobal vector_dmul_scalar, 4,4,3, dst, src, mul, len
 
				     movaps [dstq+lenq+mmsize], m2
			
 
				     sub          lenq, 2*mmsize
			
 
				     jge .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
@@ -323,7 +323,7 @@ cglobal vector_fmul_window, 5, 6, 6, dst, src0, src1, win, len, len1
 
				     sub       len1q, mmsize
			
 
				     add       lenq,  mmsize
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 ;-----------------------------------------------------------------------------
			
 
				 ; vector_fmul_add(float *dst, const float *src0, const float *src1,
			
@@ -352,7 +352,7 @@ ALIGN 16
 
				 
			
 
				     sub     lenq,   2*mmsize
			
 
				     jge     .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse
			
@@ -401,7 +401,7 @@ ALIGN 16
 
				     add     src1q, 2*mmsize
			
 
				     sub     lenq,  2*mmsize
			
 
				     jge     .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse
			
@@ -585,4 +585,4 @@ cglobal butterflies_float, 3,3,3, src0, src1, len
 
				     mova        [src0q + lenq], m0
			
 
				     add       lenq, mmsize
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
--- a/libavutil/x86/lls.asm
+++ b/libavutil/x86/lls.asm
@@ -123,7 +123,7 @@ cglobal update_lls, 2,5,8, ctx, var, i, j, covar2
 
				     test    id, id
			
 
				     jle .loop2x1
			
 
				 .ret:
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 
			
 
				 %macro UPDATE_LLS 0
			
 
				 cglobal update_lls, 3,6,8, ctx, var, count, i, j, count2
			
@@ -240,7 +240,7 @@ cglobal update_lls, 3,6,8, ctx, var, count, i, j, count2
 
				     cmp     id, countd
			
 
				     jle .loop2x1
			
 
				 .ret:
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro ; UPDATE_LLS
			
 
				 
			
 
				 %if HAVE_AVX_EXTERNAL
			
--- a/libswresample/x86/audio_convert.asm
+++ b/libswresample/x86/audio_convert.asm
@@ -85,7 +85,7 @@ pack_2ch_%2_to_%1_u_int %+ SUFFIX:
 
				     add lenq, 2*mmsize/(2<<%4)
			
 
				 %endif
			
 
				         jl .next
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 %macro UNPACK_2CH 5-7
			
@@ -157,7 +157,7 @@ unpack_2ch_%2_to_%1_u_int %+ SUFFIX:
 
				     add lenq, mmsize/(1<<%4)
			
 
				 %endif
			
 
				         jl .next
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 %macro CONV 5-7
			
@@ -198,7 +198,7 @@ cglobal %2_to_%1_%3, 3, 3, 6, dst, src, len
 
				     emms
			
 
				     RET
			
 
				 %else
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endif
			
 
				 %endmacro
			
 
				 
			
@@ -301,7 +301,7 @@ pack_6ch_%2_to_%1_u_int %+ SUFFIX:
 
				     emms
			
 
				     RET
			
 
				 %else
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endif
			
 
				 %endmacro
			
 
				 
			
@@ -375,7 +375,7 @@ unpack_6ch_%2_to_%1_u_int %+ SUFFIX:
 
				     add      dstq, mmsize
			
 
				     sub      lend, mmsize/4
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 %define PACK_8CH_GPRS (10 * ARCH_X86_64) + ((6 + HAVE_ALIGNED_STACK) * ARCH_X86_32)
			
@@ -525,7 +525,7 @@ pack_8ch_%2_to_%1_u_int %+ SUFFIX:
 
				 %endif
			
 
				     sub      lend, mmsize/4
			
 
				     jg .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 %macro INT16_TO_INT32_N 6
			
--- a/libswresample/x86/rematrix.asm
+++ b/libswresample/x86/rematrix.asm
@@ -68,7 +68,7 @@ mix_2_1_float_u_int %+ SUFFIX:
 
				     mov%1  [outq + lenq + mmsize], m2
			
 
				     add        lenq, mmsize*2
			
 
				         jl .next
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 %macro MIX1_FLT 1
			
@@ -100,7 +100,7 @@ mix_1_1_float_u_int %+ SUFFIX:
 
				     mov%1  [outq + lenq + mmsize], m1
			
 
				     add        lenq, mmsize*2
			
 
				         jl .next
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 %macro MIX1_INT16 1
			
@@ -152,7 +152,7 @@ mix_1_1_int16_u_int %+ SUFFIX:
 
				     emms
			
 
				     RET
			
 
				 %else
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endif
			
 
				 %endmacro
			
 
				 
			
@@ -218,7 +218,7 @@ mix_2_1_int16_u_int %+ SUFFIX:
 
				     emms
			
 
				     RET
			
 
				 %else
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endif
			
 
				 %endmacro
			
 
				 
			
--- a/libswscale/x86/input.asm
+++ b/libswscale/x86/input.asm
@@ -207,7 +207,7 @@ cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table
 
				     mova    [dstq+wq], m0
			
 
				     add            wq, mmsize
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endif ; ARCH_X86_64 && %0 == 3
			
 
				 %endmacro
			
 
				 
			
@@ -313,7 +313,7 @@ cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table
 
				     mova   [dstVq+wq], m2
			
 
				     add            wq, mmsize
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endif ; ARCH_X86_64 && %0 == 3
			
 
				 %endmacro
			
 
				 
			
@@ -394,7 +394,7 @@ cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, table
 
				     add            wq, 2
			
 
				     jl .loop2
			
 
				 .end:
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endif ; %0 == 3
			
 
				 %endmacro
			
 
				 
			
@@ -491,7 +491,7 @@ cglobal %2%3%4%5 %+ ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table
 
				     add            wq, 2
			
 
				     jl .loop2
			
 
				 .end:
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endif ; ARCH_X86_64 && %0 == 3
			
 
				 %endmacro
			
 
				 
			
@@ -543,7 +543,7 @@ RGB32_FUNCS 8, 12
 
				     mova    [dstq+wq], m0
			
 
				     add            wq, mmsize
			
 
				     jl .loop_%1
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 ; %1 = nr. of XMM registers
			
@@ -599,7 +599,7 @@ cglobal %2ToY, 5, 5, %1, dst, unused0, unused1, src, w
 
				     movhps [dstVq+wq], m1
			
 
				     add            wq, mmsize / 2
			
 
				     jl .loop_%1
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 ; %1 = nr. of XMM registers
			
@@ -657,7 +657,7 @@ cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
 
				 %endif ; nv12/21
			
 
				     add            wq, mmsize
			
 
				     jl .loop_%1
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 ; %1 = nr. of XMM registers
			
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@@ -297,7 +297,7 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
 
				     test          dstq, 15
			
 
				     jnz .unaligned
			
 
				     yuv2planeX_mainloop %1, a
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 .unaligned:
			
 
				     yuv2planeX_mainloop %1, u
			
 
				 %endif ; mmsize == 8/16
			
@@ -307,10 +307,10 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
 
				     ADD             rsp, pad
			
 
				     RET
			
 
				 %else ; x86-64
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endif ; x86-32/64
			
 
				 %else ; %1 == 9/10/16
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endif ; %1 == 8/9/10/16
			
 
				 %endmacro
			
 
				 
			
@@ -433,10 +433,10 @@ cglobal yuv2plane1_%1, %3, %3, %2, src, dst, w, dither, offset
 
				     test          dstq, 15
			
 
				     jnz .unaligned
			
 
				     yuv2plane1_mainloop %1, a
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 .unaligned:
			
 
				     yuv2plane1_mainloop %1, u
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_XMM sse2
			
--- a/libswscale/x86/scale.asm
+++ b/libswscale/x86/scale.asm
@@ -357,7 +357,7 @@ cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsi
 
				     add           wq, 2
			
 
				 %endif ; %3 ==/!= X
			
 
				     jl .loop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 ; SCALE_FUNCS source_width, intermediate_nbits, n_xmm
			
--- a/libswscale/x86/scale_avx2.asm
+++ b/libswscale/x86/scale_avx2.asm
@@ -144,7 +144,7 @@ cglobal hscale8to15_%1, 7, 9, 16, pos0, dst, w, srcmem, filter, fltpos, fltsize,
 
				     cmp countq, wq
			
 
				     jl .tail_loop
			
 
				 .end:
			
 
				-REP_RET
			
 
				+RET
			
 
				 %endmacro
			
 
				 
			
 
				 %if ARCH_X86_64
			
--- a/libswscale/x86/yuv2yuvX.asm
+++ b/libswscale/x86/yuv2yuvX.asm
@@ -121,7 +121,7 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
 
				     mov                  filterSizeq, filterq
			
 
				     cmp                  offsetq, dstWq
			
 
				     jb                  .outerloop
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 INIT_MMX mmxext
			
--- a/libswscale/x86/yuv_2_rgb.asm
+++ b/libswscale/x86/yuv_2_rgb.asm
@@ -354,7 +354,7 @@ add imageq, 8 * depth * time_num
 
				 add indexq, 4 * time_num
			
 
				 js .loop0
			
 
				 
			
 
				-REP_RET
			
 
				+RET
			
 
				 
			
 
				 %endmacro
			
 
				 
			
--- a/tests/checkasm/x86/checkasm.asm
+++ b/tests/checkasm/x86/checkasm.asm
@@ -234,7 +234,7 @@ cglobal checked_call%1, 1,7
 
				 .emms_ok:
			
 
				 %endif
			
 
				     add  esp, max_args*4
			
 
				-    REP_RET
			
 
				+    RET
			
 
				 %endmacro
			
 
				 
			
 
				 %endif ; ARCH_X86_64