diff --git a/libavcodec/x86/mathops.h b/libavcodec/x86/mathops.h index 5949dfe..29ea584 100644 --- a/libavcodec/x86/mathops.h +++ b/libavcodec/x86/mathops.h @@ -28,10 +28,18 @@ #if ARCH_X86_32 #define MULL(ra, rb, shift) \ ({ int rt, dummy; __asm__ (\ + if (__builtin_constant_p(shift)) "imull %3 \n\t"\ "shrdl %4, %%edx, %%eax \n\t"\ : "=a"(rt), "=d"(dummy)\ - : "a" ((int)(ra)), "rm" ((int)(rb)), "i"(shift));\ + :"a"(a), "rm"(b), "i"(shift & 0x1F) + else + __asm__ ( + "imull %3 \n\t" + "shrdl %4, %%edx, %%eax \n\t" + :"=a"(rt), "=d"(dummy) + :"a"(a), "rm"(b), "c"((uint8_t)shift) + ); rt; }) #define MULH(ra, rb) \ @@ -81,19 +89,31 @@ __asm__ volatile(\ // avoid +32 for shift optimization (gcc should do that ...) #define NEG_SSR32 NEG_SSR32 static inline int32_t NEG_SSR32( int32_t a, int8_t s){ + if (__builtin_constant_p(s)) __asm__ ("sarl %1, %0\n\t" : "+r" (a) - : "ic" ((uint8_t)(-s)) + : "i" (-s & 0x1F) ); + else + __asm__ ("sarl %1, %0\n\t" + : "+r" (a) + : "c" ((uint8_t)(-s)) + ); return a; } #define NEG_USR32 NEG_USR32 static inline uint32_t NEG_USR32(uint32_t a, int8_t s){ + if (__builtin_constant_p(s)) __asm__ ("shrl %1, %0\n\t" : "+r" (a) - : "ic" ((uint8_t)(-s)) + : "i" (-s & 0x1F) ); + else + __asm__ ("shrl %1, %0\n\t" + : "+r" (a) + : "c" ((uint8_t)(-s)) + ); return a; }