diff -urN x264-snapshot-20160712-2245/common/bitstream.c x264-snapshot-20160712-2245.x32/common/bitstream.c --- x264-snapshot-20160712-2245/common/bitstream.c 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/bitstream.c 2017-03-03 19:15:37.167990122 +0000 @@ -116,7 +116,7 @@ pf->nal_escape = x264_nal_escape_c; #if HAVE_MMX -#if ARCH_X86_64 +#if (ARCH_X86_64 || ARCH_X86_64_32) pf->cabac_block_residual_internal = x264_cabac_block_residual_internal_sse2; pf->cabac_block_residual_rd_internal = x264_cabac_block_residual_rd_internal_sse2; pf->cabac_block_residual_8x8_rd_internal = x264_cabac_block_residual_8x8_rd_internal_sse2; @@ -126,7 +126,7 @@ pf->nal_escape = x264_nal_escape_mmx2; if( cpu&X264_CPU_SSE2 ) { -#if ARCH_X86_64 +#if (ARCH_X86_64 || ARCH_X86_64_32) if( cpu&X264_CPU_LZCNT ) { pf->cabac_block_residual_internal = x264_cabac_block_residual_internal_sse2_lzcnt; @@ -137,7 +137,7 @@ if( cpu&X264_CPU_SSE2_IS_FAST ) pf->nal_escape = x264_nal_escape_sse2; } -#if ARCH_X86_64 +#if (ARCH_X86_64 || ARCH_X86_64_32) if( cpu&X264_CPU_SSSE3 ) { pf->cabac_block_residual_rd_internal = x264_cabac_block_residual_rd_internal_ssse3; diff -urN x264-snapshot-20160712-2245/common/common.h x264-snapshot-20160712-2245.x32/common/common.h --- x264-snapshot-20160712-2245/common/common.h 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/common.h 2017-03-03 19:02:22.011993565 +0000 @@ -1015,7 +1015,7 @@ return cnt; } -#if ARCH_X86 || ARCH_X86_64 +#if ARCH_X86 || ARCH_X86_64 || ARCH_X86_64_32 #include "x86/util.h" #endif diff -urN x264-snapshot-20160712-2245/common/cpu.c x264-snapshot-20160712-2245.x32/common/cpu.c --- x264-snapshot-20160712-2245/common/cpu.c 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/cpu.c 2017-03-03 19:02:22.014993565 +0000 @@ -128,7 +128,7 @@ uint32_t max_extended_cap, max_basic_cap; int cache; -#if !ARCH_X86_64 +#if !ARCH_X86_64 && !ARCH_X86_64_32 if( !x264_cpu_cpuid_test() ) return 0; #endif diff -urN x264-snapshot-20160712-2245/common/dct.c x264-snapshot-20160712-2245.x32/common/dct.c --- x264-snapshot-20160712-2245/common/dct.c 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/dct.c 2017-03-03 20:59:13.722994658 +0000 @@ -619,7 +619,7 @@ dctf->idct4x4dc = x264_idct4x4dc_mmx; dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_mmx2; -#if !ARCH_X86_64 +#if !ARCH_X86_64 && !ARCH_X86_64_32 dctf->sub8x8_dct = x264_sub8x8_dct_mmx; dctf->sub16x16_dct = x264_sub16x16_dct_mmx; dctf->add8x8_idct = x264_add8x8_idct_mmx; @@ -707,7 +707,7 @@ dctf->sub8x8_dct = x264_sub8x8_dct_avx2; dctf->sub16x16_dct = x264_sub16x16_dct_avx2; dctf->add16x16_idct_dc = x264_add16x16_idct_dc_avx2; -#if ARCH_X86_64 +#if ARCH_X86_64 || ARCH_X86_64_32 dctf->sub16x16_dct8 = x264_sub16x16_dct8_avx2; #endif } @@ -976,13 +976,13 @@ pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_sse4; if( cpu&X264_CPU_AVX ) pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_avx; -#if ARCH_X86_64 +#if ARCH_X86_64 || ARCH_X86_64_32 if( cpu&X264_CPU_AVX ) { pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx; pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_avx; } -#endif // ARCH_X86_64 +#endif // ARCH_X86_64 || ARCH_X86_64_32 #endif // HAVE_MMX #else #if HAVE_MMX @@ -1010,7 +1010,7 @@ { pf_interlaced->sub_4x4 = x264_zigzag_sub_4x4_field_avx; pf_progressive->sub_4x4 = x264_zigzag_sub_4x4_frame_avx; -#if ARCH_X86_64 +#if ARCH_X86_64 || ARCH_X86_64_32 pf_interlaced->sub_4x4ac = x264_zigzag_sub_4x4ac_field_avx; pf_progressive->sub_4x4ac= x264_zigzag_sub_4x4ac_frame_avx; #endif diff -urN x264-snapshot-20160712-2245/common/frame.c x264-snapshot-20160712-2245.x32/common/frame.c --- x264-snapshot-20160712-2245/common/frame.c 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/frame.c 2017-03-03 19:02:22.017993565 +0000 @@ -75,7 +75,7 @@ int i_stride, i_width, i_lines, luma_plane_count; int i_padv = PADV << PARAM_INTERLACED; int align = 16; -#if ARCH_X86 || ARCH_X86_64 +#if ARCH_X86 || ARCH_X86_64 || ARCH_X86_64_32 if( h->param.cpu&X264_CPU_CACHELINE_64 ) align = 64; else if( h->param.cpu&X264_CPU_CACHELINE_32 || h->param.cpu&X264_CPU_AVX ) diff -urN x264-snapshot-20160712-2245/common/osdep.h x264-snapshot-20160712-2245.x32/common/osdep.h --- x264-snapshot-20160712-2245/common/osdep.h 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/osdep.h 2017-03-03 19:15:11.214990235 +0000 @@ -147,7 +147,7 @@ #define ALIGNED_ARRAY_64( ... ) EXPAND( ALIGNED_ARRAY_EMU( 63, __VA_ARGS__ ) ) /* For AVX2 */ -#if ARCH_X86 || ARCH_X86_64 +#if ARCH_X86 || ARCH_X86_64 || ARCH_X86_64_32 #define NATIVE_ALIGN 32 #define ALIGNED_N ALIGNED_32 #define ALIGNED_ARRAY_N ALIGNED_ARRAY_32 @@ -293,7 +293,7 @@ return (x<<24) + ((x<<8)&0xff0000) + ((x>>8)&0xff00) + (x>>24); } #endif -#if HAVE_X86_INLINE_ASM && ARCH_X86_64 +#if HAVE_X86_INLINE_ASM && (ARCH_X86_64 || ARCH_X86_64_32) static ALWAYS_INLINE uint64_t endian_fix64( uint64_t x ) { asm("bswap %0":"+r"(x)); @@ -361,7 +361,7 @@ /* We require that prefetch not fault on invalid reads, so we only enable it on * known architectures. */ #elif defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 1) &&\ - (ARCH_X86 || ARCH_X86_64 || ARCH_ARM || ARCH_PPC) + (ARCH_X86 || ARCH_X86_64 || ARCH_X86_64_32 || ARCH_ARM || ARCH_PPC) #define x264_prefetch(x) __builtin_prefetch(x) #else #define x264_prefetch(x) diff -urN x264-snapshot-20160712-2245/common/pixel.c x264-snapshot-20160712-2245.x32/common/pixel.c --- x264-snapshot-20160712-2245/common/pixel.c 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/pixel.c 2017-03-03 20:59:53.069994488 +0000 @@ -911,7 +911,7 @@ pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_sse2; -#if ARCH_X86_64 +#if ARCH_X86_64 || ARCH_X86_64_32 pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2; pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse2; #endif @@ -975,7 +975,7 @@ pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_ssse3; pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_ssse3; -#if ARCH_X86_64 +#if ARCH_X86_64 || ARCH_X86_64_32 pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_ssse3; #endif pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_ssse3; @@ -995,7 +995,7 @@ } pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_sse4; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_sse4; -#if ARCH_X86_64 +#if ARCH_X86_64 || ARCH_X86_64_32 pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse4; #endif pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse4; @@ -1018,7 +1018,7 @@ pixf->ssd_nv12_core = x264_pixel_ssd_nv12_core_avx; pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_avx; pixf->ssim_end4 = x264_pixel_ssim_end4_avx; -#if ARCH_X86_64 +#if ARCH_X86_64 || ARCH_X86_64_32 pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_avx; #endif pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_avx; @@ -1032,7 +1032,7 @@ pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_xop; pixf->vsad = x264_pixel_vsad_xop; pixf->asd8 = x264_pixel_asd8_xop; -#if ARCH_X86_64 +#if ARCH_X86_64 || ARCH_X86_64_32 pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_xop; #endif } @@ -1125,7 +1125,7 @@ pixf->ssim_end4 = x264_pixel_ssim_end4_sse2; pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_sse2; -#if ARCH_X86_64 +#if ARCH_X86_64 || ARCH_X86_64_32 pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2; pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse2; #endif @@ -1194,7 +1194,7 @@ pixf->intra_sad_x9_4x4 = x264_intra_sad_x9_4x4_ssse3; pixf->intra_satd_x9_4x4 = x264_intra_satd_x9_4x4_ssse3; pixf->intra_sad_x9_8x8 = x264_intra_sad_x9_8x8_ssse3; -#if ARCH_X86_64 +#if ARCH_X86_64 || ARCH_X86_64_32 pixf->intra_sa8d_x9_8x8 = x264_intra_sa8d_x9_8x8_ssse3; #endif } @@ -1208,7 +1208,7 @@ INIT6( satd_x3, _ssse3_atom ); INIT6( satd_x4, _ssse3_atom ); INIT4( hadamard_ac, _ssse3_atom ); -#if ARCH_X86_64 +#if ARCH_X86_64 || ARCH_X86_64_32 pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_ssse3_atom; #endif } @@ -1220,7 +1220,7 @@ INIT8( satd, _ssse3 ); INIT7( satd_x3, _ssse3 ); INIT7( satd_x4, _ssse3 ); -#if ARCH_X86_64 +#if ARCH_X86_64 || ARCH_X86_64_32 pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_ssse3; #endif } @@ -1261,14 +1261,14 @@ pixf->intra_sad_x9_4x4 = x264_intra_sad_x9_4x4_sse4; pixf->intra_satd_x9_4x4 = x264_intra_satd_x9_4x4_sse4; pixf->intra_sad_x9_8x8 = x264_intra_sad_x9_8x8_sse4; -#if ARCH_X86_64 +#if ARCH_X86_64 || ARCH_X86_64_32 pixf->intra_sa8d_x9_8x8 = x264_intra_sa8d_x9_8x8_sse4; #endif } pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_sse4; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_sse4; pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse4; -#if ARCH_X86_64 +#if ARCH_X86_64 || ARCH_X86_64_32 pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse4; #endif } @@ -1288,7 +1288,7 @@ pixf->intra_sad_x9_4x4 = x264_intra_sad_x9_4x4_avx; pixf->intra_satd_x9_4x4 = x264_intra_satd_x9_4x4_avx; pixf->intra_sad_x9_8x8 = x264_intra_sad_x9_8x8_avx; -#if ARCH_X86_64 +#if ARCH_X86_64 || ARCH_X86_64_32 pixf->intra_sa8d_x9_8x8 = x264_intra_sa8d_x9_8x8_avx; #endif } @@ -1302,7 +1302,7 @@ pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_avx; pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_avx; pixf->ssim_end4 = x264_pixel_ssim_end4_avx; -#if ARCH_X86_64 +#if ARCH_X86_64 || ARCH_X86_64_32 pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_avx; #endif } @@ -1327,7 +1327,7 @@ pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_xop; pixf->var2[PIXEL_8x8] = x264_pixel_var2_8x8_xop; pixf->var2[PIXEL_8x16] = x264_pixel_var2_8x16_xop; -#if ARCH_X86_64 +#if ARCH_X86_64 || ARCH_X86_64_32 pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_xop; #endif } @@ -1348,7 +1348,7 @@ pixf->intra_sad_x9_8x8 = x264_intra_sad_x9_8x8_avx2; pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_avx2; pixf->ssd_nv12_core = x264_pixel_ssd_nv12_core_avx2; -#if ARCH_X86_64 +#if ARCH_X86_64 || ARCH_X86_64_32 pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_avx2; #endif } diff -urN x264-snapshot-20160712-2245/common/x86/bitstream-a.asm x264-snapshot-20160712-2245.x32/common/x86/bitstream-a.asm --- x264-snapshot-20160712-2245/common/x86/bitstream-a.asm 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/x86/bitstream-a.asm 2017-03-03 20:46:59.649997837 +0000 @@ -130,7 +130,7 @@ NAL_ESCAPE INIT_XMM sse2 NAL_ESCAPE -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 INIT_YMM avx2 NAL_ESCAPE %endif diff -urN x264-snapshot-20160712-2245/common/x86/cabac-a.asm x264-snapshot-20160712-2245.x32/common/x86/cabac-a.asm --- x264-snapshot-20160712-2245/common/x86/cabac-a.asm 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/x86/cabac-a.asm 2017-03-03 20:46:59.626997837 +0000 @@ -35,7 +35,7 @@ coeff_abs_level_transition: db 1, 2, 3, 3, 4, 5, 6, 7 db 4, 4, 4, 4, 5, 6, 7, 7 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 %macro COEFF_LAST_TABLE 17 %define funccpu1 %1 %define funccpu2 %2 @@ -86,7 +86,7 @@ cextern count_cat_m1 cextern cabac_encode_ue_bypass -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 %define pointer resq %else %define pointer resd @@ -122,7 +122,7 @@ ; t3 must be ecx, since it's used for shift. %if WIN64 DECLARE_REG_TMP 3,1,2,0,5,6,4,4 -%elif ARCH_X86_64 +%elif ARCH_X86_64 || ARCH_X86_64_32 DECLARE_REG_TMP 0,1,2,3,4,5,6,6 %else DECLARE_REG_TMP 0,4,2,1,3,5,6,2 @@ -193,7 +193,7 @@ mov [t0+cb.low], t7d mov [t0+cb.queue], t3d RET -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 ARCH_X86_64 == 0ARCH_X86_64 == 0 ARCH_X86_64_32 == 0 .putbyte: PROLOGUE 0,7 movifnidn t6d, t7d @@ -525,7 +525,7 @@ RET %endmacro -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 INIT_XMM sse2 CABAC_RESIDUAL_RD 0, coeff_last_sse2 CABAC_RESIDUAL_RD 1, coeff_last_sse2 @@ -746,7 +746,7 @@ RET %endmacro -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 INIT_XMM sse2 CABAC_RESIDUAL coeff_last_sse2 INIT_XMM sse2,lzcnt diff -urN x264-snapshot-20160712-2245/common/x86/cpu-a.asm x264-snapshot-20160712-2245.x32/common/x86/cpu-a.asm --- x264-snapshot-20160712-2245/common/x86/cpu-a.asm 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/x86/cpu-a.asm 2017-03-03 20:46:59.575997837 +0000 @@ -66,7 +66,7 @@ mov [r4], edx RET -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 ;----------------------------------------------------------------------------- ; void stack_align( void (*func)(void*), void *arg ); diff -urN x264-snapshot-20160712-2245/common/x86/dct-a.asm x264-snapshot-20160712-2245.x32/common/x86/dct-a.asm --- x264-snapshot-20160712-2245/common/x86/dct-a.asm 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/x86/dct-a.asm 2017-03-03 20:46:59.603997837 +0000 @@ -661,7 +661,7 @@ SUB_NxN_DCT sub16x16_dct8_sse4, sub8x8_dct8_sse4, 256, 16, 0, 0, 14 SUB_NxN_DCT sub16x16_dct8_avx, sub8x8_dct8_avx, 256, 16, 0, 0, 14 %else ; !HIGH_BIT_DEPTH -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 ARCH_X86_64 == 0ARCH_X86_64 == 0 ARCH_X86_64_32 == 0 INIT_MMX SUB_NxN_DCT sub8x8_dct_mmx, sub4x4_dct_mmx, 32, 4, 0, 0, 0 ADD_NxN_IDCT add8x8_idct_mmx, add4x4_idct_mmx, 32, 4, 0, 0 diff -urN x264-snapshot-20160712-2245/common/x86/deblock-a.asm x264-snapshot-20160712-2245.x32/common/x86/deblock-a.asm --- x264-snapshot-20160712-2245/common/x86/deblock-a.asm 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/x86/deblock-a.asm 2017-03-03 20:53:14.125996215 +0000 @@ -303,7 +303,7 @@ RET %endmacro -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 ; in: m0=p1, m1=p0, m2=q0, m3=q1, m8=p2, m9=q2 ; m12=alpha, m13=beta ; out: m0=p1', m3=q1', m1=p0', m2=q0' @@ -434,7 +434,7 @@ ; %1=p0 %2=p1 %3=p2 %4=p3 %5=q0 %6=q1 %7=mask0 ; %8=mask1p %9=2 %10=p0' %11=p1' %12=p2' %macro LUMA_INTRA_P012 12 ; p0..p3 in memory -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 paddw t0, %3, %2 mova t2, %4 paddw t2, %3 @@ -499,7 +499,7 @@ LOAD_AB t0, t1, r2d, r3d mova %1, t0 LOAD_MASK m0, m1, m2, m3, %1, t1, t0, t2, t3 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 mova %2, t0 ; mask0 psrlw t3, %1, 2 %else @@ -596,7 +596,7 @@ %endif %endmacro -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 ;----------------------------------------------------------------------------- ; void deblock_v_luma_intra( uint16_t *pix, intptr_t stride, int alpha, int beta ) ;----------------------------------------------------------------------------- @@ -782,7 +782,7 @@ RET %endmacro -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_MMX mmx2 DEBLOCK_LUMA DEBLOCK_LUMA_INTRA @@ -1204,7 +1204,7 @@ mova %4, %2 %endmacro -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 ;----------------------------------------------------------------------------- ; void deblock_v_luma( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ) ;----------------------------------------------------------------------------- @@ -1471,7 +1471,7 @@ %macro LUMA_INTRA_P012 4 ; p0..p3 in memory -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 pavgb t0, p2, p1 pavgb t1, p0, q0 %else @@ -1482,7 +1482,7 @@ %endif pavgb t0, t1 ; ((p2+p1+1)/2 + (p0+q0+1)/2 + 1)/2 mova t5, t1 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 paddb t2, p2, p1 paddb t3, p0, q0 %else @@ -1500,7 +1500,7 @@ pand t2, mpb_1 psubb t0, t2 ; p1' = (p2+p1+p0+q0+2)/4; -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 pavgb t1, p2, q1 psubb t2, p2, q1 %else @@ -1575,7 +1575,7 @@ %define t1 m5 %define t2 m6 %define t3 m7 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 %define p2 m8 %define q2 m9 %define t4 m10 @@ -1614,7 +1614,7 @@ mova p0, [r4+r5] mova q0, [r0] mova q1, [r0+r1] -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 pxor mpb_0, mpb_0 mova mpb_1, [pb_1] LOAD_MASK r2d, r3d, t5 ; m5=beta-1, t5=alpha-1, m7=mask0 @@ -1657,7 +1657,7 @@ %else INIT_MMX cpuname %endif -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 ;----------------------------------------------------------------------------- ; void deblock_h_luma_intra( uint8_t *pix, intptr_t stride, int alpha, int beta ) ;----------------------------------------------------------------------------- @@ -1727,14 +1727,14 @@ lea r2, [r2+r1*8] TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30), PASS8ROWS(r0, r2, r1, r3) RET -%endif ; ARCH_X86_64 +%endif ; ARCH_X86_64 || ARCH_X86_64_32 %endmacro ; DEBLOCK_LUMA_INTRA INIT_XMM sse2 DEBLOCK_LUMA_INTRA v INIT_XMM avx DEBLOCK_LUMA_INTRA v -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_MMX mmx2 DEBLOCK_LUMA_INTRA v8 %endif @@ -2014,7 +2014,7 @@ RET %endmacro ; DEBLOCK_CHROMA -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_MMX mmx2 DEBLOCK_CHROMA %endif @@ -2114,7 +2114,7 @@ DEBLOCK_CHROMA INIT_XMM avx DEBLOCK_CHROMA -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_MMX mmx2 DEBLOCK_CHROMA %endif @@ -2137,14 +2137,14 @@ INIT_XMM sse2 DEBLOCK_H_CHROMA_420_MBAFF -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_MMX mmx2 DEBLOCK_H_CHROMA_420_MBAFF %endif %macro DEBLOCK_H_CHROMA_422 0 cglobal deblock_h_chroma_422, 5,8,8 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 %define cntr r7 %else %define cntr dword r0m @@ -2262,7 +2262,7 @@ DEBLOCK_CHROMA_INTRA INIT_MMX mmx2 DEBLOCK_CHROMA_INTRA_BODY -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 DEBLOCK_CHROMA_INTRA %endif diff -urN x264-snapshot-20160712-2245/common/x86/mc-a.asm x264-snapshot-20160712-2245.x32/common/x86/mc-a.asm --- x264-snapshot-20160712-2245/common/x86/mc-a.asm 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/x86/mc-a.asm 2017-03-03 20:46:59.737997836 +0000 @@ -1167,7 +1167,7 @@ %endif %if 0 ; or %1==8 - but the extra branch seems too expensive ja cachesplit -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 test r4b, 1 %else test byte r4m, 1 @@ -1189,7 +1189,7 @@ INIT_MMX AVG_CACHELINE_CHECK 8, 64, mmx2 AVG_CACHELINE_CHECK 12, 64, mmx2 -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 AVG_CACHELINE_CHECK 16, 64, mmx2 AVG_CACHELINE_CHECK 20, 64, mmx2 AVG_CACHELINE_CHECK 8, 32, mmx2 @@ -1381,7 +1381,7 @@ ;----------------------------------------------------------------------------- %macro PREFETCH_FENC 1 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 cglobal prefetch_fenc_%1, 5,5 FIX_STRIDES r1, r3 and r4d, 3 @@ -1435,7 +1435,7 @@ prefetcht0 [r0+r1] %endif ret -%endif ; ARCH_X86_64 +%endif ; ARCH_X86_64 || ARCH_X86_64_32 %endmacro INIT_MMX mmx2 @@ -1469,14 +1469,14 @@ ; chroma MC ;============================================================================= -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 DECLARE_REG_TMP 6,7,8 %else DECLARE_REG_TMP 0,1,2 %endif %macro MC_CHROMA_START 1 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 PROLOGUE 0,9,%1 %else PROLOGUE 0,6,%1 @@ -1533,11 +1533,11 @@ MC_CHROMA_START 0 FIX_STRIDES r4 and r5d, 7 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 jz .mc1dy %endif and t2d, 7 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 jz .mc1dx %endif shl r5d, 16 @@ -1638,7 +1638,7 @@ %if mmsize==8 .width4: -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 mov t0, r0 mov t1, r1 mov t2, r3 @@ -1655,7 +1655,7 @@ %endif %else .width8: -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 %define multy0 m8 SWAP 8, 5 %else @@ -1764,7 +1764,7 @@ jg .width8 RET .width8: -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 lea r3, [t2+8*SIZEOF_PIXEL] lea r0, [t0+4*SIZEOF_PIXEL] lea r1, [t1+4*SIZEOF_PIXEL] @@ -1780,7 +1780,7 @@ jmp .loopx %endif -%if ARCH_X86_64 ; too many regs for x86_32 +%if ARCH_X86_64 || ARCH_X86_64_32; too many regs for x86_32 RESET_MM_PERMUTATION %if WIN64 %assign stack_offset stack_offset - stack_size_padded @@ -1907,7 +1907,7 @@ shl r5d, 1 %endif jmp .loop1d_w4 -%endif ; ARCH_X86_64 +%endif ; ARCH_X86_64 || ARCH_X86_64_32 %endmacro ; MC_CHROMA %macro MC_CHROMA_SSSE3 0 @@ -1950,7 +1950,7 @@ SPLATW m6, m6 SPLATW m7, m7 %endif -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 %define shiftround m8 mova m8, [pw_512] %else @@ -2057,7 +2057,7 @@ pshufb m0, m5 movu m1, [r3+8] pshufb m1, m5 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 SWAP 9, 6 %define mult1 m9 %else diff -urN x264-snapshot-20160712-2245/common/x86/mc-a2.asm x264-snapshot-20160712-2245.x32/common/x86/mc-a2.asm --- x264-snapshot-20160712-2245/common/x86/mc-a2.asm 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/x86/mc-a2.asm 2017-03-03 20:56:32.751995355 +0000 @@ -499,7 +499,7 @@ mova m7, [pw_32] %endif %define pw_rnd m7 -%elif ARCH_X86_64 +%elif ARCH_X86_64 || ARCH_X86_64_32 mova m8, [pw_32] %define pw_rnd m8 %else @@ -654,7 +654,7 @@ HPEL_V 0 INIT_XMM sse2 HPEL_V 8 -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_XMM sse2 HPEL_C INIT_XMM ssse3 @@ -706,7 +706,7 @@ RET %endif -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 %macro DO_FILT_V 5 ;The optimum prefetch distance is difficult to determine in checkasm: ;any prefetch seems slower than not prefetching. @@ -915,7 +915,7 @@ HPEL INIT_YMM avx2 HPEL -%endif ; ARCH_X86_64 +%endif ; ARCH_X86_64 || ARCH_X86_64_32 %undef movntq %undef movntps @@ -1107,7 +1107,7 @@ lea r0, [r0+r6*2] add r2, r6 add r4, r6 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 DECLARE_REG_TMP 7,8 %else DECLARE_REG_TMP 1,3 @@ -1304,7 +1304,7 @@ ; pixel *dstc, intptr_t i_dstc, ; pixel *src, intptr_t i_src, int pw, int w, int h ) ;----------------------------------------------------------------------------- -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 cglobal plane_copy_deinterleave_rgb, 8,12 %define %%args r1, r3, r5, r7, r8, r9, r10, r11 mov r8d, r9m @@ -1350,7 +1350,7 @@ ; uint16_t *dstc, intptr_t i_dstc, ; uint32_t *src, intptr_t i_src, int w, int h ) ;----------------------------------------------------------------------------- -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 cglobal plane_copy_deinterleave_v210, 8,10,7 %define src r8 %define org_w r9 @@ -2003,7 +2003,7 @@ INIT_MMX mmx2 FRAME_INIT_LOWRES -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_MMX cache32, mmx2 FRAME_INIT_LOWRES %endif diff -urN x264-snapshot-20160712-2245/common/x86/mc-c.c x264-snapshot-20160712-2245.x32/common/x86/mc-c.c --- x264-snapshot-20160712-2245/common/x86/mc-c.c 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/x86/mc-c.c 2017-03-03 21:05:00.079993158 +0000 @@ -480,7 +480,7 @@ HPEL(16, sse2, sse2, sse2, sse2) #else // !HIGH_BIT_DEPTH HPEL(16, sse2_amd, mmx2, mmx2, sse2) -#if ARCH_X86_64 +#if ARCH_X86_64 || ARCH_X86_64_32 void x264_hpel_filter_sse2 ( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, intptr_t stride, int width, int height, int16_t *buf ); void x264_hpel_filter_ssse3( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, intptr_t stride, int width, int height, int16_t *buf ); void x264_hpel_filter_avx ( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, intptr_t stride, int width, int height, int16_t *buf ); @@ -855,7 +855,7 @@ if( !(cpu&X264_CPU_SLOW_PALIGNR) ) { -#if ARCH_X86_64 +#if ARCH_X86_64 || ARCH_X86_64_32 if( !(cpu&X264_CPU_SLOW_ATOM) ) /* The 64-bit version is slower, but the 32-bit version is faster? */ #endif pf->hpel_filter = x264_hpel_filter_ssse3; diff -urN x264-snapshot-20160712-2245/common/x86/pixel-a.asm x264-snapshot-20160712-2245.x32/common/x86/pixel-a.asm --- x264-snapshot-20160712-2245/common/x86/pixel-a.asm 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/x86/pixel-a.asm 2017-03-03 20:52:18.149996458 +0000 @@ -422,7 +422,7 @@ %else .startloop: -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 DECLARE_REG_TMP 0,1,2,3 PROLOGUE 0,0,8 %else @@ -733,7 +733,7 @@ HADDW m5, m2 %endif HADDD m6, m1 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 punpckldq m5, m6 movq rax, m5 %else @@ -923,7 +923,7 @@ paddd xm6, xm1 HADDW xm5, xm2 HADDD xm6, xm1 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 punpckldq xm5, xm6 movq rax, xm5 %else @@ -983,7 +983,7 @@ VAR2_END %2, m5, m6 %endmacro -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_MMX mmx2 VAR2_8x8_MMX 8, 6 VAR2_8x8_MMX 16, 7 @@ -1502,7 +1502,7 @@ %endmacro %macro BACKUP_POINTERS 0 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 %if WIN64 PUSH r7 %endif @@ -1512,7 +1512,7 @@ %endmacro %macro RESTORE_AND_INC_POINTERS 0 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 lea r0, [r6+8*SIZEOF_PIXEL] lea r2, [r7+8*SIZEOF_PIXEL] %if WIN64 @@ -1718,7 +1718,7 @@ %endmacro ; SATDS_SSE2 %macro SA8D_INTER 0 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 %define lh m10 %define rh m0 %else @@ -1737,7 +1737,7 @@ ; sse2 doesn't seem to like the horizontal way of doing things %define vertical ((notcpuflag(ssse3) || cpuflag(atom)) || HIGH_BIT_DEPTH) -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 ;----------------------------------------------------------------------------- ; int pixel_sa8d_8x8( uint8_t *, intptr_t, uint8_t *, intptr_t ) ;----------------------------------------------------------------------------- @@ -1938,7 +1938,7 @@ shr eax, 1 mov esp, r6 RET -%endif ; !ARCH_X86_64 +%endif ; !ARCH_X86_64 || ARCH_X86_64_32 %endmacro ; SA8D ;============================================================================= @@ -2121,7 +2121,7 @@ ; intra_sa8d_x3_8x8 and intra_satd_x3_4x4 are obsoleted by x9 on ssse3+, ; and are only retained for old cpus. %macro INTRA_SA8D_SSE2 0 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 ;----------------------------------------------------------------------------- ; void intra_sa8d_x3_8x8( uint8_t *fenc, uint8_t edge[36], int *res ) ;----------------------------------------------------------------------------- @@ -2219,7 +2219,7 @@ psrldq m0, 8 movd [r2+8], m0 ; i8x8_dc RET -%endif ; ARCH_X86_64 +%endif ; ARCH_X86_64 || ARCH_X86_64_32 %endmacro ; INTRA_SA8D_SSE2 ; in: r0 = fenc @@ -2491,7 +2491,7 @@ ADD rsp, stack_pad RET -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 %define t0 r6 %else %define t0 r2 @@ -2798,7 +2798,7 @@ %assign pad 0xc0-gprsize-(stack_offset&15) %define pred_buf rsp sub rsp, pad -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 INTRA_X9_PRED intrax9a, m8 %else INTRA_X9_PRED intrax9a, [rsp+0xa0] @@ -2833,7 +2833,7 @@ paddd m2, m3 paddd m4, m5 paddd m6, m7 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 SWAP 7, 8 pxor m8, m8 %define %%zero m8 @@ -2873,7 +2873,7 @@ RET %endif ; cpuflag -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 ;----------------------------------------------------------------------------- ; int intra_satd_x9_4x4( uint8_t *fenc, uint8_t *fdec, uint16_t *bitcosts ) ;----------------------------------------------------------------------------- @@ -2960,7 +2960,7 @@ paddd xmm0, m0, m1 ; consistent location of return value. only the avx version of hadamard permutes m0, so 3arg is free ret -%else ; !ARCH_X86_64 +%else ; !ARCH_X86_64 || ARCH_X86_64_32 cglobal intra_satd_x9_4x4, 3,4,8 %assign pad 0x120-gprsize-(stack_offset&15) %define fenc_buf rsp @@ -3075,7 +3075,7 @@ %define fenc13 m5 %define fenc46 m6 %define fenc57 m7 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 %define tmp m8 %assign padbase 0x0 %else @@ -3431,7 +3431,7 @@ ADD rsp, pad RET -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 ;----------------------------------------------------------------------------- ; int intra_sa8d_x9_8x8( uint8_t *fenc, uint8_t *fdec, uint8_t edge[36], uint16_t *bitcosts, uint16_t *satds ) ;----------------------------------------------------------------------------- @@ -3725,7 +3725,7 @@ paddw m0, m2 paddw mret, m0, m3 ret -%endif ; ARCH_X86_64 +%endif ; ARCH_X86_64 || ARCH_X86_64_32 %endmacro ; INTRA8_X9 ; in: r0=pix, r1=stride, r2=stride*3, r3=tmp, m6=mask_ac4, m7=0 @@ -3937,7 +3937,7 @@ movd edx, m0 movd eax, m1 shr edx, 1 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 shl rdx, 32 add rax, rdx %endif @@ -3986,7 +3986,7 @@ ; in: r0=pix, r1=stride, r2=stride*3 ; out: [esp+16]=sa8d, [esp+32]=satd, r0+=stride*4 cglobal hadamard_ac_8x8 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 %define spill0 m8 %define spill1 m9 %define spill2 m10 @@ -4172,7 +4172,7 @@ movd eax, xm1 shr edx, 2 - (%1*%2*16/mmsize >> 8) shr eax, 1 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 shl rdx, 32 add rax, rdx %endif @@ -4182,7 +4182,7 @@ ; instantiate satds -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 cextern pixel_sa8d_8x8_internal_mmx2 INIT_MMX mmx2 SA8D @@ -4199,7 +4199,7 @@ INIT_XMM sse2 SA8D SATDS_SSE2 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 SA8D_SATD %endif %if HIGH_BIT_DEPTH == 0 @@ -4215,7 +4215,7 @@ SATDS_SSE2 SA8D HADAMARD_AC_SSE2 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 SA8D_SATD %endif %endif @@ -4231,7 +4231,7 @@ SATDS_SSE2 SA8D HADAMARD_AC_SSE2 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 SA8D_SATD %endif %if HIGH_BIT_DEPTH == 0 @@ -4252,7 +4252,7 @@ SATDS_SSE2 SA8D HADAMARD_AC_SSE2 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 SA8D_SATD %endif %if HIGH_BIT_DEPTH == 0 @@ -4266,7 +4266,7 @@ INIT_XMM avx SATDS_SSE2 SA8D -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 SA8D_SATD %endif %if HIGH_BIT_DEPTH == 0 @@ -4279,7 +4279,7 @@ INIT_XMM xop SATDS_SSE2 SA8D -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 SA8D_SATD %endif %if HIGH_BIT_DEPTH == 0 @@ -4295,7 +4295,7 @@ %define TRANS TRANS_SSE4 INIT_YMM avx2 HADAMARD_AC_SSE2 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 SA8D_SATD %endif @@ -4770,7 +4770,7 @@ pshuflw m4, m0, q0032 %endif addss m0, m4 -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 movss r0m, m0 fld dword r0m %endif @@ -5162,7 +5162,7 @@ jge .end .loopi: mov r2, [r6+r1] -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 test r2, r2 %else mov r3, r2 @@ -5174,7 +5174,7 @@ TEST 1 TEST 2 TEST 3 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 shr r2, 32 %else mov r2d, [r6+r1] diff -urN x264-snapshot-20160712-2245/common/x86/predict-a.asm x264-snapshot-20160712-2245.x32/common/x86/predict-a.asm --- x264-snapshot-20160712-2245/common/x86/predict-a.asm 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/x86/predict-a.asm 2017-03-03 20:56:01.527995490 +0000 @@ -640,7 +640,7 @@ cglobal predict_8x8_filter, 4,6,6 add r0, 0x58*SIZEOF_PIXEL %define src r0-0x58*SIZEOF_PIXEL -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 mov r4, r1 %define t1 r4 %define t4 r1 @@ -942,7 +942,7 @@ PREDICT_8x8_DDLR INIT_XMM ssse3, cache64 PREDICT_8x8_DDLR -%elif ARCH_X86_64 == 0 +%elif ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_MMX mmx2 PREDICT_8x8_DDLR %endif @@ -1014,7 +1014,7 @@ PREDICT_8x8_HU d, wd INIT_XMM avx PREDICT_8x8_HU d, wd -%elif ARCH_X86_64 == 0 +%elif ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_MMX mmx2 PREDICT_8x8_HU w, bw %endif @@ -1063,13 +1063,13 @@ PREDICT_8x8_VR w INIT_XMM avx PREDICT_8x8_VR w -%elif ARCH_X86_64 == 0 +%elif ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_MMX mmx2 PREDICT_8x8_VR b %endif %macro LOAD_PLANE_ARGS 0 -%if cpuflag(avx2) && ARCH_X86_64 == 0 +%if cpuflag(avx2) && ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 vpbroadcastw m0, r1m vpbroadcastw m2, r2m vpbroadcastw m4, r3m @@ -1090,7 +1090,7 @@ ;----------------------------------------------------------------------------- ; void predict_8x8c_p_core( uint8_t *src, int i00, int b, int c ) ;----------------------------------------------------------------------------- -%if ARCH_X86_64 == 0 && HIGH_BIT_DEPTH == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 && HIGH_BIT_DEPTH == 0 %macro PREDICT_CHROMA_P_MMX 1 cglobal predict_8x%1c_p_core, 1,2 LOAD_PLANE_ARGS @@ -1210,7 +1210,7 @@ ;----------------------------------------------------------------------------- ; void predict_16x16_p_core( uint8_t *src, int i00, int b, int c ) ;----------------------------------------------------------------------------- -%if HIGH_BIT_DEPTH == 0 && ARCH_X86_64 == 0 +%if HIGH_BIT_DEPTH == 0 && ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_MMX mmx2 cglobal predict_16x16_p_core, 1,2 LOAD_PLANE_ARGS @@ -1250,7 +1250,7 @@ dec r1d jg .loop RET -%endif ; !HIGH_BIT_DEPTH && !ARCH_X86_64 +%endif ; !HIGH_BIT_DEPTH && !ARCH_X86_64 || ARCH_X86_64_32 %macro PREDICT_16x16_P 0 cglobal predict_16x16_p_core, 1,2,8 @@ -2121,7 +2121,7 @@ INIT_MMX mmx2 cglobal predict_16x16_dc_core, 1,2 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 movd m6, r1d PRED16x16_DC_MMX m6, 5 %else diff -urN x264-snapshot-20160712-2245/common/x86/predict-c.c x264-snapshot-20160712-2245.x32/common/x86/predict-c.c --- x264-snapshot-20160712-2245/common/x86/predict-c.c 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/x86/predict-c.c 2017-03-03 21:04:29.507993291 +0000 @@ -172,9 +172,9 @@ #if HIGH_BIT_DEPTH PREDICT_16x16_P_INLINE( sse2, sse2 ) #else // !HIGH_BIT_DEPTH -#if !ARCH_X86_64 +#if !ARCH_X86_64 && !ARCH_X86_64_32 PREDICT_16x16_P( mmx2, mmx2 ) -#endif // !ARCH_X86_64 +#endif // !ARCH_X86_64 && !ARCH_X86_64_32 PREDICT_16x16_P( sse2, sse2 ) #if HAVE_X86_INLINE_ASM PREDICT_16x16_P_INLINE( ssse3, sse2 ) @@ -212,9 +212,9 @@ PREDICT_8x16C_P_END(name)\ } -#if !ARCH_X86_64 && !HIGH_BIT_DEPTH +#if !ARCH_X86_64 && !ARCH_X86_64_32 && !HIGH_BIT_DEPTH PREDICT_8x16C_P( mmx2 ) -#endif // !ARCH_X86_64 && !HIGH_BIT_DEPTH +#endif // !ARCH_X86_64 && !ARCH_X86_64_32 && !HIGH_BIT_DEPTH PREDICT_8x16C_P( sse2 ) PREDICT_8x16C_P( avx ) PREDICT_8x16C_P( avx2 ) @@ -301,9 +301,9 @@ #if HIGH_BIT_DEPTH PREDICT_8x8C_P_INLINE( sse2, sse2 ) #else //!HIGH_BIT_DEPTH -#if !ARCH_X86_64 +#if !ARCH_X86_64 && !ARCH_X86_64_32 PREDICT_8x8C_P( mmx2, mmx2 ) -#endif // !ARCH_X86_64 +#endif // !ARCH_X86_64 && !ARCH_X86_64_32 PREDICT_8x8C_P( sse2, sse2 ) #if HAVE_X86_INLINE_ASM PREDICT_8x8C_P_INLINE( ssse3, sse2 ) @@ -312,7 +312,7 @@ PREDICT_8x8C_P_INLINE( avx, avx ) PREDICT_8x8C_P_INLINE( avx2, avx2 ) -#if ARCH_X86_64 && !HIGH_BIT_DEPTH +#if (ARCH_X86_64 || ARCH_X86_64_32) && !HIGH_BIT_DEPTH static void x264_predict_8x8c_dc_left( uint8_t *src ) { int y; @@ -338,7 +338,7 @@ src += FDEC_STRIDE; } } -#endif // ARCH_X86_64 && !HIGH_BIT_DEPTH +#endif // (ARCH_X86_64 || ARCH_X86_64_32) && !HIGH_BIT_DEPTH /**************************************************************************** * Exported functions: @@ -370,7 +370,7 @@ return; pf[I_PRED_16x16_H] = x264_predict_16x16_h_avx2; #else -#if !ARCH_X86_64 +#if !ARCH_X86_64 && !ARCH_X86_64_32 pf[I_PRED_16x16_P] = x264_predict_16x16_p_mmx2; #endif if( !(cpu&X264_CPU_SSE) ) @@ -431,7 +431,7 @@ return; pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_avx2; #else -#if ARCH_X86_64 +#if ARCH_X86_64 || ARCH_X86_64_32 pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left; #endif pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_mmx; @@ -439,7 +439,7 @@ return; pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x8c_dc_top_mmx2; pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_mmx2; -#if !ARCH_X86_64 +#if !ARCH_X86_64 || ARCH_X86_64_32 pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_mmx2; #endif pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_mmx2; @@ -494,7 +494,7 @@ pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x16c_dc_top_mmx2; pf[I_PRED_CHROMA_DC] = x264_predict_8x16c_dc_mmx2; pf[I_PRED_CHROMA_H] = x264_predict_8x16c_h_mmx2; -#if !ARCH_X86_64 +#if !ARCH_X86_64 || ARCH_X86_64_32 pf[I_PRED_CHROMA_P] = x264_predict_8x16c_p_mmx2; #endif if( !(cpu&X264_CPU_SSE2) ) diff -urN x264-snapshot-20160712-2245/common/x86/quant-a.asm x264-snapshot-20160712-2245.x32/common/x86/quant-a.asm --- x264-snapshot-20160712-2245/common/x86/quant-a.asm 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/x86/quant-a.asm 2017-03-03 20:50:50.744996836 +0000 @@ -131,7 +131,7 @@ %if cpuflag(sse4) ptest m5, m5 %else ; !sse4 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 %if mmsize == 16 packsswb m5, m5 %endif @@ -451,7 +451,7 @@ INIT_MMX mmx2 QUANT_DC quant_2x2_dc, 1 -%if ARCH_X86_64 == 0 ; not needed because sse2 is faster +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 ; not needed because sse2 is faster QUANT_DC quant_4x4_dc, 4 INIT_MMX mmx2 QUANT_AC quant_4x4, 4 @@ -607,7 +607,7 @@ %endrep %endmacro -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 DECLARE_REG_TMP 6,3,2 %else DECLARE_REG_TMP 2,0,1 @@ -621,7 +621,7 @@ sub t2d, t0d sub t2d, t1d ; i_mf = i_qp % 6 shl t2d, %1 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 add r1, t2 ; dequant_mf[i_mf] %else add r1, r1mp ; dequant_mf[i_mf] @@ -724,7 +724,7 @@ DEQUANT 4, 4, 4 DEQUANT 8, 6, 4 %else -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_MMX mmx DEQUANT 4, 4, 1 DEQUANT 8, 6, 1 @@ -817,7 +817,7 @@ INIT_YMM avx2 DEQUANT_DC d, pmaddwd %else -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_MMX mmx2 DEQUANT_DC w, pmullw %endif @@ -857,7 +857,7 @@ %define %%args dct, dct4x4, dmf, qp %endif -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 DECLARE_REG_TMP 2,0,1 %endif @@ -869,7 +869,7 @@ sub t2d, t0d sub t2d, t1d ; qp % 6 shl t2d, 6 ; 16 * sizeof(int) -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 imul t2d, [dmfq+t2], -0xffff ; (-dmf) << 16 | dmf %else mov dctq, dctmp @@ -974,7 +974,7 @@ DEQUANT_2x4_DC dconly ; t4 is eax for return value. -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 DECLARE_REG_TMP 0,1,2,3,6,4 ; Identical for both Windows and *NIX %else DECLARE_REG_TMP 4,1,2,3,0,5 @@ -1120,7 +1120,7 @@ RET %endmacro -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_MMX mmx DENOISE_DCT %endif @@ -1170,7 +1170,7 @@ RET %endmacro -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_MMX mmx DENOISE_DCT %endif @@ -1306,7 +1306,7 @@ %endmacro -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_MMX mmx2 DECIMATE4x4 15 DECIMATE4x4 16 @@ -1343,7 +1343,7 @@ %macro DECIMATE8x8 0 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 cglobal decimate_score64, 1,5 %ifdef PIC lea r4, [decimate_table8] @@ -1462,7 +1462,7 @@ %endmacro -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_MMX mmx2 DECIMATE8x8 %endif @@ -1573,7 +1573,7 @@ RET %endmacro -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_MMX mmx2 COEFF_LAST8 %endif @@ -1613,7 +1613,7 @@ %endmacro %macro COEFF_LAST48 0 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 cglobal coeff_last4, 1,1 BSR rax, [r0], 0x3f shr eax, 4 @@ -1662,7 +1662,7 @@ BSR eax, r1d, 0x1f RET -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 cglobal coeff_last64, 1, 4-mmsize/16 pxor m2, m2 LAST_MASK 16, r1d, r0+SIZEOF_DCTCOEF* 32, r3d @@ -1701,7 +1701,7 @@ %endif %endmacro -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_MMX mmx2 COEFF_LAST %endif @@ -1728,7 +1728,7 @@ pmovmskb %1, m0 %endmacro -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_YMM avx2,lzcnt cglobal coeff_last64, 1,2 pxor m2, m2 @@ -1770,7 +1770,7 @@ ; t6 = eax for return, t3 = ecx for shift, t[01] = r[01] for x86_64 args %if WIN64 DECLARE_REG_TMP 3,1,2,0,4,5,6 -%elif ARCH_X86_64 +%elif ARCH_X86_64 || ARCH_X86_64_32 DECLARE_REG_TMP 0,1,2,3,4,5,6 %else DECLARE_REG_TMP 6,3,2,1,4,5,0 @@ -1821,7 +1821,7 @@ %endmacro INIT_MMX mmx2 -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 COEFF_LEVELRUN 15 COEFF_LEVELRUN 16 %endif @@ -1885,7 +1885,7 @@ add eax, eax %endif %if %1 > 8 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 mov r4d, eax shr r4d, 8 %else diff -urN x264-snapshot-20160712-2245/common/x86/sad-a.asm x264-snapshot-20160712-2245.x32/common/x86/sad-a.asm --- x264-snapshot-20160712-2245/common/x86/sad-a.asm 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/x86/sad-a.asm 2017-03-03 20:57:08.876995199 +0000 @@ -265,7 +265,7 @@ ; void pixel_vsad( pixel *src, intptr_t stride ); ;----------------------------------------------------------------------------- -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 INIT_MMX cglobal pixel_vsad_mmx2, 3,3 mova m0, [r0] @@ -1042,7 +1042,7 @@ paddw m2, m3 %endmacro -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 DECLARE_REG_TMP 6 %else DECLARE_REG_TMP 5 @@ -1733,7 +1733,7 @@ CHECK_SPLIT r3m, %1, %3 jmp pixel_sad_x3_%1x%2_%4 .split: -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 PROLOGUE 6,9 push r3 push r2 @@ -1799,7 +1799,7 @@ CHECK_SPLIT r4m, %1, %3 jmp pixel_sad_x4_%1x%2_%4 .split: -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 PROLOGUE 6,9 mov r8, r6mp push r4 @@ -1878,7 +1878,7 @@ ; instantiate the aligned sads INIT_MMX -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 SAD16_CACHELINE_FUNC_MMX2 8, 32 SAD16_CACHELINE_FUNC_MMX2 16, 32 SAD8_CACHELINE_FUNC_MMX2 4, 32 @@ -1886,23 +1886,23 @@ SAD8_CACHELINE_FUNC_MMX2 16, 32 SAD16_CACHELINE_FUNC_MMX2 8, 64 SAD16_CACHELINE_FUNC_MMX2 16, 64 -%endif ; !ARCH_X86_64 +%endif ; !ARCH_X86_64 || ARCH_X86_64_32 SAD8_CACHELINE_FUNC_MMX2 4, 64 SAD8_CACHELINE_FUNC_MMX2 8, 64 SAD8_CACHELINE_FUNC_MMX2 16, 64 -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 SADX34_CACHELINE_FUNC 16, 16, 32, mmx2, mmx2, mmx2 SADX34_CACHELINE_FUNC 16, 8, 32, mmx2, mmx2, mmx2 SADX34_CACHELINE_FUNC 8, 16, 32, mmx2, mmx2, mmx2 SADX34_CACHELINE_FUNC 8, 8, 32, mmx2, mmx2, mmx2 SADX34_CACHELINE_FUNC 16, 16, 64, mmx2, mmx2, mmx2 SADX34_CACHELINE_FUNC 16, 8, 64, mmx2, mmx2, mmx2 -%endif ; !ARCH_X86_64 +%endif ; !ARCH_X86_64 || ARCH_X86_64_32 SADX34_CACHELINE_FUNC 8, 16, 64, mmx2, mmx2, mmx2 SADX34_CACHELINE_FUNC 8, 8, 64, mmx2, mmx2, mmx2 -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 SAD16_CACHELINE_FUNC sse2, 8 SAD16_CACHELINE_FUNC sse2, 16 %assign i 1 @@ -1912,7 +1912,7 @@ %endrep SADX34_CACHELINE_FUNC 16, 16, 64, sse2, sse2, sse2 SADX34_CACHELINE_FUNC 16, 8, 64, sse2, sse2, sse2 -%endif ; !ARCH_X86_64 +%endif ; !ARCH_X86_64 || ARCH_X86_64_32 SADX34_CACHELINE_FUNC 8, 16, 64, sse2, mmx2, sse2 SAD16_CACHELINE_FUNC ssse3, 8 diff -urN x264-snapshot-20160712-2245/common/x86/x86inc.asm x264-snapshot-20160712-2245.x32/common/x86/x86inc.asm --- x264-snapshot-20160712-2245/common/x86/x86inc.asm 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/x86/x86inc.asm 2017-03-03 20:54:08.102995981 +0000 @@ -43,7 +43,7 @@ %endif %ifndef STACK_ALIGNMENT - %if ARCH_X86_64 + %if ARCH_X86_64 || ARCH_X86_64_32 %define STACK_ALIGNMENT 16 %else %define STACK_ALIGNMENT 4 @@ -52,7 +52,7 @@ %define WIN64 0 %define UNIX64 0 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 %ifidn __OUTPUT_FORMAT__,win32 %define WIN64 1 %elifidn __OUTPUT_FORMAT__,win64 @@ -85,7 +85,7 @@ %if WIN64 %define PIC -%elif ARCH_X86_64 == 0 +%elif ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 ; x86_32 doesn't require PIC. ; Some distros prefer shared objects to be PIC, but nothing breaks if ; the code contains a few textrels, so we'll skip that complexity. @@ -171,7 +171,7 @@ %define e%1h %3 %define r%1b %2 %define e%1b %2 - %if ARCH_X86_64 == 0 + %if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 %define r%1 e%1 %endif %endmacro @@ -208,7 +208,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 %define gprsize 8 %else %define gprsize 4 @@ -882,7 +882,7 @@ %define RESET_MM_PERMUTATION INIT_XMM %1 %define mmsize 16 %define num_mmregs 8 - %if ARCH_X86_64 + %if ARCH_X86_64 || ARCH_X86_64_32 %define num_mmregs 16 %endif %define mova movdqa @@ -903,7 +903,7 @@ %define RESET_MM_PERMUTATION INIT_YMM %1 %define mmsize 32 %define num_mmregs 8 - %if ARCH_X86_64 + %if ARCH_X86_64 || ARCH_X86_64_32 %define num_mmregs 16 %endif %define mova movdqa @@ -1523,7 +1523,7 @@ ; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0) %ifdef __YASM_VER__ - %if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0 + %if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 %macro vpbroadcastq 2 %if sizeof%1 == 16 movddup %1, %2 diff -urN x264-snapshot-20160712-2245/common/x86/x86util.asm x264-snapshot-20160712-2245.x32/common/x86/x86util.asm --- x264-snapshot-20160712-2245/common/x86/x86util.asm 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/common/x86/x86util.asm 2017-03-03 20:46:59.699997837 +0000 @@ -102,7 +102,7 @@ %endmacro %macro TRANSPOSE8x8W 9-11 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 SBUTTERFLY wd, %1, %2, %9 SBUTTERFLY wd, %3, %4, %9 SBUTTERFLY wd, %5, %6, %9 diff -urN x264-snapshot-20160712-2245/configure x264-snapshot-20160712-2245.x32/configure --- x264-snapshot-20160712-2245/configure 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/configure 2017-03-03 19:07:55.239992122 +0000 @@ -698,31 +698,36 @@ fi ;; x86_64) - ARCH="X86_64" AS="${AS-yasm}" AS_EXT=".asm" - ASFLAGS="$ASFLAGS -DARCH_X86_64=1 -I\$(SRCPATH)/common/x86/" stack_alignment=16 - [ $compiler = GNU ] && CFLAGS="-m64 $CFLAGS" && LDFLAGS="-m64 $LDFLAGS" - if [ "$SYS" = MACOSX ]; then - ASFLAGS="$ASFLAGS -f macho64 -DPIC -DPREFIX" - if cc_check '' "-arch x86_64"; then - CFLAGS="$CFLAGS -arch x86_64" - LDFLAGS="$LDFLAGS -arch x86_64" - fi - elif [ "$SYS" = WINDOWS -o "$SYS" = CYGWIN ]; then - ASFLAGS="$ASFLAGS -f win64" - if [ $compiler = GNU ]; then - # only the GNU toolchain is inconsistent in prefixing function names with _ - cc_check "" "-S" && grep -q "_main:" conftest && ASFLAGS="$ASFLAGS -DPREFIX" - cc_check "" "-Wl,--high-entropy-va" && LDFLAGS="$LDFLAGS -Wl,--high-entropy-va" - LDFLAGS="$LDFLAGS -Wl,--dynamicbase,--nxcompat,--tsaware" - LDFLAGSCLI="$LDFLAGSCLI -Wl,--image-base,0x140000000" - SOFLAGS="$SOFLAGS -Wl,--image-base,0x180000000" - RCFLAGS="--target=pe-x86-64 $RCFLAGS" - fi + if [[ $host_os = *x32 ]]; then + ARCH="X86_64_32" + ASFLAGS="$ASFLAGS -DARCH_X86_64_32=1 -I\$(SRCPATH)/common/x86/ -f elfx32" else - ASFLAGS="$ASFLAGS -f elf64" + ARCH="X86_64" + ASFLAGS="$ASFLAGS -DARCH_X86_64=1 -I\$(SRCPATH)/common/x86/" + [ $compiler = GNU ] && CFLAGS="-m64 $CFLAGS" && LDFLAGS="-m64 $LDFLAGS" + if [ "$SYS" = MACOSX ]; then + ASFLAGS="$ASFLAGS -f macho64 -DPIC -DPREFIX" + if cc_check '' "-arch x86_64"; then + CFLAGS="$CFLAGS -arch x86_64" + LDFLAGS="$LDFLAGS -arch x86_64" + fi + elif [ "$SYS" = WINDOWS -o "$SYS" = CYGWIN ]; then + ASFLAGS="$ASFLAGS -f win64" + if [ $compiler = GNU ]; then + # only the GNU toolchain is inconsistent in prefixing function names with _ + cc_check "" "-S" && grep -q "_main:" conftest && ASFLAGS="$ASFLAGS -DPREFIX" + cc_check "" "-Wl,--high-entropy-va" && LDFLAGS="$LDFLAGS -Wl,--high-entropy-va" + LDFLAGS="$LDFLAGS -Wl,--dynamicbase,--nxcompat,--tsaware" + LDFLAGSCLI="$LDFLAGSCLI -Wl,--image-base,0x140000000" + SOFLAGS="$SOFLAGS -Wl,--image-base,0x180000000" + RCFLAGS="--target=pe-x86-64 $RCFLAGS" + fi + else + ASFLAGS="$ASFLAGS -f elf64" + fi fi ;; powerpc*) @@ -1201,7 +1206,7 @@ fi [ "$lto" = "auto" ] && lto="no" -if cc_check '' -fno-tree-vectorize ; then +if cc_check '' -fno-tree-vectorize && ! [[ $host_os = *x32 ]]; then CFLAGS="$CFLAGS -fno-tree-vectorize" fi diff -urN x264-snapshot-20160712-2245/encoder/cabac.c x264-snapshot-20160712-2245.x32/encoder/cabac.c --- x264-snapshot-20160712-2245/encoder/cabac.c 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/encoder/cabac.c 2017-03-03 19:12:59.160990806 +0000 @@ -801,7 +801,7 @@ static void ALWAYS_INLINE x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l ) { -#if ARCH_X86_64 && HAVE_MMX +#if (ARCH_X86_64 || ARCH_X86_64_32) && HAVE_MMX h->bsf.cabac_block_residual_internal( l, MB_INTERLACED, ctx_block_cat, cb ); #else x264_cabac_block_residual_c( h, cb, ctx_block_cat, l ); @@ -915,7 +915,7 @@ static ALWAYS_INLINE void x264_cabac_block_residual_8x8( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l ) { -#if ARCH_X86_64 && HAVE_MMX +#if (ARCH_X86_64 || ARCH_X86_64_32) && HAVE_MMX h->bsf.cabac_block_residual_8x8_rd_internal( l, MB_INTERLACED, ctx_block_cat, cb ); #else x264_cabac_block_residual_8x8_rd_c( h, cb, ctx_block_cat, l ); @@ -923,7 +923,7 @@ } static ALWAYS_INLINE void x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l ) { -#if ARCH_X86_64 && HAVE_MMX +#if (ARCH_X86_64 || ARCH_X86_64_32) && HAVE_MMX h->bsf.cabac_block_residual_rd_internal( l, MB_INTERLACED, ctx_block_cat, cb ); #else x264_cabac_block_residual_rd_c( h, cb, ctx_block_cat, l ); diff -urN x264-snapshot-20160712-2245/encoder/encoder.c x264-snapshot-20160712-2245.x32/encoder/encoder.c --- x264-snapshot-20160712-2245/encoder/encoder.c 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/encoder/encoder.c 2017-03-03 19:02:22.231993564 +0000 @@ -1593,7 +1593,7 @@ if( x264_clz( temp ) != 23 ) { x264_log( h, X264_LOG_ERROR, "CLZ test failed: x264 has been miscompiled!\n" ); -#if ARCH_X86 || ARCH_X86_64 +#if ARCH_X86 || ARCH_X86_64 || ARCH_X86_64_32 x264_log( h, X264_LOG_ERROR, "Are you attempting to run an SSE4a/LZCNT-targeted build on a CPU that\n" ); x264_log( h, X264_LOG_ERROR, "doesn't support it?\n" ); #endif diff -urN x264-snapshot-20160712-2245/encoder/rdo.c x264-snapshot-20160712-2245.x32/encoder/rdo.c --- x264-snapshot-20160712-2245/encoder/rdo.c 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/encoder/rdo.c 2017-03-03 19:13:40.791990626 +0000 @@ -695,7 +695,7 @@ return !!dct[0]; } -#if HAVE_MMX && ARCH_X86_64 +#if HAVE_MMX && (ARCH_X86_64 || ARCH_X86_64_32) #define TRELLIS_ARGS unquant_mf, zigzag, lambda2, last_nnz, orig_coefs, quant_coefs, dct,\ cabac_state_sig, cabac_state_last, M64(cabac_state), M16(cabac_state+8) if( num_coefs == 16 && !dc ) diff -urN x264-snapshot-20160712-2245/tools/checkasm-a.asm x264-snapshot-20160712-2245.x32/tools/checkasm-a.asm --- x264-snapshot-20160712-2245/tools/checkasm-a.asm 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/tools/checkasm-a.asm 2017-03-03 20:46:59.288997838 +0000 @@ -30,7 +30,7 @@ error_message: db "failed to preserve register", 0 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 ; just random numbers to reduce the chance of incidental match ALIGN 16 x6: dq 0x1a1b2550a612b48c,0x79445c159ce79064 @@ -61,7 +61,7 @@ ; (max_args % 4) must equal 3 for stack alignment %define max_args 15 -%if ARCH_X86_64 +%if ARCH_X86_64 || ARCH_X86_64_32 ;----------------------------------------------------------------------------- ; void x264_checkasm_stack_clobber( uint64_t clobber, ... ) @@ -203,7 +203,7 @@ .ok: REP_RET -%endif ; ARCH_X86_64 +%endif ; ARCH_X86_64 || ARCH_X86_64_32 ;----------------------------------------------------------------------------- ; int x264_stack_pagealign( int (*func)(), int align ) diff -urN x264-snapshot-20160712-2245/tools/checkasm.c x264-snapshot-20160712-2245.x32/tools/checkasm.c --- x264-snapshot-20160712-2245/tools/checkasm.c 2016-07-12 20:45:04.000000000 +0000 +++ x264-snapshot-20160712-2245.x32/tools/checkasm.c 2017-03-03 19:02:22.239993564 +0000 @@ -217,7 +217,7 @@ } } -#if ARCH_X86 || ARCH_X86_64 +#if ARCH_X86 || ARCH_X86_64 || ARCH_X86_64_32 int x264_stack_pagealign( int (*func)(), int align ); /* detect when callee-saved regs aren't saved @@ -254,7 +254,7 @@ uint64_t r = (rand() & 0xffff) * 0x0001000100010001ULL; \ x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+6 */ \ x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, __VA_ARGS__ ); }) -#elif ARCH_X86 || (ARCH_AARCH64 && !defined(__APPLE__)) || ARCH_ARM +#elif ARCH_X86 || ARCH_X86_64_32 || (ARCH_AARCH64 && !defined(__APPLE__)) || ARCH_ARM #define call_a1(func,...) x264_checkasm_call( (intptr_t(*)())func, &ok, __VA_ARGS__ ) #else #define call_a1 call_c1 @@ -2884,7 +2884,7 @@ if( argc > 1 && !strncmp( argv[1], "--bench", 7 ) ) { -#if !ARCH_X86 && !ARCH_X86_64 && !ARCH_PPC && !ARCH_ARM && !ARCH_AARCH64 && !ARCH_MIPS +#if !ARCH_X86 && !ARCH_X86_64 && !ARCH_X86_64_32 && !ARCH_PPC && !ARCH_ARM && !ARCH_AARCH64 && !ARCH_MIPS fprintf( stderr, "no --bench for your cpu until you port rdtsc\n" ); return 1; #endif