Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 420241
Collapse All | Expand All

(-)x264-snapshot-20160712-2245/common/bitstream.c (-3 / +3 lines)
Lines 116-122 Link Here
116
116
117
    pf->nal_escape = x264_nal_escape_c;
117
    pf->nal_escape = x264_nal_escape_c;
118
#if HAVE_MMX
118
#if HAVE_MMX
119
#if ARCH_X86_64
119
#if (ARCH_X86_64 || ARCH_X86_64_32)
120
    pf->cabac_block_residual_internal = x264_cabac_block_residual_internal_sse2;
120
    pf->cabac_block_residual_internal = x264_cabac_block_residual_internal_sse2;
121
    pf->cabac_block_residual_rd_internal = x264_cabac_block_residual_rd_internal_sse2;
121
    pf->cabac_block_residual_rd_internal = x264_cabac_block_residual_rd_internal_sse2;
122
    pf->cabac_block_residual_8x8_rd_internal = x264_cabac_block_residual_8x8_rd_internal_sse2;
122
    pf->cabac_block_residual_8x8_rd_internal = x264_cabac_block_residual_8x8_rd_internal_sse2;
Lines 126-132 Link Here
126
        pf->nal_escape = x264_nal_escape_mmx2;
126
        pf->nal_escape = x264_nal_escape_mmx2;
127
    if( cpu&X264_CPU_SSE2 )
127
    if( cpu&X264_CPU_SSE2 )
128
    {
128
    {
129
#if ARCH_X86_64
129
#if (ARCH_X86_64 || ARCH_X86_64_32)
130
        if( cpu&X264_CPU_LZCNT )
130
        if( cpu&X264_CPU_LZCNT )
131
        {
131
        {
132
            pf->cabac_block_residual_internal = x264_cabac_block_residual_internal_sse2_lzcnt;
132
            pf->cabac_block_residual_internal = x264_cabac_block_residual_internal_sse2_lzcnt;
Lines 137-143 Link Here
137
        if( cpu&X264_CPU_SSE2_IS_FAST )
137
        if( cpu&X264_CPU_SSE2_IS_FAST )
138
            pf->nal_escape = x264_nal_escape_sse2;
138
            pf->nal_escape = x264_nal_escape_sse2;
139
    }
139
    }
140
#if ARCH_X86_64
140
#if (ARCH_X86_64 || ARCH_X86_64_32)
141
    if( cpu&X264_CPU_SSSE3 )
141
    if( cpu&X264_CPU_SSSE3 )
142
    {
142
    {
143
        pf->cabac_block_residual_rd_internal = x264_cabac_block_residual_rd_internal_ssse3;
143
        pf->cabac_block_residual_rd_internal = x264_cabac_block_residual_rd_internal_ssse3;
(-)x264-snapshot-20160712-2245/common/common.h (-1 / +1 lines)
Lines 1015-1021 Link Here
1015
    return cnt;
1015
    return cnt;
1016
}
1016
}
1017
1017
1018
#if ARCH_X86 || ARCH_X86_64
1018
#if ARCH_X86 || ARCH_X86_64 || ARCH_X86_64_32
1019
#include "x86/util.h"
1019
#include "x86/util.h"
1020
#endif
1020
#endif
1021
1021
(-)x264-snapshot-20160712-2245/common/cpu.c (-1 / +1 lines)
Lines 128-134 Link Here
128
    uint32_t max_extended_cap, max_basic_cap;
128
    uint32_t max_extended_cap, max_basic_cap;
129
    int cache;
129
    int cache;
130
130
131
#if !ARCH_X86_64
131
#if !ARCH_X86_64 && !ARCH_X86_64_32
132
    if( !x264_cpu_cpuid_test() )
132
    if( !x264_cpu_cpuid_test() )
133
        return 0;
133
        return 0;
134
#endif
134
#endif
(-)x264-snapshot-20160712-2245/common/dct.c (-5 / +5 lines)
Lines 619-625 Link Here
619
        dctf->idct4x4dc     = x264_idct4x4dc_mmx;
619
        dctf->idct4x4dc     = x264_idct4x4dc_mmx;
620
        dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_mmx2;
620
        dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_mmx2;
621
621
622
#if !ARCH_X86_64
622
#if !ARCH_X86_64 && !ARCH_X86_64_32
623
        dctf->sub8x8_dct    = x264_sub8x8_dct_mmx;
623
        dctf->sub8x8_dct    = x264_sub8x8_dct_mmx;
624
        dctf->sub16x16_dct  = x264_sub16x16_dct_mmx;
624
        dctf->sub16x16_dct  = x264_sub16x16_dct_mmx;
625
        dctf->add8x8_idct   = x264_add8x8_idct_mmx;
625
        dctf->add8x8_idct   = x264_add8x8_idct_mmx;
Lines 707-713 Link Here
707
        dctf->sub8x8_dct       = x264_sub8x8_dct_avx2;
707
        dctf->sub8x8_dct       = x264_sub8x8_dct_avx2;
708
        dctf->sub16x16_dct     = x264_sub16x16_dct_avx2;
708
        dctf->sub16x16_dct     = x264_sub16x16_dct_avx2;
709
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_avx2;
709
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_avx2;
710
#if ARCH_X86_64
710
#if ARCH_X86_64 || ARCH_X86_64_32
711
        dctf->sub16x16_dct8    = x264_sub16x16_dct8_avx2;
711
        dctf->sub16x16_dct8    = x264_sub16x16_dct8_avx2;
712
#endif
712
#endif
713
    }
713
    }
Lines 976-988 Link Here
976
        pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_sse4;
976
        pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_sse4;
977
    if( cpu&X264_CPU_AVX )
977
    if( cpu&X264_CPU_AVX )
978
        pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_avx;
978
        pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_avx;
979
#if ARCH_X86_64
979
#if ARCH_X86_64 || ARCH_X86_64_32
980
    if( cpu&X264_CPU_AVX )
980
    if( cpu&X264_CPU_AVX )
981
    {
981
    {
982
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx;
982
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx;
983
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_avx;
983
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_avx;
984
    }
984
    }
985
#endif // ARCH_X86_64
985
#endif // ARCH_X86_64 || ARCH_X86_64_32
986
#endif // HAVE_MMX
986
#endif // HAVE_MMX
987
#else
987
#else
988
#if HAVE_MMX
988
#if HAVE_MMX
Lines 1010-1016 Link Here
1010
    {
1010
    {
1011
        pf_interlaced->sub_4x4   = x264_zigzag_sub_4x4_field_avx;
1011
        pf_interlaced->sub_4x4   = x264_zigzag_sub_4x4_field_avx;
1012
        pf_progressive->sub_4x4  = x264_zigzag_sub_4x4_frame_avx;
1012
        pf_progressive->sub_4x4  = x264_zigzag_sub_4x4_frame_avx;
1013
#if ARCH_X86_64
1013
#if ARCH_X86_64 || ARCH_X86_64_32
1014
        pf_interlaced->sub_4x4ac = x264_zigzag_sub_4x4ac_field_avx;
1014
        pf_interlaced->sub_4x4ac = x264_zigzag_sub_4x4ac_field_avx;
1015
        pf_progressive->sub_4x4ac= x264_zigzag_sub_4x4ac_frame_avx;
1015
        pf_progressive->sub_4x4ac= x264_zigzag_sub_4x4ac_frame_avx;
1016
#endif
1016
#endif
(-)x264-snapshot-20160712-2245/common/frame.c (-1 / +1 lines)
Lines 75-81 Link Here
75
    int i_stride, i_width, i_lines, luma_plane_count;
75
    int i_stride, i_width, i_lines, luma_plane_count;
76
    int i_padv = PADV << PARAM_INTERLACED;
76
    int i_padv = PADV << PARAM_INTERLACED;
77
    int align = 16;
77
    int align = 16;
78
#if ARCH_X86 || ARCH_X86_64
78
#if ARCH_X86 || ARCH_X86_64 || ARCH_X86_64_32
79
    if( h->param.cpu&X264_CPU_CACHELINE_64 )
79
    if( h->param.cpu&X264_CPU_CACHELINE_64 )
80
        align = 64;
80
        align = 64;
81
    else if( h->param.cpu&X264_CPU_CACHELINE_32 || h->param.cpu&X264_CPU_AVX )
81
    else if( h->param.cpu&X264_CPU_CACHELINE_32 || h->param.cpu&X264_CPU_AVX )
(-)x264-snapshot-20160712-2245/common/osdep.h (-3 / +3 lines)
Lines 147-153 Link Here
147
#define ALIGNED_ARRAY_64( ... ) EXPAND( ALIGNED_ARRAY_EMU( 63, __VA_ARGS__ ) )
147
#define ALIGNED_ARRAY_64( ... ) EXPAND( ALIGNED_ARRAY_EMU( 63, __VA_ARGS__ ) )
148
148
149
/* For AVX2 */
149
/* For AVX2 */
150
#if ARCH_X86 || ARCH_X86_64
150
#if ARCH_X86 || ARCH_X86_64 || ARCH_X86_64_32
151
#define NATIVE_ALIGN 32
151
#define NATIVE_ALIGN 32
152
#define ALIGNED_N ALIGNED_32
152
#define ALIGNED_N ALIGNED_32
153
#define ALIGNED_ARRAY_N ALIGNED_ARRAY_32
153
#define ALIGNED_ARRAY_N ALIGNED_ARRAY_32
Lines 293-299 Link Here
293
    return (x<<24) + ((x<<8)&0xff0000) + ((x>>8)&0xff00) + (x>>24);
293
    return (x<<24) + ((x<<8)&0xff0000) + ((x>>8)&0xff00) + (x>>24);
294
}
294
}
295
#endif
295
#endif
296
#if HAVE_X86_INLINE_ASM && ARCH_X86_64
296
#if HAVE_X86_INLINE_ASM && (ARCH_X86_64 || ARCH_X86_64_32)
297
static ALWAYS_INLINE uint64_t endian_fix64( uint64_t x )
297
static ALWAYS_INLINE uint64_t endian_fix64( uint64_t x )
298
{
298
{
299
    asm("bswap %0":"+r"(x));
299
    asm("bswap %0":"+r"(x));
Lines 361-367 Link Here
361
/* We require that prefetch not fault on invalid reads, so we only enable it on
361
/* We require that prefetch not fault on invalid reads, so we only enable it on
362
 * known architectures. */
362
 * known architectures. */
363
#elif defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 1) &&\
363
#elif defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 1) &&\
364
      (ARCH_X86 || ARCH_X86_64 || ARCH_ARM || ARCH_PPC)
364
      (ARCH_X86 || ARCH_X86_64 || ARCH_X86_64_32 || ARCH_ARM || ARCH_PPC)
365
#define x264_prefetch(x) __builtin_prefetch(x)
365
#define x264_prefetch(x) __builtin_prefetch(x)
366
#else
366
#else
367
#define x264_prefetch(x)
367
#define x264_prefetch(x)
(-)x264-snapshot-20160712-2245/common/pixel.c (-15 / +15 lines)
Lines 911-917 Link Here
911
911
912
        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2;
912
        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2;
913
        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_sse2;
913
        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_sse2;
914
#if ARCH_X86_64
914
#if ARCH_X86_64 || ARCH_X86_64_32
915
        pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2;
915
        pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2;
916
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse2;
916
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse2;
917
#endif
917
#endif
Lines 975-981 Link Here
975
        pixf->intra_sad_x3_4x4  = x264_intra_sad_x3_4x4_ssse3;
975
        pixf->intra_sad_x3_4x4  = x264_intra_sad_x3_4x4_ssse3;
976
        pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3;
976
        pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3;
977
        pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_ssse3;
977
        pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_ssse3;
978
#if ARCH_X86_64
978
#if ARCH_X86_64 || ARCH_X86_64_32
979
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_ssse3;
979
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_ssse3;
980
#endif
980
#endif
981
        pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4_ssse3;
981
        pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4_ssse3;
Lines 995-1001 Link Here
995
        }
995
        }
996
        pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_sse4;
996
        pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_sse4;
997
        pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_sse4;
997
        pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_sse4;
998
#if ARCH_X86_64
998
#if ARCH_X86_64 || ARCH_X86_64_32
999
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse4;
999
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse4;
1000
#endif
1000
#endif
1001
        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse4;
1001
        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse4;
Lines 1018-1024 Link Here
1018
        pixf->ssd_nv12_core    = x264_pixel_ssd_nv12_core_avx;
1018
        pixf->ssd_nv12_core    = x264_pixel_ssd_nv12_core_avx;
1019
        pixf->ssim_4x4x2_core  = x264_pixel_ssim_4x4x2_core_avx;
1019
        pixf->ssim_4x4x2_core  = x264_pixel_ssim_4x4x2_core_avx;
1020
        pixf->ssim_end4        = x264_pixel_ssim_end4_avx;
1020
        pixf->ssim_end4        = x264_pixel_ssim_end4_avx;
1021
#if ARCH_X86_64
1021
#if ARCH_X86_64 || ARCH_X86_64_32
1022
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_avx;
1022
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_avx;
1023
#endif
1023
#endif
1024
        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_avx;
1024
        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_avx;
Lines 1032-1038 Link Here
1032
        pixf->var[PIXEL_8x8]   = x264_pixel_var_8x8_xop;
1032
        pixf->var[PIXEL_8x8]   = x264_pixel_var_8x8_xop;
1033
        pixf->vsad = x264_pixel_vsad_xop;
1033
        pixf->vsad = x264_pixel_vsad_xop;
1034
        pixf->asd8 = x264_pixel_asd8_xop;
1034
        pixf->asd8 = x264_pixel_asd8_xop;
1035
#if ARCH_X86_64
1035
#if ARCH_X86_64 || ARCH_X86_64_32
1036
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_xop;
1036
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_xop;
1037
#endif
1037
#endif
1038
    }
1038
    }
Lines 1125-1131 Link Here
1125
        pixf->ssim_end4        = x264_pixel_ssim_end4_sse2;
1125
        pixf->ssim_end4        = x264_pixel_ssim_end4_sse2;
1126
        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2;
1126
        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2;
1127
        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_sse2;
1127
        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_sse2;
1128
#if ARCH_X86_64
1128
#if ARCH_X86_64 || ARCH_X86_64_32
1129
        pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2;
1129
        pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2;
1130
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse2;
1130
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse2;
1131
#endif
1131
#endif
Lines 1194-1200 Link Here
1194
            pixf->intra_sad_x9_4x4  = x264_intra_sad_x9_4x4_ssse3;
1194
            pixf->intra_sad_x9_4x4  = x264_intra_sad_x9_4x4_ssse3;
1195
            pixf->intra_satd_x9_4x4 = x264_intra_satd_x9_4x4_ssse3;
1195
            pixf->intra_satd_x9_4x4 = x264_intra_satd_x9_4x4_ssse3;
1196
            pixf->intra_sad_x9_8x8  = x264_intra_sad_x9_8x8_ssse3;
1196
            pixf->intra_sad_x9_8x8  = x264_intra_sad_x9_8x8_ssse3;
1197
#if ARCH_X86_64
1197
#if ARCH_X86_64 || ARCH_X86_64_32
1198
            pixf->intra_sa8d_x9_8x8 = x264_intra_sa8d_x9_8x8_ssse3;
1198
            pixf->intra_sa8d_x9_8x8 = x264_intra_sa8d_x9_8x8_ssse3;
1199
#endif
1199
#endif
1200
        }
1200
        }
Lines 1208-1214 Link Here
1208
            INIT6( satd_x3, _ssse3_atom );
1208
            INIT6( satd_x3, _ssse3_atom );
1209
            INIT6( satd_x4, _ssse3_atom );
1209
            INIT6( satd_x4, _ssse3_atom );
1210
            INIT4( hadamard_ac, _ssse3_atom );
1210
            INIT4( hadamard_ac, _ssse3_atom );
1211
#if ARCH_X86_64
1211
#if ARCH_X86_64 || ARCH_X86_64_32
1212
            pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_ssse3_atom;
1212
            pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_ssse3_atom;
1213
#endif
1213
#endif
1214
        }
1214
        }
Lines 1220-1226 Link Here
1220
            INIT8( satd, _ssse3 );
1220
            INIT8( satd, _ssse3 );
1221
            INIT7( satd_x3, _ssse3 );
1221
            INIT7( satd_x3, _ssse3 );
1222
            INIT7( satd_x4, _ssse3 );
1222
            INIT7( satd_x4, _ssse3 );
1223
#if ARCH_X86_64
1223
#if ARCH_X86_64 || ARCH_X86_64_32
1224
            pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_ssse3;
1224
            pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_ssse3;
1225
#endif
1225
#endif
1226
        }
1226
        }
Lines 1261-1274 Link Here
1261
            pixf->intra_sad_x9_4x4  = x264_intra_sad_x9_4x4_sse4;
1261
            pixf->intra_sad_x9_4x4  = x264_intra_sad_x9_4x4_sse4;
1262
            pixf->intra_satd_x9_4x4 = x264_intra_satd_x9_4x4_sse4;
1262
            pixf->intra_satd_x9_4x4 = x264_intra_satd_x9_4x4_sse4;
1263
            pixf->intra_sad_x9_8x8  = x264_intra_sad_x9_8x8_sse4;
1263
            pixf->intra_sad_x9_8x8  = x264_intra_sad_x9_8x8_sse4;
1264
#if ARCH_X86_64
1264
#if ARCH_X86_64 || ARCH_X86_64_32
1265
            pixf->intra_sa8d_x9_8x8 = x264_intra_sa8d_x9_8x8_sse4;
1265
            pixf->intra_sa8d_x9_8x8 = x264_intra_sa8d_x9_8x8_sse4;
1266
#endif
1266
#endif
1267
        }
1267
        }
1268
        pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_sse4;
1268
        pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_sse4;
1269
        pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_sse4;
1269
        pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_sse4;
1270
        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse4;
1270
        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse4;
1271
#if ARCH_X86_64
1271
#if ARCH_X86_64 || ARCH_X86_64_32
1272
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse4;
1272
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse4;
1273
#endif
1273
#endif
1274
    }
1274
    }
Lines 1288-1294 Link Here
1288
            pixf->intra_sad_x9_4x4  = x264_intra_sad_x9_4x4_avx;
1288
            pixf->intra_sad_x9_4x4  = x264_intra_sad_x9_4x4_avx;
1289
            pixf->intra_satd_x9_4x4 = x264_intra_satd_x9_4x4_avx;
1289
            pixf->intra_satd_x9_4x4 = x264_intra_satd_x9_4x4_avx;
1290
            pixf->intra_sad_x9_8x8  = x264_intra_sad_x9_8x8_avx;
1290
            pixf->intra_sad_x9_8x8  = x264_intra_sad_x9_8x8_avx;
1291
#if ARCH_X86_64
1291
#if ARCH_X86_64 || ARCH_X86_64_32
1292
            pixf->intra_sa8d_x9_8x8 = x264_intra_sa8d_x9_8x8_avx;
1292
            pixf->intra_sa8d_x9_8x8 = x264_intra_sa8d_x9_8x8_avx;
1293
#endif
1293
#endif
1294
        }
1294
        }
Lines 1302-1308 Link Here
1302
        pixf->var[PIXEL_8x8]   = x264_pixel_var_8x8_avx;
1302
        pixf->var[PIXEL_8x8]   = x264_pixel_var_8x8_avx;
1303
        pixf->ssim_4x4x2_core  = x264_pixel_ssim_4x4x2_core_avx;
1303
        pixf->ssim_4x4x2_core  = x264_pixel_ssim_4x4x2_core_avx;
1304
        pixf->ssim_end4        = x264_pixel_ssim_end4_avx;
1304
        pixf->ssim_end4        = x264_pixel_ssim_end4_avx;
1305
#if ARCH_X86_64
1305
#if ARCH_X86_64 || ARCH_X86_64_32
1306
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_avx;
1306
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_avx;
1307
#endif
1307
#endif
1308
    }
1308
    }
Lines 1327-1333 Link Here
1327
        pixf->var[PIXEL_8x8]   = x264_pixel_var_8x8_xop;
1327
        pixf->var[PIXEL_8x8]   = x264_pixel_var_8x8_xop;
1328
        pixf->var2[PIXEL_8x8] = x264_pixel_var2_8x8_xop;
1328
        pixf->var2[PIXEL_8x8] = x264_pixel_var2_8x8_xop;
1329
        pixf->var2[PIXEL_8x16] = x264_pixel_var2_8x16_xop;
1329
        pixf->var2[PIXEL_8x16] = x264_pixel_var2_8x16_xop;
1330
#if ARCH_X86_64
1330
#if ARCH_X86_64 || ARCH_X86_64_32
1331
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_xop;
1331
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_xop;
1332
#endif
1332
#endif
1333
    }
1333
    }
Lines 1348-1354 Link Here
1348
        pixf->intra_sad_x9_8x8  = x264_intra_sad_x9_8x8_avx2;
1348
        pixf->intra_sad_x9_8x8  = x264_intra_sad_x9_8x8_avx2;
1349
        pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_avx2;
1349
        pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_avx2;
1350
        pixf->ssd_nv12_core = x264_pixel_ssd_nv12_core_avx2;
1350
        pixf->ssd_nv12_core = x264_pixel_ssd_nv12_core_avx2;
1351
#if ARCH_X86_64
1351
#if ARCH_X86_64 || ARCH_X86_64_32
1352
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_avx2;
1352
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_avx2;
1353
#endif
1353
#endif
1354
    }
1354
    }
(-)x264-snapshot-20160712-2245/common/x86/bitstream-a.asm (-1 / +1 lines)
Lines 130-136 Link Here
130
NAL_ESCAPE
130
NAL_ESCAPE
131
INIT_XMM sse2
131
INIT_XMM sse2
132
NAL_ESCAPE
132
NAL_ESCAPE
133
%if ARCH_X86_64
133
%if ARCH_X86_64 || ARCH_X86_64_32
134
INIT_YMM avx2
134
INIT_YMM avx2
135
NAL_ESCAPE
135
NAL_ESCAPE
136
%endif
136
%endif
(-)x264-snapshot-20160712-2245/common/x86/cabac-a.asm (-6 / +6 lines)
Lines 35-41 Link Here
35
coeff_abs_level_transition: db 1, 2, 3, 3, 4, 5, 6, 7
35
coeff_abs_level_transition: db 1, 2, 3, 3, 4, 5, 6, 7
36
                            db 4, 4, 4, 4, 5, 6, 7, 7
36
                            db 4, 4, 4, 4, 5, 6, 7, 7
37
37
38
%if ARCH_X86_64
38
%if ARCH_X86_64 || ARCH_X86_64_32
39
%macro COEFF_LAST_TABLE 17
39
%macro COEFF_LAST_TABLE 17
40
    %define funccpu1 %1
40
    %define funccpu1 %1
41
    %define funccpu2 %2
41
    %define funccpu2 %2
Lines 86-92 Link Here
86
cextern count_cat_m1
86
cextern count_cat_m1
87
cextern cabac_encode_ue_bypass
87
cextern cabac_encode_ue_bypass
88
88
89
%if ARCH_X86_64
89
%if ARCH_X86_64 || ARCH_X86_64_32
90
    %define pointer resq
90
    %define pointer resq
91
%else
91
%else
92
    %define pointer resd
92
    %define pointer resd
Lines 122-128 Link Here
122
; t3 must be ecx, since it's used for shift.
122
; t3 must be ecx, since it's used for shift.
123
%if WIN64
123
%if WIN64
124
    DECLARE_REG_TMP 3,1,2,0,5,6,4,4
124
    DECLARE_REG_TMP 3,1,2,0,5,6,4,4
125
%elif ARCH_X86_64
125
%elif ARCH_X86_64 || ARCH_X86_64_32
126
    DECLARE_REG_TMP 0,1,2,3,4,5,6,6
126
    DECLARE_REG_TMP 0,1,2,3,4,5,6,6
127
%else
127
%else
128
    DECLARE_REG_TMP 0,4,2,1,3,5,6,2
128
    DECLARE_REG_TMP 0,4,2,1,3,5,6,2
Lines 193-199 Link Here
193
    mov   [t0+cb.low], t7d
193
    mov   [t0+cb.low], t7d
194
    mov   [t0+cb.queue], t3d
194
    mov   [t0+cb.queue], t3d
195
    RET
195
    RET
196
%if ARCH_X86_64 == 0
196
%if ARCH_X86_64 == 0 ARCH_X86_64 == 0ARCH_X86_64 == 0 ARCH_X86_64_32 == 0
197
.putbyte:
197
.putbyte:
198
    PROLOGUE 0,7
198
    PROLOGUE 0,7
199
    movifnidn t6d, t7d
199
    movifnidn t6d, t7d
Lines 525-531 Link Here
525
    RET
525
    RET
526
%endmacro
526
%endmacro
527
527
528
%if ARCH_X86_64
528
%if ARCH_X86_64 || ARCH_X86_64_32
529
INIT_XMM sse2
529
INIT_XMM sse2
530
CABAC_RESIDUAL_RD 0, coeff_last_sse2
530
CABAC_RESIDUAL_RD 0, coeff_last_sse2
531
CABAC_RESIDUAL_RD 1, coeff_last_sse2
531
CABAC_RESIDUAL_RD 1, coeff_last_sse2
Lines 746-752 Link Here
746
    RET
746
    RET
747
%endmacro
747
%endmacro
748
748
749
%if ARCH_X86_64
749
%if ARCH_X86_64 || ARCH_X86_64_32
750
INIT_XMM sse2
750
INIT_XMM sse2
751
CABAC_RESIDUAL coeff_last_sse2
751
CABAC_RESIDUAL coeff_last_sse2
752
INIT_XMM sse2,lzcnt
752
INIT_XMM sse2,lzcnt
(-)x264-snapshot-20160712-2245/common/x86/cpu-a.asm (-1 / +1 lines)
Lines 66-72 Link Here
66
    mov [r4], edx
66
    mov [r4], edx
67
    RET
67
    RET
68
68
69
%if ARCH_X86_64
69
%if ARCH_X86_64 || ARCH_X86_64_32
70
70
71
;-----------------------------------------------------------------------------
71
;-----------------------------------------------------------------------------
72
; void stack_align( void (*func)(void*), void *arg );
72
; void stack_align( void (*func)(void*), void *arg );
(-)x264-snapshot-20160712-2245/common/x86/dct-a.asm (-1 / +1 lines)
Lines 661-667 Link Here
661
SUB_NxN_DCT  sub16x16_dct8_sse4, sub8x8_dct8_sse4, 256, 16, 0, 0, 14
661
SUB_NxN_DCT  sub16x16_dct8_sse4, sub8x8_dct8_sse4, 256, 16, 0, 0, 14
662
SUB_NxN_DCT  sub16x16_dct8_avx,  sub8x8_dct8_avx,  256, 16, 0, 0, 14
662
SUB_NxN_DCT  sub16x16_dct8_avx,  sub8x8_dct8_avx,  256, 16, 0, 0, 14
663
%else ; !HIGH_BIT_DEPTH
663
%else ; !HIGH_BIT_DEPTH
664
%if ARCH_X86_64 == 0
664
%if ARCH_X86_64 == 0 ARCH_X86_64 == 0ARCH_X86_64 == 0 ARCH_X86_64_32 == 0
665
INIT_MMX
665
INIT_MMX
666
SUB_NxN_DCT  sub8x8_dct_mmx,     sub4x4_dct_mmx,   32, 4, 0, 0, 0
666
SUB_NxN_DCT  sub8x8_dct_mmx,     sub4x4_dct_mmx,   32, 4, 0, 0, 0
667
ADD_NxN_IDCT add8x8_idct_mmx,    add4x4_idct_mmx,  32, 4, 0, 0
667
ADD_NxN_IDCT add8x8_idct_mmx,    add4x4_idct_mmx,  32, 4, 0, 0
(-)x264-snapshot-20160712-2245/common/x86/deblock-a.asm (-19 / +19 lines)
Lines 303-309 Link Here
303
    RET
303
    RET
304
%endmacro
304
%endmacro
305
305
306
%if ARCH_X86_64
306
%if ARCH_X86_64 || ARCH_X86_64_32
307
; in:  m0=p1, m1=p0, m2=q0, m3=q1, m8=p2, m9=q2
307
; in:  m0=p1, m1=p0, m2=q0, m3=q1, m8=p2, m9=q2
308
;      m12=alpha, m13=beta
308
;      m12=alpha, m13=beta
309
; out: m0=p1', m3=q1', m1=p0', m2=q0'
309
; out: m0=p1', m3=q1', m1=p0', m2=q0'
Lines 434-440 Link Here
434
;     %1=p0 %2=p1 %3=p2 %4=p3 %5=q0 %6=q1 %7=mask0
434
;     %1=p0 %2=p1 %3=p2 %4=p3 %5=q0 %6=q1 %7=mask0
435
;     %8=mask1p %9=2 %10=p0' %11=p1' %12=p2'
435
;     %8=mask1p %9=2 %10=p0' %11=p1' %12=p2'
436
%macro LUMA_INTRA_P012 12 ; p0..p3 in memory
436
%macro LUMA_INTRA_P012 12 ; p0..p3 in memory
437
%if ARCH_X86_64
437
%if ARCH_X86_64 || ARCH_X86_64_32
438
    paddw     t0, %3, %2
438
    paddw     t0, %3, %2
439
    mova      t2, %4
439
    mova      t2, %4
440
    paddw     t2, %3
440
    paddw     t2, %3
Lines 499-505 Link Here
499
    LOAD_AB t0, t1, r2d, r3d
499
    LOAD_AB t0, t1, r2d, r3d
500
    mova    %1, t0
500
    mova    %1, t0
501
    LOAD_MASK m0, m1, m2, m3, %1, t1, t0, t2, t3
501
    LOAD_MASK m0, m1, m2, m3, %1, t1, t0, t2, t3
502
%if ARCH_X86_64
502
%if ARCH_X86_64 || ARCH_X86_64_32
503
    mova    %2, t0        ; mask0
503
    mova    %2, t0        ; mask0
504
    psrlw   t3, %1, 2
504
    psrlw   t3, %1, 2
505
%else
505
%else
Lines 596-602 Link Here
596
%endif
596
%endif
597
%endmacro
597
%endmacro
598
598
599
%if ARCH_X86_64
599
%if ARCH_X86_64 || ARCH_X86_64_32
600
;-----------------------------------------------------------------------------
600
;-----------------------------------------------------------------------------
601
; void deblock_v_luma_intra( uint16_t *pix, intptr_t stride, int alpha, int beta )
601
; void deblock_v_luma_intra( uint16_t *pix, intptr_t stride, int alpha, int beta )
602
;-----------------------------------------------------------------------------
602
;-----------------------------------------------------------------------------
Lines 782-788 Link Here
782
    RET
782
    RET
783
%endmacro
783
%endmacro
784
784
785
%if ARCH_X86_64 == 0
785
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
786
INIT_MMX mmx2
786
INIT_MMX mmx2
787
DEBLOCK_LUMA
787
DEBLOCK_LUMA
788
DEBLOCK_LUMA_INTRA
788
DEBLOCK_LUMA_INTRA
Lines 1204-1210 Link Here
1204
    mova    %4, %2
1204
    mova    %4, %2
1205
%endmacro
1205
%endmacro
1206
1206
1207
%if ARCH_X86_64
1207
%if ARCH_X86_64 || ARCH_X86_64_32
1208
;-----------------------------------------------------------------------------
1208
;-----------------------------------------------------------------------------
1209
; void deblock_v_luma( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
1209
; void deblock_v_luma( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
1210
;-----------------------------------------------------------------------------
1210
;-----------------------------------------------------------------------------
Lines 1471-1477 Link Here
1471
1471
1472
1472
1473
%macro LUMA_INTRA_P012 4 ; p0..p3 in memory
1473
%macro LUMA_INTRA_P012 4 ; p0..p3 in memory
1474
%if ARCH_X86_64
1474
%if ARCH_X86_64 || ARCH_X86_64_32
1475
    pavgb t0, p2, p1
1475
    pavgb t0, p2, p1
1476
    pavgb t1, p0, q0
1476
    pavgb t1, p0, q0
1477
%else
1477
%else
Lines 1482-1488 Link Here
1482
%endif
1482
%endif
1483
    pavgb t0, t1 ; ((p2+p1+1)/2 + (p0+q0+1)/2 + 1)/2
1483
    pavgb t0, t1 ; ((p2+p1+1)/2 + (p0+q0+1)/2 + 1)/2
1484
    mova  t5, t1
1484
    mova  t5, t1
1485
%if ARCH_X86_64
1485
%if ARCH_X86_64 || ARCH_X86_64_32
1486
    paddb t2, p2, p1
1486
    paddb t2, p2, p1
1487
    paddb t3, p0, q0
1487
    paddb t3, p0, q0
1488
%else
1488
%else
Lines 1500-1506 Link Here
1500
    pand  t2, mpb_1
1500
    pand  t2, mpb_1
1501
    psubb t0, t2 ; p1' = (p2+p1+p0+q0+2)/4;
1501
    psubb t0, t2 ; p1' = (p2+p1+p0+q0+2)/4;
1502
1502
1503
%if ARCH_X86_64
1503
%if ARCH_X86_64 || ARCH_X86_64_32
1504
    pavgb t1, p2, q1
1504
    pavgb t1, p2, q1
1505
    psubb t2, p2, q1
1505
    psubb t2, p2, q1
1506
%else
1506
%else
Lines 1575-1581 Link Here
1575
    %define t1 m5
1575
    %define t1 m5
1576
    %define t2 m6
1576
    %define t2 m6
1577
    %define t3 m7
1577
    %define t3 m7
1578
%if ARCH_X86_64
1578
%if ARCH_X86_64 || ARCH_X86_64_32
1579
    %define p2 m8
1579
    %define p2 m8
1580
    %define q2 m9
1580
    %define q2 m9
1581
    %define t4 m10
1581
    %define t4 m10
Lines 1614-1620 Link Here
1614
    mova    p0, [r4+r5]
1614
    mova    p0, [r4+r5]
1615
    mova    q0, [r0]
1615
    mova    q0, [r0]
1616
    mova    q1, [r0+r1]
1616
    mova    q1, [r0+r1]
1617
%if ARCH_X86_64
1617
%if ARCH_X86_64 || ARCH_X86_64_32
1618
    pxor    mpb_0, mpb_0
1618
    pxor    mpb_0, mpb_0
1619
    mova    mpb_1, [pb_1]
1619
    mova    mpb_1, [pb_1]
1620
    LOAD_MASK r2d, r3d, t5 ; m5=beta-1, t5=alpha-1, m7=mask0
1620
    LOAD_MASK r2d, r3d, t5 ; m5=beta-1, t5=alpha-1, m7=mask0
Lines 1657-1663 Link Here
1657
%else
1657
%else
1658
INIT_MMX cpuname
1658
INIT_MMX cpuname
1659
%endif
1659
%endif
1660
%if ARCH_X86_64
1660
%if ARCH_X86_64 || ARCH_X86_64_32
1661
;-----------------------------------------------------------------------------
1661
;-----------------------------------------------------------------------------
1662
; void deblock_h_luma_intra( uint8_t *pix, intptr_t stride, int alpha, int beta )
1662
; void deblock_h_luma_intra( uint8_t *pix, intptr_t stride, int alpha, int beta )
1663
;-----------------------------------------------------------------------------
1663
;-----------------------------------------------------------------------------
Lines 1727-1740 Link Here
1727
    lea    r2,  [r2+r1*8]
1727
    lea    r2,  [r2+r1*8]
1728
    TRANSPOSE8x8_MEM  PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30), PASS8ROWS(r0, r2, r1, r3)
1728
    TRANSPOSE8x8_MEM  PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30), PASS8ROWS(r0, r2, r1, r3)
1729
    RET
1729
    RET
1730
%endif ; ARCH_X86_64
1730
%endif ; ARCH_X86_64 || ARCH_X86_64_32
1731
%endmacro ; DEBLOCK_LUMA_INTRA
1731
%endmacro ; DEBLOCK_LUMA_INTRA
1732
1732
1733
INIT_XMM sse2
1733
INIT_XMM sse2
1734
DEBLOCK_LUMA_INTRA v
1734
DEBLOCK_LUMA_INTRA v
1735
INIT_XMM avx
1735
INIT_XMM avx
1736
DEBLOCK_LUMA_INTRA v
1736
DEBLOCK_LUMA_INTRA v
1737
%if ARCH_X86_64 == 0
1737
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
1738
INIT_MMX mmx2
1738
INIT_MMX mmx2
1739
DEBLOCK_LUMA_INTRA v8
1739
DEBLOCK_LUMA_INTRA v8
1740
%endif
1740
%endif
Lines 2014-2020 Link Here
2014
    RET
2014
    RET
2015
%endmacro ; DEBLOCK_CHROMA
2015
%endmacro ; DEBLOCK_CHROMA
2016
2016
2017
%if ARCH_X86_64 == 0
2017
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
2018
INIT_MMX mmx2
2018
INIT_MMX mmx2
2019
DEBLOCK_CHROMA
2019
DEBLOCK_CHROMA
2020
%endif
2020
%endif
Lines 2114-2120 Link Here
2114
DEBLOCK_CHROMA
2114
DEBLOCK_CHROMA
2115
INIT_XMM avx
2115
INIT_XMM avx
2116
DEBLOCK_CHROMA
2116
DEBLOCK_CHROMA
2117
%if ARCH_X86_64 == 0
2117
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
2118
INIT_MMX mmx2
2118
INIT_MMX mmx2
2119
DEBLOCK_CHROMA
2119
DEBLOCK_CHROMA
2120
%endif
2120
%endif
Lines 2137-2150 Link Here
2137
2137
2138
INIT_XMM sse2
2138
INIT_XMM sse2
2139
DEBLOCK_H_CHROMA_420_MBAFF
2139
DEBLOCK_H_CHROMA_420_MBAFF
2140
%if ARCH_X86_64 == 0
2140
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
2141
INIT_MMX mmx2
2141
INIT_MMX mmx2
2142
DEBLOCK_H_CHROMA_420_MBAFF
2142
DEBLOCK_H_CHROMA_420_MBAFF
2143
%endif
2143
%endif
2144
2144
2145
%macro DEBLOCK_H_CHROMA_422 0
2145
%macro DEBLOCK_H_CHROMA_422 0
2146
cglobal deblock_h_chroma_422, 5,8,8
2146
cglobal deblock_h_chroma_422, 5,8,8
2147
%if ARCH_X86_64
2147
%if ARCH_X86_64 || ARCH_X86_64_32
2148
    %define cntr r7
2148
    %define cntr r7
2149
%else
2149
%else
2150
    %define cntr dword r0m
2150
    %define cntr dword r0m
Lines 2262-2268 Link Here
2262
DEBLOCK_CHROMA_INTRA
2262
DEBLOCK_CHROMA_INTRA
2263
INIT_MMX mmx2
2263
INIT_MMX mmx2
2264
DEBLOCK_CHROMA_INTRA_BODY
2264
DEBLOCK_CHROMA_INTRA_BODY
2265
%if ARCH_X86_64 == 0
2265
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
2266
DEBLOCK_CHROMA_INTRA
2266
DEBLOCK_CHROMA_INTRA
2267
%endif
2267
%endif
2268
2268
(-)x264-snapshot-20160712-2245/common/x86/mc-a.asm (-15 / +15 lines)
Lines 1167-1173 Link Here
1167
%endif
1167
%endif
1168
%if 0 ; or %1==8 - but the extra branch seems too expensive
1168
%if 0 ; or %1==8 - but the extra branch seems too expensive
1169
    ja cachesplit
1169
    ja cachesplit
1170
%if ARCH_X86_64
1170
%if ARCH_X86_64 || ARCH_X86_64_32
1171
    test      r4b, 1
1171
    test      r4b, 1
1172
%else
1172
%else
1173
    test byte r4m, 1
1173
    test byte r4m, 1
Lines 1189-1195 Link Here
1189
INIT_MMX
1189
INIT_MMX
1190
AVG_CACHELINE_CHECK  8, 64, mmx2
1190
AVG_CACHELINE_CHECK  8, 64, mmx2
1191
AVG_CACHELINE_CHECK 12, 64, mmx2
1191
AVG_CACHELINE_CHECK 12, 64, mmx2
1192
%if ARCH_X86_64 == 0
1192
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
1193
AVG_CACHELINE_CHECK 16, 64, mmx2
1193
AVG_CACHELINE_CHECK 16, 64, mmx2
1194
AVG_CACHELINE_CHECK 20, 64, mmx2
1194
AVG_CACHELINE_CHECK 20, 64, mmx2
1195
AVG_CACHELINE_CHECK  8, 32, mmx2
1195
AVG_CACHELINE_CHECK  8, 32, mmx2
Lines 1381-1387 Link Here
1381
;-----------------------------------------------------------------------------
1381
;-----------------------------------------------------------------------------
1382
1382
1383
%macro PREFETCH_FENC 1
1383
%macro PREFETCH_FENC 1
1384
%if ARCH_X86_64
1384
%if ARCH_X86_64 || ARCH_X86_64_32
1385
cglobal prefetch_fenc_%1, 5,5
1385
cglobal prefetch_fenc_%1, 5,5
1386
    FIX_STRIDES r1, r3
1386
    FIX_STRIDES r1, r3
1387
    and    r4d, 3
1387
    and    r4d, 3
Lines 1435-1441 Link Here
1435
    prefetcht0  [r0+r1]
1435
    prefetcht0  [r0+r1]
1436
%endif
1436
%endif
1437
    ret
1437
    ret
1438
%endif ; ARCH_X86_64
1438
%endif ; ARCH_X86_64 || ARCH_X86_64_32
1439
%endmacro
1439
%endmacro
1440
1440
1441
INIT_MMX mmx2
1441
INIT_MMX mmx2
Lines 1469-1482 Link Here
1469
; chroma MC
1469
; chroma MC
1470
;=============================================================================
1470
;=============================================================================
1471
1471
1472
%if ARCH_X86_64
1472
%if ARCH_X86_64 || ARCH_X86_64_32
1473
    DECLARE_REG_TMP 6,7,8
1473
    DECLARE_REG_TMP 6,7,8
1474
%else
1474
%else
1475
    DECLARE_REG_TMP 0,1,2
1475
    DECLARE_REG_TMP 0,1,2
1476
%endif
1476
%endif
1477
1477
1478
%macro MC_CHROMA_START 1
1478
%macro MC_CHROMA_START 1
1479
%if ARCH_X86_64
1479
%if ARCH_X86_64 || ARCH_X86_64_32
1480
    PROLOGUE 0,9,%1
1480
    PROLOGUE 0,9,%1
1481
%else
1481
%else
1482
    PROLOGUE 0,6,%1
1482
    PROLOGUE 0,6,%1
Lines 1533-1543 Link Here
1533
    MC_CHROMA_START 0
1533
    MC_CHROMA_START 0
1534
    FIX_STRIDES r4
1534
    FIX_STRIDES r4
1535
    and       r5d, 7
1535
    and       r5d, 7
1536
%if ARCH_X86_64
1536
%if ARCH_X86_64 || ARCH_X86_64_32
1537
    jz .mc1dy
1537
    jz .mc1dy
1538
%endif
1538
%endif
1539
    and       t2d, 7
1539
    and       t2d, 7
1540
%if ARCH_X86_64
1540
%if ARCH_X86_64 || ARCH_X86_64_32
1541
    jz .mc1dx
1541
    jz .mc1dx
1542
%endif
1542
%endif
1543
    shl       r5d, 16
1543
    shl       r5d, 16
Lines 1638-1644 Link Here
1638
1638
1639
%if mmsize==8
1639
%if mmsize==8
1640
.width4:
1640
.width4:
1641
%if ARCH_X86_64
1641
%if ARCH_X86_64 || ARCH_X86_64_32
1642
    mov        t0, r0
1642
    mov        t0, r0
1643
    mov        t1, r1
1643
    mov        t1, r1
1644
    mov        t2, r3
1644
    mov        t2, r3
Lines 1655-1661 Link Here
1655
%endif
1655
%endif
1656
%else
1656
%else
1657
.width8:
1657
.width8:
1658
%if ARCH_X86_64
1658
%if ARCH_X86_64 || ARCH_X86_64_32
1659
    %define multy0 m8
1659
    %define multy0 m8
1660
    SWAP        8, 5
1660
    SWAP        8, 5
1661
%else
1661
%else
Lines 1764-1770 Link Here
1764
    jg .width8
1764
    jg .width8
1765
    RET
1765
    RET
1766
.width8:
1766
.width8:
1767
%if ARCH_X86_64
1767
%if ARCH_X86_64 || ARCH_X86_64_32
1768
    lea        r3, [t2+8*SIZEOF_PIXEL]
1768
    lea        r3, [t2+8*SIZEOF_PIXEL]
1769
    lea        r0, [t0+4*SIZEOF_PIXEL]
1769
    lea        r0, [t0+4*SIZEOF_PIXEL]
1770
    lea        r1, [t1+4*SIZEOF_PIXEL]
1770
    lea        r1, [t1+4*SIZEOF_PIXEL]
Lines 1780-1786 Link Here
1780
    jmp .loopx
1780
    jmp .loopx
1781
%endif
1781
%endif
1782
1782
1783
%if ARCH_X86_64 ; too many regs for x86_32
1783
%if ARCH_X86_64 || ARCH_X86_64_32; too many regs for x86_32
1784
    RESET_MM_PERMUTATION
1784
    RESET_MM_PERMUTATION
1785
%if WIN64
1785
%if WIN64
1786
    %assign stack_offset stack_offset - stack_size_padded
1786
    %assign stack_offset stack_offset - stack_size_padded
Lines 1907-1913 Link Here
1907
    shl       r5d, 1
1907
    shl       r5d, 1
1908
%endif
1908
%endif
1909
    jmp .loop1d_w4
1909
    jmp .loop1d_w4
1910
%endif ; ARCH_X86_64
1910
%endif ; ARCH_X86_64 || ARCH_X86_64_32
1911
%endmacro ; MC_CHROMA
1911
%endmacro ; MC_CHROMA
1912
1912
1913
%macro MC_CHROMA_SSSE3 0
1913
%macro MC_CHROMA_SSSE3 0
Lines 1950-1956 Link Here
1950
    SPLATW     m6, m6
1950
    SPLATW     m6, m6
1951
    SPLATW     m7, m7
1951
    SPLATW     m7, m7
1952
%endif
1952
%endif
1953
%if ARCH_X86_64
1953
%if ARCH_X86_64 || ARCH_X86_64_32
1954
    %define shiftround m8
1954
    %define shiftround m8
1955
    mova       m8, [pw_512]
1955
    mova       m8, [pw_512]
1956
%else
1956
%else
Lines 2057-2063 Link Here
2057
    pshufb     m0, m5
2057
    pshufb     m0, m5
2058
    movu       m1, [r3+8]
2058
    movu       m1, [r3+8]
2059
    pshufb     m1, m5
2059
    pshufb     m1, m5
2060
%if ARCH_X86_64
2060
%if ARCH_X86_64 || ARCH_X86_64_32
2061
    SWAP        9, 6
2061
    SWAP        9, 6
2062
    %define  mult1 m9
2062
    %define  mult1 m9
2063
%else
2063
%else
(-)x264-snapshot-20160712-2245/common/x86/mc-a2.asm (-8 / +8 lines)
Lines 499-505 Link Here
499
    mova    m7, [pw_32]
499
    mova    m7, [pw_32]
500
%endif
500
%endif
501
    %define pw_rnd m7
501
    %define pw_rnd m7
502
%elif ARCH_X86_64
502
%elif ARCH_X86_64 || ARCH_X86_64_32
503
    mova    m8, [pw_32]
503
    mova    m8, [pw_32]
504
    %define pw_rnd m8
504
    %define pw_rnd m8
505
%else
505
%else
Lines 654-660 Link Here
654
HPEL_V 0
654
HPEL_V 0
655
INIT_XMM sse2
655
INIT_XMM sse2
656
HPEL_V 8
656
HPEL_V 8
657
%if ARCH_X86_64 == 0
657
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
658
INIT_XMM sse2
658
INIT_XMM sse2
659
HPEL_C
659
HPEL_C
660
INIT_XMM ssse3
660
INIT_XMM ssse3
Lines 706-712 Link Here
706
    RET
706
    RET
707
%endif
707
%endif
708
708
709
%if ARCH_X86_64
709
%if ARCH_X86_64 || ARCH_X86_64_32
710
%macro DO_FILT_V 5
710
%macro DO_FILT_V 5
711
    ;The optimum prefetch distance is difficult to determine in checkasm:
711
    ;The optimum prefetch distance is difficult to determine in checkasm:
712
    ;any prefetch seems slower than not prefetching.
712
    ;any prefetch seems slower than not prefetching.
Lines 915-921 Link Here
915
HPEL
915
HPEL
916
INIT_YMM avx2
916
INIT_YMM avx2
917
HPEL
917
HPEL
918
%endif ; ARCH_X86_64
918
%endif ; ARCH_X86_64 || ARCH_X86_64_32
919
919
920
%undef movntq
920
%undef movntq
921
%undef movntps
921
%undef movntps
Lines 1107-1113 Link Here
1107
    lea    r0, [r0+r6*2]
1107
    lea    r0, [r0+r6*2]
1108
    add    r2,  r6
1108
    add    r2,  r6
1109
    add    r4,  r6
1109
    add    r4,  r6
1110
%if ARCH_X86_64
1110
%if ARCH_X86_64 || ARCH_X86_64_32
1111
    DECLARE_REG_TMP 7,8
1111
    DECLARE_REG_TMP 7,8
1112
%else
1112
%else
1113
    DECLARE_REG_TMP 1,3
1113
    DECLARE_REG_TMP 1,3
Lines 1304-1310 Link Here
1304
;                                        pixel *dstc, intptr_t i_dstc,
1304
;                                        pixel *dstc, intptr_t i_dstc,
1305
;                                        pixel *src,  intptr_t i_src, int pw, int w, int h )
1305
;                                        pixel *src,  intptr_t i_src, int pw, int w, int h )
1306
;-----------------------------------------------------------------------------
1306
;-----------------------------------------------------------------------------
1307
%if ARCH_X86_64
1307
%if ARCH_X86_64 || ARCH_X86_64_32
1308
cglobal plane_copy_deinterleave_rgb, 8,12
1308
cglobal plane_copy_deinterleave_rgb, 8,12
1309
    %define %%args r1, r3, r5, r7, r8, r9, r10, r11
1309
    %define %%args r1, r3, r5, r7, r8, r9, r10, r11
1310
    mov        r8d, r9m
1310
    mov        r8d, r9m
Lines 1350-1356 Link Here
1350
;                                         uint16_t *dstc, intptr_t i_dstc,
1350
;                                         uint16_t *dstc, intptr_t i_dstc,
1351
;                                         uint32_t *src, intptr_t i_src, int w, int h )
1351
;                                         uint32_t *src, intptr_t i_src, int w, int h )
1352
;-----------------------------------------------------------------------------
1352
;-----------------------------------------------------------------------------
1353
%if ARCH_X86_64
1353
%if ARCH_X86_64 || ARCH_X86_64_32
1354
cglobal plane_copy_deinterleave_v210, 8,10,7
1354
cglobal plane_copy_deinterleave_v210, 8,10,7
1355
%define src   r8
1355
%define src   r8
1356
%define org_w r9
1356
%define org_w r9
Lines 2003-2009 Link Here
2003
2003
2004
INIT_MMX mmx2
2004
INIT_MMX mmx2
2005
FRAME_INIT_LOWRES
2005
FRAME_INIT_LOWRES
2006
%if ARCH_X86_64 == 0
2006
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
2007
INIT_MMX cache32, mmx2
2007
INIT_MMX cache32, mmx2
2008
FRAME_INIT_LOWRES
2008
FRAME_INIT_LOWRES
2009
%endif
2009
%endif
(-)x264-snapshot-20160712-2245/common/x86/mc-c.c (-2 / +2 lines)
Lines 480-486 Link Here
480
HPEL(16, sse2, sse2, sse2, sse2)
480
HPEL(16, sse2, sse2, sse2, sse2)
481
#else // !HIGH_BIT_DEPTH
481
#else // !HIGH_BIT_DEPTH
482
HPEL(16, sse2_amd, mmx2, mmx2, sse2)
482
HPEL(16, sse2_amd, mmx2, mmx2, sse2)
483
#if ARCH_X86_64
483
#if ARCH_X86_64 || ARCH_X86_64_32
484
void x264_hpel_filter_sse2 ( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, intptr_t stride, int width, int height, int16_t *buf );
484
void x264_hpel_filter_sse2 ( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, intptr_t stride, int width, int height, int16_t *buf );
485
void x264_hpel_filter_ssse3( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, intptr_t stride, int width, int height, int16_t *buf );
485
void x264_hpel_filter_ssse3( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, intptr_t stride, int width, int height, int16_t *buf );
486
void x264_hpel_filter_avx  ( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, intptr_t stride, int width, int height, int16_t *buf );
486
void x264_hpel_filter_avx  ( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, intptr_t stride, int width, int height, int16_t *buf );
Lines 855-861 Link Here
855
855
856
    if( !(cpu&X264_CPU_SLOW_PALIGNR) )
856
    if( !(cpu&X264_CPU_SLOW_PALIGNR) )
857
    {
857
    {
858
#if ARCH_X86_64
858
#if ARCH_X86_64 || ARCH_X86_64_32
859
        if( !(cpu&X264_CPU_SLOW_ATOM) ) /* The 64-bit version is slower, but the 32-bit version is faster? */
859
        if( !(cpu&X264_CPU_SLOW_ATOM) ) /* The 64-bit version is slower, but the 32-bit version is faster? */
860
#endif
860
#endif
861
            pf->hpel_filter = x264_hpel_filter_ssse3;
861
            pf->hpel_filter = x264_hpel_filter_ssse3;
(-)x264-snapshot-20160712-2245/common/x86/pixel-a.asm (-33 / +33 lines)
Lines 422-428 Link Here
422
%else
422
%else
423
423
424
.startloop:
424
.startloop:
425
%if ARCH_X86_64
425
%if ARCH_X86_64 || ARCH_X86_64_32
426
    DECLARE_REG_TMP 0,1,2,3
426
    DECLARE_REG_TMP 0,1,2,3
427
    PROLOGUE 0,0,8
427
    PROLOGUE 0,0,8
428
%else
428
%else
Lines 733-739 Link Here
733
    HADDW   m5, m2
733
    HADDW   m5, m2
734
%endif
734
%endif
735
    HADDD   m6, m1
735
    HADDD   m6, m1
736
%if ARCH_X86_64
736
%if ARCH_X86_64 || ARCH_X86_64_32
737
    punpckldq m5, m6
737
    punpckldq m5, m6
738
    movq   rax, m5
738
    movq   rax, m5
739
%else
739
%else
Lines 923-929 Link Here
923
    paddd  xm6, xm1
923
    paddd  xm6, xm1
924
    HADDW  xm5, xm2
924
    HADDW  xm5, xm2
925
    HADDD  xm6, xm1
925
    HADDD  xm6, xm1
926
%if ARCH_X86_64
926
%if ARCH_X86_64 || ARCH_X86_64_32
927
    punpckldq xm5, xm6
927
    punpckldq xm5, xm6
928
    movq   rax, xm5
928
    movq   rax, xm5
929
%else
929
%else
Lines 983-989 Link Here
983
    VAR2_END %2, m5, m6
983
    VAR2_END %2, m5, m6
984
%endmacro
984
%endmacro
985
985
986
%if ARCH_X86_64 == 0
986
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
987
INIT_MMX mmx2
987
INIT_MMX mmx2
988
VAR2_8x8_MMX  8, 6
988
VAR2_8x8_MMX  8, 6
989
VAR2_8x8_MMX 16, 7
989
VAR2_8x8_MMX 16, 7
Lines 1502-1508 Link Here
1502
%endmacro
1502
%endmacro
1503
1503
1504
%macro BACKUP_POINTERS 0
1504
%macro BACKUP_POINTERS 0
1505
%if ARCH_X86_64
1505
%if ARCH_X86_64 || ARCH_X86_64_32
1506
%if WIN64
1506
%if WIN64
1507
    PUSH r7
1507
    PUSH r7
1508
%endif
1508
%endif
Lines 1512-1518 Link Here
1512
%endmacro
1512
%endmacro
1513
1513
1514
%macro RESTORE_AND_INC_POINTERS 0
1514
%macro RESTORE_AND_INC_POINTERS 0
1515
%if ARCH_X86_64
1515
%if ARCH_X86_64 || ARCH_X86_64_32
1516
    lea     r0, [r6+8*SIZEOF_PIXEL]
1516
    lea     r0, [r6+8*SIZEOF_PIXEL]
1517
    lea     r2, [r7+8*SIZEOF_PIXEL]
1517
    lea     r2, [r7+8*SIZEOF_PIXEL]
1518
%if WIN64
1518
%if WIN64
Lines 1718-1724 Link Here
1718
%endmacro ; SATDS_SSE2
1718
%endmacro ; SATDS_SSE2
1719
1719
1720
%macro SA8D_INTER 0
1720
%macro SA8D_INTER 0
1721
%if ARCH_X86_64
1721
%if ARCH_X86_64 || ARCH_X86_64_32
1722
    %define lh m10
1722
    %define lh m10
1723
    %define rh m0
1723
    %define rh m0
1724
%else
1724
%else
Lines 1737-1743 Link Here
1737
; sse2 doesn't seem to like the horizontal way of doing things
1737
; sse2 doesn't seem to like the horizontal way of doing things
1738
%define vertical ((notcpuflag(ssse3) || cpuflag(atom)) || HIGH_BIT_DEPTH)
1738
%define vertical ((notcpuflag(ssse3) || cpuflag(atom)) || HIGH_BIT_DEPTH)
1739
1739
1740
%if ARCH_X86_64
1740
%if ARCH_X86_64 || ARCH_X86_64_32
1741
;-----------------------------------------------------------------------------
1741
;-----------------------------------------------------------------------------
1742
; int pixel_sa8d_8x8( uint8_t *, intptr_t, uint8_t *, intptr_t )
1742
; int pixel_sa8d_8x8( uint8_t *, intptr_t, uint8_t *, intptr_t )
1743
;-----------------------------------------------------------------------------
1743
;-----------------------------------------------------------------------------
Lines 1938-1944 Link Here
1938
    shr  eax, 1
1938
    shr  eax, 1
1939
    mov  esp, r6
1939
    mov  esp, r6
1940
    RET
1940
    RET
1941
%endif ; !ARCH_X86_64
1941
%endif ; !ARCH_X86_64 || ARCH_X86_64_32
1942
%endmacro ; SA8D
1942
%endmacro ; SA8D
1943
1943
1944
;=============================================================================
1944
;=============================================================================
Lines 2121-2127 Link Here
2121
; intra_sa8d_x3_8x8 and intra_satd_x3_4x4 are obsoleted by x9 on ssse3+,
2121
; intra_sa8d_x3_8x8 and intra_satd_x3_4x4 are obsoleted by x9 on ssse3+,
2122
; and are only retained for old cpus.
2122
; and are only retained for old cpus.
2123
%macro INTRA_SA8D_SSE2 0
2123
%macro INTRA_SA8D_SSE2 0
2124
%if ARCH_X86_64
2124
%if ARCH_X86_64 || ARCH_X86_64_32
2125
;-----------------------------------------------------------------------------
2125
;-----------------------------------------------------------------------------
2126
; void intra_sa8d_x3_8x8( uint8_t *fenc, uint8_t edge[36], int *res )
2126
; void intra_sa8d_x3_8x8( uint8_t *fenc, uint8_t edge[36], int *res )
2127
;-----------------------------------------------------------------------------
2127
;-----------------------------------------------------------------------------
Lines 2219-2225 Link Here
2219
    psrldq      m0, 8
2219
    psrldq      m0, 8
2220
    movd    [r2+8], m0 ; i8x8_dc
2220
    movd    [r2+8], m0 ; i8x8_dc
2221
    RET
2221
    RET
2222
%endif ; ARCH_X86_64
2222
%endif ; ARCH_X86_64 || ARCH_X86_64_32
2223
%endmacro ; INTRA_SA8D_SSE2
2223
%endmacro ; INTRA_SA8D_SSE2
2224
2224
2225
; in: r0 = fenc
2225
; in: r0 = fenc
Lines 2491-2497 Link Here
2491
    ADD        rsp, stack_pad
2491
    ADD        rsp, stack_pad
2492
    RET
2492
    RET
2493
2493
2494
%if ARCH_X86_64
2494
%if ARCH_X86_64 || ARCH_X86_64_32
2495
    %define  t0 r6
2495
    %define  t0 r6
2496
%else
2496
%else
2497
    %define  t0 r2
2497
    %define  t0 r2
Lines 2798-2804 Link Here
2798
    %assign pad 0xc0-gprsize-(stack_offset&15)
2798
    %assign pad 0xc0-gprsize-(stack_offset&15)
2799
    %define pred_buf rsp
2799
    %define pred_buf rsp
2800
    sub       rsp, pad
2800
    sub       rsp, pad
2801
%if ARCH_X86_64
2801
%if ARCH_X86_64 || ARCH_X86_64_32
2802
    INTRA_X9_PRED intrax9a, m8
2802
    INTRA_X9_PRED intrax9a, m8
2803
%else
2803
%else
2804
    INTRA_X9_PRED intrax9a, [rsp+0xa0]
2804
    INTRA_X9_PRED intrax9a, [rsp+0xa0]
Lines 2833-2839 Link Here
2833
    paddd      m2, m3
2833
    paddd      m2, m3
2834
    paddd      m4, m5
2834
    paddd      m4, m5
2835
    paddd      m6, m7
2835
    paddd      m6, m7
2836
%if ARCH_X86_64
2836
%if ARCH_X86_64 || ARCH_X86_64_32
2837
    SWAP        7, 8
2837
    SWAP        7, 8
2838
    pxor       m8, m8
2838
    pxor       m8, m8
2839
    %define %%zero m8
2839
    %define %%zero m8
Lines 2873-2879 Link Here
2873
    RET
2873
    RET
2874
%endif ; cpuflag
2874
%endif ; cpuflag
2875
2875
2876
%if ARCH_X86_64
2876
%if ARCH_X86_64 || ARCH_X86_64_32
2877
;-----------------------------------------------------------------------------
2877
;-----------------------------------------------------------------------------
2878
; int intra_satd_x9_4x4( uint8_t *fenc, uint8_t *fdec, uint16_t *bitcosts )
2878
; int intra_satd_x9_4x4( uint8_t *fenc, uint8_t *fdec, uint16_t *bitcosts )
2879
;-----------------------------------------------------------------------------
2879
;-----------------------------------------------------------------------------
Lines 2960-2966 Link Here
2960
    paddd    xmm0, m0, m1 ; consistent location of return value. only the avx version of hadamard permutes m0, so 3arg is free
2960
    paddd    xmm0, m0, m1 ; consistent location of return value. only the avx version of hadamard permutes m0, so 3arg is free
2961
    ret
2961
    ret
2962
2962
2963
%else ; !ARCH_X86_64
2963
%else ; !ARCH_X86_64 || ARCH_X86_64_32
2964
cglobal intra_satd_x9_4x4, 3,4,8
2964
cglobal intra_satd_x9_4x4, 3,4,8
2965
    %assign pad 0x120-gprsize-(stack_offset&15)
2965
    %assign pad 0x120-gprsize-(stack_offset&15)
2966
    %define fenc_buf rsp
2966
    %define fenc_buf rsp
Lines 3075-3081 Link Here
3075
    %define fenc13 m5
3075
    %define fenc13 m5
3076
    %define fenc46 m6
3076
    %define fenc46 m6
3077
    %define fenc57 m7
3077
    %define fenc57 m7
3078
%if ARCH_X86_64
3078
%if ARCH_X86_64 || ARCH_X86_64_32
3079
    %define tmp m8
3079
    %define tmp m8
3080
    %assign padbase 0x0
3080
    %assign padbase 0x0
3081
%else
3081
%else
Lines 3431-3437 Link Here
3431
    ADD       rsp, pad
3431
    ADD       rsp, pad
3432
    RET
3432
    RET
3433
3433
3434
%if ARCH_X86_64
3434
%if ARCH_X86_64 || ARCH_X86_64_32
3435
;-----------------------------------------------------------------------------
3435
;-----------------------------------------------------------------------------
3436
; int intra_sa8d_x9_8x8( uint8_t *fenc, uint8_t *fdec, uint8_t edge[36], uint16_t *bitcosts, uint16_t *satds )
3436
; int intra_sa8d_x9_8x8( uint8_t *fenc, uint8_t *fdec, uint8_t edge[36], uint16_t *bitcosts, uint16_t *satds )
3437
;-----------------------------------------------------------------------------
3437
;-----------------------------------------------------------------------------
Lines 3725-3731 Link Here
3725
    paddw       m0, m2
3725
    paddw       m0, m2
3726
    paddw mret, m0, m3
3726
    paddw mret, m0, m3
3727
    ret
3727
    ret
3728
%endif ; ARCH_X86_64
3728
%endif ; ARCH_X86_64 || ARCH_X86_64_32
3729
%endmacro ; INTRA8_X9
3729
%endmacro ; INTRA8_X9
3730
3730
3731
; in:  r0=pix, r1=stride, r2=stride*3, r3=tmp, m6=mask_ac4, m7=0
3731
; in:  r0=pix, r1=stride, r2=stride*3, r3=tmp, m6=mask_ac4, m7=0
Lines 3937-3943 Link Here
3937
    movd edx, m0
3937
    movd edx, m0
3938
    movd eax, m1
3938
    movd eax, m1
3939
    shr  edx, 1
3939
    shr  edx, 1
3940
%if ARCH_X86_64
3940
%if ARCH_X86_64 || ARCH_X86_64_32
3941
    shl  rdx, 32
3941
    shl  rdx, 32
3942
    add  rax, rdx
3942
    add  rax, rdx
3943
%endif
3943
%endif
Lines 3986-3992 Link Here
3986
; in:  r0=pix, r1=stride, r2=stride*3
3986
; in:  r0=pix, r1=stride, r2=stride*3
3987
; out: [esp+16]=sa8d, [esp+32]=satd, r0+=stride*4
3987
; out: [esp+16]=sa8d, [esp+32]=satd, r0+=stride*4
3988
cglobal hadamard_ac_8x8
3988
cglobal hadamard_ac_8x8
3989
%if ARCH_X86_64
3989
%if ARCH_X86_64 || ARCH_X86_64_32
3990
    %define spill0 m8
3990
    %define spill0 m8
3991
    %define spill1 m9
3991
    %define spill1 m9
3992
    %define spill2 m10
3992
    %define spill2 m10
Lines 4172-4178 Link Here
4172
    movd eax, xm1
4172
    movd eax, xm1
4173
    shr  edx, 2 - (%1*%2*16/mmsize >> 8)
4173
    shr  edx, 2 - (%1*%2*16/mmsize >> 8)
4174
    shr  eax, 1
4174
    shr  eax, 1
4175
%if ARCH_X86_64
4175
%if ARCH_X86_64 || ARCH_X86_64_32
4176
    shl  rdx, 32
4176
    shl  rdx, 32
4177
    add  rax, rdx
4177
    add  rax, rdx
4178
%endif
4178
%endif
Lines 4182-4188 Link Here
4182
4182
4183
; instantiate satds
4183
; instantiate satds
4184
4184
4185
%if ARCH_X86_64 == 0
4185
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
4186
cextern pixel_sa8d_8x8_internal_mmx2
4186
cextern pixel_sa8d_8x8_internal_mmx2
4187
INIT_MMX mmx2
4187
INIT_MMX mmx2
4188
SA8D
4188
SA8D
Lines 4199-4205 Link Here
4199
INIT_XMM sse2
4199
INIT_XMM sse2
4200
SA8D
4200
SA8D
4201
SATDS_SSE2
4201
SATDS_SSE2
4202
%if ARCH_X86_64
4202
%if ARCH_X86_64 || ARCH_X86_64_32
4203
SA8D_SATD
4203
SA8D_SATD
4204
%endif
4204
%endif
4205
%if HIGH_BIT_DEPTH == 0
4205
%if HIGH_BIT_DEPTH == 0
Lines 4215-4221 Link Here
4215
SATDS_SSE2
4215
SATDS_SSE2
4216
SA8D
4216
SA8D
4217
HADAMARD_AC_SSE2
4217
HADAMARD_AC_SSE2
4218
%if ARCH_X86_64
4218
%if ARCH_X86_64 || ARCH_X86_64_32
4219
SA8D_SATD
4219
SA8D_SATD
4220
%endif
4220
%endif
4221
%endif
4221
%endif
Lines 4231-4237 Link Here
4231
SATDS_SSE2
4231
SATDS_SSE2
4232
SA8D
4232
SA8D
4233
HADAMARD_AC_SSE2
4233
HADAMARD_AC_SSE2
4234
%if ARCH_X86_64
4234
%if ARCH_X86_64 || ARCH_X86_64_32
4235
SA8D_SATD
4235
SA8D_SATD
4236
%endif
4236
%endif
4237
%if HIGH_BIT_DEPTH == 0
4237
%if HIGH_BIT_DEPTH == 0
Lines 4252-4258 Link Here
4252
SATDS_SSE2
4252
SATDS_SSE2
4253
SA8D
4253
SA8D
4254
HADAMARD_AC_SSE2
4254
HADAMARD_AC_SSE2
4255
%if ARCH_X86_64
4255
%if ARCH_X86_64 || ARCH_X86_64_32
4256
SA8D_SATD
4256
SA8D_SATD
4257
%endif
4257
%endif
4258
%if HIGH_BIT_DEPTH == 0
4258
%if HIGH_BIT_DEPTH == 0
Lines 4266-4272 Link Here
4266
INIT_XMM avx
4266
INIT_XMM avx
4267
SATDS_SSE2
4267
SATDS_SSE2
4268
SA8D
4268
SA8D
4269
%if ARCH_X86_64
4269
%if ARCH_X86_64 || ARCH_X86_64_32
4270
SA8D_SATD
4270
SA8D_SATD
4271
%endif
4271
%endif
4272
%if HIGH_BIT_DEPTH == 0
4272
%if HIGH_BIT_DEPTH == 0
Lines 4279-4285 Link Here
4279
INIT_XMM xop
4279
INIT_XMM xop
4280
SATDS_SSE2
4280
SATDS_SSE2
4281
SA8D
4281
SA8D
4282
%if ARCH_X86_64
4282
%if ARCH_X86_64 || ARCH_X86_64_32
4283
SA8D_SATD
4283
SA8D_SATD
4284
%endif
4284
%endif
4285
%if HIGH_BIT_DEPTH == 0
4285
%if HIGH_BIT_DEPTH == 0
Lines 4295-4301 Link Here
4295
%define TRANS TRANS_SSE4
4295
%define TRANS TRANS_SSE4
4296
INIT_YMM avx2
4296
INIT_YMM avx2
4297
HADAMARD_AC_SSE2
4297
HADAMARD_AC_SSE2
4298
%if ARCH_X86_64
4298
%if ARCH_X86_64 || ARCH_X86_64_32
4299
SA8D_SATD
4299
SA8D_SATD
4300
%endif
4300
%endif
4301
4301
Lines 4770-4776 Link Here
4770
    pshuflw   m4, m0, q0032
4770
    pshuflw   m4, m0, q0032
4771
%endif
4771
%endif
4772
    addss     m0, m4
4772
    addss     m0, m4
4773
%if ARCH_X86_64 == 0
4773
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
4774
    movss    r0m, m0
4774
    movss    r0m, m0
4775
    fld     dword r0m
4775
    fld     dword r0m
4776
%endif
4776
%endif
Lines 5162-5168 Link Here
5162
    jge .end
5162
    jge .end
5163
.loopi:
5163
.loopi:
5164
    mov     r2,  [r6+r1]
5164
    mov     r2,  [r6+r1]
5165
%if ARCH_X86_64
5165
%if ARCH_X86_64 || ARCH_X86_64_32
5166
    test    r2,  r2
5166
    test    r2,  r2
5167
%else
5167
%else
5168
    mov     r3,  r2
5168
    mov     r3,  r2
Lines 5174-5180 Link Here
5174
    TEST 1
5174
    TEST 1
5175
    TEST 2
5175
    TEST 2
5176
    TEST 3
5176
    TEST 3
5177
%if ARCH_X86_64
5177
%if ARCH_X86_64 || ARCH_X86_64_32
5178
    shr     r2,  32
5178
    shr     r2,  32
5179
%else
5179
%else
5180
    mov     r2d, [r6+r1]
5180
    mov     r2d, [r6+r1]
(-)x264-snapshot-20160712-2245/common/x86/predict-a.asm (-9 / +9 lines)
Lines 640-646 Link Here
640
cglobal predict_8x8_filter, 4,6,6
640
cglobal predict_8x8_filter, 4,6,6
641
    add          r0, 0x58*SIZEOF_PIXEL
641
    add          r0, 0x58*SIZEOF_PIXEL
642
%define src r0-0x58*SIZEOF_PIXEL
642
%define src r0-0x58*SIZEOF_PIXEL
643
%if ARCH_X86_64 == 0
643
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
644
    mov          r4, r1
644
    mov          r4, r1
645
%define t1 r4
645
%define t1 r4
646
%define t4 r1
646
%define t4 r1
Lines 942-948 Link Here
942
PREDICT_8x8_DDLR
942
PREDICT_8x8_DDLR
943
INIT_XMM ssse3, cache64
943
INIT_XMM ssse3, cache64
944
PREDICT_8x8_DDLR
944
PREDICT_8x8_DDLR
945
%elif ARCH_X86_64 == 0
945
%elif ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
946
INIT_MMX mmx2
946
INIT_MMX mmx2
947
PREDICT_8x8_DDLR
947
PREDICT_8x8_DDLR
948
%endif
948
%endif
Lines 1014-1020 Link Here
1014
PREDICT_8x8_HU d, wd
1014
PREDICT_8x8_HU d, wd
1015
INIT_XMM avx
1015
INIT_XMM avx
1016
PREDICT_8x8_HU d, wd
1016
PREDICT_8x8_HU d, wd
1017
%elif ARCH_X86_64 == 0
1017
%elif ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
1018
INIT_MMX mmx2
1018
INIT_MMX mmx2
1019
PREDICT_8x8_HU w, bw
1019
PREDICT_8x8_HU w, bw
1020
%endif
1020
%endif
Lines 1063-1075 Link Here
1063
PREDICT_8x8_VR w
1063
PREDICT_8x8_VR w
1064
INIT_XMM avx
1064
INIT_XMM avx
1065
PREDICT_8x8_VR w
1065
PREDICT_8x8_VR w
1066
%elif ARCH_X86_64 == 0
1066
%elif ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
1067
INIT_MMX mmx2
1067
INIT_MMX mmx2
1068
PREDICT_8x8_VR b
1068
PREDICT_8x8_VR b
1069
%endif
1069
%endif
1070
1070
1071
%macro LOAD_PLANE_ARGS 0
1071
%macro LOAD_PLANE_ARGS 0
1072
%if cpuflag(avx2) && ARCH_X86_64 == 0
1072
%if cpuflag(avx2) && ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
1073
    vpbroadcastw m0, r1m
1073
    vpbroadcastw m0, r1m
1074
    vpbroadcastw m2, r2m
1074
    vpbroadcastw m2, r2m
1075
    vpbroadcastw m4, r3m
1075
    vpbroadcastw m4, r3m
Lines 1090-1096 Link Here
1090
;-----------------------------------------------------------------------------
1090
;-----------------------------------------------------------------------------
1091
; void predict_8x8c_p_core( uint8_t *src, int i00, int b, int c )
1091
; void predict_8x8c_p_core( uint8_t *src, int i00, int b, int c )
1092
;-----------------------------------------------------------------------------
1092
;-----------------------------------------------------------------------------
1093
%if ARCH_X86_64 == 0 && HIGH_BIT_DEPTH == 0
1093
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 && HIGH_BIT_DEPTH == 0
1094
%macro PREDICT_CHROMA_P_MMX 1
1094
%macro PREDICT_CHROMA_P_MMX 1
1095
cglobal predict_8x%1c_p_core, 1,2
1095
cglobal predict_8x%1c_p_core, 1,2
1096
    LOAD_PLANE_ARGS
1096
    LOAD_PLANE_ARGS
Lines 1210-1216 Link Here
1210
;-----------------------------------------------------------------------------
1210
;-----------------------------------------------------------------------------
1211
; void predict_16x16_p_core( uint8_t *src, int i00, int b, int c )
1211
; void predict_16x16_p_core( uint8_t *src, int i00, int b, int c )
1212
;-----------------------------------------------------------------------------
1212
;-----------------------------------------------------------------------------
1213
%if HIGH_BIT_DEPTH == 0 && ARCH_X86_64 == 0
1213
%if HIGH_BIT_DEPTH == 0 && ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
1214
INIT_MMX mmx2
1214
INIT_MMX mmx2
1215
cglobal predict_16x16_p_core, 1,2
1215
cglobal predict_16x16_p_core, 1,2
1216
    LOAD_PLANE_ARGS
1216
    LOAD_PLANE_ARGS
Lines 1250-1256 Link Here
1250
    dec         r1d
1250
    dec         r1d
1251
    jg          .loop
1251
    jg          .loop
1252
    RET
1252
    RET
1253
%endif ; !HIGH_BIT_DEPTH && !ARCH_X86_64
1253
%endif ; !HIGH_BIT_DEPTH && !ARCH_X86_64 || ARCH_X86_64_32
1254
1254
1255
%macro PREDICT_16x16_P 0
1255
%macro PREDICT_16x16_P 0
1256
cglobal predict_16x16_p_core, 1,2,8
1256
cglobal predict_16x16_p_core, 1,2,8
Lines 2121-2127 Link Here
2121
2121
2122
INIT_MMX mmx2
2122
INIT_MMX mmx2
2123
cglobal predict_16x16_dc_core, 1,2
2123
cglobal predict_16x16_dc_core, 1,2
2124
%if ARCH_X86_64
2124
%if ARCH_X86_64 || ARCH_X86_64_32
2125
    movd         m6, r1d
2125
    movd         m6, r1d
2126
    PRED16x16_DC_MMX m6, 5
2126
    PRED16x16_DC_MMX m6, 5
2127
%else
2127
%else
(-)x264-snapshot-20160712-2245/common/x86/predict-c.c (-12 / +12 lines)
Lines 172-180 Link Here
172
#if HIGH_BIT_DEPTH
172
#if HIGH_BIT_DEPTH
173
PREDICT_16x16_P_INLINE( sse2, sse2 )
173
PREDICT_16x16_P_INLINE( sse2, sse2 )
174
#else // !HIGH_BIT_DEPTH
174
#else // !HIGH_BIT_DEPTH
175
#if !ARCH_X86_64
175
#if !ARCH_X86_64 && !ARCH_X86_64_32
176
PREDICT_16x16_P( mmx2, mmx2 )
176
PREDICT_16x16_P( mmx2, mmx2 )
177
#endif // !ARCH_X86_64
177
#endif // !ARCH_X86_64 && !ARCH_X86_64_32
178
PREDICT_16x16_P( sse2, sse2 )
178
PREDICT_16x16_P( sse2, sse2 )
179
#if HAVE_X86_INLINE_ASM
179
#if HAVE_X86_INLINE_ASM
180
PREDICT_16x16_P_INLINE( ssse3, sse2 )
180
PREDICT_16x16_P_INLINE( ssse3, sse2 )
Lines 212-220 Link Here
212
    PREDICT_8x16C_P_END(name)\
212
    PREDICT_8x16C_P_END(name)\
213
}
213
}
214
214
215
#if !ARCH_X86_64 && !HIGH_BIT_DEPTH
215
#if !ARCH_X86_64 && !ARCH_X86_64_32 && !HIGH_BIT_DEPTH
216
PREDICT_8x16C_P( mmx2 )
216
PREDICT_8x16C_P( mmx2 )
217
#endif // !ARCH_X86_64 && !HIGH_BIT_DEPTH
217
#endif // !ARCH_X86_64 && !ARCH_X86_64_32 && !HIGH_BIT_DEPTH
218
PREDICT_8x16C_P( sse2 )
218
PREDICT_8x16C_P( sse2 )
219
PREDICT_8x16C_P( avx )
219
PREDICT_8x16C_P( avx )
220
PREDICT_8x16C_P( avx2 )
220
PREDICT_8x16C_P( avx2 )
Lines 301-309 Link Here
301
#if HIGH_BIT_DEPTH
301
#if HIGH_BIT_DEPTH
302
PREDICT_8x8C_P_INLINE( sse2, sse2 )
302
PREDICT_8x8C_P_INLINE( sse2, sse2 )
303
#else  //!HIGH_BIT_DEPTH
303
#else  //!HIGH_BIT_DEPTH
304
#if !ARCH_X86_64
304
#if !ARCH_X86_64 && !ARCH_X86_64_32
305
PREDICT_8x8C_P( mmx2, mmx2 )
305
PREDICT_8x8C_P( mmx2, mmx2 )
306
#endif // !ARCH_X86_64
306
#endif // !ARCH_X86_64 && !ARCH_X86_64_32
307
PREDICT_8x8C_P( sse2, sse2 )
307
PREDICT_8x8C_P( sse2, sse2 )
308
#if HAVE_X86_INLINE_ASM
308
#if HAVE_X86_INLINE_ASM
309
PREDICT_8x8C_P_INLINE( ssse3, sse2 )
309
PREDICT_8x8C_P_INLINE( ssse3, sse2 )
Lines 312-318 Link Here
312
PREDICT_8x8C_P_INLINE( avx, avx )
312
PREDICT_8x8C_P_INLINE( avx, avx )
313
PREDICT_8x8C_P_INLINE( avx2, avx2 )
313
PREDICT_8x8C_P_INLINE( avx2, avx2 )
314
314
315
#if ARCH_X86_64 && !HIGH_BIT_DEPTH
315
#if (ARCH_X86_64 || ARCH_X86_64_32) && !HIGH_BIT_DEPTH
316
static void x264_predict_8x8c_dc_left( uint8_t *src )
316
static void x264_predict_8x8c_dc_left( uint8_t *src )
317
{
317
{
318
    int y;
318
    int y;
Lines 338-344 Link Here
338
        src += FDEC_STRIDE;
338
        src += FDEC_STRIDE;
339
    }
339
    }
340
}
340
}
341
#endif // ARCH_X86_64 && !HIGH_BIT_DEPTH
341
#endif // (ARCH_X86_64 || ARCH_X86_64_32) && !HIGH_BIT_DEPTH
342
342
343
/****************************************************************************
343
/****************************************************************************
344
 * Exported functions:
344
 * Exported functions:
Lines 370-376 Link Here
370
        return;
370
        return;
371
    pf[I_PRED_16x16_H]       = x264_predict_16x16_h_avx2;
371
    pf[I_PRED_16x16_H]       = x264_predict_16x16_h_avx2;
372
#else
372
#else
373
#if !ARCH_X86_64
373
#if !ARCH_X86_64 && !ARCH_X86_64_32
374
    pf[I_PRED_16x16_P]       = x264_predict_16x16_p_mmx2;
374
    pf[I_PRED_16x16_P]       = x264_predict_16x16_p_mmx2;
375
#endif
375
#endif
376
    if( !(cpu&X264_CPU_SSE) )
376
    if( !(cpu&X264_CPU_SSE) )
Lines 431-437 Link Here
431
        return;
431
        return;
432
    pf[I_PRED_CHROMA_H]   = x264_predict_8x8c_h_avx2;
432
    pf[I_PRED_CHROMA_H]   = x264_predict_8x8c_h_avx2;
433
#else
433
#else
434
#if ARCH_X86_64
434
#if ARCH_X86_64 || ARCH_X86_64_32
435
    pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left;
435
    pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left;
436
#endif
436
#endif
437
    pf[I_PRED_CHROMA_V]       = x264_predict_8x8c_v_mmx;
437
    pf[I_PRED_CHROMA_V]       = x264_predict_8x8c_v_mmx;
Lines 439-445 Link Here
439
        return;
439
        return;
440
    pf[I_PRED_CHROMA_DC_TOP]  = x264_predict_8x8c_dc_top_mmx2;
440
    pf[I_PRED_CHROMA_DC_TOP]  = x264_predict_8x8c_dc_top_mmx2;
441
    pf[I_PRED_CHROMA_H]       = x264_predict_8x8c_h_mmx2;
441
    pf[I_PRED_CHROMA_H]       = x264_predict_8x8c_h_mmx2;
442
#if !ARCH_X86_64
442
#if !ARCH_X86_64 || ARCH_X86_64_32
443
    pf[I_PRED_CHROMA_P]       = x264_predict_8x8c_p_mmx2;
443
    pf[I_PRED_CHROMA_P]       = x264_predict_8x8c_p_mmx2;
444
#endif
444
#endif
445
    pf[I_PRED_CHROMA_DC]      = x264_predict_8x8c_dc_mmx2;
445
    pf[I_PRED_CHROMA_DC]      = x264_predict_8x8c_dc_mmx2;
Lines 494-500 Link Here
494
    pf[I_PRED_CHROMA_DC_TOP]  = x264_predict_8x16c_dc_top_mmx2;
494
    pf[I_PRED_CHROMA_DC_TOP]  = x264_predict_8x16c_dc_top_mmx2;
495
    pf[I_PRED_CHROMA_DC]      = x264_predict_8x16c_dc_mmx2;
495
    pf[I_PRED_CHROMA_DC]      = x264_predict_8x16c_dc_mmx2;
496
    pf[I_PRED_CHROMA_H]       = x264_predict_8x16c_h_mmx2;
496
    pf[I_PRED_CHROMA_H]       = x264_predict_8x16c_h_mmx2;
497
#if !ARCH_X86_64
497
#if !ARCH_X86_64 || ARCH_X86_64_32
498
    pf[I_PRED_CHROMA_P]       = x264_predict_8x16c_p_mmx2;
498
    pf[I_PRED_CHROMA_P]       = x264_predict_8x16c_p_mmx2;
499
#endif
499
#endif
500
    if( !(cpu&X264_CPU_SSE2) )
500
    if( !(cpu&X264_CPU_SSE2) )
(-)x264-snapshot-20160712-2245/common/x86/quant-a.asm (-22 / +22 lines)
Lines 131-137 Link Here
131
%if cpuflag(sse4)
131
%if cpuflag(sse4)
132
    ptest     m5, m5
132
    ptest     m5, m5
133
%else ; !sse4
133
%else ; !sse4
134
%if ARCH_X86_64
134
%if ARCH_X86_64 || ARCH_X86_64_32
135
%if mmsize == 16
135
%if mmsize == 16
136
    packsswb  m5, m5
136
    packsswb  m5, m5
137
%endif
137
%endif
Lines 451-457 Link Here
451
451
452
INIT_MMX mmx2
452
INIT_MMX mmx2
453
QUANT_DC quant_2x2_dc, 1
453
QUANT_DC quant_2x2_dc, 1
454
%if ARCH_X86_64 == 0 ; not needed because sse2 is faster
454
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0 ; not needed because sse2 is faster
455
QUANT_DC quant_4x4_dc, 4
455
QUANT_DC quant_4x4_dc, 4
456
INIT_MMX mmx2
456
INIT_MMX mmx2
457
QUANT_AC quant_4x4, 4
457
QUANT_AC quant_4x4, 4
Lines 607-613 Link Here
607
%endrep
607
%endrep
608
%endmacro
608
%endmacro
609
609
610
%if ARCH_X86_64
610
%if ARCH_X86_64 || ARCH_X86_64_32
611
    DECLARE_REG_TMP 6,3,2
611
    DECLARE_REG_TMP 6,3,2
612
%else
612
%else
613
    DECLARE_REG_TMP 2,0,1
613
    DECLARE_REG_TMP 2,0,1
Lines 621-627 Link Here
621
    sub  t2d, t0d
621
    sub  t2d, t0d
622
    sub  t2d, t1d   ; i_mf = i_qp % 6
622
    sub  t2d, t1d   ; i_mf = i_qp % 6
623
    shl  t2d, %1
623
    shl  t2d, %1
624
%if ARCH_X86_64
624
%if ARCH_X86_64 || ARCH_X86_64_32
625
    add  r1, t2     ; dequant_mf[i_mf]
625
    add  r1, t2     ; dequant_mf[i_mf]
626
%else
626
%else
627
    add  r1, r1mp   ; dequant_mf[i_mf]
627
    add  r1, r1mp   ; dequant_mf[i_mf]
Lines 724-730 Link Here
724
DEQUANT 4, 4, 4
724
DEQUANT 4, 4, 4
725
DEQUANT 8, 6, 4
725
DEQUANT 8, 6, 4
726
%else
726
%else
727
%if ARCH_X86_64 == 0
727
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
728
INIT_MMX mmx
728
INIT_MMX mmx
729
DEQUANT 4, 4, 1
729
DEQUANT 4, 4, 1
730
DEQUANT 8, 6, 1
730
DEQUANT 8, 6, 1
Lines 817-823 Link Here
817
INIT_YMM avx2
817
INIT_YMM avx2
818
DEQUANT_DC d, pmaddwd
818
DEQUANT_DC d, pmaddwd
819
%else
819
%else
820
%if ARCH_X86_64 == 0
820
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
821
INIT_MMX mmx2
821
INIT_MMX mmx2
822
DEQUANT_DC w, pmullw
822
DEQUANT_DC w, pmullw
823
%endif
823
%endif
Lines 857-863 Link Here
857
    %define %%args dct, dct4x4, dmf, qp
857
    %define %%args dct, dct4x4, dmf, qp
858
%endif
858
%endif
859
859
860
%if ARCH_X86_64 == 0
860
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
861
    DECLARE_REG_TMP 2,0,1
861
    DECLARE_REG_TMP 2,0,1
862
%endif
862
%endif
863
863
Lines 869-875 Link Here
869
    sub        t2d, t0d
869
    sub        t2d, t0d
870
    sub        t2d, t1d       ; qp % 6
870
    sub        t2d, t1d       ; qp % 6
871
    shl        t2d, 6         ; 16 * sizeof(int)
871
    shl        t2d, 6         ; 16 * sizeof(int)
872
%if ARCH_X86_64
872
%if ARCH_X86_64 || ARCH_X86_64_32
873
    imul       t2d, [dmfq+t2], -0xffff ; (-dmf) << 16 | dmf
873
    imul       t2d, [dmfq+t2], -0xffff ; (-dmf) << 16 | dmf
874
%else
874
%else
875
    mov       dctq, dctmp
875
    mov       dctq, dctmp
Lines 974-980 Link Here
974
DEQUANT_2x4_DC dconly
974
DEQUANT_2x4_DC dconly
975
975
976
; t4 is eax for return value.
976
; t4 is eax for return value.
977
%if ARCH_X86_64
977
%if ARCH_X86_64 || ARCH_X86_64_32
978
    DECLARE_REG_TMP 0,1,2,3,6,4  ; Identical for both Windows and *NIX
978
    DECLARE_REG_TMP 0,1,2,3,6,4  ; Identical for both Windows and *NIX
979
%else
979
%else
980
    DECLARE_REG_TMP 4,1,2,3,0,5
980
    DECLARE_REG_TMP 4,1,2,3,0,5
Lines 1120-1126 Link Here
1120
    RET
1120
    RET
1121
%endmacro
1121
%endmacro
1122
1122
1123
%if ARCH_X86_64 == 0
1123
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
1124
INIT_MMX mmx
1124
INIT_MMX mmx
1125
DENOISE_DCT
1125
DENOISE_DCT
1126
%endif
1126
%endif
Lines 1170-1176 Link Here
1170
    RET
1170
    RET
1171
%endmacro
1171
%endmacro
1172
1172
1173
%if ARCH_X86_64 == 0
1173
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
1174
INIT_MMX mmx
1174
INIT_MMX mmx
1175
DENOISE_DCT
1175
DENOISE_DCT
1176
%endif
1176
%endif
Lines 1306-1312 Link Here
1306
1306
1307
%endmacro
1307
%endmacro
1308
1308
1309
%if ARCH_X86_64 == 0
1309
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
1310
INIT_MMX mmx2
1310
INIT_MMX mmx2
1311
DECIMATE4x4 15
1311
DECIMATE4x4 15
1312
DECIMATE4x4 16
1312
DECIMATE4x4 16
Lines 1343-1349 Link Here
1343
1343
1344
%macro DECIMATE8x8 0
1344
%macro DECIMATE8x8 0
1345
1345
1346
%if ARCH_X86_64
1346
%if ARCH_X86_64 || ARCH_X86_64_32
1347
cglobal decimate_score64, 1,5
1347
cglobal decimate_score64, 1,5
1348
%ifdef PIC
1348
%ifdef PIC
1349
    lea r4, [decimate_table8]
1349
    lea r4, [decimate_table8]
Lines 1462-1468 Link Here
1462
1462
1463
%endmacro
1463
%endmacro
1464
1464
1465
%if ARCH_X86_64 == 0
1465
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
1466
INIT_MMX mmx2
1466
INIT_MMX mmx2
1467
DECIMATE8x8
1467
DECIMATE8x8
1468
%endif
1468
%endif
Lines 1573-1579 Link Here
1573
    RET
1573
    RET
1574
%endmacro
1574
%endmacro
1575
1575
1576
%if ARCH_X86_64 == 0
1576
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
1577
INIT_MMX mmx2
1577
INIT_MMX mmx2
1578
COEFF_LAST8
1578
COEFF_LAST8
1579
%endif
1579
%endif
Lines 1613-1619 Link Here
1613
%endmacro
1613
%endmacro
1614
1614
1615
%macro COEFF_LAST48 0
1615
%macro COEFF_LAST48 0
1616
%if ARCH_X86_64
1616
%if ARCH_X86_64 || ARCH_X86_64_32
1617
cglobal coeff_last4, 1,1
1617
cglobal coeff_last4, 1,1
1618
    BSR  rax, [r0], 0x3f
1618
    BSR  rax, [r0], 0x3f
1619
    shr  eax, 4
1619
    shr  eax, 4
Lines 1662-1668 Link Here
1662
    BSR eax, r1d, 0x1f
1662
    BSR eax, r1d, 0x1f
1663
    RET
1663
    RET
1664
1664
1665
%if ARCH_X86_64 == 0
1665
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
1666
cglobal coeff_last64, 1, 4-mmsize/16
1666
cglobal coeff_last64, 1, 4-mmsize/16
1667
    pxor m2, m2
1667
    pxor m2, m2
1668
    LAST_MASK 16, r1d, r0+SIZEOF_DCTCOEF* 32, r3d
1668
    LAST_MASK 16, r1d, r0+SIZEOF_DCTCOEF* 32, r3d
Lines 1701-1707 Link Here
1701
%endif
1701
%endif
1702
%endmacro
1702
%endmacro
1703
1703
1704
%if ARCH_X86_64 == 0
1704
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
1705
INIT_MMX mmx2
1705
INIT_MMX mmx2
1706
COEFF_LAST
1706
COEFF_LAST
1707
%endif
1707
%endif
Lines 1728-1734 Link Here
1728
    pmovmskb %1, m0
1728
    pmovmskb %1, m0
1729
%endmacro
1729
%endmacro
1730
1730
1731
%if ARCH_X86_64 == 0
1731
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
1732
INIT_YMM avx2,lzcnt
1732
INIT_YMM avx2,lzcnt
1733
cglobal coeff_last64, 1,2
1733
cglobal coeff_last64, 1,2
1734
    pxor m2, m2
1734
    pxor m2, m2
Lines 1770-1776 Link Here
1770
; t6 = eax for return, t3 = ecx for shift, t[01] = r[01] for x86_64 args
1770
; t6 = eax for return, t3 = ecx for shift, t[01] = r[01] for x86_64 args
1771
%if WIN64
1771
%if WIN64
1772
    DECLARE_REG_TMP 3,1,2,0,4,5,6
1772
    DECLARE_REG_TMP 3,1,2,0,4,5,6
1773
%elif ARCH_X86_64
1773
%elif ARCH_X86_64 || ARCH_X86_64_32
1774
    DECLARE_REG_TMP 0,1,2,3,4,5,6
1774
    DECLARE_REG_TMP 0,1,2,3,4,5,6
1775
%else
1775
%else
1776
    DECLARE_REG_TMP 6,3,2,1,4,5,0
1776
    DECLARE_REG_TMP 6,3,2,1,4,5,0
Lines 1821-1827 Link Here
1821
%endmacro
1821
%endmacro
1822
1822
1823
INIT_MMX mmx2
1823
INIT_MMX mmx2
1824
%if ARCH_X86_64 == 0
1824
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
1825
COEFF_LEVELRUN 15
1825
COEFF_LEVELRUN 15
1826
COEFF_LEVELRUN 16
1826
COEFF_LEVELRUN 16
1827
%endif
1827
%endif
Lines 1885-1891 Link Here
1885
    add     eax, eax
1885
    add     eax, eax
1886
%endif
1886
%endif
1887
%if %1 > 8
1887
%if %1 > 8
1888
%if ARCH_X86_64
1888
%if ARCH_X86_64 || ARCH_X86_64_32
1889
    mov     r4d, eax
1889
    mov     r4d, eax
1890
    shr     r4d, 8
1890
    shr     r4d, 8
1891
%else
1891
%else
(-)x264-snapshot-20160712-2245/common/x86/sad-a.asm (-10 / +10 lines)
Lines 265-271 Link Here
265
; void pixel_vsad( pixel *src, intptr_t stride );
265
; void pixel_vsad( pixel *src, intptr_t stride );
266
;-----------------------------------------------------------------------------
266
;-----------------------------------------------------------------------------
267
267
268
%if ARCH_X86_64 == 0
268
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
269
INIT_MMX
269
INIT_MMX
270
cglobal pixel_vsad_mmx2, 3,3
270
cglobal pixel_vsad_mmx2, 3,3
271
    mova      m0, [r0]
271
    mova      m0, [r0]
Lines 1042-1048 Link Here
1042
    paddw    m2, m3
1042
    paddw    m2, m3
1043
%endmacro
1043
%endmacro
1044
1044
1045
%if ARCH_X86_64
1045
%if ARCH_X86_64 || ARCH_X86_64_32
1046
    DECLARE_REG_TMP 6
1046
    DECLARE_REG_TMP 6
1047
%else
1047
%else
1048
    DECLARE_REG_TMP 5
1048
    DECLARE_REG_TMP 5
Lines 1733-1739 Link Here
1733
    CHECK_SPLIT r3m, %1, %3
1733
    CHECK_SPLIT r3m, %1, %3
1734
    jmp pixel_sad_x3_%1x%2_%4
1734
    jmp pixel_sad_x3_%1x%2_%4
1735
.split:
1735
.split:
1736
%if ARCH_X86_64
1736
%if ARCH_X86_64 || ARCH_X86_64_32
1737
    PROLOGUE 6,9
1737
    PROLOGUE 6,9
1738
    push r3
1738
    push r3
1739
    push r2
1739
    push r2
Lines 1799-1805 Link Here
1799
    CHECK_SPLIT r4m, %1, %3
1799
    CHECK_SPLIT r4m, %1, %3
1800
    jmp pixel_sad_x4_%1x%2_%4
1800
    jmp pixel_sad_x4_%1x%2_%4
1801
.split:
1801
.split:
1802
%if ARCH_X86_64
1802
%if ARCH_X86_64 || ARCH_X86_64_32
1803
    PROLOGUE 6,9
1803
    PROLOGUE 6,9
1804
    mov  r8,  r6mp
1804
    mov  r8,  r6mp
1805
    push r4
1805
    push r4
Lines 1878-1884 Link Here
1878
; instantiate the aligned sads
1878
; instantiate the aligned sads
1879
1879
1880
INIT_MMX
1880
INIT_MMX
1881
%if ARCH_X86_64 == 0
1881
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
1882
SAD16_CACHELINE_FUNC_MMX2  8, 32
1882
SAD16_CACHELINE_FUNC_MMX2  8, 32
1883
SAD16_CACHELINE_FUNC_MMX2 16, 32
1883
SAD16_CACHELINE_FUNC_MMX2 16, 32
1884
SAD8_CACHELINE_FUNC_MMX2   4, 32
1884
SAD8_CACHELINE_FUNC_MMX2   4, 32
Lines 1886-1908 Link Here
1886
SAD8_CACHELINE_FUNC_MMX2  16, 32
1886
SAD8_CACHELINE_FUNC_MMX2  16, 32
1887
SAD16_CACHELINE_FUNC_MMX2  8, 64
1887
SAD16_CACHELINE_FUNC_MMX2  8, 64
1888
SAD16_CACHELINE_FUNC_MMX2 16, 64
1888
SAD16_CACHELINE_FUNC_MMX2 16, 64
1889
%endif ; !ARCH_X86_64
1889
%endif ; !ARCH_X86_64 || ARCH_X86_64_32
1890
SAD8_CACHELINE_FUNC_MMX2   4, 64
1890
SAD8_CACHELINE_FUNC_MMX2   4, 64
1891
SAD8_CACHELINE_FUNC_MMX2   8, 64
1891
SAD8_CACHELINE_FUNC_MMX2   8, 64
1892
SAD8_CACHELINE_FUNC_MMX2  16, 64
1892
SAD8_CACHELINE_FUNC_MMX2  16, 64
1893
1893
1894
%if ARCH_X86_64 == 0
1894
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
1895
SADX34_CACHELINE_FUNC 16, 16, 32, mmx2, mmx2, mmx2
1895
SADX34_CACHELINE_FUNC 16, 16, 32, mmx2, mmx2, mmx2
1896
SADX34_CACHELINE_FUNC 16,  8, 32, mmx2, mmx2, mmx2
1896
SADX34_CACHELINE_FUNC 16,  8, 32, mmx2, mmx2, mmx2
1897
SADX34_CACHELINE_FUNC  8, 16, 32, mmx2, mmx2, mmx2
1897
SADX34_CACHELINE_FUNC  8, 16, 32, mmx2, mmx2, mmx2
1898
SADX34_CACHELINE_FUNC  8,  8, 32, mmx2, mmx2, mmx2
1898
SADX34_CACHELINE_FUNC  8,  8, 32, mmx2, mmx2, mmx2
1899
SADX34_CACHELINE_FUNC 16, 16, 64, mmx2, mmx2, mmx2
1899
SADX34_CACHELINE_FUNC 16, 16, 64, mmx2, mmx2, mmx2
1900
SADX34_CACHELINE_FUNC 16,  8, 64, mmx2, mmx2, mmx2
1900
SADX34_CACHELINE_FUNC 16,  8, 64, mmx2, mmx2, mmx2
1901
%endif ; !ARCH_X86_64
1901
%endif ; !ARCH_X86_64 || ARCH_X86_64_32
1902
SADX34_CACHELINE_FUNC  8, 16, 64, mmx2, mmx2, mmx2
1902
SADX34_CACHELINE_FUNC  8, 16, 64, mmx2, mmx2, mmx2
1903
SADX34_CACHELINE_FUNC  8,  8, 64, mmx2, mmx2, mmx2
1903
SADX34_CACHELINE_FUNC  8,  8, 64, mmx2, mmx2, mmx2
1904
1904
1905
%if ARCH_X86_64 == 0
1905
%if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
1906
SAD16_CACHELINE_FUNC sse2, 8
1906
SAD16_CACHELINE_FUNC sse2, 8
1907
SAD16_CACHELINE_FUNC sse2, 16
1907
SAD16_CACHELINE_FUNC sse2, 16
1908
%assign i 1
1908
%assign i 1
Lines 1912-1918 Link Here
1912
%endrep
1912
%endrep
1913
SADX34_CACHELINE_FUNC 16, 16, 64, sse2, sse2, sse2
1913
SADX34_CACHELINE_FUNC 16, 16, 64, sse2, sse2, sse2
1914
SADX34_CACHELINE_FUNC 16,  8, 64, sse2, sse2, sse2
1914
SADX34_CACHELINE_FUNC 16,  8, 64, sse2, sse2, sse2
1915
%endif ; !ARCH_X86_64
1915
%endif ; !ARCH_X86_64 || ARCH_X86_64_32
1916
SADX34_CACHELINE_FUNC  8, 16, 64, sse2, mmx2, sse2
1916
SADX34_CACHELINE_FUNC  8, 16, 64, sse2, mmx2, sse2
1917
1917
1918
SAD16_CACHELINE_FUNC ssse3, 8
1918
SAD16_CACHELINE_FUNC ssse3, 8
(-)x264-snapshot-20160712-2245/common/x86/x86inc.asm (-8 / +8 lines)
Lines 43-49 Link Here
43
%endif
43
%endif
44
44
45
%ifndef STACK_ALIGNMENT
45
%ifndef STACK_ALIGNMENT
46
    %if ARCH_X86_64
46
    %if ARCH_X86_64 || ARCH_X86_64_32
47
        %define STACK_ALIGNMENT 16
47
        %define STACK_ALIGNMENT 16
48
    %else
48
    %else
49
        %define STACK_ALIGNMENT 4
49
        %define STACK_ALIGNMENT 4
Lines 52-58 Link Here
52
52
53
%define WIN64  0
53
%define WIN64  0
54
%define UNIX64 0
54
%define UNIX64 0
55
%if ARCH_X86_64
55
%if ARCH_X86_64 || ARCH_X86_64_32
56
    %ifidn __OUTPUT_FORMAT__,win32
56
    %ifidn __OUTPUT_FORMAT__,win32
57
        %define WIN64  1
57
        %define WIN64  1
58
    %elifidn __OUTPUT_FORMAT__,win64
58
    %elifidn __OUTPUT_FORMAT__,win64
Lines 85-91 Link Here
85
85
86
%if WIN64
86
%if WIN64
87
    %define PIC
87
    %define PIC
88
%elif ARCH_X86_64 == 0
88
%elif ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
89
; x86_32 doesn't require PIC.
89
; x86_32 doesn't require PIC.
90
; Some distros prefer shared objects to be PIC, but nothing breaks if
90
; Some distros prefer shared objects to be PIC, but nothing breaks if
91
; the code contains a few textrels, so we'll skip that complexity.
91
; the code contains a few textrels, so we'll skip that complexity.
Lines 171-177 Link Here
171
    %define e%1h %3
171
    %define e%1h %3
172
    %define r%1b %2
172
    %define r%1b %2
173
    %define e%1b %2
173
    %define e%1b %2
174
    %if ARCH_X86_64 == 0
174
    %if ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
175
        %define r%1 e%1
175
        %define r%1 e%1
176
    %endif
176
    %endif
177
%endmacro
177
%endmacro
Lines 208-214 Link Here
208
208
209
DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
209
DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
210
210
211
%if ARCH_X86_64
211
%if ARCH_X86_64 || ARCH_X86_64_32
212
    %define gprsize 8
212
    %define gprsize 8
213
%else
213
%else
214
    %define gprsize 4
214
    %define gprsize 4
Lines 882-888 Link Here
882
    %define RESET_MM_PERMUTATION INIT_XMM %1
882
    %define RESET_MM_PERMUTATION INIT_XMM %1
883
    %define mmsize 16
883
    %define mmsize 16
884
    %define num_mmregs 8
884
    %define num_mmregs 8
885
    %if ARCH_X86_64
885
    %if ARCH_X86_64 || ARCH_X86_64_32
886
        %define num_mmregs 16
886
        %define num_mmregs 16
887
    %endif
887
    %endif
888
    %define mova movdqa
888
    %define mova movdqa
Lines 903-909 Link Here
903
    %define RESET_MM_PERMUTATION INIT_YMM %1
903
    %define RESET_MM_PERMUTATION INIT_YMM %1
904
    %define mmsize 32
904
    %define mmsize 32
905
    %define num_mmregs 8
905
    %define num_mmregs 8
906
    %if ARCH_X86_64
906
    %if ARCH_X86_64 || ARCH_X86_64_32
907
        %define num_mmregs 16
907
        %define num_mmregs 16
908
    %endif
908
    %endif
909
    %define mova movdqa
909
    %define mova movdqa
Lines 1523-1529 Link Here
1523
1523
1524
; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0)
1524
; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0)
1525
%ifdef __YASM_VER__
1525
%ifdef __YASM_VER__
1526
    %if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0
1526
    %if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0 && ARCH_X86_64_32 == 0
1527
        %macro vpbroadcastq 2
1527
        %macro vpbroadcastq 2
1528
            %if sizeof%1 == 16
1528
            %if sizeof%1 == 16
1529
                movddup %1, %2
1529
                movddup %1, %2
(-)x264-snapshot-20160712-2245/common/x86/x86util.asm (-1 / +1 lines)
Lines 102-108 Link Here
102
%endmacro
102
%endmacro
103
103
104
%macro TRANSPOSE8x8W 9-11
104
%macro TRANSPOSE8x8W 9-11
105
%if ARCH_X86_64
105
%if ARCH_X86_64 || ARCH_X86_64_32
106
    SBUTTERFLY wd,  %1, %2, %9
106
    SBUTTERFLY wd,  %1, %2, %9
107
    SBUTTERFLY wd,  %3, %4, %9
107
    SBUTTERFLY wd,  %3, %4, %9
108
    SBUTTERFLY wd,  %5, %6, %9
108
    SBUTTERFLY wd,  %5, %6, %9
(-)x264-snapshot-20160712-2245/configure (-22 / +27 lines)
Lines 698-728 Link Here
698
        fi
698
        fi
699
        ;;
699
        ;;
700
    x86_64)
700
    x86_64)
701
        ARCH="X86_64"
702
        AS="${AS-yasm}"
701
        AS="${AS-yasm}"
703
        AS_EXT=".asm"
702
        AS_EXT=".asm"
704
        ASFLAGS="$ASFLAGS -DARCH_X86_64=1 -I\$(SRCPATH)/common/x86/"
705
        stack_alignment=16
703
        stack_alignment=16
706
        [ $compiler = GNU ] && CFLAGS="-m64 $CFLAGS" && LDFLAGS="-m64 $LDFLAGS"
704
        if [[ $host_os = *x32  ]]; then
707
        if [ "$SYS" = MACOSX ]; then
705
            ARCH="X86_64_32"
708
            ASFLAGS="$ASFLAGS -f macho64 -DPIC -DPREFIX"
706
            ASFLAGS="$ASFLAGS -DARCH_X86_64_32=1 -I\$(SRCPATH)/common/x86/ -f elfx32"
709
            if cc_check '' "-arch x86_64"; then
710
                CFLAGS="$CFLAGS -arch x86_64"
711
                LDFLAGS="$LDFLAGS -arch x86_64"
712
            fi
713
        elif [ "$SYS" = WINDOWS -o "$SYS" = CYGWIN ]; then
714
            ASFLAGS="$ASFLAGS -f win64"
715
            if [ $compiler = GNU ]; then
716
                # only the GNU toolchain is inconsistent in prefixing function names with _
717
                cc_check "" "-S" && grep -q "_main:" conftest && ASFLAGS="$ASFLAGS -DPREFIX"
718
                cc_check "" "-Wl,--high-entropy-va" && LDFLAGS="$LDFLAGS -Wl,--high-entropy-va"
719
                LDFLAGS="$LDFLAGS -Wl,--dynamicbase,--nxcompat,--tsaware"
720
                LDFLAGSCLI="$LDFLAGSCLI -Wl,--image-base,0x140000000"
721
                SOFLAGS="$SOFLAGS -Wl,--image-base,0x180000000"
722
                RCFLAGS="--target=pe-x86-64 $RCFLAGS"
723
            fi
724
        else
707
        else
725
            ASFLAGS="$ASFLAGS -f elf64"
708
            ARCH="X86_64"
709
            ASFLAGS="$ASFLAGS -DARCH_X86_64=1 -I\$(SRCPATH)/common/x86/"
710
            [ $compiler = GNU ] && CFLAGS="-m64 $CFLAGS" && LDFLAGS="-m64 $LDFLAGS"
711
            if [ "$SYS" = MACOSX ]; then
712
                ASFLAGS="$ASFLAGS -f macho64 -DPIC -DPREFIX"
713
                if cc_check '' "-arch x86_64"; then
714
                    CFLAGS="$CFLAGS -arch x86_64"
715
                    LDFLAGS="$LDFLAGS -arch x86_64"
716
                fi
717
            elif [ "$SYS" = WINDOWS -o "$SYS" = CYGWIN ]; then
718
                ASFLAGS="$ASFLAGS -f win64"
719
                if [ $compiler = GNU ]; then
720
                    # only the GNU toolchain is inconsistent in prefixing function names with _
721
                    cc_check "" "-S" && grep -q "_main:" conftest && ASFLAGS="$ASFLAGS -DPREFIX"
722
                    cc_check "" "-Wl,--high-entropy-va" && LDFLAGS="$LDFLAGS -Wl,--high-entropy-va"
723
                    LDFLAGS="$LDFLAGS -Wl,--dynamicbase,--nxcompat,--tsaware"
724
                    LDFLAGSCLI="$LDFLAGSCLI -Wl,--image-base,0x140000000"
725
                    SOFLAGS="$SOFLAGS -Wl,--image-base,0x180000000"
726
                    RCFLAGS="--target=pe-x86-64 $RCFLAGS"
727
                fi
728
            else
729
                ASFLAGS="$ASFLAGS -f elf64"
730
            fi
726
        fi
731
        fi
727
        ;;
732
        ;;
728
    powerpc*)
733
    powerpc*)
Lines 1201-1207 Link Here
1201
fi
1206
fi
1202
[ "$lto" = "auto" ] && lto="no"
1207
[ "$lto" = "auto" ] && lto="no"
1203
1208
1204
if cc_check '' -fno-tree-vectorize ; then
1209
if cc_check '' -fno-tree-vectorize && ! [[ $host_os = *x32  ]]; then
1205
    CFLAGS="$CFLAGS -fno-tree-vectorize"
1210
    CFLAGS="$CFLAGS -fno-tree-vectorize"
1206
fi
1211
fi
1207
1212
(-)x264-snapshot-20160712-2245/encoder/cabac.c (-3 / +3 lines)
Lines 801-807 Link Here
801
801
802
static void ALWAYS_INLINE x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
802
static void ALWAYS_INLINE x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
803
{
803
{
804
#if ARCH_X86_64 && HAVE_MMX
804
#if (ARCH_X86_64 || ARCH_X86_64_32) && HAVE_MMX
805
    h->bsf.cabac_block_residual_internal( l, MB_INTERLACED, ctx_block_cat, cb );
805
    h->bsf.cabac_block_residual_internal( l, MB_INTERLACED, ctx_block_cat, cb );
806
#else
806
#else
807
    x264_cabac_block_residual_c( h, cb, ctx_block_cat, l );
807
    x264_cabac_block_residual_c( h, cb, ctx_block_cat, l );
Lines 915-921 Link Here
915
915
916
static ALWAYS_INLINE void x264_cabac_block_residual_8x8( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
916
static ALWAYS_INLINE void x264_cabac_block_residual_8x8( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
917
{
917
{
918
#if ARCH_X86_64 && HAVE_MMX
918
#if (ARCH_X86_64 || ARCH_X86_64_32) && HAVE_MMX
919
    h->bsf.cabac_block_residual_8x8_rd_internal( l, MB_INTERLACED, ctx_block_cat, cb );
919
    h->bsf.cabac_block_residual_8x8_rd_internal( l, MB_INTERLACED, ctx_block_cat, cb );
920
#else
920
#else
921
    x264_cabac_block_residual_8x8_rd_c( h, cb, ctx_block_cat, l );
921
    x264_cabac_block_residual_8x8_rd_c( h, cb, ctx_block_cat, l );
Lines 923-929 Link Here
923
}
923
}
924
static ALWAYS_INLINE void x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
924
static ALWAYS_INLINE void x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
925
{
925
{
926
#if ARCH_X86_64 && HAVE_MMX
926
#if (ARCH_X86_64 || ARCH_X86_64_32) && HAVE_MMX
927
    h->bsf.cabac_block_residual_rd_internal( l, MB_INTERLACED, ctx_block_cat, cb );
927
    h->bsf.cabac_block_residual_rd_internal( l, MB_INTERLACED, ctx_block_cat, cb );
928
#else
928
#else
929
    x264_cabac_block_residual_rd_c( h, cb, ctx_block_cat, l );
929
    x264_cabac_block_residual_rd_c( h, cb, ctx_block_cat, l );
(-)x264-snapshot-20160712-2245/encoder/encoder.c (-1 / +1 lines)
Lines 1593-1599 Link Here
1593
    if( x264_clz( temp ) != 23 )
1593
    if( x264_clz( temp ) != 23 )
1594
    {
1594
    {
1595
        x264_log( h, X264_LOG_ERROR, "CLZ test failed: x264 has been miscompiled!\n" );
1595
        x264_log( h, X264_LOG_ERROR, "CLZ test failed: x264 has been miscompiled!\n" );
1596
#if ARCH_X86 || ARCH_X86_64
1596
#if ARCH_X86 || ARCH_X86_64 || ARCH_X86_64_32
1597
        x264_log( h, X264_LOG_ERROR, "Are you attempting to run an SSE4a/LZCNT-targeted build on a CPU that\n" );
1597
        x264_log( h, X264_LOG_ERROR, "Are you attempting to run an SSE4a/LZCNT-targeted build on a CPU that\n" );
1598
        x264_log( h, X264_LOG_ERROR, "doesn't support it?\n" );
1598
        x264_log( h, X264_LOG_ERROR, "doesn't support it?\n" );
1599
#endif
1599
#endif
(-)x264-snapshot-20160712-2245/encoder/rdo.c (-1 / +1 lines)
Lines 695-701 Link Here
695
        return !!dct[0];
695
        return !!dct[0];
696
    }
696
    }
697
697
698
#if HAVE_MMX && ARCH_X86_64
698
#if HAVE_MMX && (ARCH_X86_64 || ARCH_X86_64_32)
699
#define TRELLIS_ARGS unquant_mf, zigzag, lambda2, last_nnz, orig_coefs, quant_coefs, dct,\
699
#define TRELLIS_ARGS unquant_mf, zigzag, lambda2, last_nnz, orig_coefs, quant_coefs, dct,\
700
                     cabac_state_sig, cabac_state_last, M64(cabac_state), M16(cabac_state+8)
700
                     cabac_state_sig, cabac_state_last, M64(cabac_state), M16(cabac_state+8)
701
    if( num_coefs == 16 && !dc )
701
    if( num_coefs == 16 && !dc )
(-)x264-snapshot-20160712-2245/tools/checkasm-a.asm (-3 / +3 lines)
Lines 30-36 Link Here
30
30
31
error_message: db "failed to preserve register", 0
31
error_message: db "failed to preserve register", 0
32
32
33
%if ARCH_X86_64
33
%if ARCH_X86_64 || ARCH_X86_64_32
34
; just random numbers to reduce the chance of incidental match
34
; just random numbers to reduce the chance of incidental match
35
ALIGN 16
35
ALIGN 16
36
x6:  dq 0x1a1b2550a612b48c,0x79445c159ce79064
36
x6:  dq 0x1a1b2550a612b48c,0x79445c159ce79064
Lines 61-67 Link Here
61
; (max_args % 4) must equal 3 for stack alignment
61
; (max_args % 4) must equal 3 for stack alignment
62
%define max_args 15
62
%define max_args 15
63
63
64
%if ARCH_X86_64
64
%if ARCH_X86_64 || ARCH_X86_64_32
65
65
66
;-----------------------------------------------------------------------------
66
;-----------------------------------------------------------------------------
67
; void x264_checkasm_stack_clobber( uint64_t clobber, ... )
67
; void x264_checkasm_stack_clobber( uint64_t clobber, ... )
Lines 203-209 Link Here
203
.ok:
203
.ok:
204
    REP_RET
204
    REP_RET
205
205
206
%endif ; ARCH_X86_64
206
%endif ; ARCH_X86_64 || ARCH_X86_64_32
207
207
208
;-----------------------------------------------------------------------------
208
;-----------------------------------------------------------------------------
209
; int x264_stack_pagealign( int (*func)(), int align )
209
; int x264_stack_pagealign( int (*func)(), int align )
(-)x264-snapshot-20160712-2245/tools/checkasm.c (-3 / +3 lines)
Lines 217-223 Link Here
217
        }
217
        }
218
}
218
}
219
219
220
#if ARCH_X86 || ARCH_X86_64
220
#if ARCH_X86 || ARCH_X86_64 || ARCH_X86_64_32
221
int x264_stack_pagealign( int (*func)(), int align );
221
int x264_stack_pagealign( int (*func)(), int align );
222
222
223
/* detect when callee-saved regs aren't saved
223
/* detect when callee-saved regs aren't saved
Lines 254-260 Link Here
254
    uint64_t r = (rand() & 0xffff) * 0x0001000100010001ULL; \
254
    uint64_t r = (rand() & 0xffff) * 0x0001000100010001ULL; \
255
    x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+6 */ \
255
    x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+6 */ \
256
    x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, __VA_ARGS__ ); })
256
    x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, __VA_ARGS__ ); })
257
#elif ARCH_X86 || (ARCH_AARCH64 && !defined(__APPLE__)) || ARCH_ARM
257
#elif ARCH_X86 || ARCH_X86_64_32 || (ARCH_AARCH64 && !defined(__APPLE__)) || ARCH_ARM
258
#define call_a1(func,...) x264_checkasm_call( (intptr_t(*)())func, &ok, __VA_ARGS__ )
258
#define call_a1(func,...) x264_checkasm_call( (intptr_t(*)())func, &ok, __VA_ARGS__ )
259
#else
259
#else
260
#define call_a1 call_c1
260
#define call_a1 call_c1
Lines 2884-2890 Link Here
2884
2884
2885
    if( argc > 1 && !strncmp( argv[1], "--bench", 7 ) )
2885
    if( argc > 1 && !strncmp( argv[1], "--bench", 7 ) )
2886
    {
2886
    {
2887
#if !ARCH_X86 && !ARCH_X86_64 && !ARCH_PPC && !ARCH_ARM && !ARCH_AARCH64 && !ARCH_MIPS
2887
#if !ARCH_X86 && !ARCH_X86_64 && !ARCH_X86_64_32 && !ARCH_PPC && !ARCH_ARM && !ARCH_AARCH64 && !ARCH_MIPS
2888
        fprintf( stderr, "no --bench for your cpu until you port rdtsc\n" );
2888
        fprintf( stderr, "no --bench for your cpu until you port rdtsc\n" );
2889
        return 1;
2889
        return 1;
2890
#endif
2890
#endif

Return to bug 420241