Lines 121-131
Link Here
|
121 |
return amvd; |
121 |
return amvd; |
122 |
} |
122 |
} |
123 |
|
123 |
|
|
|
124 |
/*Some hardened systems only have available 4 registers on x86 systems. If that's he case we have to register spill mvc_max*/ |
125 |
#ifdef x264_ultrahardened |
126 |
#define mvc_max_mode "=m" |
127 |
#else |
128 |
#define mvc_max_mode "+r" |
129 |
#endif |
130 |
|
124 |
#define x264_predictor_clip x264_predictor_clip_mmx2 |
131 |
#define x264_predictor_clip x264_predictor_clip_mmx2 |
125 |
static int ALWAYS_INLINE x264_predictor_clip_mmx2( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv ) |
132 |
static int ALWAYS_INLINE x264_predictor_clip_mmx2( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv ) |
126 |
{ |
133 |
{ |
127 |
static const uint32_t pd_32 = 0x20; |
134 |
static const uint32_t pd_32 = 0x20; |
128 |
intptr_t tmp = (intptr_t)mv_limit, mvc_max = i_mvc, i = 0; |
135 |
intptr_t tmp = (intptr_t)mv_limit, mvc_max = ((uint32_t *)mvc)+i_mvc, i = 0; |
129 |
|
136 |
|
130 |
asm( |
137 |
asm( |
131 |
"movq (%2), %%mm5 \n" |
138 |
"movq (%2), %%mm5 \n" |
Lines 137-143
Link Here
|
137 |
"punpckldq %%mm3, %%mm3 \n" |
145 |
"punpckldq %%mm3, %%mm3 \n" |
138 |
"punpckldq %%mm5, %%mm5 \n" |
146 |
"punpckldq %%mm5, %%mm5 \n" |
139 |
"movd %7, %%mm4 \n" |
147 |
"movd %7, %%mm4 \n" |
140 |
"lea (%0,%3,4), %3 \n" |
|
|
141 |
"1: \n" |
148 |
"1: \n" |
142 |
"movq (%0), %%mm0 \n" |
149 |
"movq (%0), %%mm0 \n" |
143 |
"add $8, %0 \n" |
150 |
"add $8, %0 \n" |
Lines 176-182
Link Here
|
176 |
"and $1, %k2 \n" |
183 |
"and $1, %k2 \n" |
177 |
"sub %2, %4 \n" // output += !(mv == pmv || mv == 0) |
184 |
"sub %2, %4 \n" // output += !(mv == pmv || mv == 0) |
178 |
"3: \n" |
185 |
"3: \n" |
179 |
:"+r"(mvc), "=m"(M64( dst )), "+r"(tmp), "+r"(mvc_max), "+r"(i) |
186 |
:"+r"(mvc), "=m"(M64( dst )), "+r"(tmp), mvc_max_mode(mvc_max), "+r"(i) |
180 |
:"r"(dst), "g"(pmv), "m"(pd_32), "m"(M64( mvc )) |
187 |
:"r"(dst), "g"(pmv), "m"(pd_32), "m"(M64( mvc )) |
181 |
); |
188 |
); |
182 |
return i; |
189 |
return i; |
Lines 188-194
Link Here
|
188 |
{ |
195 |
{ |
189 |
static const uint64_t pw_2 = 0x0002000200020002ULL; |
196 |
static const uint64_t pw_2 = 0x0002000200020002ULL; |
190 |
static const uint32_t pd_32 = 0x20; |
197 |
static const uint32_t pd_32 = 0x20; |
191 |
intptr_t tmp = (intptr_t)mv_limit, mvc_max = i_mvc, i = 0; |
198 |
intptr_t tmp = (intptr_t)mv_limit, mvc_max = ((uint32_t *)mvc)+i_mvc, i = 0; |
192 |
|
199 |
|
193 |
asm( |
200 |
asm( |
194 |
"movq (%2), %%mm5 \n" |
201 |
"movq (%2), %%mm5 \n" |
Lines 200-206
Link Here
|
200 |
"punpckldq %%mm3, %%mm3 \n" |
207 |
"punpckldq %%mm3, %%mm3 \n" |
201 |
"punpckldq %%mm5, %%mm5 \n" |
208 |
"punpckldq %%mm5, %%mm5 \n" |
202 |
"movd %8, %%mm4 \n" |
209 |
"movd %8, %%mm4 \n" |
203 |
"lea (%0,%3,4), %3 \n" |
|
|
204 |
"1: \n" |
210 |
"1: \n" |
205 |
"movq (%0), %%mm0 \n" |
211 |
"movq (%0), %%mm0 \n" |
206 |
"add $8, %0 \n" |
212 |
"add $8, %0 \n" |
Lines 243-249
Link Here
|
243 |
"and $1, %k2 \n" |
249 |
"and $1, %k2 \n" |
244 |
"sub %2, %4 \n" |
250 |
"sub %2, %4 \n" |
245 |
"3: \n" |
251 |
"3: \n" |
246 |
:"+r"(mvc), "=m"(M64( dst )), "+r"(tmp), "+r"(mvc_max), "+r"(i) |
252 |
:"+r"(mvc), "=m"(M64( dst )), "+r"(tmp), mvc_max_mode(mvc_max), "+r"(i) |
247 |
:"r"(dst), "m"(pw_2), "g"(pmv), "m"(pd_32), "m"(M64( mvc )) |
253 |
:"r"(dst), "m"(pw_2), "g"(pmv), "m"(pd_32), "m"(M64( mvc )) |
248 |
); |
254 |
); |
249 |
return i; |
255 |
return i; |