Lines 146-157
Link Here
|
146 |
" pxor %%mm7 , %%mm7; /* clear mm7 */\n" |
146 |
" pxor %%mm7 , %%mm7; /* clear mm7 */\n" |
147 |
" /* */\n" |
147 |
" /* */\n" |
148 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
148 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
|
|
149 |
#ifdef __PIC__ |
150 |
" movl %2 , %%esi; /* load frameadress into esi */\n" |
151 |
#else |
149 |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
152 |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
|
|
153 |
#endif |
150 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
154 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
151 |
" /* */\n" |
155 |
" /* */\n" |
152 |
".rept 8 ; /* Loop for 8 lines */\n" |
156 |
".rept 8 ; /* Loop for 8 lines */\n" |
153 |
" movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
157 |
" movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
|
|
158 |
#ifdef __PIC__ |
159 |
" movq (%%esi), %%mm2; /* 8 Pixels from reference frame to mm2 */\n" |
160 |
#else |
154 |
" movq (%%ebx), %%mm2; /* 8 Pixels from reference frame to mm2 */\n" |
161 |
" movq (%%ebx), %%mm2; /* 8 Pixels from reference frame to mm2 */\n" |
|
|
162 |
#endif |
155 |
" movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
163 |
" movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
156 |
" psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
164 |
" psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
157 |
" psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" |
165 |
" psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" |
Lines 162-174
Link Here
|
162 |
" paddusw %%mm1 , %%mm0; /* add mm1 (stored in mm1 and mm2...) */\n" |
170 |
" paddusw %%mm1 , %%mm0; /* add mm1 (stored in mm1 and mm2...) */\n" |
163 |
" paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
171 |
" paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
164 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
172 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
|
|
173 |
#ifdef __PIC__ |
174 |
" addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
175 |
#else |
165 |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
176 |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
|
|
177 |
#endif |
166 |
" .endr /* end loop */\n" |
178 |
" .endr /* end loop */\n" |
167 |
" /* */\n" |
179 |
" /* */\n" |
168 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
180 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
169 |
:"=m" (a) |
181 |
:"=m" (a) |
170 |
:"m" (frm), "m" (ref), "m" (denoiser.frame.w) |
182 |
:"m" (frm), "m" (ref), "m" (denoiser.frame.w) |
|
|
183 |
#ifdef __PIC__ |
184 |
:"%eax", "%esi", "%ecx" |
185 |
#else |
171 |
:"%eax", "%ebx", "%ecx" |
186 |
:"%eax", "%ebx", "%ecx" |
|
|
187 |
#endif |
172 |
); |
188 |
); |
173 |
#endif |
189 |
#endif |
174 |
|
190 |
|
Lines 191-211
Link Here
|
191 |
( |
207 |
( |
192 |
" pxor %%mm0 , %%mm0; /* clear mm0 */\n" |
208 |
" pxor %%mm0 , %%mm0; /* clear mm0 */\n" |
193 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
209 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
|
|
210 |
#ifdef __PIC__ |
211 |
" movl %2 , %%esi; /* load frameadress into esi */\n" |
212 |
#else |
194 |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
213 |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
|
|
214 |
#endif |
195 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
215 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
196 |
" ; /* */\n" |
216 |
" ; /* */\n" |
197 |
" .rept 8 ; /* */\n" |
217 |
" .rept 8 ; /* */\n" |
198 |
" movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
218 |
" movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
|
|
219 |
#ifdef __PIC__ |
220 |
" psadbw (%%esi), %%mm1; /* 8 Pixels difference to mm1 */\n" |
221 |
#else |
199 |
" psadbw (%%ebx), %%mm1; /* 8 Pixels difference to mm1 */\n" |
222 |
" psadbw (%%ebx), %%mm1; /* 8 Pixels difference to mm1 */\n" |
|
|
223 |
#endif |
200 |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
224 |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
201 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
225 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
|
|
226 |
#ifdef __PIC__ |
227 |
" addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
228 |
#else |
202 |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
229 |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
|
|
230 |
#endif |
203 |
" .endr ; /* */\n" |
231 |
" .endr ; /* */\n" |
204 |
" /* */\n" |
232 |
" /* */\n" |
205 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
233 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
206 |
:"=m" (a) |
234 |
:"=m" (a) |
207 |
:"m" (frm), "m" (ref), "m" (denoiser.frame.w) |
235 |
:"m" (frm), "m" (ref), "m" (denoiser.frame.w) |
|
|
236 |
#ifdef __PIC__ |
237 |
:"%eax", "%esi", "%ecx" |
238 |
#else |
208 |
:"%eax", "%ebx", "%ecx" |
239 |
:"%eax", "%ebx", "%ecx" |
|
|
240 |
#endif |
209 |
); |
241 |
); |
210 |
#endif |
242 |
#endif |
211 |
return a; |
243 |
return a; |
Lines 254-265
Link Here
|
254 |
" pxor %%mm7 , %%mm7; /* clear mm7 */\n" |
286 |
" pxor %%mm7 , %%mm7; /* clear mm7 */\n" |
255 |
" /* */\n" |
287 |
" /* */\n" |
256 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
288 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
|
|
289 |
#ifdef __PIC__ |
290 |
" movl %2 , %%esi; /* load frameadress into esi */\n" |
291 |
#else |
257 |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
292 |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
|
|
293 |
#endif |
258 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
294 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
259 |
" /* */\n" |
295 |
" /* */\n" |
260 |
".rept 4 ; /* Loop for 4 lines */\n" |
296 |
".rept 4 ; /* Loop for 4 lines */\n" |
261 |
" movd (%%eax), %%mm1; /* 4 Pixels from filtered frame to mm1 */\n" |
297 |
" movd (%%eax), %%mm1; /* 4 Pixels from filtered frame to mm1 */\n" |
|
|
298 |
#ifdef __PIC__ |
299 |
" movd (%%esi), %%mm2; /* 4 Pixels from reference frame to mm2 */\n" |
300 |
#else |
262 |
" movd (%%ebx), %%mm2; /* 4 Pixels from reference frame to mm2 */\n" |
301 |
" movd (%%ebx), %%mm2; /* 4 Pixels from reference frame to mm2 */\n" |
|
|
302 |
#endif |
263 |
" movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
303 |
" movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
264 |
" psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
304 |
" psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
265 |
" psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" |
305 |
" psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" |
Lines 270-282
Link Here
|
270 |
" paddusw %%mm1 , %%mm2; /* add mm1 (stored in mm1 and mm2...) */\n" |
310 |
" paddusw %%mm1 , %%mm2; /* add mm1 (stored in mm1 and mm2...) */\n" |
271 |
" paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
311 |
" paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
272 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
312 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
|
|
313 |
#ifdef __PIC__ |
314 |
" addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
315 |
#else |
273 |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
316 |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
|
|
317 |
#endif |
274 |
" .endr /* end loop */\n" |
318 |
" .endr /* end loop */\n" |
275 |
" /* */\n" |
319 |
" /* */\n" |
276 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
320 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
277 |
:"=m" (a) |
321 |
:"=m" (a) |
278 |
:"m" (frm), "m" (ref), "m" (halfwidth) |
322 |
:"m" (frm), "m" (ref), "m" (halfwidth) |
|
|
323 |
#ifdef __PIC__ |
324 |
:"%eax", "%esi", "%ecx" |
325 |
#else |
279 |
:"%eax", "%ebx", "%ecx" |
326 |
:"%eax", "%ebx", "%ecx" |
|
|
327 |
#endif |
280 |
); |
328 |
); |
281 |
#endif |
329 |
#endif |
282 |
return (uint32_t)(a[0]+a[1]+a[2]+a[3]); |
330 |
return (uint32_t)(a[0]+a[1]+a[2]+a[3]); |
Lines 300-312
Link Here
|
300 |
" pxor %%mm7 , %%mm7; /* clear mm7 */\n" |
348 |
" pxor %%mm7 , %%mm7; /* clear mm7 */\n" |
301 |
" /* */\n" |
349 |
" /* */\n" |
302 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
350 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
|
|
351 |
#ifdef __PIC__ |
352 |
" movl %2 , %%esi; /* load frameadress into esi */\n" |
353 |
#else |
303 |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
354 |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
|
|
355 |
#endif |
304 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
356 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
305 |
" /* */\n" |
357 |
" /* */\n" |
306 |
".rept 8 ; /* Loop for 8 lines */\n" |
358 |
".rept 8 ; /* Loop for 8 lines */\n" |
307 |
" movw (%%eax), %%dx; /* */\n" |
359 |
" movw (%%eax), %%dx; /* */\n" |
308 |
" movd %%edx , %%mm1; /* 2 Pixels from filtered frame to mm1 */\n" |
360 |
" movd %%edx , %%mm1; /* 2 Pixels from filtered frame to mm1 */\n" |
|
|
361 |
#ifdef __PIC__ |
362 |
" movw (%%esi), %%dx; /* */\n" |
363 |
#else |
309 |
" movw (%%ebx), %%dx; /* */\n" |
364 |
" movw (%%ebx), %%dx; /* */\n" |
|
|
365 |
#endif |
310 |
" movd %%edx , %%mm2; /* 2 Pixels from reference frame to mm2 */\n" |
366 |
" movd %%edx , %%mm2; /* 2 Pixels from reference frame to mm2 */\n" |
311 |
" movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
367 |
" movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
312 |
" psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
368 |
" psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
Lines 318-330
Link Here
|
318 |
" paddusw %%mm1 , %%mm2; /* add mm1 (stored in mm1 and mm2...) */\n" |
374 |
" paddusw %%mm1 , %%mm2; /* add mm1 (stored in mm1 and mm2...) */\n" |
319 |
" paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
375 |
" paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
320 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
376 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
|
|
377 |
#ifdef __PIC__ |
378 |
" addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
379 |
#else |
321 |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
380 |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
|
|
381 |
#endif |
322 |
" .endr /* end loop */\n" |
382 |
" .endr /* end loop */\n" |
323 |
" /* */\n" |
383 |
" /* */\n" |
324 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
384 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
325 |
:"=m" (a) |
385 |
:"=m" (a) |
326 |
:"m" (frm), "m" (ref), "m" (W2) |
386 |
:"m" (frm), "m" (ref), "m" (W2) |
|
|
387 |
#ifdef __PIC__ |
388 |
:"%eax", "%esi", "%ecx" |
389 |
#else |
327 |
:"%eax", "%ebx", "%ecx" |
390 |
:"%eax", "%ebx", "%ecx" |
|
|
391 |
#endif |
328 |
); |
392 |
); |
329 |
#endif |
393 |
#endif |
330 |
return (uint32_t)(a[0]+a[1]+a[2]+a[3]+a[4]+a[5]+a[6]+a[7]); |
394 |
return (uint32_t)(a[0]+a[1]+a[2]+a[3]+a[4]+a[5]+a[6]+a[7]); |
Lines 348-369
Link Here
|
348 |
( |
412 |
( |
349 |
" pxor %%mm0 , %%mm0; /* clear mm0 */\n" |
413 |
" pxor %%mm0 , %%mm0; /* clear mm0 */\n" |
350 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
414 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
|
|
415 |
#ifdef __PIC__ |
416 |
" movl %2 , %%esi; /* load frameadress into esi */\n" |
417 |
#else |
351 |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
418 |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
|
|
419 |
#endif |
352 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
420 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
353 |
" ; /* */\n" |
421 |
" ; /* */\n" |
354 |
" .rept 4 ; /* */\n" |
422 |
" .rept 4 ; /* */\n" |
355 |
" movd (%%eax), %%mm1; /* 4 Pixels from filtered frame to mm1 */\n" |
423 |
" movd (%%eax), %%mm1; /* 4 Pixels from filtered frame to mm1 */\n" |
|
|
424 |
#ifdef __PIC__ |
425 |
" movd (%%esi), %%mm2; /* 4 Pixels from filtered frame to mm2 */\n" |
426 |
#else |
356 |
" movd (%%ebx), %%mm2; /* 4 Pixels from filtered frame to mm2 */\n" |
427 |
" movd (%%ebx), %%mm2; /* 4 Pixels from filtered frame to mm2 */\n" |
|
|
428 |
#endif |
357 |
" psadbw %%mm2 , %%mm1; /* 4 Pixels difference to mm1 */\n" |
429 |
" psadbw %%mm2 , %%mm1; /* 4 Pixels difference to mm1 */\n" |
358 |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
430 |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
359 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
431 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
|
|
432 |
#ifdef __PIC__ |
433 |
" addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
434 |
#else |
360 |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
435 |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
|
|
436 |
#endif |
361 |
" .endr ; /* */\n" |
437 |
" .endr ; /* */\n" |
362 |
" /* */\n" |
438 |
" /* */\n" |
363 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
439 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
364 |
:"=m" (a) |
440 |
:"=m" (a) |
365 |
:"m" (frm), "m" (ref), "m" (halfwidth) |
441 |
:"m" (frm), "m" (ref), "m" (halfwidth) |
|
|
442 |
#ifdef __PIC__ |
443 |
:"%eax", "%esi", "%ecx" |
444 |
#else |
366 |
:"%eax", "%ebx", "%ecx" |
445 |
:"%eax", "%ebx", "%ecx" |
|
|
446 |
#endif |
367 |
); |
447 |
); |
368 |
#endif |
448 |
#endif |
369 |
return a; |
449 |
return a; |
Lines 385-408
Link Here
|
385 |
( |
465 |
( |
386 |
" pxor %%mm0 , %%mm0; /* clear mm0 */\n" |
466 |
" pxor %%mm0 , %%mm0; /* clear mm0 */\n" |
387 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
467 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
|
|
468 |
#ifdef __PIC__ |
469 |
" movl %2 , %%esi; /* load frameadress into esi */\n" |
470 |
#else |
388 |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
471 |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
|
|
472 |
#endif |
389 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
473 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
390 |
" ; /* */\n" |
474 |
" ; /* */\n" |
391 |
" .rept 8 ; /* */\n" |
475 |
" .rept 8 ; /* */\n" |
392 |
" movw (%%eax), %%dx; /* */\n" |
476 |
" movw (%%eax), %%dx; /* */\n" |
393 |
" movd %%edx , %%mm1; /* 2 Pixels from filtered frame to mm1 */\n" |
477 |
" movd %%edx , %%mm1; /* 2 Pixels from filtered frame to mm1 */\n" |
|
|
478 |
#ifdef __PIC__ |
479 |
" movw (%%esi), %%dx; /* */\n" |
480 |
#else |
394 |
" movw (%%ebx), %%dx; /* */\n" |
481 |
" movw (%%ebx), %%dx; /* */\n" |
|
|
482 |
#endif |
395 |
" movd %%edx , %%mm2; /* 2 Pixels from filtered frame to mm2 */\n" |
483 |
" movd %%edx , %%mm2; /* 2 Pixels from filtered frame to mm2 */\n" |
396 |
" psadbw %%mm2 , %%mm1; /* 2 Pixels difference to mm1 */\n" |
484 |
" psadbw %%mm2 , %%mm1; /* 2 Pixels difference to mm1 */\n" |
397 |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
485 |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
398 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
486 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
|
|
487 |
#ifdef __PIC__ |
488 |
" addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
489 |
#else |
399 |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
490 |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
|
|
491 |
#endif |
400 |
" .endr ; /* */\n" |
492 |
" .endr ; /* */\n" |
401 |
" /* */\n" |
493 |
" /* */\n" |
402 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
494 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
403 |
:"=m" (a) |
495 |
:"=m" (a) |
404 |
:"m" (frm), "m" (ref), "m" (W2) |
496 |
:"m" (frm), "m" (ref), "m" (W2) |
|
|
497 |
#ifdef __PIC__ |
498 |
:"%eax", "%esi", "%ecx" |
499 |
#else |
405 |
:"%eax", "%ebx", "%ecx" |
500 |
:"%eax", "%ebx", "%ecx" |
|
|
501 |
#endif |
406 |
); |
502 |
); |
407 |
#endif |
503 |
#endif |
408 |
return a; |
504 |
return a; |
Lines 448-460
Link Here
|
448 |
( |
544 |
( |
449 |
" pxor %%mm0 , %%mm0; /* clear mm0 */" |
545 |
" pxor %%mm0 , %%mm0; /* clear mm0 */" |
450 |
" movl %1 , %%eax; /* load frameadress into eax */" |
546 |
" movl %1 , %%eax; /* load frameadress into eax */" |
|
|
547 |
#ifdef __PIC__ |
548 |
" movl %2 , %%esi; /* load frameadress into esi */" |
549 |
#else |
451 |
" movl %2 , %%ebx; /* load frameadress into ebx */" |
550 |
" movl %2 , %%ebx; /* load frameadress into ebx */" |
|
|
551 |
#endif |
452 |
" movl %3 , %%ecx; /* load frameadress into ecx */" |
552 |
" movl %3 , %%ecx; /* load frameadress into ecx */" |
453 |
" movl %4 , %%edx; /* load width into edx */" |
553 |
" movl %4 , %%edx; /* load width into edx */" |
454 |
" ; /* */" |
554 |
" ; /* */" |
455 |
" .rept 8 ; /* */" |
555 |
" .rept 8 ; /* */" |
456 |
" movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */" |
556 |
" movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */" |
|
|
557 |
#ifdef __PIC__ |
558 |
" movq (%%esi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */" |
559 |
#else |
457 |
" movq (%%ebx), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */" |
560 |
" movq (%%ebx), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */" |
|
|
561 |
#endif |
458 |
" movq (%%ecx), %%mm3; /* reference to mm3 */" |
562 |
" movq (%%ecx), %%mm3; /* reference to mm3 */" |
459 |
" psrlq $1 , %%mm1; /* average source pixels */" |
563 |
" psrlq $1 , %%mm1; /* average source pixels */" |
460 |
" psrlq $1 , %%mm2; /* shift right by one (divide by two) */" |
564 |
" psrlq $1 , %%mm2; /* shift right by one (divide by two) */" |
Lines 468-481
Link Here
|
468 |
" paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */" |
572 |
" paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */" |
469 |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */" |
573 |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */" |
470 |
" addl %%edx , %%eax; /* add framewidth to frameaddress */" |
574 |
" addl %%edx , %%eax; /* add framewidth to frameaddress */" |
|
|
575 |
#ifdef __PIC__ |
576 |
" addl %%edx , %%esi; /* add framewidth to frameaddress */" |
577 |
#else |
471 |
" addl %%edx , %%ebx; /* add framewidth to frameaddress */" |
578 |
" addl %%edx , %%ebx; /* add framewidth to frameaddress */" |
|
|
579 |
#endif |
472 |
" addl %%edx , %%ecx; /* add framewidth to frameaddress */" |
580 |
" addl %%edx , %%ecx; /* add framewidth to frameaddress */" |
473 |
" .endr ; /* */" |
581 |
" .endr ; /* */" |
474 |
" /* */" |
582 |
" /* */" |
475 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */" |
583 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */" |
476 |
:"=m" (a) |
584 |
:"=m" (a) |
477 |
:"m" (frm1),"m" (frm2), "m" (ref), "m" (denoiser.frame.w), "m" (*bit_mask) |
585 |
:"m" (frm1),"m" (frm2), "m" (ref), "m" (denoiser.frame.w), "m" (*bit_mask) |
|
|
586 |
#ifdef __PIC__ |
587 |
:"%eax", "%esi", "%ecx", "%edx" |
588 |
#else |
478 |
:"%eax", "%ebx", "%ecx", "%edx" |
589 |
:"%eax", "%ebx", "%ecx", "%edx" |
|
|
590 |
#endif |
479 |
); |
591 |
); |
480 |
#endif |
592 |
#endif |
481 |
return a; |
593 |
return a; |
Lines 497-522
Link Here
|
497 |
( |
609 |
( |
498 |
" pxor %%mm0 , %%mm0; /* clear mm0 */\n" |
610 |
" pxor %%mm0 , %%mm0; /* clear mm0 */\n" |
499 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
611 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
|
|
612 |
#ifdef __PIC__ |
613 |
" movl %2 , %%esi; /* load frameadress into esi */\n" |
614 |
#else |
500 |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
615 |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
|
|
616 |
#endif |
501 |
" movl %3 , %%ecx; /* load frameadress into ecx */\n" |
617 |
" movl %3 , %%ecx; /* load frameadress into ecx */\n" |
502 |
" movl %4 , %%edx; /* load width into edx */\n" |
618 |
" movl %4 , %%edx; /* load width into edx */\n" |
503 |
" ; /* */\n" |
619 |
" ; /* */\n" |
504 |
" .rept 8 ; /* */\n" |
620 |
" .rept 8 ; /* */\n" |
505 |
" movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
621 |
" movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
|
|
622 |
#ifdef __PIC__ |
623 |
" movq (%%esi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */\n" |
624 |
#else |
506 |
" movq (%%ebx), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */\n" |
625 |
" movq (%%ebx), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */\n" |
|
|
626 |
#endif |
507 |
" movq (%%ecx), %%mm3; /* 8 Pixels from reference frame to mm3 */\n" |
627 |
" movq (%%ecx), %%mm3; /* 8 Pixels from reference frame to mm3 */\n" |
508 |
" pavgb %%mm2 , %%mm1; /* average source pixels */\n" |
628 |
" pavgb %%mm2 , %%mm1; /* average source pixels */\n" |
509 |
" psadbw %%mm3 , %%mm1; /* 8 Pixels difference to mm1 */\n" |
629 |
" psadbw %%mm3 , %%mm1; /* 8 Pixels difference to mm1 */\n" |
510 |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
630 |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
511 |
" addl %%edx , %%eax; /* add framewidth to frameaddress */\n" |
631 |
" addl %%edx , %%eax; /* add framewidth to frameaddress */\n" |
|
|
632 |
#ifdef __PIC__ |
633 |
" addl %%edx , %%esi; /* add framewidth to frameaddress */\n" |
634 |
#else |
512 |
" addl %%edx , %%ebx; /* add framewidth to frameaddress */\n" |
635 |
" addl %%edx , %%ebx; /* add framewidth to frameaddress */\n" |
|
|
636 |
#endif |
513 |
" addl %%edx , %%ecx; /* add framewidth to frameaddress */\n" |
637 |
" addl %%edx , %%ecx; /* add framewidth to frameaddress */\n" |
514 |
" .endr ; /* */\n" |
638 |
" .endr ; /* */\n" |
515 |
" /* */\n" |
639 |
" /* */\n" |
516 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
640 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
517 |
:"=m" (a) |
641 |
:"=m" (a) |
518 |
:"m" (frm1),"m" (frm2), "m" (ref), "m" (denoiser.frame.w) |
642 |
:"m" (frm1),"m" (frm2), "m" (ref), "m" (denoiser.frame.w) |
|
|
643 |
#ifdef __PIC__ |
644 |
:"%eax", "%esi", "%ecx", "%edx" |
645 |
#else |
519 |
:"%eax", "%ebx", "%ecx", "%edx" |
646 |
:"%eax", "%ebx", "%ecx", "%edx" |
|
|
647 |
#endif |
520 |
); |
648 |
); |
521 |
#endif |
649 |
#endif |
522 |
return a; |
650 |
return a; |