Lines 146-157
Link Here
|
146 |
" pxor %%mm7 , %%mm7; /* clear mm7 */\n" |
146 |
" pxor %%mm7 , %%mm7; /* clear mm7 */\n" |
147 |
" /* */\n" |
147 |
" /* */\n" |
148 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
148 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
149 |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
149 |
" movl %2 , %%esi; /* load frameadress into esi */\n" |
150 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
150 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
151 |
" /* */\n" |
151 |
" /* */\n" |
152 |
".rept 8 ; /* Loop for 8 lines */\n" |
152 |
".rept 8 ; /* Loop for 8 lines */\n" |
153 |
" movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
153 |
" movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
154 |
" movq (%%ebx), %%mm2; /* 8 Pixels from reference frame to mm2 */\n" |
154 |
" movq (%%esi), %%mm2; /* 8 Pixels from reference frame to mm2 */\n" |
155 |
" movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
155 |
" movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
156 |
" psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
156 |
" psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
157 |
" psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" |
157 |
" psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" |
Lines 162-174
Link Here
|
162 |
" paddusw %%mm1 , %%mm0; /* add mm1 (stored in mm1 and mm2...) */\n" |
162 |
" paddusw %%mm1 , %%mm0; /* add mm1 (stored in mm1 and mm2...) */\n" |
163 |
" paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
163 |
" paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
164 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
164 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
165 |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
165 |
" addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
166 |
" .endr /* end loop */\n" |
166 |
" .endr /* end loop */\n" |
167 |
" /* */\n" |
167 |
" /* */\n" |
168 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
168 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
169 |
:"=m" (a) |
169 |
:"=m" (a) |
170 |
:"m" (frm), "m" (ref), "m" (denoiser.frame.w) |
170 |
:"m" (frm), "m" (ref), "m" (denoiser.frame.w) |
171 |
:"%eax", "%ebx", "%ecx" |
171 |
:"%eax", "%esi", "%ecx" |
172 |
); |
172 |
); |
173 |
#endif |
173 |
#endif |
174 |
|
174 |
|
Lines 191-211
Link Here
|
191 |
( |
191 |
( |
192 |
" pxor %%mm0 , %%mm0; /* clear mm0 */\n" |
192 |
" pxor %%mm0 , %%mm0; /* clear mm0 */\n" |
193 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
193 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
194 |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
194 |
" movl %2 , %%esi; /* load frameadress into esi */\n" |
195 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
195 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
196 |
" ; /* */\n" |
196 |
" ; /* */\n" |
197 |
" .rept 8 ; /* */\n" |
197 |
" .rept 8 ; /* */\n" |
198 |
" movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
198 |
" movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
199 |
" psadbw (%%ebx), %%mm1; /* 8 Pixels difference to mm1 */\n" |
199 |
" psadbw (%%esi), %%mm1; /* 8 Pixels difference to mm1 */\n" |
200 |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
200 |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
201 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
201 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
202 |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
202 |
" addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
203 |
" .endr ; /* */\n" |
203 |
" .endr ; /* */\n" |
204 |
" /* */\n" |
204 |
" /* */\n" |
205 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
205 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
206 |
:"=m" (a) |
206 |
:"=m" (a) |
207 |
:"m" (frm), "m" (ref), "m" (denoiser.frame.w) |
207 |
:"m" (frm), "m" (ref), "m" (denoiser.frame.w) |
208 |
:"%eax", "%ebx", "%ecx" |
208 |
:"%eax", "%esi", "%ecx" |
209 |
); |
209 |
); |
210 |
#endif |
210 |
#endif |
211 |
return a; |
211 |
return a; |
Lines 254-265
Link Here
|
254 |
" pxor %%mm7 , %%mm7; /* clear mm7 */\n" |
254 |
" pxor %%mm7 , %%mm7; /* clear mm7 */\n" |
255 |
" /* */\n" |
255 |
" /* */\n" |
256 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
256 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
257 |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
257 |
" movl %2 , %%esi; /* load frameadress into esi */\n" |
258 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
258 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
259 |
" /* */\n" |
259 |
" /* */\n" |
260 |
".rept 4 ; /* Loop for 4 lines */\n" |
260 |
".rept 4 ; /* Loop for 4 lines */\n" |
261 |
" movd (%%eax), %%mm1; /* 4 Pixels from filtered frame to mm1 */\n" |
261 |
" movd (%%eax), %%mm1; /* 4 Pixels from filtered frame to mm1 */\n" |
262 |
" movd (%%ebx), %%mm2; /* 4 Pixels from reference frame to mm2 */\n" |
262 |
" movd (%%esi), %%mm2; /* 4 Pixels from reference frame to mm2 */\n" |
263 |
" movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
263 |
" movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
264 |
" psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
264 |
" psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
265 |
" psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" |
265 |
" psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" |
Lines 270-282
Link Here
|
270 |
" paddusw %%mm1 , %%mm2; /* add mm1 (stored in mm1 and mm2...) */\n" |
270 |
" paddusw %%mm1 , %%mm2; /* add mm1 (stored in mm1 and mm2...) */\n" |
271 |
" paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
271 |
" paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
272 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
272 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
273 |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
273 |
" addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
274 |
" .endr /* end loop */\n" |
274 |
" .endr /* end loop */\n" |
275 |
" /* */\n" |
275 |
" /* */\n" |
276 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
276 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
277 |
:"=m" (a) |
277 |
:"=m" (a) |
278 |
:"m" (frm), "m" (ref), "m" (halfwidth) |
278 |
:"m" (frm), "m" (ref), "m" (halfwidth) |
279 |
:"%eax", "%ebx", "%ecx" |
279 |
:"%eax", "%esi", "%ecx" |
280 |
); |
280 |
); |
281 |
#endif |
281 |
#endif |
282 |
return (uint32_t)(a[0]+a[1]+a[2]+a[3]); |
282 |
return (uint32_t)(a[0]+a[1]+a[2]+a[3]); |
Lines 300-312
Link Here
|
300 |
" pxor %%mm7 , %%mm7; /* clear mm7 */\n" |
300 |
" pxor %%mm7 , %%mm7; /* clear mm7 */\n" |
301 |
" /* */\n" |
301 |
" /* */\n" |
302 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
302 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
303 |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
303 |
" movl %2 , %%esi; /* load frameadress into esi */\n" |
304 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
304 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
305 |
" /* */\n" |
305 |
" /* */\n" |
306 |
".rept 8 ; /* Loop for 8 lines */\n" |
306 |
".rept 8 ; /* Loop for 8 lines */\n" |
307 |
" movw (%%eax), %%dx; /* */\n" |
307 |
" movw (%%eax), %%dx; /* */\n" |
308 |
" movd %%edx , %%mm1; /* 2 Pixels from filtered frame to mm1 */\n" |
308 |
" movd %%edx , %%mm1; /* 2 Pixels from filtered frame to mm1 */\n" |
309 |
" movw (%%ebx), %%dx; /* */\n" |
309 |
" movw (%%esi), %%dx; /* */\n" |
310 |
" movd %%edx , %%mm2; /* 2 Pixels from reference frame to mm2 */\n" |
310 |
" movd %%edx , %%mm2; /* 2 Pixels from reference frame to mm2 */\n" |
311 |
" movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
311 |
" movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
312 |
" psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
312 |
" psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
Lines 318-330
Link Here
|
318 |
" paddusw %%mm1 , %%mm2; /* add mm1 (stored in mm1 and mm2...) */\n" |
318 |
" paddusw %%mm1 , %%mm2; /* add mm1 (stored in mm1 and mm2...) */\n" |
319 |
" paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
319 |
" paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
320 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
320 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
321 |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
321 |
" addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
322 |
" .endr /* end loop */\n" |
322 |
" .endr /* end loop */\n" |
323 |
" /* */\n" |
323 |
" /* */\n" |
324 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
324 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
325 |
:"=m" (a) |
325 |
:"=m" (a) |
326 |
:"m" (frm), "m" (ref), "m" (W2) |
326 |
:"m" (frm), "m" (ref), "m" (W2) |
327 |
:"%eax", "%ebx", "%ecx" |
327 |
:"%eax", "%esi", "%ecx" |
328 |
); |
328 |
); |
329 |
#endif |
329 |
#endif |
330 |
return (uint32_t)(a[0]+a[1]+a[2]+a[3]+a[4]+a[5]+a[6]+a[7]); |
330 |
return (uint32_t)(a[0]+a[1]+a[2]+a[3]+a[4]+a[5]+a[6]+a[7]); |
Lines 348-369
Link Here
|
348 |
( |
348 |
( |
349 |
" pxor %%mm0 , %%mm0; /* clear mm0 */\n" |
349 |
" pxor %%mm0 , %%mm0; /* clear mm0 */\n" |
350 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
350 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
351 |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
351 |
" movl %2 , %%esi; /* load frameadress into esi */\n" |
352 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
352 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
353 |
" ; /* */\n" |
353 |
" ; /* */\n" |
354 |
" .rept 4 ; /* */\n" |
354 |
" .rept 4 ; /* */\n" |
355 |
" movd (%%eax), %%mm1; /* 4 Pixels from filtered frame to mm1 */\n" |
355 |
" movd (%%eax), %%mm1; /* 4 Pixels from filtered frame to mm1 */\n" |
356 |
" movd (%%ebx), %%mm2; /* 4 Pixels from filtered frame to mm2 */\n" |
356 |
" movd (%%esi), %%mm2; /* 4 Pixels from filtered frame to mm2 */\n" |
357 |
" psadbw %%mm2 , %%mm1; /* 4 Pixels difference to mm1 */\n" |
357 |
" psadbw %%mm2 , %%mm1; /* 4 Pixels difference to mm1 */\n" |
358 |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
358 |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
359 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
359 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
360 |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
360 |
" addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
361 |
" .endr ; /* */\n" |
361 |
" .endr ; /* */\n" |
362 |
" /* */\n" |
362 |
" /* */\n" |
363 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
363 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
364 |
:"=m" (a) |
364 |
:"=m" (a) |
365 |
:"m" (frm), "m" (ref), "m" (halfwidth) |
365 |
:"m" (frm), "m" (ref), "m" (halfwidth) |
366 |
:"%eax", "%ebx", "%ecx" |
366 |
:"%eax", "%esi", "%ecx" |
367 |
); |
367 |
); |
368 |
#endif |
368 |
#endif |
369 |
return a; |
369 |
return a; |
Lines 385-408
Link Here
|
385 |
( |
385 |
( |
386 |
" pxor %%mm0 , %%mm0; /* clear mm0 */\n" |
386 |
" pxor %%mm0 , %%mm0; /* clear mm0 */\n" |
387 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
387 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
388 |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
388 |
" movl %2 , %%esi; /* load frameadress into esi */\n" |
389 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
389 |
" movl %3 , %%ecx; /* load width into ecx */\n" |
390 |
" ; /* */\n" |
390 |
" ; /* */\n" |
391 |
" .rept 8 ; /* */\n" |
391 |
" .rept 8 ; /* */\n" |
392 |
" movw (%%eax), %%dx; /* */\n" |
392 |
" movw (%%eax), %%dx; /* */\n" |
393 |
" movd %%edx , %%mm1; /* 2 Pixels from filtered frame to mm1 */\n" |
393 |
" movd %%edx , %%mm1; /* 2 Pixels from filtered frame to mm1 */\n" |
394 |
" movw (%%ebx), %%dx; /* */\n" |
394 |
" movw (%%esi), %%dx; /* */\n" |
395 |
" movd %%edx , %%mm2; /* 2 Pixels from filtered frame to mm2 */\n" |
395 |
" movd %%edx , %%mm2; /* 2 Pixels from filtered frame to mm2 */\n" |
396 |
" psadbw %%mm2 , %%mm1; /* 2 Pixels difference to mm1 */\n" |
396 |
" psadbw %%mm2 , %%mm1; /* 2 Pixels difference to mm1 */\n" |
397 |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
397 |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
398 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
398 |
" addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
399 |
" addl %%ecx , %%ebx; /* add framewidth to frameaddress */\n" |
399 |
" addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
400 |
" .endr ; /* */\n" |
400 |
" .endr ; /* */\n" |
401 |
" /* */\n" |
401 |
" /* */\n" |
402 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
402 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
403 |
:"=m" (a) |
403 |
:"=m" (a) |
404 |
:"m" (frm), "m" (ref), "m" (W2) |
404 |
:"m" (frm), "m" (ref), "m" (W2) |
405 |
:"%eax", "%ebx", "%ecx" |
405 |
:"%eax", "%esi", "%ecx" |
406 |
); |
406 |
); |
407 |
#endif |
407 |
#endif |
408 |
return a; |
408 |
return a; |
Lines 448-460
Link Here
|
448 |
( |
448 |
( |
449 |
" pxor %%mm0 , %%mm0; /* clear mm0 */" |
449 |
" pxor %%mm0 , %%mm0; /* clear mm0 */" |
450 |
" movl %1 , %%eax; /* load frameadress into eax */" |
450 |
" movl %1 , %%eax; /* load frameadress into eax */" |
451 |
" movl %2 , %%ebx; /* load frameadress into ebx */" |
451 |
" movl %2 , %%esi; /* load frameadress into esi */" |
452 |
" movl %3 , %%ecx; /* load frameadress into ecx */" |
452 |
" movl %3 , %%ecx; /* load frameadress into ecx */" |
453 |
" movl %4 , %%edx; /* load width into edx */" |
453 |
" movl %4 , %%edx; /* load width into edx */" |
454 |
" ; /* */" |
454 |
" ; /* */" |
455 |
" .rept 8 ; /* */" |
455 |
" .rept 8 ; /* */" |
456 |
" movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */" |
456 |
" movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */" |
457 |
" movq (%%ebx), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */" |
457 |
" movq (%%esi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */" |
458 |
" movq (%%ecx), %%mm3; /* reference to mm3 */" |
458 |
" movq (%%ecx), %%mm3; /* reference to mm3 */" |
459 |
" psrlq $1 , %%mm1; /* average source pixels */" |
459 |
" psrlq $1 , %%mm1; /* average source pixels */" |
460 |
" psrlq $1 , %%mm2; /* shift right by one (divide by two) */" |
460 |
" psrlq $1 , %%mm2; /* shift right by one (divide by two) */" |
Lines 468-481
Link Here
|
468 |
" paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */" |
468 |
" paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */" |
469 |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */" |
469 |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */" |
470 |
" addl %%edx , %%eax; /* add framewidth to frameaddress */" |
470 |
" addl %%edx , %%eax; /* add framewidth to frameaddress */" |
471 |
" addl %%edx , %%ebx; /* add framewidth to frameaddress */" |
471 |
" addl %%edx , %%esi; /* add framewidth to frameaddress */" |
472 |
" addl %%edx , %%ecx; /* add framewidth to frameaddress */" |
472 |
" addl %%edx , %%ecx; /* add framewidth to frameaddress */" |
473 |
" .endr ; /* */" |
473 |
" .endr ; /* */" |
474 |
" /* */" |
474 |
" /* */" |
475 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */" |
475 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */" |
476 |
:"=m" (a) |
476 |
:"=m" (a) |
477 |
:"m" (frm1),"m" (frm2), "m" (ref), "m" (denoiser.frame.w), "m" (*bit_mask) |
477 |
:"m" (frm1),"m" (frm2), "m" (ref), "m" (denoiser.frame.w), "m" (*bit_mask) |
478 |
:"%eax", "%ebx", "%ecx", "%edx" |
478 |
:"%eax", "%esi", "%ecx", "%edx" |
479 |
); |
479 |
); |
480 |
#endif |
480 |
#endif |
481 |
return a; |
481 |
return a; |
Lines 497-522
Link Here
|
497 |
( |
497 |
( |
498 |
" pxor %%mm0 , %%mm0; /* clear mm0 */\n" |
498 |
" pxor %%mm0 , %%mm0; /* clear mm0 */\n" |
499 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
499 |
" movl %1 , %%eax; /* load frameadress into eax */\n" |
500 |
" movl %2 , %%ebx; /* load frameadress into ebx */\n" |
500 |
" movl %2 , %%esi; /* load frameadress into esi */\n" |
501 |
" movl %3 , %%ecx; /* load frameadress into ecx */\n" |
501 |
" movl %3 , %%ecx; /* load frameadress into ecx */\n" |
502 |
" movl %4 , %%edx; /* load width into edx */\n" |
502 |
" movl %4 , %%edx; /* load width into edx */\n" |
503 |
" ; /* */\n" |
503 |
" ; /* */\n" |
504 |
" .rept 8 ; /* */\n" |
504 |
" .rept 8 ; /* */\n" |
505 |
" movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
505 |
" movq (%%eax), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
506 |
" movq (%%ebx), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */\n" |
506 |
" movq (%%esi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */\n" |
507 |
" movq (%%ecx), %%mm3; /* 8 Pixels from reference frame to mm3 */\n" |
507 |
" movq (%%ecx), %%mm3; /* 8 Pixels from reference frame to mm3 */\n" |
508 |
" pavgb %%mm2 , %%mm1; /* average source pixels */\n" |
508 |
" pavgb %%mm2 , %%mm1; /* average source pixels */\n" |
509 |
" psadbw %%mm3 , %%mm1; /* 8 Pixels difference to mm1 */\n" |
509 |
" psadbw %%mm3 , %%mm1; /* 8 Pixels difference to mm1 */\n" |
510 |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
510 |
" paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
511 |
" addl %%edx , %%eax; /* add framewidth to frameaddress */\n" |
511 |
" addl %%edx , %%eax; /* add framewidth to frameaddress */\n" |
512 |
" addl %%edx , %%ebx; /* add framewidth to frameaddress */\n" |
512 |
" addl %%edx , %%esi; /* add framewidth to frameaddress */\n" |
513 |
" addl %%edx , %%ecx; /* add framewidth to frameaddress */\n" |
513 |
" addl %%edx , %%ecx; /* add framewidth to frameaddress */\n" |
514 |
" .endr ; /* */\n" |
514 |
" .endr ; /* */\n" |
515 |
" /* */\n" |
515 |
" /* */\n" |
516 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
516 |
" movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n" |
517 |
:"=m" (a) |
517 |
:"=m" (a) |
518 |
:"m" (frm1),"m" (frm2), "m" (ref), "m" (denoiser.frame.w) |
518 |
:"m" (frm1),"m" (frm2), "m" (ref), "m" (denoiser.frame.w) |
519 |
:"%eax", "%ebx", "%ecx", "%edx" |
519 |
:"%eax", "%esi", "%ecx", "%edx" |
520 |
); |
520 |
); |
521 |
#endif |
521 |
#endif |
522 |
return a; |
522 |
return a; |