|
Lines 261-298
inline void mpeg3_bgra32_mmx(unsigned lo
Link Here
|
| 261 |
unsigned long v, |
261 |
unsigned long v, |
| 262 |
unsigned long *output) |
262 |
unsigned long *output) |
| 263 |
{ |
263 |
{ |
| 264 |
asm(" |
264 |
asm("\n\ |
| 265 |
/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ |
265 |
/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */\n\ |
| 266 |
/* for bgr24. */ |
266 |
/* for bgr24. */\n\ |
| 267 |
movd (%0), %%mm0; /* Load y 0x00000000000000yy */ |
267 |
movd (%0), %%mm0; /* Load y 0x00000000000000yy */\n\ |
| 268 |
movd (%1), %%mm1; /* Load u 0x00000000000000cr */ |
268 |
movd (%1), %%mm1; /* Load u 0x00000000000000cr */\n\ |
| 269 |
movq %%mm0, %%mm3; /* Copy y to temp */ |
269 |
movq %%mm0, %%mm3; /* Copy y to temp */\n\ |
| 270 |
psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */ |
270 |
psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */\n\ |
| 271 |
movd (%2), %%mm2; /* Load v 0x00000000000000cb */ |
271 |
movd (%2), %%mm2; /* Load v 0x00000000000000cb */\n\ |
| 272 |
psllq $16, %%mm3; /* Shift y */ |
272 |
psllq $16, %%mm3; /* Shift y */\n\ |
| 273 |
movq %%mm1, %%mm4; /* Copy u to temp */ |
273 |
movq %%mm1, %%mm4; /* Copy u to temp */\n\ |
| 274 |
por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ |
274 |
por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */\n\ |
| 275 |
psllq $16, %%mm4; /* Shift u */ |
275 |
psllq $16, %%mm4; /* Shift u */\n\ |
| 276 |
movq %%mm2, %%mm5; /* Copy v to temp */ |
276 |
movq %%mm2, %%mm5; /* Copy v to temp */\n\ |
| 277 |
psllq $16, %%mm3; /* Shift y */ |
277 |
psllq $16, %%mm3; /* Shift y */\n\ |
| 278 |
por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */ |
278 |
por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */\n\ |
| 279 |
psllq $16, %%mm5; /* Shift v */ |
279 |
psllq $16, %%mm5; /* Shift v */\n\ |
| 280 |
por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ |
280 |
por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */\n\ |
| 281 |
por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */ |
281 |
por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */\n\ |
| 282 |
|
282 |
\n\ |
| 283 |
/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ |
283 |
/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */\n\ |
| 284 |
psubw _mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */ |
284 |
psubw _mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */\n\ |
| 285 |
pmullw _mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */ |
285 |
pmullw _mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */\n\ |
| 286 |
psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ |
286 |
psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */\n\ |
| 287 |
psubw _mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */ |
287 |
psubw _mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */\n\ |
| 288 |
pmullw _mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */ |
288 |
pmullw _mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */\n\ |
| 289 |
|
289 |
\n\ |
| 290 |
/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ |
290 |
/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */\n\ |
| 291 |
paddsw %%mm1, %%mm0; /* Add u to result */ |
291 |
paddsw %%mm1, %%mm0; /* Add u to result */\n\ |
| 292 |
paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */ |
292 |
paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */\n\ |
| 293 |
psraw $6, %%mm0; /* Demote precision */ |
293 |
psraw $6, %%mm0; /* Demote precision */\n\ |
| 294 |
packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */ |
294 |
packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */\n\ |
| 295 |
movd %%mm0, (%3); /* Store output */ |
295 |
movd %%mm0, (%3); /* Store output */\n\ |
| 296 |
" |
296 |
" |
| 297 |
: |
297 |
: |
| 298 |
: "r" (&y), "r" (&u), "r" (&v), "r" (output)); |
298 |
: "r" (&y), "r" (&u), "r" (&v), "r" (output)); |
|
Lines 303-341
inline void mpeg3_601_bgra32_mmx(unsigne
Link Here
|
| 303 |
unsigned long v, |
303 |
unsigned long v, |
| 304 |
unsigned long *output) |
304 |
unsigned long *output) |
| 305 |
{ |
305 |
{ |
| 306 |
asm(" |
306 |
asm("\n\ |
| 307 |
/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ |
307 |
/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */\n\ |
| 308 |
/* for bgr24. */ |
308 |
/* for bgr24. */\n\ |
| 309 |
movd (%0), %%mm0; /* Load y 0x00000000000000yy */ |
309 |
movd (%0), %%mm0; /* Load y 0x00000000000000yy */\n\ |
| 310 |
psubsw _mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */ |
310 |
psubsw _mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */\n\ |
| 311 |
movd (%1), %%mm1; /* Load u 0x00000000000000cr */ |
311 |
movd (%1), %%mm1; /* Load u 0x00000000000000cr */\n\ |
| 312 |
movq %%mm0, %%mm3; /* Copy y to temp */ |
312 |
movq %%mm0, %%mm3; /* Copy y to temp */\n\ |
| 313 |
psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */ |
313 |
psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */\n\ |
| 314 |
movd (%2), %%mm2; /* Load v 0x00000000000000cb */ |
314 |
movd (%2), %%mm2; /* Load v 0x00000000000000cb */\n\ |
| 315 |
psllq $16, %%mm3; /* Shift y */ |
315 |
psllq $16, %%mm3; /* Shift y */\n\ |
| 316 |
movq %%mm1, %%mm4; /* Copy u to temp */ |
316 |
movq %%mm1, %%mm4; /* Copy u to temp */\n\ |
| 317 |
por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ |
317 |
por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */\n\ |
| 318 |
psllq $16, %%mm4; /* Shift u */ |
318 |
psllq $16, %%mm4; /* Shift u */\n\ |
| 319 |
movq %%mm2, %%mm5; /* Copy v to temp */ |
319 |
movq %%mm2, %%mm5; /* Copy v to temp */\n\ |
| 320 |
psllq $16, %%mm3; /* Shift y */ |
320 |
psllq $16, %%mm3; /* Shift y */\n\ |
| 321 |
por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */ |
321 |
por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */\n\ |
| 322 |
psllq $16, %%mm5; /* Shift v */ |
322 |
psllq $16, %%mm5; /* Shift v */\n\ |
| 323 |
por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ |
323 |
por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */\n\ |
| 324 |
por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */ |
324 |
por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */\n\ |
| 325 |
|
325 |
\n\ |
| 326 |
/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ |
326 |
/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */\n\ |
| 327 |
pmullw _mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale and shift y coeffs */ |
327 |
pmullw _mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale and shift y coeffs */\n\ |
| 328 |
psubw _mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */ |
328 |
psubw _mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */\n\ |
| 329 |
pmullw _mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */ |
329 |
pmullw _mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */\n\ |
| 330 |
psubw _mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */ |
330 |
psubw _mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */\n\ |
| 331 |
pmullw _mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */ |
331 |
pmullw _mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */\n\ |
| 332 |
|
332 |
\n\ |
| 333 |
/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ |
333 |
/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */\n\ |
| 334 |
paddsw %%mm1, %%mm0; /* Add u to result */ |
334 |
paddsw %%mm1, %%mm0; /* Add u to result */\n\ |
| 335 |
paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */ |
335 |
paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */\n\ |
| 336 |
psraw $6, %%mm0; /* Demote precision */ |
336 |
psraw $6, %%mm0; /* Demote precision */\n\ |
| 337 |
packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */ |
337 |
packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */\n\ |
| 338 |
movd %%mm0, (%3); /* Store output */ |
338 |
movd %%mm0, (%3); /* Store output */\n\ |
| 339 |
" |
339 |
" |
| 340 |
: |
340 |
: |
| 341 |
: "r" (&y), "r" (&u), "r" (&v), "r" (output)); |
341 |
: "r" (&y), "r" (&u), "r" (&v), "r" (output)); |
|
Lines 351-388
inline void mpeg3_rgba32_mmx(unsigned lo
Link Here
|
| 351 |
unsigned long v, |
351 |
unsigned long v, |
| 352 |
unsigned long *output) |
352 |
unsigned long *output) |
| 353 |
{ |
353 |
{ |
| 354 |
asm(" |
354 |
asm("\n\ |
| 355 |
/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ |
355 |
/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */\n\ |
| 356 |
/* for rgb24. */ |
356 |
/* for rgb24. */\n\ |
| 357 |
movd (%0), %%mm0; /* Load y 0x00000000000000yy */ |
357 |
movd (%0), %%mm0; /* Load y 0x00000000000000yy */\n\ |
| 358 |
movd (%1), %%mm1; /* Load v 0x00000000000000vv */ |
358 |
movd (%1), %%mm1; /* Load v 0x00000000000000vv */\n\ |
| 359 |
movq %%mm0, %%mm3; /* Copy y to temp */ |
359 |
movq %%mm0, %%mm3; /* Copy y to temp */\n\ |
| 360 |
psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */ |
360 |
psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */\n\ |
| 361 |
movd (%2), %%mm2; /* Load u 0x00000000000000uu */ |
361 |
movd (%2), %%mm2; /* Load u 0x00000000000000uu */\n\ |
| 362 |
psllq $16, %%mm3; /* Shift y */ |
362 |
psllq $16, %%mm3; /* Shift y */\n\ |
| 363 |
movq %%mm1, %%mm4; /* Copy v to temp */ |
363 |
movq %%mm1, %%mm4; /* Copy v to temp */\n\ |
| 364 |
por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ |
364 |
por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */\n\ |
| 365 |
psllq $16, %%mm4; /* Shift v */ |
365 |
psllq $16, %%mm4; /* Shift v */\n\ |
| 366 |
movq %%mm2, %%mm5; /* Copy u to temp */ |
366 |
movq %%mm2, %%mm5; /* Copy u to temp */\n\ |
| 367 |
psllq $16, %%mm3; /* Shift y */ |
367 |
psllq $16, %%mm3; /* Shift y */\n\ |
| 368 |
por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */ |
368 |
por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */\n\ |
| 369 |
psllq $16, %%mm5; /* Shift u */ |
369 |
psllq $16, %%mm5; /* Shift u */\n\ |
| 370 |
por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ |
370 |
por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */\n\ |
| 371 |
por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */ |
371 |
por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */\n\ |
| 372 |
|
372 |
\n\ |
| 373 |
/* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ |
373 |
/* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */\n\ |
| 374 |
psubw _mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */ |
374 |
psubw _mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */\n\ |
| 375 |
pmullw _mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */ |
375 |
pmullw _mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */\n\ |
| 376 |
psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ |
376 |
psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */\n\ |
| 377 |
psubw _mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */ |
377 |
psubw _mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */\n\ |
| 378 |
pmullw _mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ |
378 |
pmullw _mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */\n\ |
| 379 |
|
379 |
\n\ |
| 380 |
/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ |
380 |
/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */\n\ |
| 381 |
paddsw %%mm1, %%mm0; /* Add v to result */ |
381 |
paddsw %%mm1, %%mm0; /* Add v to result */\n\ |
| 382 |
paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */ |
382 |
paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */\n\ |
| 383 |
psraw $6, %%mm0; /* Demote precision */ |
383 |
psraw $6, %%mm0; /* Demote precision */\n\ |
| 384 |
packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */ |
384 |
packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */\n\ |
| 385 |
movd %%mm0, (%3); /* Store output */ |
385 |
movd %%mm0, (%3); /* Store output */\n\ |
| 386 |
" |
386 |
" |
| 387 |
: |
387 |
: |
| 388 |
: "r" (&y), "r" (&v), "r" (&u), "r" (output)); |
388 |
: "r" (&y), "r" (&v), "r" (&u), "r" (output)); |
|
Lines 393-431
inline void mpeg3_601_rgba32_mmx(unsigne
Link Here
|
| 393 |
unsigned long v, |
393 |
unsigned long v, |
| 394 |
unsigned long *output) |
394 |
unsigned long *output) |
| 395 |
{ |
395 |
{ |
| 396 |
asm(" |
396 |
asm("\n\ |
| 397 |
/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ |
397 |
/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */\n\ |
| 398 |
/* for rgb24. */ |
398 |
/* for rgb24. */\n\ |
| 399 |
movd (%0), %%mm0; /* Load y 0x00000000000000yy */ |
399 |
movd (%0), %%mm0; /* Load y 0x00000000000000yy */\n\ |
| 400 |
psubsw _mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */ |
400 |
psubsw _mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */\n\ |
| 401 |
movd (%1), %%mm1; /* Load v 0x00000000000000vv */ |
401 |
movd (%1), %%mm1; /* Load v 0x00000000000000vv */\n\ |
| 402 |
movq %%mm0, %%mm3; /* Copy y to temp */ |
402 |
movq %%mm0, %%mm3; /* Copy y to temp */\n\ |
| 403 |
psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */ |
403 |
psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */\n\ |
| 404 |
movd (%2), %%mm2; /* Load u 0x00000000000000uu */ |
404 |
movd (%2), %%mm2; /* Load u 0x00000000000000uu */\n\ |
| 405 |
psllq $16, %%mm3; /* Shift y */ |
405 |
psllq $16, %%mm3; /* Shift y */\n\ |
| 406 |
movq %%mm1, %%mm4; /* Copy v to temp */ |
406 |
movq %%mm1, %%mm4; /* Copy v to temp */\n\ |
| 407 |
por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ |
407 |
por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */\n\ |
| 408 |
psllq $16, %%mm4; /* Shift v */ |
408 |
psllq $16, %%mm4; /* Shift v */\n\ |
| 409 |
movq %%mm2, %%mm5; /* Copy u to temp */ |
409 |
movq %%mm2, %%mm5; /* Copy u to temp */\n\ |
| 410 |
psllq $16, %%mm3; /* Shift y */ |
410 |
psllq $16, %%mm3; /* Shift y */\n\ |
| 411 |
por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */ |
411 |
por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */\n\ |
| 412 |
psllq $16, %%mm5; /* Shift u */ |
412 |
psllq $16, %%mm5; /* Shift u */\n\ |
| 413 |
por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ |
413 |
por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */\n\ |
| 414 |
por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */ |
414 |
por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */\n\ |
| 415 |
|
415 |
\n\ |
| 416 |
/* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ |
416 |
/* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */\n\ |
| 417 |
pmullw _mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale y coeffs */ |
417 |
pmullw _mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale y coeffs */\n\ |
| 418 |
psubw _mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */ |
418 |
psubw _mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */\n\ |
| 419 |
pmullw _mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */ |
419 |
pmullw _mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */\n\ |
| 420 |
psubw _mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */ |
420 |
psubw _mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */\n\ |
| 421 |
pmullw _mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ |
421 |
pmullw _mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */\n\ |
| 422 |
|
422 |
\n\ |
| 423 |
/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ |
423 |
/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */\n\ |
| 424 |
paddsw %%mm1, %%mm0; /* Add v to result */ |
424 |
paddsw %%mm1, %%mm0; /* Add v to result */\n\ |
| 425 |
paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */ |
425 |
paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */\n\ |
| 426 |
psraw $6, %%mm0; /* Demote precision */ |
426 |
psraw $6, %%mm0; /* Demote precision */\n\ |
| 427 |
packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */ |
427 |
packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */\n\ |
| 428 |
movd %%mm0, (%3); /* Store output */ |
428 |
movd %%mm0, (%3); /* Store output */\n\ |
| 429 |
" |
429 |
" |
| 430 |
: |
430 |
: |
| 431 |
: "r" (&y), "r" (&v), "r" (&u), "r" (output)); |
431 |
: "r" (&y), "r" (&v), "r" (&u), "r" (output)); |