Lines 619-628
Link Here
|
619 |
|
619 |
|
620 |
static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){ |
620 |
static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){ |
621 |
asm volatile( //FIXME could save 1 instruction if done as 8x4 ... |
621 |
asm volatile( //FIXME could save 1 instruction if done as 8x4 ... |
622 |
"movd %4, %%mm0 \n\t" |
622 |
"movd %0, %%mm0 \n\t" |
623 |
"movd %5, %%mm1 \n\t" |
623 |
"movd %1, %%mm1 \n\t" |
624 |
"movd %6, %%mm2 \n\t" |
624 |
"movd %2, %%mm2 \n\t" |
625 |
"movd %7, %%mm3 \n\t" |
625 |
|
|
|
626 |
: |
627 |
: "m" (*(uint32_t*)(src + 0*src_stride)), |
628 |
"m" (*(uint32_t*)(src + 1*src_stride)), |
629 |
"m" (*(uint32_t*)(src + 2*src_stride)) |
630 |
); |
631 |
asm volatile( //FIXME could save 1 instruction if done as 8x4 ... |
632 |
"movd %0, %%mm3 \n\t" |
633 |
|
634 |
: |
635 |
: "m" (*(uint32_t*)(src + 3*src_stride)) |
636 |
); |
637 |
asm volatile( //FIXME could save 1 instruction if done as 8x4 ... |
626 |
"punpcklbw %%mm1, %%mm0 \n\t" |
638 |
"punpcklbw %%mm1, %%mm0 \n\t" |
627 |
"punpcklbw %%mm3, %%mm2 \n\t" |
639 |
"punpcklbw %%mm3, %%mm2 \n\t" |
628 |
"movq %%mm0, %%mm1 \n\t" |
640 |
"movq %%mm0, %%mm1 \n\t" |
Lines 639-648
Link Here
|
639 |
"=m" (*(uint32_t*)(dst + 1*dst_stride)), |
651 |
"=m" (*(uint32_t*)(dst + 1*dst_stride)), |
640 |
"=m" (*(uint32_t*)(dst + 2*dst_stride)), |
652 |
"=m" (*(uint32_t*)(dst + 2*dst_stride)), |
641 |
"=m" (*(uint32_t*)(dst + 3*dst_stride)) |
653 |
"=m" (*(uint32_t*)(dst + 3*dst_stride)) |
642 |
: "m" (*(uint32_t*)(src + 0*src_stride)), |
|
|
643 |
"m" (*(uint32_t*)(src + 1*src_stride)), |
644 |
"m" (*(uint32_t*)(src + 2*src_stride)), |
645 |
"m" (*(uint32_t*)(src + 3*src_stride)) |
646 |
); |
654 |
); |
647 |
} |
655 |
} |
648 |
|
656 |
|