Lines 723-754
Link Here
|
723 |
} |
723 |
} |
724 |
} |
724 |
} |
725 |
|
725 |
|
726 |
static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){ |
726 |
static inline void transpose4x4(uint8_t *dst, uint8_t *src, x86_reg dst_stride, x86_reg src_stride){ |
727 |
__asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ... |
727 |
__asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ... |
728 |
"movd %4, %%mm0 \n\t" |
728 |
"movd (%0), %%mm0 \n\t" |
729 |
"movd %5, %%mm1 \n\t" |
729 |
"movd (%2), %%mm1 \n\t" |
730 |
"movd %6, %%mm2 \n\t" |
730 |
"movd (%0, %1, 2), %%mm2 \n\t" |
731 |
"movd %7, %%mm3 \n\t" |
731 |
"movd (%2, %1, 2), %%mm3 \n\t" |
732 |
"punpcklbw %%mm1, %%mm0 \n\t" |
732 |
"punpcklbw %%mm1, %%mm0 \n\t" |
733 |
"punpcklbw %%mm3, %%mm2 \n\t" |
733 |
"punpcklbw %%mm3, %%mm2 \n\t" |
734 |
"movq %%mm0, %%mm1 \n\t" |
734 |
"movq %%mm0, %%mm1 \n\t" |
735 |
"punpcklwd %%mm2, %%mm0 \n\t" |
735 |
"punpcklwd %%mm2, %%mm0 \n\t" |
736 |
"punpckhwd %%mm2, %%mm1 \n\t" |
736 |
"punpckhwd %%mm2, %%mm1 \n\t" |
737 |
"movd %%mm0, %0 \n\t" |
737 |
"movd %%mm0, (%3) \n\t" |
738 |
"punpckhdq %%mm0, %%mm0 \n\t" |
738 |
"punpckhdq %%mm0, %%mm0 \n\t" |
739 |
"movd %%mm0, %1 \n\t" |
739 |
"movd %%mm0, (%5) \n\t" |
740 |
"movd %%mm1, %2 \n\t" |
740 |
"movd %%mm1, (%3, %4, 2) \n\t" |
741 |
"punpckhdq %%mm1, %%mm1 \n\t" |
741 |
"punpckhdq %%mm1, %%mm1 \n\t" |
742 |
"movd %%mm1, %3 \n\t" |
742 |
"movd %%mm1, (%5, %4, 2) \n\t" |
743 |
|
743 |
|
744 |
: "=m" (*(uint32_t*)(dst + 0*dst_stride)), |
744 |
: |
745 |
"=m" (*(uint32_t*)(dst + 1*dst_stride)), |
745 |
: "r"(src), "r"(src_stride), "r"(src + src_stride), |
746 |
"=m" (*(uint32_t*)(dst + 2*dst_stride)), |
746 |
"r"(dst), "r"(dst_stride), "r"(dst + dst_stride) |
747 |
"=m" (*(uint32_t*)(dst + 3*dst_stride)) |
747 |
: "memory" |
748 |
: "m" (*(uint32_t*)(src + 0*src_stride)), |
|
|
749 |
"m" (*(uint32_t*)(src + 1*src_stride)), |
750 |
"m" (*(uint32_t*)(src + 2*src_stride)), |
751 |
"m" (*(uint32_t*)(src + 3*src_stride)) |
752 |
); |
748 |
); |
753 |
} |
749 |
} |
754 |
|
750 |
|