Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 671036
Collapse All | Expand All

(-)a/av1/common/ppc/cfl_ppc.c (-43 / +42 lines)
Lines 24-42 Link Here
24
#define CFL_LINE_2 128
24
#define CFL_LINE_2 128
25
#define CFL_LINE_3 192
25
#define CFL_LINE_3 192
26
26
27
typedef vector int8_t int8x16_t;
27
typedef vector signed char int8x16_t;          // NOLINT(runtime/int)
28
typedef vector uint8_t uint8x16_t;
28
typedef vector unsigned char uint8x16_t;       // NOLINT(runtime/int)
29
typedef vector int16_t int16x8_t;
29
typedef vector signed short int16x8_t;         // NOLINT(runtime/int)
30
typedef vector uint16_t uint16x8_t;
30
typedef vector unsigned short uint16x8_t;      // NOLINT(runtime/int)
31
typedef vector int32_t int32x4_t;
31
typedef vector signed int int32x4_t;           // NOLINT(runtime/int)
32
typedef vector uint32_t uint32x4_t;
32
typedef vector unsigned int uint32x4_t;        // NOLINT(runtime/int)
33
typedef vector uint64_t uint64x2_t;
33
typedef vector unsigned long long uint64x2_t;  // NOLINT(runtime/int)
34
34
35
static INLINE void subtract_average_vsx(int16_t *pred_buf, int width,
35
static INLINE void subtract_average_vsx(const uint16_t *src_ptr, int16_t *dst,
36
                                        int height, int round_offset,
36
                                        int width, int height, int round_offset,
37
                                        int num_pel_log2) {
37
                                        int num_pel_log2) {
38
  const int16_t *end = pred_buf + height * CFL_BUF_LINE;
38
  //  int16_t *dst = dst_ptr;
39
  const int16_t *sum_buf = pred_buf;
39
  const int16_t *dst_end = dst + height * CFL_BUF_LINE;
40
  const int16_t *sum_buf = (const int16_t *)src_ptr;
41
  const int16_t *end = sum_buf + height * CFL_BUF_LINE;
40
  const uint32x4_t div_shift = vec_splats((uint32_t)num_pel_log2);
42
  const uint32x4_t div_shift = vec_splats((uint32_t)num_pel_log2);
41
  const uint8x16_t mask_64 = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
43
  const uint8x16_t mask_64 = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
42
                               0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
44
                               0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
Lines 71-113 Link Here
71
  const int32x4_t avg = vec_sr(sum_32x4, div_shift);
73
  const int32x4_t avg = vec_sr(sum_32x4, div_shift);
72
  const int16x8_t vec_avg = vec_pack(avg, avg);
74
  const int16x8_t vec_avg = vec_pack(avg, avg);
73
  do {
75
  do {
74
    vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0, pred_buf), vec_avg), OFF_0, pred_buf);
76
    vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0, dst), vec_avg), OFF_0, dst);
75
    vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_1, pred_buf), vec_avg),
77
    vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_1, dst), vec_avg),
76
               OFF_0 + CFL_BUF_LINE_BYTES, pred_buf);
78
               OFF_0 + CFL_BUF_LINE_BYTES, dst);
77
    vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_2, pred_buf), vec_avg),
79
    vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_2, dst), vec_avg),
78
               OFF_0 + CFL_LINE_2, pred_buf);
80
               OFF_0 + CFL_LINE_2, dst);
79
    vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_3, pred_buf), vec_avg),
81
    vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_3, dst), vec_avg),
80
               OFF_0 + CFL_LINE_3, pred_buf);
82
               OFF_0 + CFL_LINE_3, dst);
81
    if (width >= 16) {
83
    if (width >= 16) {
82
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1, pred_buf), vec_avg), OFF_1,
84
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1, dst), vec_avg), OFF_1, dst);
83
                 pred_buf);
85
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_1, dst), vec_avg),
84
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_1, pred_buf), vec_avg),
86
                 OFF_1 + CFL_LINE_1, dst);
85
                 OFF_1 + CFL_LINE_1, pred_buf);
87
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_2, dst), vec_avg),
86
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_2, pred_buf), vec_avg),
88
                 OFF_1 + CFL_LINE_2, dst);
87
                 OFF_1 + CFL_LINE_2, pred_buf);
89
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_3, dst), vec_avg),
88
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_3, pred_buf), vec_avg),
90
                 OFF_1 + CFL_LINE_3, dst);
89
                 OFF_1 + CFL_LINE_3, pred_buf);
90
    }
91
    }
91
    if (width == 32) {
92
    if (width == 32) {
92
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2, pred_buf), vec_avg), OFF_2,
93
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2, dst), vec_avg), OFF_2, dst);
93
                 pred_buf);
94
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_1, dst), vec_avg),
94
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_1, pred_buf), vec_avg),
95
                 OFF_2 + CFL_LINE_1, dst);
95
                 OFF_2 + CFL_LINE_1, pred_buf);
96
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_2, dst), vec_avg),
96
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_2, pred_buf), vec_avg),
97
                 OFF_2 + CFL_LINE_2, dst);
97
                 OFF_2 + CFL_LINE_2, pred_buf);
98
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_3, dst), vec_avg),
98
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_3, pred_buf), vec_avg),
99
                 OFF_2 + CFL_LINE_3, dst);
99
                 OFF_2 + CFL_LINE_3, pred_buf);
100
100
101
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3, pred_buf), vec_avg), OFF_3,
101
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3, dst), vec_avg), OFF_3, dst);
102
                 pred_buf);
102
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_1, dst), vec_avg),
103
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_1, pred_buf), vec_avg),
103
                 OFF_3 + CFL_LINE_1, dst);
104
                 OFF_3 + CFL_LINE_1, pred_buf);
104
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_2, dst), vec_avg),
105
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_2, pred_buf), vec_avg),
105
                 OFF_3 + CFL_LINE_2, dst);
106
                 OFF_3 + CFL_LINE_2, pred_buf);
106
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_3, dst), vec_avg),
107
      vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_3, pred_buf), vec_avg),
107
                 OFF_3 + CFL_LINE_3, dst);
108
                 OFF_3 + CFL_LINE_3, pred_buf);
109
    }
108
    }
110
  } while ((pred_buf += CFL_BUF_LINE * 4) < end);
109
  } while ((dst += CFL_BUF_LINE * 4) < dst_end);
111
}
110
}
112
111
113
// Declare wrappers for VSX sizes
112
// Declare wrappers for VSX sizes

Return to bug 671036