Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 66350 | Differences between
and this patch

Collapse All | Expand All

(-)flac-1.1.1/src/libFLAC/ppc/lpc_asm.s (-157 / +157 lines)
Lines 1-90 Link Here
1
;  libFLAC - Free Lossless Audio Codec library
1
/* libFLAC - Free Lossless Audio Codec library
2
;  Copyright (C) 2004  Josh Coalson
2
 * Copyright (C) 2004  Josh Coalson
3
;
3
 *
4
;  Redistribution and use in source and binary forms, with or without
4
 * Redistribution and use in source and binary forms, with or without
5
;  modification, are permitted provided that the following conditions
5
 * modification, are permitted provided that the following conditions
6
;  are met:
6
 * are met:
7
;
7
 *
8
;  - Redistributions of source code must retain the above copyright
8
 * - Redistributions of source code must retain the above copyright
9
;  notice, this list of conditions and the following disclaimer.
9
 * notice, this list of conditions and the following disclaimer.
10
;
10
 *
11
;  - Redistributions in binary form must reproduce the above copyright
11
 * - Redistributions in binary form must reproduce the above copyright
12
;  notice, this list of conditions and the following disclaimer in the
12
 * notice, this list of conditions and the following disclaimer in the
13
;  documentation and/or other materials provided with the distribution.
13
 * documentation and/or other materials provided with the distribution.
14
;
14
 *
15
;  - Neither the name of the Xiph.org Foundation nor the names of its
15
 * - Neither the name of the Xiph.org Foundation nor the names of its
16
;  contributors may be used to endorse or promote products derived from
16
 * contributors may be used to endorse or promote products derived from
17
;  this software without specific prior written permission.
17
 * this software without specific prior written permission.
18
;
18
 *
19
;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
;  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
22
 * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
23
;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24
;  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25
;  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26
;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27
;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28
;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29
;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
30
 */
31
.text
31
.text
32
	.align 2
32
	.align 2
33
.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16
33
.globl FLAC__lpc_restore_signal_asm_ppc_altivec_16
34
.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8
34
.globl FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8
35
36
_FLAC__lpc_restore_signal_asm_ppc_altivec_16:
37
;	r3: residual[]
38
;	r4: data_len
39
;	r5: qlp_coeff[]
40
;	r6: order
41
;	r7: lp_quantization
42
;	r8: data[]
43
44
; see src/libFLAC/lpc.c:FLAC__lpc_restore_signal()
45
; these is a PowerPC/Altivec assembly version which requires bps<=16 (or actual
46
; bps<=15 for mid-side coding, since that uses an extra bit)
47
48
; these should be fast; the inner loop is unrolled (it takes no more than
49
; 3*(order%4) instructions, all of which are arithmetic), and all of the
50
; coefficients and all relevant history stay in registers, so the outer loop
51
; has only one load from memory (the residual)
52
53
; I have not yet run this through simg4, so there may be some avoidable stalls,
54
; and there may be a somewhat more clever way to do the outer loop
55
56
; the branch mechanism may prevent dynamic loading; I still need to examine
57
; this issue, and there may be a more elegant method
58
35
36
FLAC__lpc_restore_signal_asm_ppc_altivec_16:
37
/*	r3: residual[]
38
 *	r4: data_len
39
 *	r5: qlp_coeff[]
40
 *	r6: order
41
 *	r7: lp_quantization
42
 *	r8: data[]
43
 *
44
 * see src/libFLAC/lpc.c:FLAC__lpc_restore_signal()
45
 * these is a PowerPC/Altivec assembly version which requires bps<=16 (or actual
46
 * bps<=15 for mid-side coding, since that uses an extra bit)
47
 *
48
 * these should be fast; the inner loop is unrolled (it takes no more than
49
 * 3*(order%4) instructions, all of which are arithmetic), and all of the
50
 * coefficients and all relevant history stay in registers, so the outer loop
51
 * has only one load from memory (the residual)
52
 *
53
 * I have not yet run this through simg4, so there may be some avoidable stalls,
54
 * and there may be a somewhat more clever way to do the outer loop
55
 *
56
 * the branch mechanism may prevent dynamic loading; I still need to examine 
57
 * this issue, and there may be a more elegant method
58
 */
59
	stmw r31,-4(r1)
59
	stmw r31,-4(r1)
60
60
61
	addi r9,r1,-28
61
	addi r9,r1,-28
62
	li r31,0xf
62
	li r31,0xf
63
	andc r9,r9,r31 ; for quadword-aligned stack data
63
	andc r9,r9,r31 /* for quadword-aligned stack data */
64
64
65
	slwi r6,r6,2 ; adjust for word size
65
	slwi r6,r6,2  /* adjust for word size */
66
	slwi r4,r4,2
66
	slwi r4,r4,2
67
	add r4,r4,r8 ; r4 = data+data_len
67
	add r4,r4,r8 /* r4 = data+data_len */
68
68
69
	mfspr r0,256 ; cache old vrsave
69
	mfspr r0,256 /* cache old vrsave */
70
	addis r31,0,hi16(0xfffffc00)
70
	addis r31,0,0xfffffc00@ha
71
	ori r31,r31,lo16(0xfffffc00)
71
	ori r31,r31,0xfffffc00@l
72
	mtspr 256,r31 ; declare VRs in vrsave
72
	mtspr 256,r31 /* declare VRs in vrsave */
73
73
74
	cmplw cr0,r8,r4 ; i<data_len
74
	cmplw cr0,r8,r4 /* i<data_len */
75
	bc 4,0,L1400
75
	bc 4,0,L1400
76
76
77
	; load coefficients into v0-v7 and initial history into v8-v15
77
	/* load coefficients into v0-v7 and initial history into v8-v15 */
78
	li r31,0xf
78
	li r31,0xf
79
	and r31,r8,r31 ; r31: data%4
79
	and r31,r8,r31 /* r31: data%4 */
80
	li r11,16
80
	li r11,16
81
	subf r31,r31,r11 ; r31: 4-(data%4)
81
	subf r31,r31,r11 /* r31: 4-(data%4) */
82
	slwi r31,r31,3 ; convert to bits for vsro
82
	slwi r31,r31,3 /* convert to bits for vsro */
83
	li r10,-4
83
	li r10,-4
84
	stw r31,-4(r9)
84
	stw r31,-4(r9)
85
	lvewx v0,r10,r9
85
	lvewx v0,r10,r9
86
	vspltisb v18,-1
86
	vspltisb v18,-1
87
	vsro v18,v18,v0 ; v18: mask vector
87
	vsro v18,v18,v0 /* v18: mask vector */
88
88
89
	li r31,0x8
89
	li r31,0x8
90
	lvsl v0,0,r31
90
	lvsl v0,0,r31
Lines 94-107 Link Here
94
	vspltisb v2,0
94
	vspltisb v2,0
95
	vspltisb v3,-1
95
	vspltisb v3,-1
96
	vmrglw v2,v2,v3
96
	vmrglw v2,v2,v3
97
	vsel v0,v1,v0,v2 ; v0: reversal permutation vector
97
	vsel v0,v1,v0,v2 /* v0: reversal permutation vector */
98
98
99
	add r10,r5,r6
99
	add r10,r5,r6
100
	lvsl v17,0,r5 ; v17: coefficient alignment permutation vector
100
	lvsl v17,0,r5 /* v17: coefficient alignment permutation vector */
101
	vperm v17,v17,v17,v0 ; v17: reversal coefficient alignment permutation vector
101
	vperm v17,v17,v17,v0 /* v17: reversal coefficient alignment permutation vector */
102
102
103
	mr r11,r8
103
	mr r11,r8
104
	lvsl v16,0,r11 ; v16: history alignment permutation vector
104
	lvsl v16,0,r11 /* v16: history alignment permutation vector */
105
105
106
	lvx v0,0,r5
106
	lvx v0,0,r5
107
	addi r5,r5,16
107
	addi r5,r5,16
Lines 114-121 Link Here
114
	cmplw cr0,r5,r10
114
	cmplw cr0,r5,r10
115
	bc 12,0,L1101
115
	bc 12,0,L1101
116
	vand v0,v0,v18
116
	vand v0,v0,v18
117
	addis r31,0,hi16(L1307)
117
	addis r31,0,L1307@ha
118
	ori r31,r31,lo16(L1307)
118
	ori r31,r31,L1307@l
119
	b L1199
119
	b L1199
120
120
121
L1101:
121
L1101:
Lines 128-135 Link Here
128
	cmplw cr0,r5,r10
128
	cmplw cr0,r5,r10
129
	bc 12,0,L1102
129
	bc 12,0,L1102
130
	vand v1,v1,v18
130
	vand v1,v1,v18
131
	addis r31,0,hi16(L1306)
131
	addis r31,0,L1306@ha
132
	ori r31,r31,lo16(L1306)
132
	ori r31,r31,L1306@l
133
	b L1199
133
	b L1199
134
134
135
L1102:
135
L1102:
Lines 142-149 Link Here
142
	cmplw cr0,r5,r10
142
	cmplw cr0,r5,r10
143
	bc 12,0,L1103
143
	bc 12,0,L1103
144
	vand v2,v2,v18
144
	vand v2,v2,v18
145
	addis r31,0,hi16(L1305)
145
	addis r31,0,L1305@ha
146
	ori r31,r31,lo16(L1305)
146
	ori r31,r31,L1305@l
147
	b L1199
147
	b L1199
148
148
149
L1103:
149
L1103:
Lines 156-163 Link Here
156
	cmplw cr0,r5,r10
156
	cmplw cr0,r5,r10
157
	bc 12,0,L1104
157
	bc 12,0,L1104
158
	vand v3,v3,v18
158
	vand v3,v3,v18
159
	addis r31,0,hi16(L1304)
159
	addis r31,0,L1304@ha
160
	ori r31,r31,lo16(L1304)
160
	ori r31,r31,L1304@l
161
	b L1199
161
	b L1199
162
162
163
L1104:
163
L1104:
Lines 170-177 Link Here
170
	cmplw cr0,r5,r10
170
	cmplw cr0,r5,r10
171
	bc 12,0,L1105
171
	bc 12,0,L1105
172
	vand v4,v4,v18
172
	vand v4,v4,v18
173
	addis r31,0,hi16(L1303)
173
	addis r31,0,L1303@ha
174
	ori r31,r31,lo16(L1303)
174
	ori r31,r31,L1303@l
175
	b L1199
175
	b L1199
176
176
177
L1105:
177
L1105:
Lines 184-191 Link Here
184
	cmplw cr0,r5,r10
184
	cmplw cr0,r5,r10
185
	bc 12,0,L1106
185
	bc 12,0,L1106
186
	vand v5,v5,v18
186
	vand v5,v5,v18
187
	addis r31,0,hi16(L1302)
187
	addis r31,0,L1302@ha
188
	ori r31,r31,lo16(L1302)
188
	ori r31,r31,L1302@l
189
	b L1199
189
	b L1199
190
190
191
L1106:
191
L1106:
Lines 198-205 Link Here
198
	cmplw cr0,r5,r10
198
	cmplw cr0,r5,r10
199
	bc 12,0,L1107
199
	bc 12,0,L1107
200
	vand v6,v6,v18
200
	vand v6,v6,v18
201
	addis r31,0,hi16(L1301)
201
	addis r31,0,L1301@ha
202
	ori r31,r31,lo16(L1301)
202
	ori r31,r31,L1301@l
203
	b L1199
203
	b L1199
204
204
205
L1107:
205
L1107:
Lines 210-239 Link Here
210
	lvx v19,0,r11
210
	lvx v19,0,r11
211
	vperm v15,v19,v15,v16
211
	vperm v15,v19,v15,v16
212
	vand v7,v7,v18
212
	vand v7,v7,v18
213
	addis r31,0,hi16(L1300)
213
	addis r31,0,L1300@ha
214
	ori r31,r31,lo16(L1300)
214
	ori r31,r31,L1300@l
215
215
216
L1199:
216
L1199:
217
	mtctr r31
217
	mtctr r31
218
218
219
	; set up invariant vectors
219
	/* set up invariant vectors */
220
	vspltish v16,0 ; v16: zero vector
220
	vspltish v16,0 /* v16: zero vector */
221
221
222
	li r10,-12
222
	li r10,-12
223
	lvsr v17,r10,r8 ; v17: result shift vector
223
	lvsr v17,r10,r8 /* v17: result shift vector */
224
	lvsl v18,r10,r3 ; v18: residual shift back vector
224
	lvsl v18,r10,r3 /* v18: residual shift back vector */
225
225
226
	li r10,-4
226
	li r10,-4
227
	stw r7,-4(r9)
227
	stw r7,-4(r9)
228
	lvewx v19,r10,r9 ; v19: lp_quantization vector
228
	lvewx v19,r10,r9 /* v19: lp_quantization vector */
229
229
230
L1200:
230
L1200:
231
	vmulosh v20,v0,v8 ; v20: sum vector
231
	vmulosh v20,v0,v8 /* v20: sum vector */
232
	bcctr 20,0
232
	bcctr 20,0
233
233
234
L1300:
234
L1300:
235
	vmulosh v21,v7,v15
235
	vmulosh v21,v7,v15
236
	vsldoi v15,v15,v14,4 ; increment history
236
	vsldoi v15,v15,v14,4 /* increment history */
237
	vaddsws v20,v20,v21
237
	vaddsws v20,v20,v21
238
238
239
L1301:
239
L1301:
Lines 267-339 Link Here
267
	vaddsws v20,v20,v21
267
	vaddsws v20,v20,v21
268
268
269
L1307:
269
L1307:
270
	vsumsws v20,v20,v16 ; v20[3]: sum
270
	vsumsws v20,v20,v16 /* v20[3]: sum */
271
	vsraw v20,v20,v19 ; v20[3]: sum >> lp_quantization
271
	vsraw v20,v20,v19 /* v20[3]: sum >> lp_quantization */
272
272
273
	lvewx v21,0,r3 ; v21[n]: *residual
273
	lvewx v21,0,r3 /* v21[n]: *residual */
274
	vperm v21,v21,v21,v18 ; v21[3]: *residual
274
	vperm v21,v21,v21,v18 /* v21[3]: *residual */
275
	vaddsws v20,v21,v20 ; v20[3]: *residual + (sum >> lp_quantization)
275
	vaddsws v20,v21,v20 /* v20[3]: *residual + (sum >> lp_quantization) */
276
	vsldoi v18,v18,v18,4 ; increment shift vector
276
	vsldoi v18,v18,v18,4 /* increment shift vector */
277
277
278
	vperm v21,v20,v20,v17 ; v21[n]: shift for storage
278
	vperm v21,v20,v20,v17 /* v21[n]: shift for storage */
279
	vsldoi v17,v17,v17,12 ; increment shift vector
279
	vsldoi v17,v17,v17,12 /* increment shift vector */
280
	stvewx v21,0,r8
280
	stvewx v21,0,r8
281
281
282
	vsldoi v20,v20,v20,12
282
	vsldoi v20,v20,v20,12
283
	vsldoi v8,v8,v20,4 ; insert value onto history
283
	vsldoi v8,v8,v20,4 /* insert value onto history */
284
284
285
	addi r3,r3,4
285
	addi r3,r3,4
286
	addi r8,r8,4
286
	addi r8,r8,4
287
	cmplw cr0,r8,r4 ; i<data_len
287
	cmplw cr0,r8,r4 /* i<data_len */
288
	bc 12,0,L1200
288
	bc 12,0,L1200
289
289
290
L1400:
290
L1400:
291
	mtspr 256,r0 ; restore old vrsave
291
	mtspr 256,r0 /* restore old vrsave */
292
	lmw r31,-4(r1)
292
	lmw r31,-4(r1)
293
	blr
293
	blr
294
294
295
_FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8:
295
FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8:
296
;	r3: residual[]
296
/*	r3: residual[]
297
;	r4: data_len
297
 *	r4: data_len
298
;	r5: qlp_coeff[]
298
 *	r5: qlp_coeff[]
299
;	r6: order
299
 *	r6: order
300
;	r7: lp_quantization
300
 *	r7: lp_quantization
301
;	r8: data[]
301
 *	r8: data[]
302
302
 *
303
; see _FLAC__lpc_restore_signal_asm_ppc_altivec_16() above
303
 * see FLAC__lpc_restore_signal_asm_ppc_altivec_16() above
304
; this version assumes order<=8; it uses fewer vector registers, which should
304
 * this version assumes order<=8; it uses fewer vector registers, which should
305
; save time in context switches, and has less code, which may improve
305
 * save time in context switches, and has less code, which may improve
306
; instruction caching
306
 * instruction caching
307
307
 */
308
	stmw r31,-4(r1)
308
	stmw r31,-4(r1)
309
309
310
	addi r9,r1,-28
310
	addi r9,r1,-28
311
	li r31,0xf
311
	li r31,0xf
312
	andc r9,r9,r31 ; for quadword-aligned stack data
312
	andc r9,r9,r31 /* for quadword-aligned stack data */
313
313
314
	slwi r6,r6,2 ; adjust for word size
314
	slwi r6,r6,2 /* adjust for word size */
315
	slwi r4,r4,2
315
	slwi r4,r4,2
316
	add r4,r4,r8 ; r4 = data+data_len
316
	add r4,r4,r8 /* r4 = data+data_len */
317
317
318
	mfspr r0,256 ; cache old vrsave
318
	mfspr r0,256 /* cache old vrsave */
319
	addis r31,0,hi16(0xffc00000)
319
	addis r31,0,0xffc00000@ha
320
	ori r31,r31,lo16(0xffc00000)
320
	ori r31,r31,0xffc00000@l
321
	mtspr 256,r31 ; declare VRs in vrsave
321
	mtspr 256,r31 /* declare VRs in vrsave */
322
322
323
	cmplw cr0,r8,r4 ; i<data_len
323
	cmplw cr0,r8,r4 /* i<data_len */
324
	bc 4,0,L2400
324
	bc 4,0,L2400
325
325
326
	; load coefficients into v0-v1 and initial history into v2-v3
326
	/* load coefficients into v0-v1 and initial history into v2-v3 */
327
	li r31,0xf
327
	li r31,0xf
328
	and r31,r8,r31 ; r31: data%4
328
	and r31,r8,r31 /* r31: data%4 */
329
	li r11,16
329
	li r11,16
330
	subf r31,r31,r11 ; r31: 4-(data%4)
330
	subf r31,r31,r11 /* r31: 4-(data%4) */
331
	slwi r31,r31,3 ; convert to bits for vsro
331
	slwi r31,r31,3 /* convert to bits for vsro */
332
	li r10,-4
332
	li r10,-4
333
	stw r31,-4(r9)
333
	stw r31,-4(r9)
334
	lvewx v0,r10,r9
334
	lvewx v0,r10,r9
335
	vspltisb v6,-1
335
	vspltisb v6,-1
336
	vsro v6,v6,v0 ; v6: mask vector
336
	vsro v6,v6,v0 /* v6: mask vector */
337
337
338
	li r31,0x8
338
	li r31,0x8
339
	lvsl v0,0,r31
339
	lvsl v0,0,r31
Lines 343-356 Link Here
343
	vspltisb v2,0
343
	vspltisb v2,0
344
	vspltisb v3,-1
344
	vspltisb v3,-1
345
	vmrglw v2,v2,v3
345
	vmrglw v2,v2,v3
346
	vsel v0,v1,v0,v2 ; v0: reversal permutation vector
346
	vsel v0,v1,v0,v2 /* v0: reversal permutation vector */
347
347
348
	add r10,r5,r6
348
	add r10,r5,r6
349
	lvsl v5,0,r5 ; v5: coefficient alignment permutation vector
349
	lvsl v5,0,r5 /* v5: coefficient alignment permutation vector */
350
	vperm v5,v5,v5,v0 ; v5: reversal coefficient alignment permutation vector
350
	vperm v5,v5,v5,v0 /* v5: reversal coefficient alignment permutation vector */
351
351
352
	mr r11,r8
352
	mr r11,r8
353
	lvsl v4,0,r11 ; v4: history alignment permutation vector
353
	lvsl v4,0,r11 /* v4: history alignment permutation vector */
354
354
355
	lvx v0,0,r5
355
	lvx v0,0,r5
356
	addi r5,r5,16
356
	addi r5,r5,16
Lines 363-370 Link Here
363
	cmplw cr0,r5,r10
363
	cmplw cr0,r5,r10
364
	bc 12,0,L2101
364
	bc 12,0,L2101
365
	vand v0,v0,v6
365
	vand v0,v0,v6
366
	addis r31,0,hi16(L2301)
366
	addis r31,0,L2301@ha
367
	ori r31,r31,lo16(L2301)
367
	ori r31,r31,L2301@l
368
	b L2199
368
	b L2199
369
369
370
L2101:
370
L2101:
Lines 375-399 Link Here
375
	lvx v7,0,r11
375
	lvx v7,0,r11
376
	vperm v3,v7,v3,v4
376
	vperm v3,v7,v3,v4
377
	vand v1,v1,v6
377
	vand v1,v1,v6
378
	addis r31,0,hi16(L2300)
378
	addis r31,0,L2300@ha
379
	ori r31,r31,lo16(L2300)
379
	ori r31,r31,L2300@l
380
380
381
L2199:
381
L2199:
382
	mtctr r31
382
	mtctr r31
383
383
384
	; set up invariant vectors
384
	/* set up invariant vectors */
385
	vspltish v4,0 ; v4: zero vector
385
	vspltish v4,0 /* v4: zero vector */
386
386
387
	li r10,-12
387
	li r10,-12
388
	lvsr v5,r10,r8 ; v5: result shift vector
388
	lvsr v5,r10,r8 /* v5: result shift vector */
389
	lvsl v6,r10,r3 ; v6: residual shift back vector
389
	lvsl v6,r10,r3 /* v6: residual shift back vector */
390
390
391
	li r10,-4
391
	li r10,-4
392
	stw r7,-4(r9)
392
	stw r7,-4(r9)
393
	lvewx v7,r10,r9 ; v7: lp_quantization vector
393
	lvewx v7,r10,r9 /* v7: lp_quantization vector */
394
394
395
L2200:
395
L2200:
396
	vmulosh v8,v0,v2 ; v8: sum vector
396
	vmulosh v8,v0,v2 /* v8: sum vector */
397
	bcctr 20,0
397
	bcctr 20,0
398
398
399
L2300:
399
L2300:
Lines 402-428 Link Here
402
	vaddsws v8,v8,v9
402
	vaddsws v8,v8,v9
403
403
404
L2301:
404
L2301:
405
	vsumsws v8,v8,v4 ; v8[3]: sum
405
	vsumsws v8,v8,v4 /* v8[3]: sum */
406
	vsraw v8,v8,v7 ; v8[3]: sum >> lp_quantization
406
	vsraw v8,v8,v7 /* v8[3]: sum >> lp_quantization */
407
407
408
	lvewx v9,0,r3 ; v9[n]: *residual
408
	lvewx v9,0,r3 /* v9[n]: *residual */
409
	vperm v9,v9,v9,v6 ; v9[3]: *residual
409
	vperm v9,v9,v9,v6 /* v9[3]: *residual */
410
	vaddsws v8,v9,v8 ; v8[3]: *residual + (sum >> lp_quantization)
410
	vaddsws v8,v9,v8 /* v8[3]: *residual + (sum >> lp_quantization) */
411
	vsldoi v6,v6,v6,4 ; increment shift vector
411
	vsldoi v6,v6,v6,4 /* increment shift vector */
412
412
413
	vperm v9,v8,v8,v5 ; v9[n]: shift for storage
413
	vperm v9,v8,v8,v5 /* v9[n]: shift for storage */
414
	vsldoi v5,v5,v5,12 ; increment shift vector
414
	vsldoi v5,v5,v5,12 /* increment shift vector */
415
	stvewx v9,0,r8
415
	stvewx v9,0,r8
416
416
417
	vsldoi v8,v8,v8,12
417
	vsldoi v8,v8,v8,12
418
	vsldoi v2,v2,v8,4 ; insert value onto history
418
	vsldoi v2,v2,v8,4 /* insert value onto history */
419
419
420
	addi r3,r3,4
420
	addi r3,r3,4
421
	addi r8,r8,4
421
	addi r8,r8,4
422
	cmplw cr0,r8,r4 ; i<data_len
422
	cmplw cr0,r8,r4 /* i<data_len */
423
	bc 12,0,L2200
423
	bc 12,0,L2200
424
424
425
L2400:
425
L2400:
426
	mtspr 256,r0 ; restore old vrsave
426
	mtspr 256,r0 /* restore old vrsave */
427
	lmw r31,-4(r1)
427
	lmw r31,-4(r1)
428
	blr
428
	blr
(-)flac-1.1.1/src/libFLAC/ppc/Makefile.am (-1 / +1 lines)
Lines 35-41 Link Here
35
# For some unknown reason libtool can't figure out the tag for 'as', so
35
# For some unknown reason libtool can't figure out the tag for 'as', so
36
# we fake it with --tag=CC and strip out unwanted options.
36
# we fake it with --tag=CC and strip out unwanted options.
37
.s.lo:
37
.s.lo:
38
	$(LIBTOOL) --tag=CC --mode=compile $(STRIP_NON_ASM) as -force_cpusubtype_ALL -o $@ $<
38
	$(LIBTOOL) --tag=CC --mode=compile $(STRIP_NON_ASM) as -maltivec -mregnames -o $@ $<
39
39
40
noinst_LTLIBRARIES = libFLAC-asm.la
40
noinst_LTLIBRARIES = libFLAC-asm.la
41
libFLAC_asm_la_SOURCES = \
41
libFLAC_asm_la_SOURCES = \
(-)flac-1.1.1/src/libFLAC/ppc/Makefile.in (-1 / +1 lines)
Lines 482-488 Link Here
482
# For some unknown reason libtool can't figure out the tag for 'as', so
488
# For some unknown reason libtool can't figure out the tag for 'as', so
483
# we fake it with --tag=CC and strip out unwanted options.
489
# we fake it with --tag=CC and strip out unwanted options.
484
.s.lo:
490
.s.lo:
485
	$(LIBTOOL) --tag=CC --mode=compile $(STRIP_NON_ASM) as -force_cpusubtype_ALL -o $@ $<
491
	$(LIBTOOL) --tag=CC --mode=compile $(STRIP_NON_ASM) as -maltivec -mregnames -o $@ $<
486
# Tell versions [3.59,3.63) of GNU make to not export all variables.
492
# Tell versions [3.59,3.63) of GNU make to not export all variables.
487
# Otherwise a system limit (for SysV at least) may be exceeded.
493
# Otherwise a system limit (for SysV at least) may be exceeded.
488
.NOEXPORT:
494
.NOEXPORT:
(-)flac-1.1.1/src/libFLAC/Makefile.am (-1 / +1 lines)
Lines 45-51 Link Here
45
if FLaC__CPU_PPC
45
if FLaC__CPU_PPC
46
ARCH_SUBDIRS = ppc
46
ARCH_SUBDIRS = ppc
47
libFLAC_la_LIBADD = ppc/libFLAC-asm.la
47
libFLAC_la_LIBADD = ppc/libFLAC-asm.la
48
LOCAL_EXTRA_LDFLAGS = "-Wl,-read_only_relocs,warning"
48
LOCAL_EXTRA_LDFLAGS =
49
endif
49
endif
50
endif
50
endif
51
51
(-)flac-1.1.1/src/libFLAC/Makefile.in (-1 / +1 lines)
Lines 277-283 Link Here
277
@FLaC__CPU_PPC_TRUE@@FLaC__NO_ASM_FALSE@ARCH_SUBDIRS = ppc
277
@FLaC__CPU_PPC_TRUE@@FLaC__NO_ASM_FALSE@ARCH_SUBDIRS = ppc
278
@FLaC__CPU_IA32_TRUE@@FLaC__HAS_NASM_TRUE@@FLaC__NO_ASM_FALSE@libFLAC_la_LIBADD = ia32/libFLAC-asm.la
278
@FLaC__CPU_IA32_TRUE@@FLaC__HAS_NASM_TRUE@@FLaC__NO_ASM_FALSE@libFLAC_la_LIBADD = ia32/libFLAC-asm.la
279
@FLaC__CPU_PPC_TRUE@@FLaC__NO_ASM_FALSE@libFLAC_la_LIBADD = ppc/libFLAC-asm.la
279
@FLaC__CPU_PPC_TRUE@@FLaC__NO_ASM_FALSE@libFLAC_la_LIBADD = ppc/libFLAC-asm.la
280
@FLaC__CPU_PPC_TRUE@@FLaC__NO_ASM_FALSE@LOCAL_EXTRA_LDFLAGS = "-Wl,-read_only_relocs,warning"
280
@FLaC__CPU_PPC_TRUE@@FLaC__NO_ASM_FALSE@LOCAL_EXTRA_LDFLAGS = 
281
SUBDIRS = $(ARCH_SUBDIRS) include .
281
SUBDIRS = $(ARCH_SUBDIRS) include .
282
m4datadir = $(datadir)/aclocal
282
m4datadir = $(datadir)/aclocal
283
m4data_DATA = libFLAC.m4
283
m4data_DATA = libFLAC.m4

Return to bug 66350