83 linhas
3.4 KiB
ArmAsm
83 linhas
3.4 KiB
ArmAsm
#include "video_utils_p5p.h"
|
|
|
|
#ifdef HAS_DO_QUANTIZE_INTRA_MB
|
|
|
|
.global do_quantize_intra_mb
|
|
.type do_quantize_intra_mb, %function
|
|
|
|
/* This implementation compute two quantizations at a time
|
|
using ARM926EJ-S DSP extension (smul<x><y> = 1 cycle if no dependency follow)
|
|
|
|
Registers usage
|
|
r0 : [in ptr] data ptr
|
|
r1 : [in] quantification factor (16 bits)
|
|
r2 : [out ptr] number of non zero factor
|
|
r3 : data read from memory [r0] & value quantified from lsb of r3
|
|
r4 : value quantified from msb of r3
|
|
r5 : number of non zero factor
|
|
ip/r12 : bloc counter (a macroblock has 6 blocks)
|
|
lr/r14 : number of coefficient in block left to compute (there's 64 coefficiens per block (1 dc et 63 ac)
|
|
*/
|
|
do_quantize_intra_mb:
|
|
stmdb sp!, {r4, r5, lr}
|
|
mov ip, #6 /* initialize bloc counter i = 6 */
|
|
ldr r3, [r0] /* read dc coefficient & first ac coefficient */
|
|
|
|
do_quantize_intra_l0:
|
|
mov r5, #1 /* last = 1 */
|
|
smulbt r4, r1, r3 /* coeff *= invQuant */
|
|
mov r3, r3, lsl #16 /* set r3 msb to zero */
|
|
add r3, r3, #0x40000 /* coeff = (*ptr + 4) >> 3 */
|
|
movs r3, r3, asr #19
|
|
moveq r3, #1 /* if( coeff == 0 ) coeff = 1 */
|
|
cmp r4, #0
|
|
beq do_quantize_intra_l01
|
|
rsblt r4, r4, #0
|
|
mov r4, r4, asr #16 /* |coeff| >>= 16 */
|
|
rsblt r4, r4, #0
|
|
cmp r4, #0
|
|
addne r5, r5, #1 /* if( coeff != 0 ) last++ */
|
|
orrne r3, r3, r4, lsl #16
|
|
do_quantize_intra_l01:
|
|
str r3, [r0]
|
|
ldr r3, [r0, #4]! /* read ac(3) & ac(2) coefficients */
|
|
mov lr, #31 /* 31 pairs to read */
|
|
|
|
do_quantize_intra_l1:
|
|
cmp r3, #0 /* do nothing if both coefficients are zero */
|
|
beq do_quantize_intra_l2
|
|
smulbt r4, r1, r3 /* coeff *= invQuant */
|
|
smulbb r3, r1, r3 /* coeff *= invQuant */
|
|
cmp r4, #0
|
|
beq do_quantize_intra_l11
|
|
rsblt r4, r4, #0
|
|
mov r4, r4, asr #16 /* |coeff| >>= 16 */
|
|
rsblt r4, r4, #0
|
|
movs r4, r4, lsl #16 /* keep only 16 lower significant bits */
|
|
addne r5, r5, #1 /* if( coeff != 0 ) last++ */
|
|
do_quantize_intra_l11:
|
|
cmp r3, #0
|
|
beq do_quantize_intra_l12
|
|
rsblt r3, r3, #0
|
|
mov r3, r3, asr #16
|
|
rsblt r3, r3, #0
|
|
movs r3, r3, lsl #16 /* keep only 16 lower significant bits */
|
|
addne r5, r5, #1 /* if( coeff != 0 ) last++ */
|
|
do_quantize_intra_l12:
|
|
orr r3, r4, r3, lsr #16
|
|
str r3, [r0]
|
|
do_quantize_intra_l2:
|
|
subs lr, lr, #1
|
|
ldrne r3, [r0, #4]! /* read ac(i+1) & ac(i) coefficients */
|
|
bne do_quantize_intra_l1
|
|
|
|
str r5, [r2], #4 /* store number of non zero coefficient for current bloc */
|
|
subs ip, ip, #1 /* i-- */
|
|
ldrne r3, [r0, #4]! /* read dc coefficient & first ac coefficient */
|
|
bne do_quantize_intra_l0
|
|
do_quantize_intra_exit:
|
|
add r0, #4 /* keep consistency because last reads are conditionals */
|
|
ldmia sp!, {r4, r5, pc}
|
|
|
|
#endif // HAS_DO_QUANTIZE_INTRA_MB
|