Arquivos
ardrone_autonomy/ARDroneLib/VLIB/Platform/arm9/video_quantizer_p5p.S
T
2013-10-21 11:07:21 -07:00

83 linhas
3.4 KiB
ArmAsm

#include "video_utils_p5p.h"
#ifdef HAS_DO_QUANTIZE_INTRA_MB
.global do_quantize_intra_mb
.type do_quantize_intra_mb, %function
/* This implementation compute two quantizations at a time
using ARM926EJ-S DSP extension (smul<x><y> = 1 cycle if no dependency follow)
Registers usage
r0 : [in ptr] data ptr
r1 : [in] quantification factor (16 bits)
r2 : [out ptr] number of non zero factor
r3 : data read from memory [r0] & value quantified from lsb of r3
r4 : value quantified from msb of r3
r5 : number of non zero factor
ip/r12 : bloc counter (a macroblock has 6 blocks)
lr/r14 : number of coefficient in block left to compute (there's 64 coefficiens per block (1 dc et 63 ac)
*/
do_quantize_intra_mb:
stmdb sp!, {r4, r5, lr}
mov ip, #6 /* initialize bloc counter i = 6 */
ldr r3, [r0] /* read dc coefficient & first ac coefficient */
do_quantize_intra_l0:
mov r5, #1 /* last = 1 */
smulbt r4, r1, r3 /* coeff *= invQuant */
mov r3, r3, lsl #16 /* set r3 msb to zero */
add r3, r3, #0x40000 /* coeff = (*ptr + 4) >> 3 */
movs r3, r3, asr #19
moveq r3, #1 /* if( coeff == 0 ) coeff = 1 */
cmp r4, #0
beq do_quantize_intra_l01
rsblt r4, r4, #0
mov r4, r4, asr #16 /* |coeff| >>= 16 */
rsblt r4, r4, #0
cmp r4, #0
addne r5, r5, #1 /* if( coeff != 0 ) last++ */
orrne r3, r3, r4, lsl #16
do_quantize_intra_l01:
str r3, [r0]
ldr r3, [r0, #4]! /* read ac(3) & ac(2) coefficients */
mov lr, #31 /* 31 pairs to read */
do_quantize_intra_l1:
cmp r3, #0 /* do nothing if both coefficients are zero */
beq do_quantize_intra_l2
smulbt r4, r1, r3 /* coeff *= invQuant */
smulbb r3, r1, r3 /* coeff *= invQuant */
cmp r4, #0
beq do_quantize_intra_l11
rsblt r4, r4, #0
mov r4, r4, asr #16 /* |coeff| >>= 16 */
rsblt r4, r4, #0
movs r4, r4, lsl #16 /* keep only 16 lower significant bits */
addne r5, r5, #1 /* if( coeff != 0 ) last++ */
do_quantize_intra_l11:
cmp r3, #0
beq do_quantize_intra_l12
rsblt r3, r3, #0
mov r3, r3, asr #16
rsblt r3, r3, #0
movs r3, r3, lsl #16 /* keep only 16 lower significant bits */
addne r5, r5, #1 /* if( coeff != 0 ) last++ */
do_quantize_intra_l12:
orr r3, r4, r3, lsr #16
str r3, [r0]
do_quantize_intra_l2:
subs lr, lr, #1
ldrne r3, [r0, #4]! /* read ac(i+1) & ac(i) coefficients */
bne do_quantize_intra_l1
str r5, [r2], #4 /* store number of non zero coefficient for current bloc */
subs ip, ip, #1 /* i-- */
ldrne r3, [r0, #4]! /* read dc coefficient & first ac coefficient */
bne do_quantize_intra_l0
do_quantize_intra_exit:
add r0, #4 /* keep consistency because last reads are conditionals */
ldmia sp!, {r4, r5, pc}
#endif // HAS_DO_QUANTIZE_INTRA_MB