RGBA 编码为 YUV420SP【NEON】

简介: RGBA 编码为 YUV420SP【NEON】

RGBA >> YUV420SP

  • C/C++ 朴素实现版
void encodeYUV420SP_CPU(unsigned char *__restrict__ yuv420sp,
                        unsigned char *__restrict__ argb, int width, int height) {
    int frameSize = width * height;
    int yIndex = 0;
    int uvIndex = frameSize;
    for (int j = 0; j < height; j++) {
        for (int i = 0; i < width; i++) {
            uint8_t R = argb[2];
            uint8_t G = argb[1];
            uint8_t B = argb[0];
            argb += 4;
            // well known RGB to YUV algorithm
            uint8_t Y = ((66 * R + 129 * G + 25 * B + 128) >> 8) + 16;
            uint8_t U = ((-38 * R - 74 * G + 112 * B + 128) >> 8) + 128;
            uint8_t V = ((112 * R - 94 * G - 18 * B + 128) >> 8) + 128;
            // NV21有一个 Y 平面和 V-U 交叉平面,每一个平面的采样值都是 2
            // 意思是每4个 Y 像素(上下左右,不是横向连续的四个)对应1个 V 和1个 U
            // 像素和其他扫描线。
            yuv420sp[yIndex++] = Y;
            if (j % 2 == 0 && i % 2 == 0) {
                yuv420sp[uvIndex++] = V;
                yuv420sp[uvIndex++] = U;
            }
        }
    }
}
  • Neon 实现版(Github上找的)
void encodeYUV420SP_NEON_Intrinsics(unsigned char *__restrict__ yuv420sp,
                                    unsigned char *__restrict__ argb, int width, int height) {
    const uint16x8_t u16_rounding = vdupq_n_u16(128);
    const int16x8_t s16_rounding = vdupq_n_s16(128); // +128, u/v 中内层的 +128
    const int8x8_t s8_rounding = vdup_n_s8(
            128); // -128,即 0x80,最高成了符号位,实际只有 7 位用来表示数字,用来处理符号位, u/v 中外层的 +128
    const uint8x16_t offset = vdupq_n_u8(16);
    const uint16x8_t mask = vdupq_n_s16(255);
//    测试
//    int16x8_t test = vaddl_s8 (s8_rounding, s8_rounding);// -256
//    int8x8_t test_0 = vdup_n_s8(127); // 正常为 127
//    int8x8_t test_1 = vadd_s8(test_0, test_0); // -2,因为计算溢出到符号位
    int frameSize = width * height;
    int yIndex = 0;
    int uvIndex = frameSize;
    int i;
    int j;
    for (j = 0; j < height; j++) {
        for (i = 0; i < width >> 4; i++) {
            // Load rgb
            uint8x16x4_t pixel_argb = vld4q_u8(argb);
            argb += 4 * 16;
            uint8x8x2_t uint8_r;
            uint8x8x2_t uint8_g;
            uint8x8x2_t uint8_b;
            uint8_r.val[0] = vget_low_u8(pixel_argb.val[2]);
            uint8_r.val[1] = vget_high_u8(pixel_argb.val[2]);
            uint8_g.val[0] = vget_low_u8(pixel_argb.val[1]);
            uint8_g.val[1] = vget_high_u8(pixel_argb.val[1]);
            uint8_b.val[0] = vget_low_u8(pixel_argb.val[0]);
            uint8_b.val[1] = vget_high_u8(pixel_argb.val[0]);
            // Y = ((66 * R + 129 * G + 25 * B + 128) >> 8) + 16;
            uint16x8x2_t uint16_y;
            uint8x8_t scalar = vdup_n_u8(66);
            uint8x16_t y;
            uint16_y.val[0] = vmull_u8(uint8_r.val[0], scalar);
            uint16_y.val[1] = vmull_u8(uint8_r.val[1], scalar);
            scalar = vdup_n_u8(129);
            uint16_y.val[0] = vmlal_u8(uint16_y.val[0], uint8_g.val[0], scalar);
            uint16_y.val[1] = vmlal_u8(uint16_y.val[1], uint8_g.val[1], scalar);
            scalar = vdup_n_u8(25);
            uint16_y.val[0] = vmlal_u8(uint16_y.val[0], uint8_b.val[0], scalar);
            uint16_y.val[1] = vmlal_u8(uint16_y.val[1], uint8_b.val[1], scalar);
            uint16_y.val[0] = vaddq_u16(uint16_y.val[0], u16_rounding);
            uint16_y.val[1] = vaddq_u16(uint16_y.val[1], u16_rounding);
            y = vcombine_u8(vqshrn_n_u16(uint16_y.val[0], 8), vqshrn_n_u16(uint16_y.val[1], 8));
            y = vaddq_u8(y, offset);
            vst1q_u8(yuv420sp + yIndex, y);
            yIndex += 16;
            // 在偶数行中计算 U 和 V
            if (j % 2 == 0) {
//                uint8_t U = ((-38 * R - 74 * G + 112 * B + 128) >> 8) + 128;
//                uint8_t V = ((112 * R - 94 * G - 18 * B + 128) >> 8) + 128;
                int16x8_t u_scalar = vdupq_n_s16(-38);
                int16x8_t v_scalar = vdupq_n_s16(112);
#if 1
                // 因为 u,v 的值只有 y 的一半,所以只取高位计算
                int16x8_t r = vreinterpretq_s16_u16(
                        vandq_u16(vreinterpretq_u16_u8(pixel_argb.val[2]), mask));
//                 测试
//                uint16x8_t test_0 = vreinterpretq_u16_u8(pixel_argb.val[2]);
//                uint16x8_t test_1 = vandq_u16(test_0, mask);
//                int16x8_t  test_2 = vreinterpretq_s16_u16(test_1);
                int16x8_t g = vreinterpretq_s16_u16(
                        vandq_u16(vreinterpretq_u16_u8(pixel_argb.val[1]), mask));
                int16x8_t b = vreinterpretq_s16_u16(
                        vandq_u16(vreinterpretq_u16_u8(pixel_argb.val[0]), mask));
#else // 两者效果是一样的,但是没第一种快
                int16x8_t r = vreinterpretq_s16_u16(vmovl_u8(
                        vqshrn_n_u16(vshlq_n_u16(vreinterpretq_u16_u8(pixel_argb.val[2]), 8), 8)));
                int16x8_t g = vreinterpretq_s16_u16(vmovl_u8(
                        vqshrn_n_u16(vshlq_n_u16(vreinterpretq_u16_u8(pixel_argb.val[1]), 8), 8)));
                int16x8_t b = vreinterpretq_s16_u16(vmovl_u8(
                        vqshrn_n_u16(vshlq_n_u16(vreinterpretq_u16_u8(pixel_argb.val[0]), 8), 8)));
                // vshlq_n_u16,结果是 uint16x8_t
                // vqshrn_n_u16,结果是 uint8x8_t
                // vmovl_u8,结果是 uint16x8_t
#endif
                int16x8_t u;
                int16x8_t v;
                uint8x8x2_t uv;
                u = vmulq_s16(r, u_scalar);
                v = vmulq_s16(r, v_scalar);
                u_scalar = vdupq_n_s16(-74);
                v_scalar = vdupq_n_s16(-94);
                u = vmlaq_s16(u, g, u_scalar);
                v = vmlaq_s16(v, g, v_scalar);
                u_scalar = vdupq_n_s16(112);
                v_scalar = vdupq_n_s16(-18);
                u = vmlaq_s16(u, b, u_scalar);
                v = vmlaq_s16(v, b, v_scalar);
                u = vaddq_s16(u, s16_rounding);
                v = vaddq_s16(v, s16_rounding);
                uv.val[1] = vreinterpret_u8_s8(vadd_s8(vqshrn_n_s16(u, 8), s8_rounding));
//                 测试
//                int8x8_t test_3 = vqshrn_n_s16(u, 8);
//                int8x8_t test_4 = vadd_s8(test_3, s8_rounding); //
//                uint8x8_t test_5 = vreinterpret_u8_s8(test_4);
                uv.val[0] = vreinterpret_u8_s8(vadd_s8(vqshrn_n_s16(v, 8), s8_rounding));
                vst2_u8(yuv420sp + uvIndex, uv);
                uvIndex += 2 * 8;
            }
        }
        // 处理余数的好办法
        for (i = ((width >> 4) << 4); i < width; i++) {
            uint8_t R = argb[2];
            uint8_t G = argb[1];
            uint8_t B = argb[0];
            argb += 4;
            // well known RGB to YUV algorithm
            uint8_t Y = ((66 * R + 129 * G + 25 * B + 128) >> 8) + 16;
            uint8_t U = ((-38 * R - 74 * G + 112 * B + 128) >> 8) + 128;
            uint8_t V = ((112 * R - 94 * G - 18 * B + 128) >> 8) + 128;
            // NV21有一个 Y 平面和 V-U 交叉平面,每一个平面的采样值都是 2
            // 意思是每4个 Y 像素(上下左右,不是横向连续的四个)对应1个 V 和1个 U
            // 像素和其他扫描线。
            yuv420sp[yIndex++] = Y;
            if (j % 2 == 0 && i % 2 == 0) {
                yuv420sp[uvIndex++] = V;
                yuv420sp[uvIndex++] = U;
            }
        }
    }
}

通过 objdump 生成 so 库的反汇编

命令行如下:

>J:\Programs\Android\sdk\ndk-bundle\toolchains\aarch64-linux-android-4.9\prebuilt\windows-x86_64\bin\aarch64-linux-android-objdump.exe -d libnative-lib.so > objdump_d.txt

一.   arm-linux-objdump


常用来显示二进制文件信息,常用来查看反汇编代码


二.   常用选项:


1.-b bfdname 指定目标码格式


2.—disassemble或者-d 反汇编可执行段


3.—dissassemble-all或者-D 反汇编所有段


4.-EB,-EL指定字节序


5.—file-headers或者-f 显示文件的整体头部摘要信息


6.—section-headers,--headers或者-h 显示目标文件中各个段的头部摘要信息


7.—info 或者-I 显示支持的目标文件格式和CPU架构


8.—section=name或者-j name显示指定section 的信息


9.—architecture=machine或者-m machine 指定反汇编目标文件时使用的架构



三.   示例


Arm-linux-objdump –D elf_file > dis_file 或者


Arm-linux-objdump –D –b binary –m arm bin_file > dis_file

  • C/C++  实现版:
000410dc <_Z18encodeYUV420SP_CPUPhS_ii>:
   410dc: b5b0        push  {r4, r5, r7, lr}
   410de: af02        add r7, sp, #8
   410e0: b08f        sub sp, #60 ; 0x3c
   410e2: 469c        mov ip, r3
   410e4: 4696        mov lr, r2
   410e6: 460c        mov r4, r1
   410e8: 4605        mov r5, r0
   410ea: 900e        str r0, [sp, #56] ; 0x38
   410ec: 910d        str r1, [sp, #52] ; 0x34
   410ee: 920c        str r2, [sp, #48] ; 0x30
   410f0: 930b        str r3, [sp, #44] ; 0x2c
   410f2: 980c        ldr r0, [sp, #48] ; 0x30
   410f4: 990b        ldr r1, [sp, #44] ; 0x2c
   410f6: 4348        muls  r0, r1
   410f8: 900a        str r0, [sp, #40] ; 0x28
   410fa: 2000        movs  r0, #0
   410fc: 9009        str r0, [sp, #36] ; 0x24
   410fe: 990a        ldr r1, [sp, #40] ; 0x28
   41100: 9108        str r1, [sp, #32]
   41102: 9007        str r0, [sp, #28]
   41104: f8cd c00c   str.w ip, [sp, #12]
   41108: f8cd e008   str.w lr, [sp, #8]
   4110c: 9401        str r4, [sp, #4]
   4110e: 9500        str r5, [sp, #0]
   41110: e7ff        b.n 41112 <_Z18encodeYUV420SP_CPUPhS_ii+0x36>
   41112: 9807        ldr r0, [sp, #28]
   41114: 990b        ldr r1, [sp, #44] ; 0x2c
   41116: 4288        cmp r0, r1
   41118: f280 808c   bge.w 41234 <_Z18encodeYUV420SP_CPUPhS_ii+0x158>
   4111c: e7ff        b.n 4111e <_Z18encodeYUV420SP_CPUPhS_ii+0x42>
   4111e: 2000        movs  r0, #0
   41120: 9006        str r0, [sp, #24]
   41122: e7ff        b.n 41124 <_Z18encodeYUV420SP_CPUPhS_ii+0x48>
   41124: 9806        ldr r0, [sp, #24]
   41126: 990c        ldr r1, [sp, #48] ; 0x30
   41128: 4288        cmp r0, r1
   4112a: da7e        bge.n 4122a <_Z18encodeYUV420SP_CPUPhS_ii+0x14e>
   4112c: e7ff        b.n 4112e <_Z18encodeYUV420SP_CPUPhS_ii+0x52>
   4112e: 980d        ldr r0, [sp, #52] ; 0x34
   41130: 7880        ldrb  r0, [r0, #2]
   41132: f807 0c2d   strb.w  r0, [r7, #-45]
   41136: 980d        ldr r0, [sp, #52] ; 0x34
   41138: 7840        ldrb  r0, [r0, #1]
   4113a: f807 0c2e   strb.w  r0, [r7, #-46]
   4113e: 980d        ldr r0, [sp, #52] ; 0x34
   41140: 7800        ldrb  r0, [r0, #0]
   41142: f807 0c2f   strb.w  r0, [r7, #-47]
   41146: 980d        ldr r0, [sp, #52] ; 0x34
   41148: 3004        adds  r0, #4
   4114a: 900d        str r0, [sp, #52] ; 0x34
   4114c: f817 0c2d   ldrb.w  r0, [r7, #-45]
   41150: eb00 1040   add.w r0, r0, r0, lsl #5
   41154: f817 1c2e   ldrb.w  r1, [r7, #-46]
   41158: eb01 11c1   add.w r1, r1, r1, lsl #7
   4115c: eb01 0040   add.w r0, r1, r0, lsl #1
   41160: f817 1c2f   ldrb.w  r1, [r7, #-47]
   41164: 2219        movs  r2, #25
   41166: fb11 0002   smlabb  r0, r1, r2, r0
   4116a: 3080        adds  r0, #128  ; 0x80
   4116c: 2110        movs  r1, #16
   4116e: eb01 2010   add.w r0, r1, r0, lsr #8
   41172: f88d 0014   strb.w  r0, [sp, #20]
   41176: f817 0c2d   ldrb.w  r0, [r7, #-45]
   4117a: f06f 0125   mvn.w r1, #37 ; 0x25
   4117e: fb10 f001   smulbb  r0, r0, r1
   41182: f817 1c2e   ldrb.w  r1, [r7, #-46]
   41186: 224a        movs  r2, #74 ; 0x4a
   41188: fb01 0012   mls r0, r1, r2, r0
   4118c: f817 1c2f   ldrb.w  r1, [r7, #-47]
   41190: ebc1 01c1   rsb r1, r1, r1, lsl #3
   41194: eb00 1001   add.w r0, r0, r1, lsl #4
   41198: 3080        adds  r0, #128  ; 0x80
   4119a: 2180        movs  r1, #128  ; 0x80
   4119c: eb01 2010   add.w r0, r1, r0, lsr #8
   411a0: f807 0c31   strb.w  r0, [r7, #-49]
   411a4: f817 0c2d   ldrb.w  r0, [r7, #-45]
   411a8: ebc0 00c0   rsb r0, r0, r0, lsl #3
   411ac: f817 2c2e   ldrb.w  r2, [r7, #-46]
   411b0: 235e        movs  r3, #94 ; 0x5e
   411b2: fb12 f203   smulbb  r2, r2, r3
   411b6: ebc2 1000   rsb r0, r2, r0, lsl #4
   411ba: f817 2c2f   ldrb.w  r2, [r7, #-47]
   411be: eb02 02c2   add.w r2, r2, r2, lsl #3
   411c2: eba0 0042   sub.w r0, r0, r2, lsl #1
   411c6: 3080        adds  r0, #128  ; 0x80
   411c8: eb01 2010   add.w r0, r1, r0, lsr #8
   411cc: f807 0c32   strb.w  r0, [r7, #-50]
   411d0: f89d 0014   ldrb.w  r0, [sp, #20]
   411d4: 990e        ldr r1, [sp, #56] ; 0x38
   411d6: 9a09        ldr r2, [sp, #36] ; 0x24
   411d8: 1c53        adds  r3, r2, #1
   411da: 9309        str r3, [sp, #36] ; 0x24
   411dc: 5488        strb  r0, [r1, r2]
   411de: 9807        ldr r0, [sp, #28]
   411e0: eb00 71d0   add.w r1, r0, r0, lsr #31
   411e4: f021 0101   bic.w r1, r1, #1
   411e8: 1a40        subs  r0, r0, r1
   411ea: 2800        cmp r0, #0
   411ec: d118        bne.n 41220 <_Z18encodeYUV420SP_CPUPhS_ii+0x144>
   411ee: e7ff        b.n 411f0 <_Z18encodeYUV420SP_CPUPhS_ii+0x114>
   411f0: 9806        ldr r0, [sp, #24]
   411f2: eb00 71d0   add.w r1, r0, r0, lsr #31
   411f6: f021 0101   bic.w r1, r1, #1
   411fa: 1a40        subs  r0, r0, r1
   411fc: 2800        cmp r0, #0
   411fe: d10f        bne.n 41220 <_Z18encodeYUV420SP_CPUPhS_ii+0x144>
   41200: e7ff        b.n 41202 <_Z18encodeYUV420SP_CPUPhS_ii+0x126>
   41202: f817 0c32   ldrb.w  r0, [r7, #-50]
   41206: 990e        ldr r1, [sp, #56] ; 0x38
   41208: 9a08        ldr r2, [sp, #32]
   4120a: 1c53        adds  r3, r2, #1
   4120c: 9308        str r3, [sp, #32]
   4120e: 5488        strb  r0, [r1, r2]
   41210: f817 0c31   ldrb.w  r0, [r7, #-49]
   41214: 990e        ldr r1, [sp, #56] ; 0x38
   41216: 9a08        ldr r2, [sp, #32]
   41218: 1c53        adds  r3, r2, #1
   4121a: 9308        str r3, [sp, #32]
   4121c: 5488        strb  r0, [r1, r2]
   4121e: e7ff        b.n 41220 <_Z18encodeYUV420SP_CPUPhS_ii+0x144>
   41220: e7ff        b.n 41222 <_Z18encodeYUV420SP_CPUPhS_ii+0x146>
   41222: 9806        ldr r0, [sp, #24]
   41224: 3001        adds  r0, #1
   41226: 9006        str r0, [sp, #24]
   41228: e77c        b.n 41124 <_Z18encodeYUV420SP_CPUPhS_ii+0x48>
   4122a: e7ff        b.n 4122c <_Z18encodeYUV420SP_CPUPhS_ii+0x150>
   4122c: 9807        ldr r0, [sp, #28]
   4122e: 3001        adds  r0, #1
   41230: 9007        str r0, [sp, #28]
   41232: e76e        b.n 41112 <_Z18encodeYUV420SP_CPUPhS_ii+0x36>
   41234: b00f        add sp, #60 ; 0x3c
   41236: bdb0        pop {r4, r5, r7, pc}
  • Neon 实现版:
00041238 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii>:
   41238: b5f0        push  {r4, r5, r6, r7, lr}
   4123a: af03        add r7, sp, #12
   4123c: e92d 0b00   stmdb sp!, {r8, r9, fp}
   41240: f5ad 6d35   sub.w sp, sp, #2896 ; 0xb50
   41244: 466c        mov r4, sp
   41246: f36f 0403   bfc r4, #0, #4
   4124a: 46a5        mov sp, r4
   4124c: f50d 6cd6   add.w ip, sp, #1712 ; 0x6b0
   41250: f10d 0e30   add.w lr, sp, #48 ; 0x30
   41254: 461c        mov r4, r3
   41256: 4615        mov r5, r2
   41258: 460e        mov r6, r1
   4125a: 4680        mov r8, r0
   4125c: f8df 9c0c   ldr.w r9, [pc, #3084] ; 41e6c <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc34>
   41260: 44f9        add r9, pc
   41262: f8d9 9000   ldr.w r9, [r9]
   41266: f8d9 9000   ldr.w r9, [r9]
   4126a: f8cd 9024   str.w r9, [sp, #36] ; 0x24
   4126e: 906f        str r0, [sp, #444]  ; 0x1bc
   41270: 916e        str r1, [sp, #440]  ; 0x1b8
   41272: 926d        str r2, [sp, #436]  ; 0x1b4
   41274: 936c        str r3, [sp, #432]  ; 0x1b0
   41276: 2080        movs  r0, #128  ; 0x80
   41278: f8ad 01ee   strh.w  r0, [sp, #494]  ; 0x1ee
   4127c: f50d 71f7   add.w r1, sp, #494  ; 0x1ee
   41280: f9e1 0c7f   vld1.16 {d16[]-d17[]}, [r1 :16]
   41284: a970        add r1, sp, #448  ; 0x1c0
   41286: f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   4128a: f961 0aef   vld1.64 {d16-d17}, [r1 :128]
   4128e: a974        add r1, sp, #464  ; 0x1d0
   41290: f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   41294: f961 0aef   vld1.64 {d16-d17}, [r1 :128]
   41298: a968        add r1, sp, #416  ; 0x1a0
   4129a: f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   4129e: f8ad 0a4e   strh.w  r0, [sp, #2638] ; 0xa4e
   412a2: f60d 214e   addw  r1, sp, #2638 ; 0xa4e
   412a6: f9e1 0c7f   vld1.16 {d16[]-d17[]}, [r1 :16]
   412aa: f50d 6122   add.w r1, sp, #2592 ; 0xa20
   412ae: f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   412b2: f961 0aef   vld1.64 {d16-d17}, [r1 :128]
   412b6: f50d 6123   add.w r1, sp, #2608 ; 0xa30
   412ba: f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   412be: f961 0aef   vld1.64 {d16-d17}, [r1 :128]
   412c2: a964        add r1, sp, #400  ; 0x190
   412c4: f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   412c8: f88d 0a1f   strb.w  r0, [sp, #2591] ; 0xa1f
   412cc: f60d 201f   addw  r0, sp, #2591 ; 0xa1f
   412d0: f9e0 2c0f   vld1.8  {d18[]}, [r0]
   412d4: edcc 2bd6   vstr  d18, [ip, #856] ; 0x358
   412d8: eddc 2bd6   vldr  d18, [ip, #856] ; 0x358
   412dc: edcc 2bd8   vstr  d18, [ip, #864] ; 0x360
   412e0: eddc 2bd8   vldr  d18, [ip, #864] ; 0x360
   412e4: edce 2b56   vstr  d18, [lr, #344] ; 0x158
   412e8: 2010        movs  r0, #16
   412ea: f88d 0a07   strb.w  r0, [sp, #2567] ; 0xa07
   412ee: f60d 2007   addw  r0, sp, #2567 ; 0xa07
   412f2: f9e0 0c2f   vld1.8  {d16[]-d17[]}, [r0]
   412f6: f50d 601e   add.w r0, sp, #2528 ; 0x9e0
   412fa: f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   412fe: f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   41302: f50d 601f   add.w r0, sp, #2544 ; 0x9f0
   41306: f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   4130a: f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   4130e: a85c        add r0, sp, #368  ; 0x170
   41310: f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41314: 20ff        movs  r0, #255  ; 0xff
   41316: f8ad 09de   strh.w  r0, [sp, #2526] ; 0x9de
   4131a: f60d 10de   addw  r0, sp, #2526 ; 0x9de
   4131e: f9e0 0c7f   vld1.16 {d16[]-d17[]}, [r0 :16]
   41322: f50d 601b   add.w r0, sp, #2480 ; 0x9b0
   41326: f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   4132a: f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   4132e: f50d 601c   add.w r0, sp, #2496 ; 0x9c0
   41332: f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41336: f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   4133a: a858        add r0, sp, #352  ; 0x160
   4133c: f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41340: 986d        ldr r0, [sp, #436]  ; 0x1b4
   41342: 996c        ldr r1, [sp, #432]  ; 0x1b0
   41344: 4348        muls  r0, r1
   41346: 9057        str r0, [sp, #348]  ; 0x15c
   41348: 2000        movs  r0, #0
   4134a: 9056        str r0, [sp, #344]  ; 0x158
   4134c: 9957        ldr r1, [sp, #348]  ; 0x15c
   4134e: 9155        str r1, [sp, #340]  ; 0x154
   41350: 9053        str r0, [sp, #332]  ; 0x14c
   41352: f8cd c020   str.w ip, [sp, #32]
   41356: f8cd e01c   str.w lr, [sp, #28]
   4135a: 9406        str r4, [sp, #24]
   4135c: 9505        str r5, [sp, #20]
   4135e: 9604        str r6, [sp, #16]
   41360: f8cd 800c   str.w r8, [sp, #12]
   41364: e7ff        b.n 41366 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x12e>
   41366: 9853        ldr r0, [sp, #332]  ; 0x14c
   41368: 996c        ldr r1, [sp, #432]  ; 0x1b0
   4136a: 4288        cmp r0, r1
   4136c: f280 856d   bge.w 41e4a <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc12>
   41370: e7ff        b.n 41372 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x13a>
   41372: 2000        movs  r0, #0
   41374: 9054        str r0, [sp, #336]  ; 0x150
   41376: e7ff        b.n 41378 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x140>
   41378: 9854        ldr r0, [sp, #336]  ; 0x150
   4137a: 996d        ldr r1, [sp, #436]  ; 0x1b4
   4137c: ebb0 1f21   cmp.w r0, r1, asr #4
   41380: f280 84d5   bge.w 41d2e <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xaf6>
   41384: e7ff        b.n 41386 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x14e>
   41386: 986e        ldr r0, [sp, #440]  ; 0x1b8
   41388: f960 010d   vld4.8  {d16,d18,d20,d22}, [r0]!
   4138c: f960 110f   vld4.8  {d17,d19,d21,d23}, [r0]
   41390: ef66 81f6   vorr  q12, q11, q11
   41394: ef62 a1f2   vorr  q13, q9, q9
   41398: ef64 c1f4   vorr  q14, q10, q10
   4139c: ef60 e1f0   vorr  q15, q8, q8
   413a0: f50d 602c   add.w r0, sp, #2752 ; 0xac0
   413a4: f940 eacf   vst1.64 {d30-d31}, [r0]
   413a8: f100 0120   add.w r1, r0, #32
   413ac: f941 cacf   vst1.64 {d28-d29}, [r1]
   413b0: 4602        mov r2, r0
   413b2: f962 caed   vld1.64 {d28-d29}, [r2 :128]!
   413b6: f942 aacf   vst1.64 {d26-d27}, [r2]
   413ba: 3030        adds  r0, #48 ; 0x30
   413bc: f940 8acf   vst1.64 {d24-d25}, [r0]
   413c0: f50d 6330   add.w r3, sp, #2816 ; 0xb00
   413c4: f103 0c20   add.w ip, r3, #32
   413c8: f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   413cc: f94c 8aef   vst1.64 {d24-d25}, [ip :128]
   413d0: 4619        mov r1, r3
   413d2: f941 caed   vst1.64 {d28-d29}, [r1 :128]!
   413d6: f962 8aef   vld1.64 {d24-d25}, [r2 :128]
   413da: f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   413de: f103 0230   add.w r2, r3, #48 ; 0x30
   413e2: f960 8aef   vld1.64 {d24-d25}, [r0 :128]
   413e6: f942 8aef   vst1.64 {d24-d25}, [r2 :128]
   413ea: 986e        ldr r0, [sp, #440]  ; 0x1b8
   413ec: 3040        adds  r0, #64 ; 0x40
   413ee: 906e        str r0, [sp, #440]  ; 0x1b8
   413f0: f96c 8aef   vld1.64 {d24-d25}, [ip :128]
   413f4: f50d 601a   add.w r0, sp, #2464 ; 0x9a0
   413f8: f940 8aef   vst1.64 {d24-d25}, [r0 :128]
   413fc: f960 8aef   vld1.64 {d24-d25}, [r0 :128]
   41400: eeb0 0b68   vmov.f64  d0, d24
   41404: 9808        ldr r0, [sp, #32]
   41406: ed80 0bba   vstr  d0, [r0, #744]  ; 0x2e8
   4140a: ed90 0bba   vldr  d0, [r0, #744]  ; 0x2e8
   4140e: f50d 622b   add.w r2, sp, #2736 ; 0xab0
   41412: f902 071d   vst1.8  {d0}, [r2 :64]!
   41416: f96c 8aef   vld1.64 {d24-d25}, [ip :128]
   4141a: f50d 6c18   add.w ip, sp, #2432 ; 0x980
   4141e: f94c 8aef   vst1.64 {d24-d25}, [ip :128]
   41422: f96c 8aef   vld1.64 {d24-d25}, [ip :128]
   41426: eeb0 0b69   vmov.f64  d0, d25
   4142a: ed80 0bb2   vstr  d0, [r0, #712]  ; 0x2c8
   4142e: ed90 0bb2   vldr  d0, [r0, #712]  ; 0x2c8
   41432: ed82 0b00   vstr  d0, [r2]
   41436: f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   4143a: f50d 6c16   add.w ip, sp, #2400 ; 0x960
   4143e: f94c 8aef   vst1.64 {d24-d25}, [ip :128]
   41442: f96c 8aef   vld1.64 {d24-d25}, [ip :128]
   41446: eeb0 0b68   vmov.f64  d0, d24
   4144a: ed80 0baa   vstr  d0, [r0, #680]  ; 0x2a8
   4144e: ed90 0baa   vldr  d0, [r0, #680]  ; 0x2a8
   41452: f50d 6c2a   add.w ip, sp, #2720 ; 0xaa0
   41456: f90c 071d   vst1.8  {d0}, [ip :64]!
   4145a: f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   4145e: f50d 6114   add.w r1, sp, #2368 ; 0x940
   41462: f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   41466: f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   4146a: eeb0 0b69   vmov.f64  d0, d25
   4146e: ed80 0ba2   vstr  d0, [r0, #648]  ; 0x288
   41472: ed90 0ba2   vldr  d0, [r0, #648]  ; 0x288
   41476: ed8c 0b00   vstr  d0, [ip]
   4147a: f963 8aef   vld1.64 {d24-d25}, [r3 :128]
   4147e: f50d 6112   add.w r1, sp, #2336 ; 0x920
   41482: f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   41486: f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   4148a: eeb0 0b68   vmov.f64  d0, d24
   4148e: ed80 0b9a   vstr  d0, [r0, #616]  ; 0x268
   41492: ed90 0b9a   vldr  d0, [r0, #616]  ; 0x268
   41496: f50d 6129   add.w r1, sp, #2704 ; 0xa90
   4149a: f901 071d   vst1.8  {d0}, [r1 :64]!
   4149e: f963 8aef   vld1.64 {d24-d25}, [r3 :128]
   414a2: f50d 6310   add.w r3, sp, #2304 ; 0x900
   414a6: f943 8aef   vst1.64 {d24-d25}, [r3 :128]
   414aa: f963 8aef   vld1.64 {d24-d25}, [r3 :128]
   414ae: eeb0 0b69   vmov.f64  d0, d25
   414b2: ed80 0b92   vstr  d0, [r0, #584]  ; 0x248
   414b6: ed90 0b92   vldr  d0, [r0, #584]  ; 0x248
   414ba: ed81 0b00   vstr  d0, [r1]
   414be: 2342        movs  r3, #66 ; 0x42
   414c0: f88d 38f7   strb.w  r3, [sp, #2295] ; 0x8f7
   414c4: f60d 03f7   addw  r3, sp, #2295 ; 0x8f7
   414c8: f9a3 0c0f   vld1.8  {d0[]}, [r3]
   414cc: ed80 0b8c   vstr  d0, [r0, #560]  ; 0x230
   414d0: ed90 0b8c   vldr  d0, [r0, #560]  ; 0x230
   414d4: ed80 0b8e   vstr  d0, [r0, #568]  ; 0x238
   414d8: ed90 0b8e   vldr  d0, [r0, #568]  ; 0x238
   414dc: 9b07        ldr r3, [sp, #28]
   414de: ed83 0b44   vstr  d0, [r3, #272]  ; 0x110
   414e2: f50d 6400   add.w r4, sp, #2048 ; 0x800
   414e6: ed94 0bac   vldr  d0, [r4, #688]  ; 0x2b0
   414ea: ed93 1b44   vldr  d1, [r3, #272]  ; 0x110
   414ee: ed80 0b8a   vstr  d0, [r0, #552]  ; 0x228
   414f2: ed80 1b88   vstr  d1, [r0, #544]  ; 0x220
   414f6: ed90 0b8a   vldr  d0, [r0, #552]  ; 0x228
   414fa: ed90 1b88   vldr  d1, [r0, #544]  ; 0x220
   414fe: ffc0 8c01   vmull.u8  q12, d0, d1
   41502: f50d 6e0c   add.w lr, sp, #2240 ; 0x8c0
   41506: f94e 8aef   vst1.64 {d24-d25}, [lr :128]
   4150a: f96e 8aef   vld1.64 {d24-d25}, [lr :128]
   4150e: f50d 6e27   add.w lr, sp, #2672 ; 0xa70
   41512: f94e 8aef   vst1.64 {d24-d25}, [lr :128]
   41516: ed92 0b00   vldr  d0, [r2]
   4151a: ed93 1b44   vldr  d1, [r3, #272]  ; 0x110
   4151e: ed80 0b82   vstr  d0, [r0, #520]  ; 0x208
   41522: ed80 1b80   vstr  d1, [r0, #512]  ; 0x200
   41526: ed90 0b82   vldr  d0, [r0, #520]  ; 0x208
   4152a: ed90 1b80   vldr  d1, [r0, #512]  ; 0x200
   4152e: ffc0 8c01   vmull.u8  q12, d0, d1
   41532: f50d 620a   add.w r2, sp, #2208 ; 0x8a0
   41536: f942 8aef   vst1.64 {d24-d25}, [r2 :128]
   4153a: f962 8aef   vld1.64 {d24-d25}, [r2 :128]
   4153e: 2281        movs  r2, #129  ; 0x81
   41540: f88d 289f   strb.w  r2, [sp, #2207] ; 0x89f
   41544: f60d 029f   addw  r2, sp, #2207 ; 0x89f
   41548: f9a2 0c0f   vld1.8  {d0[]}, [r2]
   4154c: ed80 0b76   vstr  d0, [r0, #472]  ; 0x1d8
   41550: ed90 0b76   vldr  d0, [r0, #472]  ; 0x1d8
   41554: ed80 0b78   vstr  d0, [r0, #480]  ; 0x1e0
   41558: ed90 0b78   vldr  d0, [r0, #480]  ; 0x1e0
   4155c: ed83 0b44   vstr  d0, [r3, #272]  ; 0x110
   41560: 4672        mov r2, lr
   41562: f962 aa6d   vld1.16 {d26-d27}, [r2 :128]!
   41566: f942 8aef   vst1.64 {d24-d25}, [r2 :128]
   4156a: ed90 0bfc   vldr  d0, [r0, #1008] ; 0x3f0
   4156e: ed93 1b44   vldr  d1, [r3, #272]  ; 0x110
   41572: f50d 6405   add.w r4, sp, #2128 ; 0x850
   41576: f944 aaef   vst1.64 {d26-d27}, [r4 :128]
   4157a: ed80 0b66   vstr  d0, [r0, #408]  ; 0x198
   4157e: ed80 1b64   vstr  d1, [r0, #400]  ; 0x190
   41582: f964 8aef   vld1.64 {d24-d25}, [r4 :128]
   41586: ed90 0b66   vldr  d0, [r0, #408]  ; 0x198
   4158a: ed90 1b64   vldr  d1, [r0, #400]  ; 0x190
   4158e: ed80 0b74   vstr  d0, [r0, #464]  ; 0x1d0
   41592: ed80 1b72   vstr  d1, [r0, #456]  ; 0x1c8
   41596: ed90 0b74   vldr  d0, [r0, #464]  ; 0x1d0
   4159a: ed90 1b72   vldr  d1, [r0, #456]  ; 0x1c8
   4159e: ffc0 ac01   vmull.u8  q13, d0, d1
   415a2: f50d 6406   add.w r4, sp, #2144 ; 0x860
   415a6: f944 aaef   vst1.64 {d26-d27}, [r4 :128]
   415aa: f964 aaef   vld1.64 {d26-d27}, [r4 :128]
   415ae: ef58 88ea   vadd.i16  q12, q12, q13
   415b2: f50d 6403   add.w r4, sp, #2096 ; 0x830
   415b6: f944 8aef   vst1.64 {d24-d25}, [r4 :128]
   415ba: f964 8aef   vld1.64 {d24-d25}, [r4 :128]
   415be: f94e 8aef   vst1.64 {d24-d25}, [lr :128]
   415c2: f962 8aef   vld1.64 {d24-d25}, [r2 :128]
   415c6: ed9c 0b00   vldr  d0, [ip]
   415ca: ed93 1b44   vldr  d1, [r3, #272]  ; 0x110
   415ce: f50d 6c00   add.w ip, sp, #2048 ; 0x800
   415d2: f94c 8aef   vst1.64 {d24-d25}, [ip :128]
   415d6: ed80 0b52   vstr  d0, [r0, #328]  ; 0x148
   415da: ed80 1b50   vstr  d1, [r0, #320]  ; 0x140
   415de: f96c 8aef   vld1.64 {d24-d25}, [ip :128]
   415e2: ed90 0b52   vldr  d0, [r0, #328]  ; 0x148
   415e6: ed90 1b50   vldr  d1, [r0, #320]  ; 0x140
   415ea: ed80 0b5e   vstr  d0, [r0, #376]  ; 0x178
   415ee: ed80 1b5c   vstr  d1, [r0, #368]  ; 0x170
   415f2: ed90 0b5e   vldr  d0, [r0, #376]  ; 0x178
   415f6: ed90 1b5c   vldr  d1, [r0, #368]  ; 0x170
   415fa: ffc0 ac01   vmull.u8  q13, d0, d1
   415fe: f50d 6c01   add.w ip, sp, #2064 ; 0x810
   41602: f94c aaef   vst1.64 {d26-d27}, [ip :128]
   41606: f96c aaef   vld1.64 {d26-d27}, [ip :128]
   4160a: ef58 88ea   vadd.i16  q12, q12, q13
   4160e: f50d 6cfc   add.w ip, sp, #2016 ; 0x7e0
   41612: f94c 8aef   vst1.64 {d24-d25}, [ip :128]
   41616: f96c 8aef   vld1.64 {d24-d25}, [ip :128]
   4161a: f942 8aef   vst1.64 {d24-d25}, [r2 :128]
   4161e: f04f 0c19   mov.w ip, #25
   41622: f88d c7df   strb.w  ip, [sp, #2015] ; 0x7df
   41626: f20d 7cdf   addw  ip, sp, #2015 ; 0x7df
   4162a: f9ac 0c0f   vld1.8  {d0[]}, [ip]
   4162e: ed80 0b46   vstr  d0, [r0, #280]  ; 0x118
   41632: ed90 0b46   vldr  d0, [r0, #280]  ; 0x118
   41636: ed80 0b48   vstr  d0, [r0, #288]  ; 0x120
   4163a: ed90 0b48   vldr  d0, [r0, #288]  ; 0x120
   4163e: ed83 0b44   vstr  d0, [r3, #272]  ; 0x110
   41642: f96e 8aef   vld1.64 {d24-d25}, [lr :128]
   41646: ed90 0bf8   vldr  d0, [r0, #992]  ; 0x3e0
   4164a: ed93 1b44   vldr  d1, [r3, #272]  ; 0x110
   4164e: f50d 6cf2   add.w ip, sp, #1936 ; 0x790
   41652: f94c 8aef   vst1.64 {d24-d25}, [ip :128]
   41656: ed80 0b36   vstr  d0, [r0, #216]  ; 0xd8
   4165a: ed80 1b34   vstr  d1, [r0, #208]  ; 0xd0
   4165e: f96c 8aef   vld1.64 {d24-d25}, [ip :128]
   41662: ed90 0b36   vldr  d0, [r0, #216]  ; 0xd8
   41666: ed90 1b34   vldr  d1, [r0, #208]  ; 0xd0
   4166a: ed80 0b44   vstr  d0, [r0, #272]  ; 0x110
   4166e: ed80 1b42   vstr  d1, [r0, #264]  ; 0x108
   41672: ed90 0b44   vldr  d0, [r0, #272]  ; 0x110
   41676: ed90 1b42   vldr  d1, [r0, #264]  ; 0x108
   4167a: ffc0 ac01   vmull.u8  q13, d0, d1
   4167e: f50d 6cf4   add.w ip, sp, #1952 ; 0x7a0
   41682: f94c aaef   vst1.64 {d26-d27}, [ip :128]
   41686: f96c aaef   vld1.64 {d26-d27}, [ip :128]
   4168a: ef58 88ea   vadd.i16  q12, q12, q13
   4168e: f50d 6cee   add.w ip, sp, #1904 ; 0x770
   41692: f94c 8aef   vst1.64 {d24-d25}, [ip :128]
   41696: f96c 8aef   vld1.64 {d24-d25}, [ip :128]
   4169a: f94e 8aef   vst1.64 {d24-d25}, [lr :128]
   4169e: f962 8aef   vld1.64 {d24-d25}, [r2 :128]
   416a2: ed91 0b00   vldr  d0, [r1]
   416a6: ed93 1b44   vldr  d1, [r3, #272]  ; 0x110
   416aa: f50d 61e8   add.w r1, sp, #1856 ; 0x740
   416ae: f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   416b2: ed80 0b22   vstr  d0, [r0, #136]  ; 0x88
   416b6: ed80 1b20   vstr  d1, [r0, #128]  ; 0x80
   416ba: f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   416be: ed90 0b22   vldr  d0, [r0, #136]  ; 0x88
   416c2: ed90 1b20   vldr  d1, [r0, #128]  ; 0x80
   416c6: ed80 0b2e   vstr  d0, [r0, #184]  ; 0xb8
   416ca: ed80 1b2c   vstr  d1, [r0, #176]  ; 0xb0
   416ce: ed90 0b2e   vldr  d0, [r0, #184]  ; 0xb8
   416d2: ed90 1b2c   vldr  d1, [r0, #176]  ; 0xb0
   416d6: ffc0 ac01   vmull.u8  q13, d0, d1
   416da: f50d 61ea   add.w r1, sp, #1872 ; 0x750
   416de: f941 aaef   vst1.64 {d26-d27}, [r1 :128]
   416e2: f961 aaef   vld1.64 {d26-d27}, [r1 :128]
   416e6: ef58 88ea   vadd.i16  q12, q12, q13
   416ea: f50d 61e4   add.w r1, sp, #1824 ; 0x720
   416ee: f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   416f2: f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   416f6: f942 8aef   vst1.64 {d24-d25}, [r2 :128]
   416fa: f96e 8aef   vld1.64 {d24-d25}, [lr :128]
   416fe: a968        add r1, sp, #416  ; 0x1a0
   41700: f961 aaef   vld1.64 {d26-d27}, [r1 :128]
   41704: f50d 6ce2   add.w ip, sp, #1808 ; 0x710
   41708: f94c 8aef   vst1.64 {d24-d25}, [ip :128]
   4170c: f50d 64e0   add.w r4, sp, #1792 ; 0x700
   41710: f944 aaef   vst1.64 {d26-d27}, [r4 :128]
   41714: f96c 8aef   vld1.64 {d24-d25}, [ip :128]
   41718: f964 aaef   vld1.64 {d26-d27}, [r4 :128]
   4171c: ef58 88ea   vadd.i16  q12, q12, q13
   41720: f50d 6cde   add.w ip, sp, #1776 ; 0x6f0
   41724: f94c 8aef   vst1.64 {d24-d25}, [ip :128]
   41728: f96c 8aef   vld1.64 {d24-d25}, [ip :128]
   4172c: f94e 8aef   vst1.64 {d24-d25}, [lr :128]
   41730: f962 8aef   vld1.64 {d24-d25}, [r2 :128]
   41734: f961 aaef   vld1.64 {d26-d27}, [r1 :128]
   41738: f50d 61dc   add.w r1, sp, #1760 ; 0x6e0
   4173c: f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   41740: f50d 6cda   add.w ip, sp, #1744 ; 0x6d0
   41744: f94c aaef   vst1.64 {d26-d27}, [ip :128]
   41748: f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   4174c: f96c aaef   vld1.64 {d26-d27}, [ip :128]
   41750: ef58 88ea   vadd.i16  q12, q12, q13
   41754: f50d 61d8   add.w r1, sp, #1728 ; 0x6c0
   41758: f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   4175c: f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   41760: f942 8aef   vst1.64 {d24-d25}, [r2 :128]
   41764: f96e 8aef   vld1.64 {d24-d25}, [lr :128]
   41768: a948        add r1, sp, #288  ; 0x120
   4176a: f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   4176e: f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   41772: ff88 0938   vqshrn.u16  d0, q12, #8
   41776: ed83 0b3a   vstr  d0, [r3, #232]  ; 0xe8
   4177a: ed93 0b3a   vldr  d0, [r3, #232]  ; 0xe8
   4177e: ed83 0b38   vstr  d0, [r3, #224]  ; 0xe0
   41782: ed93 0b38   vldr  d0, [r3, #224]  ; 0xe0
   41786: f962 8aef   vld1.64 {d24-d25}, [r2 :128]
   4178a: a940        add r1, sp, #256  ; 0x100
   4178c: f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   41790: f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   41794: ff88 1938   vqshrn.u16  d1, q12, #8
   41798: ed83 1b32   vstr  d1, [r3, #200]  ; 0xc8
   4179c: ed93 1b32   vldr  d1, [r3, #200]  ; 0xc8
   417a0: ed83 1b30   vstr  d1, [r3, #192]  ; 0xc0
   417a4: ed93 1b30   vldr  d1, [r3, #192]  ; 0xc0
   417a8: ed80 0b02   vstr  d0, [r0, #8]
   417ac: ed80 1b00   vstr  d1, [r0]
   417b0: ed90 0b02   vldr  d0, [r0, #8]
   417b4: ed90 1b00   vldr  d1, [r0]
   417b8: eef0 8b40   vmov.f64  d24, d0
   417bc: eef0 9b41   vmov.f64  d25, d1
   417c0: f50d 61d4   add.w r1, sp, #1696 ; 0x6a0
   417c4: f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   417c8: f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   417cc: a94c        add r1, sp, #304  ; 0x130
   417ce: f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   417d2: f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   417d6: aa5c        add r2, sp, #368  ; 0x170
   417d8: f962 aaef   vld1.64 {d26-d27}, [r2 :128]
   417dc: f50d 62d2   add.w r2, sp, #1680 ; 0x690
   417e0: f942 8aef   vst1.64 {d24-d25}, [r2 :128]
   417e4: f50d 6cd0   add.w ip, sp, #1664 ; 0x680
   417e8: f94c aaef   vst1.64 {d26-d27}, [ip :128]
   417ec: f962 8aef   vld1.64 {d24-d25}, [r2 :128]
   417f0: f96c aaef   vld1.64 {d26-d27}, [ip :128]
   417f4: ef48 88ea   vadd.i8 q12, q12, q13
   417f8: f50d 62ce   add.w r2, sp, #1648 ; 0x670
   417fc: f942 8aef   vst1.64 {d24-d25}, [r2 :128]
   41800: f962 8aef   vld1.64 {d24-d25}, [r2 :128]
   41804: f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   41808: f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   4180c: a938        add r1, sp, #224  ; 0xe0
   4180e: f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   41812: 9a6f        ldr r2, [sp, #444]  ; 0x1bc
   41814: f8dd c158   ldr.w ip, [sp, #344]  ; 0x158
   41818: 4462        add r2, ip
   4181a: f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   4181e: f942 8a0f   vst1.8  {d24-d25}, [r2]
   41822: 9956        ldr r1, [sp, #344]  ; 0x158
   41824: 3110        adds  r1, #16
   41826: 9156        str r1, [sp, #344]  ; 0x158
   41828: 9953        ldr r1, [sp, #332]  ; 0x14c
   4182a: eb01 72d1   add.w r2, r1, r1, lsr #31
   4182e: f022 0201   bic.w r2, r2, #1
   41832: 1a89        subs  r1, r1, r2
   41834: 2900        cmp r1, #0
   41836: f040 8274   bne.w 41d22 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xaea>
   4183a: e7ff        b.n 4183c <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x604>
   4183c: f64f 70da   movw  r0, #65498  ; 0xffda
   41840: f8ad 066e   strh.w  r0, [sp, #1646] ; 0x66e
   41844: f20d 606e   addw  r0, sp, #1646 ; 0x66e
   41848: f9e0 0c7f   vld1.16 {d16[]-d17[]}, [r0 :16]
   4184c: f50d 60c8   add.w r0, sp, #1600 ; 0x640
   41850: f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41854: f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   41858: f50d 60ca   add.w r0, sp, #1616 ; 0x650
   4185c: f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41860: f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   41864: a834        add r0, sp, #208  ; 0xd0
   41866: f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   4186a: 2170        movs  r1, #112  ; 0x70
   4186c: f8ad 163e   strh.w  r1, [sp, #1598] ; 0x63e
   41870: f20d 623e   addw  r2, sp, #1598 ; 0x63e
   41874: f9e2 0c7f   vld1.16 {d16[]-d17[]}, [r2 :16]
   41878: f50d 62c2   add.w r2, sp, #1552 ; 0x610
   4187c: f942 0aef   vst1.64 {d16-d17}, [r2 :128]
   41880: f962 0aef   vld1.64 {d16-d17}, [r2 :128]
   41884: f50d 62c4   add.w r2, sp, #1568 ; 0x620
   41888: f942 0aef   vst1.64 {d16-d17}, [r2 :128]
   4188c: f962 0aef   vld1.64 {d16-d17}, [r2 :128]
   41890: aa30        add r2, sp, #192  ; 0xc0
   41892: f942 0aef   vst1.64 {d16-d17}, [r2 :128]
   41896: f50d 6330   add.w r3, sp, #2816 ; 0xb00
   4189a: f103 0c20   add.w ip, r3, #32
   4189e: f96c 0aef   vld1.64 {d16-d17}, [ip :128]
   418a2: f50d 6cc0   add.w ip, sp, #1536 ; 0x600
   418a6: f94c 0aef   vst1.64 {d16-d17}, [ip :128]
   418aa: f96c 0aef   vld1.64 {d16-d17}, [ip :128]
   418ae: f50d 6cbe   add.w ip, sp, #1520 ; 0x5f0
   418b2: f94c 0aef   vst1.64 {d16-d17}, [ip :128]
   418b6: f96c 0aef   vld1.64 {d16-d17}, [ip :128]
   418ba: f50d 7cb0   add.w ip, sp, #352  ; 0x160
   418be: f96c 2aef   vld1.64 {d18-d19}, [ip :128]
   418c2: f50d 6ebc   add.w lr, sp, #1504 ; 0x5e0
   418c6: f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   418ca: f50d 64ba   add.w r4, sp, #1488 ; 0x5d0
   418ce: f944 2aef   vst1.64 {d18-d19}, [r4 :128]
   418d2: f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   418d6: f964 2aef   vld1.64 {d18-d19}, [r4 :128]
   418da: ef40 01f2   vand  q8, q8, q9
   418de: f50d 6eb8   add.w lr, sp, #1472 ; 0x5c0
   418e2: f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   418e6: f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   418ea: f50d 6eb6   add.w lr, sp, #1456 ; 0x5b0
   418ee: f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   418f2: f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   418f6: f50d 6eb4   add.w lr, sp, #1440 ; 0x5a0
   418fa: f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   418fe: f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   41902: f10d 0eb0   add.w lr, sp, #176  ; 0xb0
   41906: f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   4190a: f103 0410   add.w r4, r3, #16
   4190e: f964 0aef   vld1.64 {d16-d17}, [r4 :128]
   41912: f50d 64b2   add.w r4, sp, #1424 ; 0x590
   41916: f944 0aef   vst1.64 {d16-d17}, [r4 :128]
   4191a: f964 0aef   vld1.64 {d16-d17}, [r4 :128]
   4191e: f50d 64b0   add.w r4, sp, #1408 ; 0x580
   41922: f944 0aef   vst1.64 {d16-d17}, [r4 :128]
   41926: f964 0aef   vld1.64 {d16-d17}, [r4 :128]
   4192a: f96c 2aef   vld1.64 {d18-d19}, [ip :128]
   4192e: f50d 64ae   add.w r4, sp, #1392 ; 0x570
   41932: f944 0aef   vst1.64 {d16-d17}, [r4 :128]
   41936: f50d 65ac   add.w r5, sp, #1376 ; 0x560
   4193a: f945 2aef   vst1.64 {d18-d19}, [r5 :128]
   4193e: f964 0aef   vld1.64 {d16-d17}, [r4 :128]
   41942: f965 2aef   vld1.64 {d18-d19}, [r5 :128]
   41946: ef40 01f2   vand  q8, q8, q9
   4194a: f50d 64aa   add.w r4, sp, #1360 ; 0x550
   4194e: f944 0aef   vst1.64 {d16-d17}, [r4 :128]
   41952: f964 0aef   vld1.64 {d16-d17}, [r4 :128]
   41956: f50d 64a8   add.w r4, sp, #1344 ; 0x540
   4195a: f944 0aef   vst1.64 {d16-d17}, [r4 :128]
   4195e: f964 0aef   vld1.64 {d16-d17}, [r4 :128]
   41962: f50d 64a6   add.w r4, sp, #1328 ; 0x530
   41966: f944 0aef   vst1.64 {d16-d17}, [r4 :128]
   4196a: f964 0aef   vld1.64 {d16-d17}, [r4 :128]
   4196e: ac28        add r4, sp, #160  ; 0xa0
   41970: f944 0aef   vst1.64 {d16-d17}, [r4 :128]
   41974: f963 0aef   vld1.64 {d16-d17}, [r3 :128]
   41978: f50d 63a4   add.w r3, sp, #1312 ; 0x520
   4197c: f943 0aef   vst1.64 {d16-d17}, [r3 :128]
   41980: f963 0aef   vld1.64 {d16-d17}, [r3 :128]
   41984: f50d 63a2   add.w r3, sp, #1296 ; 0x510
   41988: f943 0aef   vst1.64 {d16-d17}, [r3 :128]
   4198c: f963 0aef   vld1.64 {d16-d17}, [r3 :128]
   41990: f96c 2aef   vld1.64 {d18-d19}, [ip :128]
   41994: f50d 63a0   add.w r3, sp, #1280 ; 0x500
   41998: f943 0aef   vst1.64 {d16-d17}, [r3 :128]
   4199c: f50d 6c9e   add.w ip, sp, #1264 ; 0x4f0
   419a0: f94c 2aef   vst1.64 {d18-d19}, [ip :128]
   419a4: f963 0aef   vld1.64 {d16-d17}, [r3 :128]
   419a8: f96c 2aef   vld1.64 {d18-d19}, [ip :128]
   419ac: ef40 01f2   vand  q8, q8, q9
   419b0: f50d 639c   add.w r3, sp, #1248 ; 0x4e0
   419b4: f943 0aef   vst1.64 {d16-d17}, [r3 :128]
   419b8: f963 0aef   vld1.64 {d16-d17}, [r3 :128]
   419bc: f50d 639a   add.w r3, sp, #1232 ; 0x4d0
   419c0: f943 0aef   vst1.64 {d16-d17}, [r3 :128]
   419c4: f963 0aef   vld1.64 {d16-d17}, [r3 :128]
   419c8: f50d 6398   add.w r3, sp, #1216 ; 0x4c0
   419cc: f943 0aef   vst1.64 {d16-d17}, [r3 :128]
   419d0: f963 0aef   vld1.64 {d16-d17}, [r3 :128]
   419d4: ab24        add r3, sp, #144  ; 0x90
   419d6: f943 0aef   vst1.64 {d16-d17}, [r3 :128]
   419da: f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   419de: f960 2aef   vld1.64 {d18-d19}, [r0 :128]
   419e2: f50d 6c96   add.w ip, sp, #1200 ; 0x4b0
   419e6: f94c 0aef   vst1.64 {d16-d17}, [ip :128]
   419ea: f50d 6594   add.w r5, sp, #1184 ; 0x4a0
   419ee: f945 2aef   vst1.64 {d18-d19}, [r5 :128]
   419f2: f96c 0aef   vld1.64 {d16-d17}, [ip :128]
   419f6: f965 2aef   vld1.64 {d18-d19}, [r5 :128]
   419fa: ef50 09f2   vmul.i16  q8, q8, q9
   419fe: f50d 6c92   add.w ip, sp, #1168 ; 0x490
   41a02: f94c 0aef   vst1.64 {d16-d17}, [ip :128]
   41a06: f96c 0aef   vld1.64 {d16-d17}, [ip :128]
   41a0a: f10d 0c80   add.w ip, sp, #128  ; 0x80
   41a0e: f94c 0aef   vst1.64 {d16-d17}, [ip :128]
   41a12: f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   41a16: f962 2aef   vld1.64 {d18-d19}, [r2 :128]
   41a1a: f50d 6e90   add.w lr, sp, #1152 ; 0x480
   41a1e: f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   41a22: f50d 658e   add.w r5, sp, #1136 ; 0x470
   41a26: f945 2aef   vst1.64 {d18-d19}, [r5 :128]
   41a2a: f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   41a2e: f965 2aef   vld1.64 {d18-d19}, [r5 :128]
   41a32: ef50 09f2   vmul.i16  q8, q8, q9
   41a36: f50d 6e8c   add.w lr, sp, #1120 ; 0x460
   41a3a: f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   41a3e: f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   41a42: f10d 0e70   add.w lr, sp, #112  ; 0x70
   41a46: f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   41a4a: f64f 75b6   movw  r5, #65462  ; 0xffb6
   41a4e: f8ad 545e   strh.w  r5, [sp, #1118] ; 0x45e
   41a52: f20d 455e   addw  r5, sp, #1118 ; 0x45e
   41a56: f9e5 0c7f   vld1.16 {d16[]-d17[]}, [r5 :16]
   41a5a: f50d 6586   add.w r5, sp, #1072 ; 0x430
   41a5e: f945 0aef   vst1.64 {d16-d17}, [r5 :128]
   41a62: f965 0aef   vld1.64 {d16-d17}, [r5 :128]
   41a66: f50d 6588   add.w r5, sp, #1088 ; 0x440
   41a6a: f945 0aef   vst1.64 {d16-d17}, [r5 :128]
   41a6e: f965 0aef   vld1.64 {d16-d17}, [r5 :128]
   41a72: f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41a76: f64f 75a2   movw  r5, #65442  ; 0xffa2
   41a7a: f8ad 542e   strh.w  r5, [sp, #1070] ; 0x42e
   41a7e: f20d 452e   addw  r5, sp, #1070 ; 0x42e
   41a82: f9e5 0c7f   vld1.16 {d16[]-d17[]}, [r5 :16]
   41a86: f50d 6580   add.w r5, sp, #1024 ; 0x400
   41a8a: f945 0aef   vst1.64 {d16-d17}, [r5 :128]
   41a8e: f965 0aef   vld1.64 {d16-d17}, [r5 :128]
   41a92: f50d 6582   add.w r5, sp, #1040 ; 0x410
   41a96: f945 0aef   vst1.64 {d16-d17}, [r5 :128]
   41a9a: f965 0aef   vld1.64 {d16-d17}, [r5 :128]
   41a9e: f942 0aef   vst1.64 {d16-d17}, [r2 :128]
   41aa2: f96c 0aef   vld1.64 {d16-d17}, [ip :128]
   41aa6: f964 2aef   vld1.64 {d18-d19}, [r4 :128]
   41aaa: f960 4aef   vld1.64 {d20-d21}, [r0 :128]
   41aae: adfc        add r5, sp, #1008 ; 0x3f0
   41ab0: f945 0aef   vst1.64 {d16-d17}, [r5 :128]
   41ab4: aef8        add r6, sp, #992  ; 0x3e0
   41ab6: f946 2aef   vst1.64 {d18-d19}, [r6 :128]
   41aba: f50d 7874   add.w r8, sp, #976  ; 0x3d0
   41abe: f948 4aef   vst1.64 {d20-d21}, [r8 :128]
   41ac2: f965 0aef   vld1.64 {d16-d17}, [r5 :128]
   41ac6: f966 2aef   vld1.64 {d18-d19}, [r6 :128]
   41aca: f968 4aef   vld1.64 {d20-d21}, [r8 :128]
   41ace: ef52 09e4   vmla.i16  q8, q9, q10
   41ad2: adf0        add r5, sp, #960  ; 0x3c0
   41ad4: f945 0aef   vst1.64 {d16-d17}, [r5 :128]
   41ad8: f965 0aef   vld1.64 {d16-d17}, [r5 :128]
   41adc: f94c 0aef   vst1.64 {d16-d17}, [ip :128]
   41ae0: f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   41ae4: f964 2aef   vld1.64 {d18-d19}, [r4 :128]
   41ae8: f962 4aef   vld1.64 {d20-d21}, [r2 :128]
   41aec: acec        add r4, sp, #944  ; 0x3b0
   41aee: f944 0aef   vst1.64 {d16-d17}, [r4 :128]
   41af2: ade8        add r5, sp, #928  ; 0x3a0
   41af4: f945 2aef   vst1.64 {d18-d19}, [r5 :128]
   41af8: aee4        add r6, sp, #912  ; 0x390
   41afa: f946 4aef   vst1.64 {d20-d21}, [r6 :128]
   41afe: f964 0aef   vld1.64 {d16-d17}, [r4 :128]
   41b02: f965 2aef   vld1.64 {d18-d19}, [r5 :128]
   41b06: f966 4aef   vld1.64 {d20-d21}, [r6 :128]
   41b0a: ef52 09e4   vmla.i16  q8, q9, q10
   41b0e: ace0        add r4, sp, #896  ; 0x380
   41b10: f944 0aef   vst1.64 {d16-d17}, [r4 :128]
   41b14: f964 0aef   vld1.64 {d16-d17}, [r4 :128]
   41b18: f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   41b1c: f8ad 137e   strh.w  r1, [sp, #894]  ; 0x37e
   41b20: f20d 317e   addw  r1, sp, #894  ; 0x37e
   41b24: f9e1 0c7f   vld1.16 {d16[]-d17[]}, [r1 :16]
   41b28: a9d4        add r1, sp, #848  ; 0x350
   41b2a: f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   41b2e: f961 0aef   vld1.64 {d16-d17}, [r1 :128]
   41b32: a9d8        add r1, sp, #864  ; 0x360
   41b34: f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   41b38: f961 0aef   vld1.64 {d16-d17}, [r1 :128]
   41b3c: f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41b40: f64f 71ee   movw  r1, #65518  ; 0xffee
   41b44: f8ad 134e   strh.w  r1, [sp, #846]  ; 0x34e
   41b48: f20d 314e   addw  r1, sp, #846  ; 0x34e
   41b4c: f9e1 0c7f   vld1.16 {d16[]-d17[]}, [r1 :16]
   41b50: a9c8        add r1, sp, #800  ; 0x320
   41b52: f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   41b56: f961 0aef   vld1.64 {d16-d17}, [r1 :128]
   41b5a: a9cc        add r1, sp, #816  ; 0x330
   41b5c: f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   41b60: f961 0aef   vld1.64 {d16-d17}, [r1 :128]
   41b64: f942 0aef   vst1.64 {d16-d17}, [r2 :128]
   41b68: f96c 0aef   vld1.64 {d16-d17}, [ip :128]
   41b6c: f963 2aef   vld1.64 {d18-d19}, [r3 :128]
   41b70: f960 4aef   vld1.64 {d20-d21}, [r0 :128]
   41b74: a8c4        add r0, sp, #784  ; 0x310
   41b76: f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41b7a: a9c0        add r1, sp, #768  ; 0x300
   41b7c: f941 2aef   vst1.64 {d18-d19}, [r1 :128]
   41b80: acbc        add r4, sp, #752  ; 0x2f0
   41b82: f944 4aef   vst1.64 {d20-d21}, [r4 :128]
   41b86: f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   41b8a: f961 2aef   vld1.64 {d18-d19}, [r1 :128]
   41b8e: f964 4aef   vld1.64 {d20-d21}, [r4 :128]
   41b92: ef52 09e4   vmla.i16  q8, q9, q10
   41b96: a8b8        add r0, sp, #736  ; 0x2e0
   41b98: f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41b9c: f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   41ba0: f94c 0aef   vst1.64 {d16-d17}, [ip :128]
   41ba4: f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   41ba8: f963 2aef   vld1.64 {d18-d19}, [r3 :128]
   41bac: f962 4aef   vld1.64 {d20-d21}, [r2 :128]
   41bb0: a8b4        add r0, sp, #720  ; 0x2d0
   41bb2: f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41bb6: a9b0        add r1, sp, #704  ; 0x2c0
   41bb8: f941 2aef   vst1.64 {d18-d19}, [r1 :128]
   41bbc: aaac        add r2, sp, #688  ; 0x2b0
   41bbe: f942 4aef   vst1.64 {d20-d21}, [r2 :128]
   41bc2: f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   41bc6: f961 2aef   vld1.64 {d18-d19}, [r1 :128]
   41bca: f962 4aef   vld1.64 {d20-d21}, [r2 :128]
   41bce: ef52 09e4   vmla.i16  q8, q9, q10
   41bd2: a8a8        add r0, sp, #672  ; 0x2a0
   41bd4: f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41bd8: f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   41bdc: f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   41be0: f96c 0aef   vld1.64 {d16-d17}, [ip :128]
   41be4: a864        add r0, sp, #400  ; 0x190
   41be6: f960 2aef   vld1.64 {d18-d19}, [r0 :128]
   41bea: a9a4        add r1, sp, #656  ; 0x290
   41bec: f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   41bf0: aaa0        add r2, sp, #640  ; 0x280
   41bf2: f942 2aef   vst1.64 {d18-d19}, [r2 :128]
   41bf6: f961 0aef   vld1.64 {d16-d17}, [r1 :128]
   41bfa: f962 2aef   vld1.64 {d18-d19}, [r2 :128]
   41bfe: ef50 08e2   vadd.i16  q8, q8, q9
   41c02: a99c        add r1, sp, #624  ; 0x270
   41c04: f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   41c08: f961 0aef   vld1.64 {d16-d17}, [r1 :128]
   41c0c: f94c 0aef   vst1.64 {d16-d17}, [ip :128]
   41c10: f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   41c14: f960 2aef   vld1.64 {d18-d19}, [r0 :128]
   41c18: a898        add r0, sp, #608  ; 0x260
   41c1a: f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41c1e: a994        add r1, sp, #592  ; 0x250
   41c20: f941 2aef   vst1.64 {d18-d19}, [r1 :128]
   41c24: f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   41c28: f961 2aef   vld1.64 {d18-d19}, [r1 :128]
   41c2c: ef50 08e2   vadd.i16  q8, q8, q9
   41c30: a890        add r0, sp, #576  ; 0x240
   41c32: f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41c36: f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   41c3a: f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   41c3e: f96c 0aef   vld1.64 {d16-d17}, [ip :128]
   41c42: a818        add r0, sp, #96 ; 0x60
   41c44: f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41c48: f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   41c4c: efc8 6930   vqshrn.s16  d22, q8, #8
   41c50: 9807        ldr r0, [sp, #28]
   41c52: edc0 6b0a   vstr  d22, [r0, #40]  ; 0x28
   41c56: edd0 6b0a   vldr  d22, [r0, #40]  ; 0x28
   41c5a: edc0 6b08   vstr  d22, [r0, #32]
   41c5e: edd0 6b08   vldr  d22, [r0, #32]
   41c62: edd0 7b56   vldr  d23, [r0, #344] ; 0x158
   41c66: edc0 6b82   vstr  d22, [r0, #520] ; 0x208
   41c6a: edc0 7b80   vstr  d23, [r0, #512] ; 0x200
   41c6e: edd0 6b82   vldr  d22, [r0, #520] ; 0x208
   41c72: edd0 7b80   vldr  d23, [r0, #512] ; 0x200
   41c76: ef46 68a7   vadd.i8 d22, d22, d23
   41c7a: edc0 6b7e   vstr  d22, [r0, #504] ; 0x1f8
   41c7e: edd0 6b7e   vldr  d22, [r0, #504] ; 0x1f8
   41c82: edc0 6b7c   vstr  d22, [r0, #496] ; 0x1f0
   41c86: edd0 6b7c   vldr  d22, [r0, #496] ; 0x1f0
   41c8a: edc0 6b7a   vstr  d22, [r0, #488] ; 0x1e8
   41c8e: edd0 6b7a   vldr  d22, [r0, #488] ; 0x1e8
   41c92: 9908        ldr r1, [sp, #32]
   41c94: edc1 6bee   vstr  d22, [r1, #952] ; 0x3b8
   41c98: f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   41c9c: aa10        add r2, sp, #64 ; 0x40
   41c9e: f942 0aef   vst1.64 {d16-d17}, [r2 :128]
   41ca2: f962 0aef   vld1.64 {d16-d17}, [r2 :128]
   41ca6: efc8 6930   vqshrn.s16  d22, q8, #8
   41caa: edc0 6b02   vstr  d22, [r0, #8]
   41cae: edd0 6b02   vldr  d22, [r0, #8]
   41cb2: edc0 6b00   vstr  d22, [r0]
   41cb6: edd0 6b00   vldr  d22, [r0]
   41cba: edd0 7b56   vldr  d23, [r0, #344] ; 0x158
   41cbe: edc0 6b78   vstr  d22, [r0, #480] ; 0x1e0
   41cc2: edc0 7b76   vstr  d23, [r0, #472] ; 0x1d8
   41cc6: edd0 6b78   vldr  d22, [r0, #480] ; 0x1e0
   41cca: edd0 7b76   vldr  d23, [r0, #472] ; 0x1d8
   41cce: ef46 68a7   vadd.i8 d22, d22, d23
   41cd2: edc0 6b74   vstr  d22, [r0, #464] ; 0x1d0
   41cd6: edd0 6b74   vldr  d22, [r0, #464] ; 0x1d0
   41cda: edc0 6b72   vstr  d22, [r0, #456] ; 0x1c8
   41cde: edd0 6b72   vldr  d22, [r0, #456] ; 0x1c8
   41ce2: edc0 6b70   vstr  d22, [r0, #448] ; 0x1c0
   41ce6: edd0 6b70   vldr  d22, [r0, #448] ; 0x1c0
   41cea: edc1 6bec   vstr  d22, [r1, #944] ; 0x3b0
   41cee: f50d 6226   add.w r2, sp, #2656 ; 0xa60
   41cf2: f962 0acf   vld1.64 {d16-d17}, [r2]
   41cf6: f50d 6225   add.w r2, sp, #2640 ; 0xa50
   41cfa: f942 0acf   vst1.64 {d16-d17}, [r2]
   41cfe: 9b6f        ldr r3, [sp, #444]  ; 0x1bc
   41d00: f8dd c154   ldr.w ip, [sp, #340]  ; 0x154
   41d04: 4463        add r3, ip
   41d06: f962 671d   vld1.8  {d22}, [r2 :64]!
   41d0a: edd2 7b00   vldr  d23, [r2]
   41d0e: eef0 0b66   vmov.f64  d16, d22
   41d12: eef0 1b67   vmov.f64  d17, d23
   41d16: f943 080f   vst2.8  {d16-d17}, [r3]
   41d1a: 9a55        ldr r2, [sp, #340]  ; 0x154
   41d1c: 3210        adds  r2, #16
   41d1e: 9255        str r2, [sp, #340]  ; 0x154
   41d20: e7ff        b.n 41d22 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xaea>
   41d22: e7ff        b.n 41d24 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xaec>
   41d24: 9854        ldr r0, [sp, #336]  ; 0x150
   41d26: 3001        adds  r0, #1
   41d28: 9054        str r0, [sp, #336]  ; 0x150
   41d2a: f7ff bb25   b.w 41378 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x140>
   41d2e: 986d        ldr r0, [sp, #436]  ; 0x1b4
   41d30: f020 000f   bic.w r0, r0, #15
   41d34: 9054        str r0, [sp, #336]  ; 0x150
   41d36: e7ff        b.n 41d38 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xb00>
   41d38: 9854        ldr r0, [sp, #336]  ; 0x150
   41d3a: 996d        ldr r1, [sp, #436]  ; 0x1b4
   41d3c: 4288        cmp r0, r1
   41d3e: da7e        bge.n 41e3e <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc06>
   41d40: e7ff        b.n 41d42 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xb0a>
   41d42: 986e        ldr r0, [sp, #440]  ; 0x1b8
   41d44: 7880        ldrb  r0, [r0, #2]
   41d46: f88d 002f   strb.w  r0, [sp, #47] ; 0x2f
   41d4a: 986e        ldr r0, [sp, #440]  ; 0x1b8
   41d4c: 7840        ldrb  r0, [r0, #1]
   41d4e: f88d 002e   strb.w  r0, [sp, #46] ; 0x2e
   41d52: 986e        ldr r0, [sp, #440]  ; 0x1b8
   41d54: 7800        ldrb  r0, [r0, #0]
   41d56: f88d 002d   strb.w  r0, [sp, #45] ; 0x2d
   41d5a: 986e        ldr r0, [sp, #440]  ; 0x1b8
   41d5c: 3004        adds  r0, #4
   41d5e: 906e        str r0, [sp, #440]  ; 0x1b8
   41d60: f89d 002f   ldrb.w  r0, [sp, #47] ; 0x2f
   41d64: eb00 1040   add.w r0, r0, r0, lsl #5
   41d68: f89d 102e   ldrb.w  r1, [sp, #46] ; 0x2e
   41d6c: eb01 11c1   add.w r1, r1, r1, lsl #7
   41d70: eb01 0040   add.w r0, r1, r0, lsl #1
   41d74: f89d 102d   ldrb.w  r1, [sp, #45] ; 0x2d
   41d78: 2219        movs  r2, #25
   41d7a: fb11 0002   smlabb  r0, r1, r2, r0
   41d7e: 3080        adds  r0, #128  ; 0x80
   41d80: 2110        movs  r1, #16
   41d82: eb01 2010   add.w r0, r1, r0, lsr #8
   41d86: f88d 002c   strb.w  r0, [sp, #44] ; 0x2c
   41d8a: f89d 002f   ldrb.w  r0, [sp, #47] ; 0x2f
   41d8e: f06f 0125   mvn.w r1, #37 ; 0x25
   41d92: fb10 f001   smulbb  r0, r0, r1
   41d96: f89d 102e   ldrb.w  r1, [sp, #46] ; 0x2e
   41d9a: 224a        movs  r2, #74 ; 0x4a
   41d9c: fb01 0012   mls r0, r1, r2, r0
   41da0: f89d 102d   ldrb.w  r1, [sp, #45] ; 0x2d
   41da4: ebc1 01c1   rsb r1, r1, r1, lsl #3
   41da8: eb00 1001   add.w r0, r0, r1, lsl #4
   41dac: 3080        adds  r0, #128  ; 0x80
   41dae: 2180        movs  r1, #128  ; 0x80
   41db0: eb01 2010   add.w r0, r1, r0, lsr #8
   41db4: f88d 002b   strb.w  r0, [sp, #43] ; 0x2b
   41db8: f89d 002f   ldrb.w  r0, [sp, #47] ; 0x2f
   41dbc: ebc0 00c0   rsb r0, r0, r0, lsl #3
   41dc0: f89d 202e   ldrb.w  r2, [sp, #46] ; 0x2e
   41dc4: 235e        movs  r3, #94 ; 0x5e
   41dc6: fb12 f203   smulbb  r2, r2, r3
   41dca: ebc2 1000   rsb r0, r2, r0, lsl #4
   41dce: f89d 202d   ldrb.w  r2, [sp, #45] ; 0x2d
   41dd2: eb02 02c2   add.w r2, r2, r2, lsl #3
   41dd6: eba0 0042   sub.w r0, r0, r2, lsl #1
   41dda: 3080        adds  r0, #128  ; 0x80
   41ddc: eb01 2010   add.w r0, r1, r0, lsr #8
   41de0: f88d 002a   strb.w  r0, [sp, #42] ; 0x2a
   41de4: f89d 002c   ldrb.w  r0, [sp, #44] ; 0x2c
   41de8: 996f        ldr r1, [sp, #444]  ; 0x1bc
   41dea: 9a56        ldr r2, [sp, #344]  ; 0x158
   41dec: 1c53        adds  r3, r2, #1
   41dee: 9356        str r3, [sp, #344]  ; 0x158
   41df0: 5488        strb  r0, [r1, r2]
   41df2: 9853        ldr r0, [sp, #332]  ; 0x14c
   41df4: eb00 71d0   add.w r1, r0, r0, lsr #31
   41df8: f021 0101   bic.w r1, r1, #1
   41dfc: 1a40        subs  r0, r0, r1
   41dfe: 2800        cmp r0, #0
   41e00: d118        bne.n 41e34 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbfc>
   41e02: e7ff        b.n 41e04 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbcc>
   41e04: 9854        ldr r0, [sp, #336]  ; 0x150
   41e06: eb00 71d0   add.w r1, r0, r0, lsr #31
   41e0a: f021 0101   bic.w r1, r1, #1
   41e0e: 1a40        subs  r0, r0, r1
   41e10: 2800        cmp r0, #0
   41e12: d10f        bne.n 41e34 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbfc>
   41e14: e7ff        b.n 41e16 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbde>
   41e16: f89d 002a   ldrb.w  r0, [sp, #42] ; 0x2a
   41e1a: 996f        ldr r1, [sp, #444]  ; 0x1bc
   41e1c: 9a55        ldr r2, [sp, #340]  ; 0x154
   41e1e: 1c53        adds  r3, r2, #1
   41e20: 9355        str r3, [sp, #340]  ; 0x154
   41e22: 5488        strb  r0, [r1, r2]
   41e24: f89d 002b   ldrb.w  r0, [sp, #43] ; 0x2b
   41e28: 996f        ldr r1, [sp, #444]  ; 0x1bc
   41e2a: 9a55        ldr r2, [sp, #340]  ; 0x154
   41e2c: 1c53        adds  r3, r2, #1
   41e2e: 9355        str r3, [sp, #340]  ; 0x154
   41e30: 5488        strb  r0, [r1, r2]
   41e32: e7ff        b.n 41e34 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbfc>
   41e34: e7ff        b.n 41e36 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbfe>
   41e36: 9854        ldr r0, [sp, #336]  ; 0x150
   41e38: 3001        adds  r0, #1
   41e3a: 9054        str r0, [sp, #336]  ; 0x150
   41e3c: e77c        b.n 41d38 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xb00>
   41e3e: e7ff        b.n 41e40 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc08>
   41e40: 9853        ldr r0, [sp, #332]  ; 0x14c
   41e42: 3001        adds  r0, #1
   41e44: 9053        str r0, [sp, #332]  ; 0x14c
   41e46: f7ff ba8e   b.w 41366 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x12e>
   41e4a: 4809        ldr r0, [pc, #36] ; (41e70 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc38>)
   41e4c: 4478        add r0, pc
   41e4e: 6800        ldr r0, [r0, #0]
   41e50: 6800        ldr r0, [r0, #0]
   41e52: 9909        ldr r1, [sp, #36] ; 0x24
   41e54: 4288        cmp r0, r1
   41e56: d106        bne.n 41e66 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc2e>
   41e58: e7ff        b.n 41e5a <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc22>
   41e5a: f1a7 0418   sub.w r4, r7, #24
   41e5e: 46a5        mov sp, r4
   41e60: e8bd 0b00   ldmia.w sp!, {r8, r9, fp}
   41e64: bdf0        pop {r4, r5, r6, r7, pc}
   41e66: f7f8 ec6e   blx 3a744 <__stack_chk_fail@plt>
   41e6a: bf00        nop
   41e6c: 001ef0f8  .word 0x001ef0f8
   41e70: 001ee50c  .word 0x001ee50c

Neon 版在我的手机上是 “负优化” 。。。跑得比朴素 CPU 还慢。。。看来网上给的 NEON 代码也未必靠谱,还是得亲自实践对比!

image.png

经过我的优化后,NEON 版达到了 53ms 左右,展开(一次)版是51 ms左右,原图大小为 1600*1873

image.png

目录
相关文章
|
7月前
|
存储 编解码 Android开发
NV21、NV12、YV12、RGB、YUV、RGBA、RGBX8888等图像色彩编码格式区别
NV21、NV12、YV12、RGB、YUV、RGBA、RGBX8888都是常见的图像颜色编码格式,它们之间的主要区别在于色彩空间和数据排列方式。
133 0
|
7月前
|
编解码 API 开发工具
NV21、NV12、YV12、RGB565、YUV等颜色编码格式区别和接口设计探讨
NV21、NV12、YV12、RGB565、YUV分别是不同的颜色编码格式,这些颜色编码格式各有特点,适用于不同的应用场景。选择合适的颜色编码格式取决于具体的需求和环境:
143 1
|
12月前
|
存储 编解码 算法
Opengl ES之RGB转NV21
Opengl ES连载系列
115 0
NV21转YUV420SP的代码
NV21转YUV420SP的代码
57 0
RGB转换为NV12的代码
RGB转换为NV12的代码
184 0
YUV 与 RGB的转换
RGB 转换成 YUV Y = (0.257 * R) + (0.504 * G) + (0.
4717 0
使用FFMPEG的sws_scale函数实现各种原始颜色格式互转(YUV\RGB\)
使用FFMPEG的sws_scale函数实现各种原始颜色格式互转(YUV\RGB\)
728 0
|
编解码 芯片
|
存储 编解码 数据建模