在android arm64平台下,crc32,aes等常用算法有指令集实现。故在android下,可借助这些指令实现代码加速。
使用cpu-features库
注意,crc32x系列与crc32cx系列,这是被乘的多项式不同
如何判断自己的手机是否支持crc32呢? 有三个方法:
方法1,直接查看/proc/cpuinfo
方法2,使用ELF辅助向量 API
unsigned long hwcap = getauxval(AT_HWCAP);
if (hwcap & HWCAP_CRC32)
return 1;
}
return 0;
第三个方法:
使用cpu-features库
#include <cpu-features.h>
uint64_t cap = android_getCpuFeatures();
if (cap & ANDROID_CPU_ARM64_FEATURE_CRC32) {
return 1;
}
return 0;
这里只讨论使用第三种方法的实现,完整代码如下:
#ifdef __ANDROID__
#ifdef __aarch64__
#define CRC32X(crc, value) __asm__("crc32x %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32W(crc, value) __asm__("crc32w %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32H(crc, value) __asm__("crc32h %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32B(crc, value) __asm__("crc32b %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32CH(crc, value) __asm__("crc32ch %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
uint32_t __arm64_accelerate_crc32(uint32_t crc, const void* chunk, size_t size) {
uint64_t cap = android_getCpuFeatures();
if (!(cap & ANDROID_CPU_ARM64_FEATURE_CRC32)) return 0;
register uint32_t l = crc ^ 0xffffffffu; // 使用局部变量,利用寄存器优化
register size_t len = size; // 同上
const uint8_t* p = reinterpret_cast<const uint8_t *>(chunk);
#define STEP1 do { \
CRC32CB(l, *p++); \
len--; \
} while (0)
#define STEP2 do { \
CRC32CH(l, *(uint16_t *)p); \
p += 2; \
len -= 2; \
} while (0)
#define STEP4 do { \
CRC32CW(l, *(uint32_t *)p); \
p += 4; \
len -= 4; \
} while (0)
#define STEP8 do { \
CRC32CX(l, *(uint64_t *)p); \
p += 8; \
len -= 8; \
} while (0)
// 512路循环内联展开
while (len >= 512) {
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
}
// 直接使用if判断,效果会高点
if (len >= 256) {
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
}
if (len >= 128) {
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
}
if (len >= 64) {
STEP8; STEP8; STEP8; STEP8;
STEP8; STEP8; STEP8; STEP8;
}
if (len >= 32) {
STEP8; STEP8; STEP8; STEP8;
}
if (len >= 16) {
STEP8; STEP8;
}
if (len >= 8) {
STEP8;
}
if (len >= 4) {
STEP4;
}
if (len >= 2) {
STEP2;
}
if (len >= 1) {
STEP1;
}
#undef STEP8
#undef STEP4
#undef STEP2
#undef STEP1
return ~l;
}
注意,crc32x系列与crc32cx系列,这是被乘的多项式不同
其中crc32x 系列的多项式系数是: 0x4C11DB7,而crc32cx系列的多项式系数是0x1EDC6F41
使用clock计算CPU的时间测试如下:
测试机器为Android 5.0,nubia,测试数据为200字节的随意字符串