前面Opus从入门到精通(二):编解码器使用介绍了opus编解码器的API,这边文章介绍编码API的具体使用示例.分Android,ios,Linux三个系统进行实现.
编码是我们对脉冲编码调制(Pulse Code Modulation,PCM)的数据进行压缩操作,我们通常通过操作系统麦克风API获取PCM数据,或者从存储的现成的文件的PCM数据:
- 麦克风回调二进制: - Android的AudioRecorder - IOS的Audio Unit
- 麦克风存储到文件: - Android的MediaRecorder - IOS的AVFoundation AVAudioRecorder
PCM数据大小怎么计算呢?根据采样率采样格式,声道数计算.根据前面文章音视频之音频知识入门介绍: PCM文件大小 = 采样率 * 采样格式 * 声道数 * 录制时长 采样率即一秒多少采样,采样格式指一个采用占多少字节,通常一个采用使用一个字节或者两个字节,所以采样率*采样格式计算出一秒钟一个声道PCM多少字节,乘以声道数,算出一秒钟PCM大小,再乘以时长就可以计算出PCM文件大小.
下面分别使用Android平台的麦克风二进制回调方式和IOS平台的麦克风文件回调方式采集,并使用OPUS编码器进行编码.
Android平台编码程序实现
配置工程
AndroidStudio新建工程,在见一个Andorid Library模块library,在library下面新建类OpusUtil,并写好native方法:
public class OpusUtil { static { System.loadLibrary("opusutil-lib"); } //创建编码器 public static native long _createOpusEncoder(int sampleRateInHz, int channel, int bitrate, int complexity); //编码一帧PCM数据 public static native int _encodeOpus(long enc, short[] buffer, int offset, byte[] encoded); //释放编码器 public static native void _destroyOpusEncoder(long enc); }
在src/main下面新建cpp目录,把从官网下载的opus编码器拷贝到cpp下,新建media_jni.c用于实现JNI方法 在library根目录下创建我们的CMakeLists.txt,并在build.gradle下面配置cmake文件:
externalNativeBuild { cmake { path file('CMakeLists.txt') }
CMakeLists.txt将opus编码器文件配置好:
# For more information about using CMake with Android Studio, read the # documentation: https://d.android.com/studio/projects/add-native-code.html # Sets the minimum version of CMake required to build the native library. cmake_minimum_required(VERSION 3.4.1) set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -s") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s") set(libs_include_opus_DIR src/main/cpp/libopus) include_directories( ${libs_include_opus_DIR}/include ${libs_include_opus_DIR}/celt ${libs_include_opus_DIR}/silk ${libs_include_opus_DIR}/silk/float ${libs_include_opus_DIR}/src) add_library( opusutil-lib SHARED src/main/cpp/util.c src/main/cpp/media_jni.c src/main/cpp/jni_utils.c src/main/cpp/libopus/src/opus_multistream_decoder.c src/main/cpp/libopus/src/opus_multistream_encoder.c src/main/cpp/libopus/src/opus_multistream.c src/main/cpp/libopus/src/opus_encoder.c src/main/cpp/libopus/celt/celt_encoder.c src/main/cpp/libopus/celt/bands.c src/main/cpp/libopus/celt/entcode.c src/main/cpp/libopus/celt/entdec.c src/main/cpp/libopus/celt/entenc.c src/main/cpp/libopus/celt/mathops.c src/main/cpp/libopus/celt/vq.c src/main/cpp/libopus/celt/cwrs.c src/main/cpp/libopus/celt/celt.c src/main/cpp/libopus/celt/mdct.c src/main/cpp/libopus/celt/kiss_fft.c src/main/cpp/libopus/celt/bands.c src/main/cpp/libopus/celt/pitch.c src/main/cpp/libopus/celt/celt_lpc.c src/main/cpp/libopus/celt/quant_bands.c src/main/cpp/libopus/celt/laplace.c src/main/cpp/libopus/celt/modes.c src/main/cpp/libopus/celt/rate.c src/main/cpp/libopus/silk/lin2log.c src/main/cpp/libopus/silk/enc_API.c src/main/cpp/libopus/silk/resampler.c src/main/cpp/libopus/silk/resampler_private_IIR_FIR.c src/main/cpp/libopus/silk/resampler_private_up2_HQ.c src/main/cpp/libopus/silk/resampler_private_down_FIR.c src/main/cpp/libopus/silk/resampler_private_AR2.c src/main/cpp/libopus/silk/resampler_rom.c src/main/cpp/libopus/silk/float/encode_frame_FLP.c src/main/cpp/libopus/silk/gain_quant.c src/main/cpp/libopus/silk/log2lin.c src/main/cpp/libopus/silk/encode_pulses.c src/main/cpp/libopus/silk/code_signs.c src/main/cpp/libopus/silk/tables_pulses_per_block.c src/main/cpp/libopus/silk/tables_other.c src/main/cpp/libopus/silk/shell_coder.c src/main/cpp/libopus/silk/encode_indices.c src/main/cpp/libopus/silk/tables_LTP.c src/main/cpp/libopus/silk/tables_pitch_lag.c src/main/cpp/libopus/silk/NLSF_unpack.c src/main/cpp/libopus/silk/tables_gain.c src/main/cpp/libopus/silk/float/wrappers_FLP.c src/main/cpp/libopus/silk/quant_LTP_gains.c src/main/cpp/libopus/silk/VQ_WMat_EC.c src/main/cpp/libopus/silk/NSQ.c src/main/cpp/libopus/silk/LPC_analysis_filter.c src/main/cpp/libopus/silk/NSQ_del_dec.c src/main/cpp/libopus/silk/process_NLSFs.c src/main/cpp/libopus/silk/NLSF2A.c src/main/cpp/libopus/silk/bwexpander_32.c src/main/cpp/libopus/silk/LPC_inv_pred_gain.c src/main/cpp/libopus/silk/table_LSF_cos.c src/main/cpp/libopus/silk/NLSF_encode.c src/main/cpp/libopus/silk/NLSF_decode.c src/main/cpp/libopus/silk/NLSF_stabilize.c src/main/cpp/libopus/silk/sort.c src/main/cpp/libopus/silk/NLSF_VQ_weights_laroia.c src/main/cpp/libopus/silk/NLSF_del_dec_quant.c src/main/cpp/libopus/silk/NLSF_VQ.c src/main/cpp/libopus/silk/interpolate.c src/main/cpp/libopus/silk/float/wrappers_FLP.c src/main/cpp/libopus/silk/A2NLSF.c src/main/cpp/libopus/silk/float/process_gains_FLP.c src/main/cpp/libopus/silk/float/find_pred_coefs_FLP.c src/main/cpp/libopus/silk/float/residual_energy_FLP.c src/main/cpp/libopus/silk/float/energy_FLP.c src/main/cpp/libopus/silk/float/LPC_analysis_filter_FLP.c src/main/cpp/libopus/silk/float/find_LPC_FLP.c src/main/cpp/libopus/silk/float/burg_modified_FLP.c src/main/cpp/libopus/silk/float/inner_product_FLP.c src/main/cpp/libopus/silk/float/scale_copy_vector_FLP.c src/main/cpp/libopus/silk/float/LTP_analysis_filter_FLP.c src/main/cpp/libopus/silk/float/LTP_scale_ctrl_FLP.c src/main/cpp/libopus/silk/float/find_LTP_FLP.c src/main/cpp/libopus/silk/float/scale_vector_FLP.c src/main/cpp/libopus/silk/float/regularize_correlations_FLP.c src/main/cpp/libopus/silk/float/corrMatrix_FLP.c src/main/cpp/libopus/silk/float/noise_shape_analysis_FLP.c src/main/cpp/libopus/silk/float/bwexpander_FLP.c src/main/cpp/libopus/silk/float/LPC_inv_pred_gain_FLP.c src/main/cpp/libopus/silk/float/autocorrelation_FLP.c src/main/cpp/libopus/silk/float/warped_autocorrelation_FLP.c src/main/cpp/libopus/silk/float/apply_sine_window_FLP.c src/main/cpp/libopus/silk/float/find_pitch_lags_FLP.c src/main/cpp/libopus/silk/float/pitch_analysis_core_FLP.c src/main/cpp/libopus/silk/pitch_est_tables.c src/main/cpp/libopus/silk/float/sort_FLP.c src/main/cpp/libopus/silk/resampler_down2.c src/main/cpp/libopus/silk/resampler_down2_3.c src/main/cpp/libopus/silk/float/k2a_FLP.c src/main/cpp/libopus/silk/float/schur_FLP.c src/main/cpp/libopus/silk/LP_variable_cutoff.c src/main/cpp/libopus/silk/biquad_alt.c src/main/cpp/libopus/silk/VAD.c src/main/cpp/libopus/silk/sigm_Q15.c src/main/cpp/libopus/silk/ana_filt_bank_1.c src/main/cpp/libopus/silk/control_SNR.c src/main/cpp/libopus/silk/stereo_encode_pred.c src/main/cpp/libopus/silk/stereo_LR_to_MS.c src/main/cpp/libopus/silk/stereo_quant_pred.c src/main/cpp/libopus/silk/stereo_find_predictor.c src/main/cpp/libopus/silk/inner_prod_aligned.c src/main/cpp/libopus/silk/sum_sqr_shift.c src/main/cpp/libopus/silk/HP_variable_cutoff.c src/main/cpp/libopus/silk/control_codec.c src/main/cpp/libopus/silk/tables_NLSF_CB_NB_MB.c src/main/cpp/libopus/silk/tables_NLSF_CB_WB.c src/main/cpp/libopus/silk/control_audio_bandwidth.c src/main/cpp/libopus/silk/init_encoder.c src/main/cpp/libopus/silk/check_control_input.c src/main/cpp/libopus/src/analysis.c src/main/cpp/libopus/src/repacketizer.c src/main/cpp/libopus/src/opus.c src/main/cpp/libopus/src/opus_decoder.c src/main/cpp/libopus/src/opus_projection_encoder.c src/main/cpp/libopus/src/opus_projection_decoder.c src/main/cpp/libopus/src/mapping_matrix.c src/main/cpp/libopus/src/mapping_matrix.h src/main/cpp/libopus/celt/celt_decoder.c src/main/cpp/libopus/silk/dec_API.c src/main/cpp/libopus/silk/stereo_MS_to_LR.c src/main/cpp/libopus/silk/decode_frame.c src/main/cpp/libopus/silk/PLC.c src/main/cpp/libopus/silk/bwexpander.c src/main/cpp/libopus/silk/CNG.c src/main/cpp/libopus/silk/decode_core.c src/main/cpp/libopus/silk/decode_parameters.c src/main/cpp/libopus/silk/decode_pitch.c src/main/cpp/libopus/silk/decode_pulses.c src/main/cpp/libopus/silk/decode_indices.c src/main/cpp/libopus/silk/stereo_decode_pred.c src/main/cpp/libopus/silk/decoder_set_fs.c src/main/cpp/libopus/silk/init_decoder.c src/main/cpp/libopus/src/mlp.c src/main/cpp/libopus/src/mlp_data.c src/main/cpp/libopus/silk/LPC_fit.c ) find_library( log-lib log) find_library(android-lib android) target_link_libraries( opusutil-lib ${log-lib} ${android-lib} ) add_definitions(-DOUTSIDE_SPEEX -DOPUS_BUILD -DSTDC_HEADERS -DVAR_ARRAYS)
最终目录结构如下图:
接下来在jni文件下编写编解码实现
实现编码函数
创建编码器:
static jlong createOpusEncoder(JNIEnv *env, jobject thiz, jint sampleRateInHz, jint channelConfig, jint bitrate, jint complexity) { int error; //通过采样率,声道数创建编码器 OpusEncoder *pOpusEnc = opus_encoder_create(sampleRateInHz, channelConfig, OPUS_APPLICATION_RESTRICTED_LOWDELAY, &error); if (pOpusEnc) { //设置是否动态码率 opus_encoder_ctl(pOpusEnc, OPUS_SET_VBR(0));//0:CBR, 1:VBR opus_encoder_ctl(pOpusEnc, OPUS_SET_VBR_CONSTRAINT(true)); //设置码率值(码率是bitspresecond) opus_encoder_ctl(pOpusEnc, OPUS_SET_BITRATE(bitrate * 1000)); //设置复杂度 opus_encoder_ctl(pOpusEnc, OPUS_SET_COMPLEXITY(complexity));//8 0~10 设置SIGNAl opus_encoder_ctl(pOpusEnc, OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE)); opus_encoder_ctl(pOpusEnc, OPUS_SET_LSB_DEPTH(16)); opus_encoder_ctl(pOpusEnc, OPUS_SET_DTX(0)); opus_encoder_ctl(pOpusEnc, OPUS_SET_INBAND_FEC(0)); opus_encoder_ctl(pOpusEnc, OPUS_SET_PACKET_LOSS_PERC(0)); } return (jlong) pOpusEnc; }
编码一帧数据:
//输入short数组的pcm数据samples,输出编码后的byte数组 bytes static jint encodeOpus (JNIEnv *env, jobject thiz, jlong pOpusEnc, jshortArray samples, jint offset, jbyteArray bytes) { OpusEncoder *pEnc = (OpusEncoder *) pOpusEnc; if (!pEnc || !samples || !bytes) return 0; jshort *pSamples = (*env)->GetShortArrayElements(env, samples, 0); jsize nSampleSize = (*env)->GetArrayLength(env, samples); jbyte *pBytes = (*env)->GetByteArrayElements(env, bytes, 0); jsize nByteSize = (*env)->GetArrayLength(env, bytes); if (nSampleSize - offset < 320 || nByteSize <= 0) return 0; //编码一帧数据,返回编码完成后的数据大小 int nRet = opus_encode(pEnc, pSamples + offset, nSampleSize, (unsigned char *) pBytes, nByteSize); (*env)->ReleaseShortArrayElements(env, samples, pSamples, 0); (*env)->ReleaseByteArrayElements(env, bytes, pBytes, 0); return nRet; }
销毁编码器
static void destroyOpusEncoder (JNIEnv *env, jobject thiz, jlong pOpusEnc) { OpusEncoder *pEnc = (OpusEncoder *) pOpusEnc; if (!pEnc) return; opus_encoder_destroy(pEnc); }
封装编解码函数根据前面减少API的文章比较容易实现,采集调用模块有些需要注意事项,下面我们实现
采集模块
利用Android的AudioRecorder模块读取PCM数据,AudioRecorder对每次读取的内容有最小长度限制,通过AudioRecord.getMinBufferSize计算得到. 构造一个Runnable,在构造方法中计算出每次读取最小长度,并且计算出一帧大小,前面文章Opus从入门到精通(二):编解码器使用提到过OPUS一帧必须为2.5ms, 5ms, 10ms, 20ms, 40ms 或60ms,一帧越小在实时语音中延迟越低,我们取一帧20ms,计算出一帧大小:
this.opusAudioOpusPath = opusAudioOpusPath; bufferSize = AudioRecord.getMinBufferSize(Constants.DEFAULT_AUDIO_SAMPLE_RATE, channelConfig, AudioFormat.ENCODING_PCM_16BIT) + 2048; audioBuffer = new byte[bufferSize]; audioRecord = new AudioRecord(MediaRecorder.AudioSource.MIC, Constants.DEFAULT_AUDIO_SAMPLE_RATE, channelConfig, AudioFormat.ENCODING_PCM_16BIT, bufferSize); bytesPerTenMS = Constants.DEFAULT_AUDIO_SAMPLE_RATE * 2 * Constants.DEFAULT_OPUS_CHANNEL / 100 * 2;//每次处理20ms Log.i(TAG, "bytesPerTenMs:" + bytesPerTenMS); mRemainBuf = new byte[bytesPerTenMS]; mRemainSize = 0;
在run 方法中我们循环读取麦克风数据,进行编码:
isRecorder = true; audioRecord.startRecording(); File file = new File(opusAudioOpusPath); File fileDir = new File(file.getParent()); if (!fileDir.exists()) { fileDir.mkdirs(); } if (file.exists()) { file.delete(); } long createEncoder = 0; FileOutputStream fileOutputStream = null; BufferedOutputStream fileOpusBufferedOutputStream = null; try { file.createNewFile(); fileOutputStream = new FileOutputStream(file, true); fileOpusBufferedOutputStream = new BufferedOutputStream(fileOutputStream); createEncoder = OpusUtil._createOpusEncoder(Constants.DEFAULT_AUDIO_SAMPLE_RATE, Constants.DEFAULT_OPUS_CHANNEL, 16, 3); Log.i(TAG, "bufferSize:" + bufferSize); while (isRecorder) { int curShortSize = audioRecord.read(audioBuffer, 0, bufferSize); if (curShortSize > 0 && curShortSize <= bufferSize) { encodeData(createEncoder, fileOpusBufferedOutputStream, curShortSize); } } } catch (IOException e) { e.printStackTrace(); Log.e(TAG, "e = " + e.getMessage()); } finally { OpusUtil._destroyOpusEncoder(createEncoder); audioRecord.stop(); audioRecord.release(); try { if(fileOpusBufferedOutputStream != null) { fileOpusBufferedOutputStream.close(); } } catch (IOException e) { e.printStackTrace(); } try { if(fileOutputStream != null) { fileOutputStream.close(); } } catch (IOException e) { e.printStackTrace(); } }
真正的encodeData内容:
private void encodeData(long createEncoder, BufferedOutputStream fileOpusBufferedOutputStream, int readSize) throws IOException { byte []data = audioBuffer; if (mRemainSize > 0) { byte totalBuf[] = new byte[readSize + mRemainSize]; System.arraycopy(mRemainBuf, 0, totalBuf, 0, mRemainSize); System.arraycopy(data, 0, totalBuf, mRemainSize, readSize); data = totalBuf; readSize += mRemainSize; mRemainSize = 0; } int hasHandleSize = 0; while (hasHandleSize < readSize) { int readCount = bytesPerTenMS; if (bytesPerTenMS > readSize) { Log.i(TAG, "bytesPerTenMs > readSize"); mRemainSize = readSize; System.arraycopy(data, 0, mRemainBuf, 0, readSize); return; } if ((readSize - hasHandleSize) < readCount) { mRemainSize = readSize - hasHandleSize; Log.d(TAG, "remain size :" + mRemainSize); System.arraycopy(data, hasHandleSize, mRemainBuf, 0, mRemainSize); return; } byte[] bytes = new byte[readCount]; System.arraycopy(data, hasHandleSize, bytes, 0, readCount); short[] leftData = ArrayUtil.bytes2shorts(bytes, readCount); byte[] decodedData = new byte[readCount]; int encodeSize = OpusUtil._encodeOpus(createEncoder, leftData, 0, decodedData); Log.d(TAG, "encodeSize = " + encodeSize); if (encodeSize > 0) { byte[] decodeArray = new byte[encodeSize]; System.arraycopy(decodeArray, 0, decodedData, 0, encodeSize); fileOpusBufferedOutputStream.write(decodeArray); } else { return; } hasHandleSize += readCount; } }
这里面需要注意的是encodeData时,因为每次从麦克风读取的数据并不是正好等于一帧,所以我们需要首先判断当前读到的数据是否大于一帧,如果大于一帧则需要循环一帧一帧解码,当循环到最后一次不足一帧时,我们把当前数据缓存起来,和下次从麦克风读取到的数据合并到一起后再进行处理.否则把随意的数据扔到解码器中,解码器会报错.
使用华为荣耀8采集,16k采样,AudioRecord.getMinBufferSize结果为1280,加了2048后变成了3328.
IOS平台编码程序实现
IOS平台的编码过程与Android大同小异,都是需要注意每帧大小.IOS使用AVAudioRecorder将语音录制成WAV,然后再通过循环读取WAV中PCM数据进行编码.
@property (nonatomic, strong) AVAudioSession *audioSession; @property (nonatomic, strong) AVAudioRecorder *audioRecorder; @implementation AudioManager { dispatch_queue_t queue; void (^_recordProcessHandler)(float volume); void (^_recordCompletedHandler)(NSData *data, NSError *error); } - (void)recordStartWithProcess:(void (^)(float peakPower))processHandler completed:(void (^)(NSData *data, NSError *error))completedHandler { dispatch_async(queue, ^{ if (!self.audioRecorder.isRecording) { self->_recordProcessHandler = processHandler; self->_recordCompletedHandler = completedHandler; [self.audioRecorder prepareToRecord]; [self.audioRecorder record]; self->_timer.fireDate = [NSDate distantPast]; } else { if (completedHandler) { NSError *error = [NSError errorWithDomain:@"AudioManager" code:-1 userInfo:@{@"info": @"audio recorder is running."}]; dispatch_async(dispatch_get_main_queue(), ^{ completedHandler(nil, error); }); } } }); } - (void)audioRecorderDidFinishRecording:(AVAudioRecorder *)recorder successfully:(BOOL)flag { if (_recordCompletedHandler) { if (flag) { NSData *data = [[NSData alloc] initWithContentsOfURL:recorder.url]; dispatch_async(dispatch_get_main_queue(), ^{ self->_recordCompletedHandler(data, nil); }); } else { NSError *error = [NSError errorWithDomain:@"AudioManager" code:-2 userInfo:@{@"info": @"audio recorder is failed."}]; dispatch_async(dispatch_get_main_queue(), ^{ self->_recordCompletedHandler(nil, error); }); } } }
剩下的主要是如何编译OPUS静态库,网上有人写了现成的脚本Opus-iOS
我自己也实现了一个通用的编译C/C++静态库的脚本:github.com/qingkouwei/…
linux平台编码程序实现
cmake工具与Android平台类似,将配置改成可执行程序即可:
add_executable( opustools ... )
主程序中我们接收两个参数:PCM文件路径与输出OPUS编码后文件路径
int main(int argc, char **argv) { FILE *fin; FILE *fout; short *in = NULL; short *out = NULL; if (argc != 3) { fprintf(stderr, "usage: %s <raw opus input> <mp3 output>\n", argv[0]); return 1; } fin = fopen(argv[1], "rb"); if (!fin) { fprintf(stderr, "cannot open input file: %s\n", argv[1]); return 1; } fout = fopen(argv[2], "wb"); if (!fout) { fprintf(stderr, "cannot open output file: %s\n", argv[2]); return 1; } ... }
创建编码器:
int sampleRateInHz = DEFAULT_SAMPLERATEINHz; int channelConfig = DEFAULT_CHANNELCONFIG; int bitrate = DEFAULT_BITRATE; int error; OpusEncoder *pOpusEnc = opus_encoder_create(sampleRateInHz, channelConfig, OPUS_APPLICATION_RESTRICTED_LOWDELAY, &error); if (pOpusEnc) { opus_encoder_ctl(pOpusEnc, OPUS_SET_VBR(0));//0:CBR, 1:VBR opus_encoder_ctl(pOpusEnc, OPUS_SET_VBR_CONSTRAINT(true)); opus_encoder_ctl(pOpusEnc, OPUS_SET_BITRATE( bitrate* 1000)); opus_encoder_ctl(pOpusEnc, OPUS_SET_COMPLEXITY(complexity));//8 0~10 opus_encoder_ctl(pOpusEnc, OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE)); opus_encoder_ctl(pOpusEnc, OPUS_SET_LSB_DEPTH(16)); opus_encoder_ctl(pOpusEnc, OPUS_SET_DTX(0)); opus_encoder_ctl(pOpusEnc, OPUS_SET_INBAND_FEC(0)); opus_encoder_ctl(pOpusEnc, OPUS_SET_PACKET_LOSS_PERC(0)); }
循环解码数据:
unsigned char *out = (short *)malloc(READ_BUFFER_SIZE * sizeof(char)); while (1) { unsigned char data[READ_BUFFER_SIZE]; num_read = fread(data, 1, READ_BUFFER_SIZE, fin); short sData[num_read/2]; memcpy(data , sData, num_read ); if (num_read > 0) { int result = opus_encode(pOpusEnc, sData, num_read/2, out, READ_BUFFER_SIZE * sizeof(char)); if (fwrite(out, 1, result, fout) != (unsigned)(result)){ printf("write error\n",output_samples,result); goto failure; } } else{ break; } } fclose(fout); fclose(fin);
我们按一帧20ms编码,采样率设置为16k,那么 一帧大小READ_BUFFER_SIZE= 16000 * 2 / 1000 * 20 = 640Byte
如果对你有帮助的话点个赞吧!!!