最近完成了双声道(立体声又称双轨)的语音文件录制,将心得在这里分享下。我录制的双声道文件格式是PCM,16K,16Bit的,blockAlign设置2。立体声的语音采用 0声道(左) 1声道(右) 0声道(左) 1声道(右)。具体设置如下:
audioCodec为1,channels为2,samplesPerSec为8000(非常重要)。samplesPerSec设置为8000的原因是需要用16000/2,也就是要用采样率除以通道数。这样做的原因是立体声设置语音内容时将左右声道的语音数据打散交叉放置到内容中。
void TWavMediaFile::Close() { /* write wav head */ if(m_fp && m_modified) { m_modified = false; rewind(m_fp); LOG(DETAIL, "m_fp postion:%d", ftell(m_fp)); char buffer[44], *p = buffer; int32_t a; int16_t b; sprintf(p, "RIFF"); p+=4; a = htolel(m_length + 44 - 8); memcpy(p, &a, 4); p+=4; sprintf(p, "WAVEfmt "); p+=8; a = htolel(16); memcpy(p, &a, 4); p+=4; LOG(DETAIL, "m_audioCodec: %d", m_audioCodec); b = htoles((int16_t)m_audioCodec); memcpy(p, &b, 2); p+=2; LOG(DETAIL, "m_channels: %d", m_channels); b = htoles((int16_t)m_channels); memcpy(p, &b, 2); p+=2; LOG(DETAIL, "m_samplesPerSec: %d", m_samplesPerSec); a = htolel((int32_t)m_samplesPerSec/2); memcpy(p, &a, 4); p+=4; LOG(DETAIL, "m_avgBytesPerSec: %d", m_avgBytesPerSec); a = ((int32_t)m_avgBytesPerSec*m_channels); memcpy(p, &a, 4); p+=4; LOG(DETAIL, "m_blockAlign: %d", m_blockAlign); b = htoles((int16_t)m_blockAlign * m_channels ); memcpy(p, &b, 2); p+=2; LOG(DETAIL, "m_bitsPerSample: %d", m_bitsPerSample); b = htoles((int16_t)m_bitsPerSample); memcpy(p, &b, 2); p+=2; sprintf(p, "data"); p+=4; LOG(DETAIL, "m_length: %d", m_length); a = htolel(m_length); memcpy(p, &a, 4); fwrite(buffer, 1, 44, m_fp); } if(m_fp) fclose(m_fp); m_fp = 0; m_length = 0; }
设置两个声道的语音合成到一个BUFFER中的实现的代码如下:
void TRecord::feedG711Stero(const char* buffer1, int length1, const char *buffer2, int length2 ) { char *p = (char*)buffer1; char twoChannelBuffer[MAXAUDIOBUFFERSIZE] = {0}; /* this only for 8bit per sample */ int i = 0; int j = 0; for ( j = 0, j = 0; i != PCMFRAMELEN; i++ ) { twoChannelBuffer[j++] = m_buffer[i]; twoChannelBuffer[j++] = m_buffer2[i]; } m_mediaFile->putAudioData(twoChannelBuffer, j ); }