Speex是一套开源的音频编解码库,最新版本还包含了回音消除和防抖动等功能,如果我们想开发语音聊天或视频会议这样的系统,Speex将是一个不错的选择。到 http://www.speex.org可以下载Speex的源码(编译后的dll为libspeex.dll),最新版本为1.2。不过源码是用C++开发的,直接在.NET中使用会有诸多不便,为此,我用C#将其封装,使得编解码的调用相当简单。
#include < windows.h >
#include < stdio.h >
#include < stdlib.h >
#include " speex/speex_echo.h "
#include " speex/speex_preprocess.h "
#include " Speex.h "
#define FRAME_SIZE 160
float encoder_input[FRAME_SIZE];
void * encoder_state;
SpeexBits encoder_bits;
DWORD ul_reason_for_call,
LPVOID lpReserved
return TRUE;
encoder_state = speex_encoder_init( & speex_nb_mode);
speex_encoder_ctl(encoder_state, SPEEX_SET_QUALITY, & quality);
speex_bits_init( & encoder_bits);
extern " C " __declspec(dllexport) void encoder_dispose()
speex_bits_destroy( & encoder_bits);
extern " C " __declspec(dllexport) int encoder_encode( const short * data, char * output)
for ( int i = 0 ; i < FRAME_SIZE; i ++ )
encoder_input[i] = data[i];
speex_bits_reset( & encoder_bits);
speex_encode(encoder_state, encoder_input, & encoder_bits);
return speex_bits_write( & encoder_bits, output, 200 );
float decoder_output[FRAME_SIZE];
void * decoder_state;
SpeexBits decoder_bits;
extern " C " __declspec(dllexport) void decoder_init()
decoder_state = speex_decoder_init( & speex_nb_mode);
int tmp = 1 ;
speex_decoder_ctl(decoder_state, SPEEX_SET_ENH, & tmp);
speex_bits_init( & decoder_bits);
extern " C " __declspec(dllexport) void decoder_dispose()
speex_bits_destroy( & decoder_bits);
extern " C " __declspec(dllexport) void decoder_decode( int nbBytes, char * data, short * output)
speex_bits_read_from( & decoder_bits, data, nbBytes);
speex_decode(decoder_state, & decoder_bits, decoder_output);
for ( int i = 0 ; i < FRAME_SIZE; i ++ )
output[i] = decoder_output[i];
/* ************************************************** 回音消除 ************************************* */
bool m_bSpeexEchoHasInit;
SpeexEchoState * m_SpeexEchoState;
SpeexPreprocessState * m_pPreprocessorState;
int m_nFilterLen;
int m_nSampleRate;
float * m_pfNoise;
extern " C " __declspec(dllexport) void SpeexEchoCapture( short * input_frame, short * output_frame)
speex_echo_capture(m_SpeexEchoState, input_frame, output_frame);
extern " C " __declspec(dllexport) void SpeexEchoPlayback( short * echo_frame)
speex_echo_playback(m_SpeexEchoState, echo_frame);
extern " C " __declspec(dllexport) void SpeexEchoReset()
if (m_SpeexEchoState != NULL)
m_SpeexEchoState = NULL;
if (m_pPreprocessorState != NULL)
m_pPreprocessorState = NULL;
if (m_pfNoise != NULL)
delete []m_pfNoise;
m_pfNoise = NULL;
m_bSpeexEchoHasInit = false ;
extern " C " __declspec(dllexport) void SpeexEchoInit( int filter_length, int sampling_rate , bool associatePreprocesser)
if (filter_length <= 0 || sampling_rate <= 0 )
m_nFilterLen = 160 * 8 ;
m_nSampleRate = 8000 ;
m_nFilterLen = filter_length;
m_nSampleRate = sampling_rate;
m_SpeexEchoState = speex_echo_state_init(FRAME_SIZE, m_nFilterLen);
m_pPreprocessorState = speex_preprocess_state_init(FRAME_SIZE, m_nSampleRate);
if (associatePreprocesser)
speex_preprocess_ctl(m_pPreprocessorState, SPEEX_PREPROCESS_SET_ECHO_STATE,m_SpeexEchoState);
m_pfNoise = new float [FRAME_SIZE + 1 ];
m_bSpeexEchoHasInit = true ;
extern " C " __declspec(dllexport) void SpeexEchoDoAEC( short * mic, short * ref , short * out )
if ( ! m_bSpeexEchoHasInit)
return ;
speex_echo_cancellation(m_SpeexEchoState,( const __int16 * ) mic,( const __int16 * ) ref ,(__int16 * ) out );
/// 对Speex的C#封装。
/// zhuweisky 2010.05.13
/// </summary>
public class Speex :IAudioCodec
private const int FrameSize = 160 ;
#region IsDisposed
private volatile bool isDisposed = false ;
public bool IsDisposed
get { return isDisposed; }
#region Ctor
/// <summary>
/// 初始化。
/// </summary>
/// <param name="quality"> 编码质量,取值0~10 </param>
public Speex( int quality)
if (quality < 0 || quality > 10 )
throw new Exception( " quality value must be between 0 and 10. " );
#region Dispose
public void Dispose()
this .isDisposed = true ;
System.Threading.Thread.Sleep( 100 );
#region Encode
/// <summary>
/// 将采集到的音频数据进行编码。
/// </summary>
public byte [] Encode( byte [] data)
if ( this .isDisposed)
return null ;
if (data.Length % (FrameSize * 2 ) != 0 )
throw new ArgumentException( " Invalid Data Length. " );
int nbBytes;
short [] input = new short [FrameSize];
byte [] buffer = new byte [ 200 ];
byte [] output = new byte [ 0 ];
for ( int i = 0 ; i < data.Length / (FrameSize * 2 ); i ++ )
for ( int j = 0 ; j < input.Length; j ++ )
input[j] = ( short )(data[i * FrameSize * 2 + j * 2 ] + data[i * FrameSize * 2 + j * 2 + 1 ] * 0x100 );
nbBytes = Speex.encoder_encode(input, buffer);
Array.Resize < byte > ( ref output, output.Length + nbBytes + sizeof ( int ));
Array.Copy(buffer, 0 , output, output.Length - nbBytes, nbBytes);
for ( int j = 0 ; j < sizeof ( int ); j ++ )
output[output.Length - nbBytes - sizeof ( int ) + j] = ( byte )(nbBytes % 0x100 );
nbBytes /= 0x100 ;
return output;
#region Decode
/// <summary>
/// 将编码后的数据进行解码得到原始的音频数据。
/// </summary>
public byte [] Decode( byte [] data)
if ( this .isDisposed)
return null ;
int nbBytes, index = 0 ;
byte [] input;
short [] buffer = new short [FrameSize];
byte [] output = new byte [ 0 ];
while (index < data.Length)
nbBytes = 0 ;
index += sizeof ( int );
for ( int i = 1 ; i <= sizeof ( int ); i ++ )
nbBytes = nbBytes * 0x100 + data[index - i];
input = new byte [nbBytes];
Array.Copy(data, index, input, 0 , input.Length);
index += input.Length;
Speex.decoder_decode(nbBytes, input, buffer);
Array.Resize < byte > ( ref output, output.Length + FrameSize * 2 );
for ( int i = 0 ; i < FrameSize; i ++ )
output[output.Length - FrameSize * 2 + i * 2 ] = ( byte )(buffer[i] % 0x100 );
output[output.Length - FrameSize * 2 + i * 2 + 1 ] = ( byte )(buffer[i] / 0x100 );
return output;
#region Pinvoke
[DllImport( " Speex.dll " , EntryPoint = " encoder_init " )]
internal extern static void encoder_init( int quality);
[DllImport( " Speex.dll " , EntryPoint = " encoder_dispose " )]
internal extern static void encoder_dispose();
[DllImport( " Speex.dll " , EntryPoint = " encoder_encode " )]
internal extern static int encoder_encode( short [] data, byte [] output);
[DllImport( " Speex.dll " , EntryPoint = " decoder_init " )]
internal extern static void decoder_init();
[DllImport( " Speex.dll " , EntryPoint = " decoder_dispose " )]
internal extern static void decoder_dispose();
[DllImport( " Speex.dll " , EntryPoint = " decoder_decode " )]
internal extern static void decoder_decode( int nbBytes, byte [] data, short [] output);
一般音频对话的整个流程是这样的:采集 -> 编码 -> 网络传输 -> 解码 -> 播放。参见:《浅谈网络语音技术》
2012.11.20 我们的研究成果已经全部集成到了OMCS中,其支持回音消除(AEC)、静音检测(VAD)、噪音抑制(DENOISE)、自动增益(AGC)等网络语音技术,有兴趣的可以了解一下。
2014.04.17 Speex dll 点击下载