音频数据编解码——在.NET中使用Speex（附下载）-阿里云开发者社区

　　Speex是一套开源的音频编解码库，最新版本还包含了回音消除和防抖动等功能，如果我们想开发语音聊天或视频会议这样的系统，Speex将是一个不错的选择。到 http://www.speex.org可以下载Speex的源码（编译后的dll为libspeex.dll），最新版本为1.2。不过源码是用C++开发的，直接在.NET中使用会有诸多不便，为此，我用C#将其封装，使得编解码的调用相当简单。

　　由于Speex原始导出的API不是很方便C#调用，所以，在用C#封装之前，先要用C++对Speex的原始API进行简化，新建一个名为Speex的VC项目，然后引用libspeex.dll的相关库文件，添加cpp文件后，复制下列源码到文件中：

#include " speex\speex.h "
#include < windows.h >
#include < stdio.h >
#include < stdlib.h >

#include " speex/speex_echo.h "
#include " speex/speex_preprocess.h "
#include " Speex.h "

#define FRAME_SIZE 160

float encoder_input[FRAME_SIZE];
void * encoder_state;
SpeexBits encoder_bits;

BOOL APIENTRY DllMain( HANDLE hModule,
                       DWORD  ul_reason_for_call,
                       LPVOID lpReserved
                     )
{
     return TRUE;
}

extern " C " __declspec(dllexport) void encoder_init( int quality)
{
    encoder_state = speex_encoder_init( & speex_nb_mode);
    speex_encoder_ctl(encoder_state, SPEEX_SET_QUALITY, & quality);
    speex_bits_init( & encoder_bits);
}

extern " C " __declspec(dllexport) void encoder_dispose()
{
    speex_encoder_destroy(encoder_state);
    speex_bits_destroy( & encoder_bits);
}

extern " C " __declspec(dllexport) int encoder_encode( const short * data, char * output)
{
     for ( int i = 0 ; i < FRAME_SIZE; i ++ )
        encoder_input[i] = data[i];
    speex_bits_reset( & encoder_bits);
    speex_encode(encoder_state, encoder_input, & encoder_bits);
     return speex_bits_write( & encoder_bits, output, 200 );
}

float decoder_output[FRAME_SIZE];
void * decoder_state;
SpeexBits decoder_bits;

extern " C " __declspec(dllexport) void decoder_init()
{
    decoder_state = speex_decoder_init( & speex_nb_mode);
     int tmp = 1 ;
    speex_decoder_ctl(decoder_state, SPEEX_SET_ENH, & tmp);
    speex_bits_init( & decoder_bits);
}
extern " C " __declspec(dllexport) void decoder_dispose()
{
    speex_decoder_destroy(decoder_state);
    speex_bits_destroy( & decoder_bits);
}
extern " C " __declspec(dllexport) void decoder_decode( int nbBytes, char * data, short * output)
{
    speex_bits_read_from( & decoder_bits, data, nbBytes);
    speex_decode(decoder_state, & decoder_bits, decoder_output);
     for ( int i = 0 ; i < FRAME_SIZE; i ++ )
    {
        output[i] = decoder_output[i];
    }
}

/* **************************************************  回音消除 ************************************* */

bool       m_bSpeexEchoHasInit;
SpeexEchoState *    m_SpeexEchoState;
SpeexPreprocessState * m_pPreprocessorState;
int       m_nFilterLen;
int       m_nSampleRate;
float *    m_pfNoise;

extern " C " __declspec(dllexport) void SpeexEchoCapture( short * input_frame, short * output_frame)
{
    speex_echo_capture(m_SpeexEchoState, input_frame, output_frame);
}

extern " C " __declspec(dllexport) void SpeexEchoPlayback( short * echo_frame)
{
    speex_echo_playback(m_SpeexEchoState, echo_frame);
}

extern " C " __declspec(dllexport) void SpeexEchoReset()
{
     if (m_SpeexEchoState != NULL)
    {
        speex_echo_state_destroy(m_SpeexEchoState);
        m_SpeexEchoState = NULL;
    }
     if (m_pPreprocessorState != NULL)
    {
        speex_preprocess_state_destroy(m_pPreprocessorState);
        m_pPreprocessorState = NULL;
    }
     if (m_pfNoise != NULL)
    {
        delete []m_pfNoise;
        m_pfNoise = NULL;
    }
    m_bSpeexEchoHasInit = false ;
}

extern " C " __declspec(dllexport) void SpeexEchoInit( int filter_length, int sampling_rate , bool associatePreprocesser)
{
    SpeexEchoReset();

     if (filter_length <= 0 || sampling_rate <= 0 )
    {
      m_nFilterLen   = 160 * 8 ;
      m_nSampleRate = 8000 ;
    }
     else
    {
      m_nFilterLen   = filter_length;
      m_nSampleRate = sampling_rate;
    }

    m_SpeexEchoState = speex_echo_state_init(FRAME_SIZE, m_nFilterLen);
    m_pPreprocessorState = speex_preprocess_state_init(FRAME_SIZE, m_nSampleRate);
     if (associatePreprocesser)
    {
        speex_preprocess_ctl(m_pPreprocessorState, SPEEX_PREPROCESS_SET_ECHO_STATE,m_SpeexEchoState);
    }
    m_pfNoise = new float [FRAME_SIZE + 1 ];
    m_bSpeexEchoHasInit = true ;
}

extern " C " __declspec(dllexport) void SpeexEchoDoAEC( short * mic, short * ref , short * out )
{
     if ( ! m_bSpeexEchoHasInit)
    {
       return ;
    }

    speex_echo_cancellation(m_SpeexEchoState,( const __int16 * ) mic,( const __int16 * ) ref ,(__int16 * ) out );
     }

　　编译便生成Speex.dll。

　　如果对VC不熟悉也没关系，文末会直接给出libspeex.dll和Speex.dll的下载，直接使用就OK了。

　　现在，C#可以调用Speex.dll导出的简单函数了，最终封装的源码如下：

    /// <summary>
     /// 对Speex的C#封装。
     /// zhuweisky 2010.05.13
     /// </summary>
     public class Speex :IAudioCodec
    {
         private const int FrameSize = 160 ;

         #region IsDisposed
         private volatile bool isDisposed = false ;
         public bool IsDisposed
        {
             get { return isDisposed; }
        }
         #endregion

         #region Ctor
         /// <summary>
         /// 初始化。
         /// </summary>
         /// <param name="quality"> 编码质量，取值0~10 </param>
         public Speex( int quality)
        {
             if (quality < 0 || quality > 10 )
            {
                 throw new Exception( " quality value must be between 0 and 10. " );
            }

            Speex.encoder_init(quality);
            Speex.decoder_init();
        }
         #endregion

         #region Dispose
         public void Dispose()
        {
             this .isDisposed = true ;
            System.Threading.Thread.Sleep( 100 );
            Speex.decoder_dispose();
            Speex.encoder_dispose();
        }
         #endregion

         #region Encode
         /// <summary>
         /// 将采集到的音频数据进行编码。
         /// </summary>
         public byte [] Encode( byte [] data)
        {
             if ( this .isDisposed)
            {
                 return null ;
            }

             if (data.Length % (FrameSize * 2 ) != 0 )
            {
                 throw new ArgumentException( " Invalid Data Length. " );
            }

             int nbBytes;
             short [] input = new short [FrameSize];
             byte [] buffer = new byte [ 200 ];
             byte [] output = new byte [ 0 ];
             for ( int i = 0 ; i < data.Length / (FrameSize * 2 ); i ++ )
            {
                 for ( int j = 0 ; j < input.Length; j ++ )
                {
                    input[j] = ( short )(data[i * FrameSize * 2 + j * 2 ] + data[i * FrameSize * 2 + j * 2 + 1 ] * 0x100 );
                }

                nbBytes = Speex.encoder_encode(input, buffer);
                Array.Resize < byte > ( ref output, output.Length + nbBytes + sizeof ( int ));
                Array.Copy(buffer, 0 , output, output.Length - nbBytes, nbBytes);

                 for ( int j = 0 ; j < sizeof ( int ); j ++ )
                {
                    output[output.Length - nbBytes - sizeof ( int ) + j] = ( byte )(nbBytes % 0x100 );
                    nbBytes /= 0x100 ;
                }
            }
             return output;
        }
         #endregion

         #region Decode
         /// <summary>
         /// 将编码后的数据进行解码得到原始的音频数据。
         /// </summary>
         public byte [] Decode( byte [] data)
        {
             if ( this .isDisposed)
            {
                 return null ;
            }

             int nbBytes, index = 0 ;
             byte [] input;
             short [] buffer = new short [FrameSize];
             byte [] output = new byte [ 0 ];
             while (index < data.Length)
            {
                nbBytes = 0 ;
                index += sizeof ( int );
                 for ( int i = 1 ; i <= sizeof ( int ); i ++ )
                    nbBytes = nbBytes * 0x100 + data[index - i];
                input = new byte [nbBytes];
                Array.Copy(data, index, input, 0 , input.Length);
                index += input.Length;
                Speex.decoder_decode(nbBytes, input, buffer);
                Array.Resize < byte > ( ref output, output.Length + FrameSize * 2 );
                 for ( int i = 0 ; i < FrameSize; i ++ )
                {
                    output[output.Length - FrameSize * 2 + i * 2 ] = ( byte )(buffer[i] % 0x100 );
                    output[output.Length - FrameSize * 2 + i * 2 + 1 ] = ( byte )(buffer[i] / 0x100 );
                }
            }
             return output;
        }
         #endregion

         #region Pinvoke
        [DllImport( " Speex.dll " , EntryPoint = " encoder_init " )]
         internal extern static void encoder_init( int quality);
        [DllImport( " Speex.dll " , EntryPoint = " encoder_dispose " )]
         internal extern static void encoder_dispose();
        [DllImport( " Speex.dll " , EntryPoint = " encoder_encode " )]
         internal extern static int encoder_encode( short [] data, byte [] output);
        [DllImport( " Speex.dll " , EntryPoint = " decoder_init " )]
         internal extern static void decoder_init();
        [DllImport( " Speex.dll " , EntryPoint = " decoder_dispose " )]
         internal extern static void decoder_dispose();
        [DllImport( " Speex.dll " , EntryPoint = " decoder_decode " )]
         internal extern static void decoder_decode( int nbBytes, byte [] data, short [] output);
         #endregion
    }

只有四个方法：Initialize、Encode、Decode、Dispose。方法参数的含义也非常明显。

　一般音频对话的整个流程是这样的：采集 -> 编码 -> 网络传输 -> 解码 -> 播放。参见：《浅谈网络语音技术》

　而该封装的Speex类解决了这个过程中的音频编码和解码的问题。你可以复制该源码到你的项目，并将从http://www.speex.org下载的speex.dll放到运行目录下，就可以正常地使用SPEEX的编解码功能了。

　关于Speex更高级的功能，我正在研究中，有兴趣的朋友可以email给我一起探讨。　　

2012.11.20 我们的研究成果已经全部集成到了OMCS中，其支持回音消除（AEC）、静音检测（VAD）、噪音抑制（DENOISE）、自动增益（AGC）等网络语音技术，有兴趣的可以了解一下。

2014.04.17 Speex dll 点击下载

音频数据编解码——在.NET中使用Speex（附下载）

热门文章

最新文章

相关电子书

热门

活动广场

任务中心

开发者评测

高校计划

乘风者计划

训练营

阿里云MVP

话题

直播

下载

镜像站

技术资料

插件

音频数据编解码——在.NET中使用Speex（附下载）

热门文章

最新文章

相关电子书