像素缓冲区对象（PBO) 的Streaming-Texture上传源码解析-阿里云开发者社区

接这篇文章 OpenGL深入探索——像素缓冲区对象（PBO）（附完整工程代码地址）

原理示意图：

首选检查显卡是否支持 PBO :

#if defined(_WIN32)
    // check PBO is supported by your video card
  // 检查显卡是否支持 PBO
    if (glInfo.isExtensionSupported("GL_ARB_pixel_buffer_object"))
    {
        // get pointers to GL functions
        glGenBuffersARB = (PFNGLGENBUFFERSARBPROC)wglGetProcAddress("glGenBuffersARB");
        glBindBufferARB = (PFNGLBINDBUFFERARBPROC)wglGetProcAddress("glBindBufferARB");
        glBufferDataARB = (PFNGLBUFFERDATAARBPROC)wglGetProcAddress("glBufferDataARB");
        glBufferSubDataARB = (PFNGLBUFFERSUBDATAARBPROC)wglGetProcAddress("glBufferSubDataARB");
        glDeleteBuffersARB = (PFNGLDELETEBUFFERSARBPROC)wglGetProcAddress("glDeleteBuffersARB");
        glGetBufferParameterivARB = (PFNGLGETBUFFERPARAMETERIVARBPROC)wglGetProcAddress("glGetBufferParameterivARB");
        glMapBufferARB = (PFNGLMAPBUFFERARBPROC)wglGetProcAddress("glMapBufferARB");
        glUnmapBufferARB = (PFNGLUNMAPBUFFERARBPROC)wglGetProcAddress("glUnmapBufferARB");
        // check once again PBO extension
        if (glGenBuffersARB && glBindBufferARB && glBufferDataARB && glBufferSubDataARB &&
                glMapBufferARB && glUnmapBufferARB && glDeleteBuffersARB && glGetBufferParameterivARB)
        {
            pboSupported = true;
            pboMode = 1;    // using 1 PBO
            cout << "Video card supports GL_ARB_pixel_buffer_object." << endl;
        }
        else
        {
            pboSupported = false;
            pboMode = 0;    // without PBO
            cout << "Video card does NOT support GL_ARB_pixel_buffer_object." << endl;
        }
    }
    // Query the system memory page size and update the default value
    SYSTEM_INFO si;
    GetSystemInfo(&si);
    if (si.dwPageSize > 0)
    {
        systemPageSize = si.dwPageSize;
    }
#elif defined (__gnu_linux__)
    // for linux, do not need to get function pointers, it is up-to-date
    if (glInfo.isExtensionSupported("GL_ARB_pixel_buffer_object"))
    {
        pboSupported = true;
        cout << "Video card supports GL_ARB_pixel_buffer_object" << endl;
    }
    else
    {
        cout << "Video card does NOT support GL_ARB_pixel_buffer_object" << endl;
    }
    if (glInfo.isExtensionSupported("GL_AMD_pinned_memory"))
    {
        amdSupported = true;
        cout << "Video card supports GL_AMD_pinned_memory" << endl;
    }
    else
    {
        cout << "Video card does NOT support GL_AMD_pinned_memory" << endl;
    }
    // Query the system memory page size and update the default value
    if (sysconf(_SC_PAGE_SIZE) > 0)
    {
        systemPageSize = sysconf(_SC_PAGE_SIZE);
    }
#endif

创建PBO / 调整 PBO个数的代码:

// 创建 count 个 PBO
void setPboCount(int count)
{
    if (!pboSupported)
        return;
  // 如果 count 大于 当前的 PBO 数
    if (count > pboCount)
    {
        if (pboMethod != AMD)
        {
            // Generate each Pixel Buffer object and allocate memory for it（生成 PBO，并为其分配内存）
            // Hopefully, PBOs will get allocated in VRAM
            glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); // Unbind any buffer object previously bound（释放之前所绑定的 PBO）
            for (int i = pboCount; i < count; ++i)
            {
                GLuint pboId;
                glGenBuffers(1, &pboId); // Generate new Buffer Object ID
                glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pboId); // Create a zero-sized memory Pixel Buffer Object and bind it
                glBufferData(GL_PIXEL_UNPACK_BUFFER, DATA_SIZE, NULL, GL_STREAM_DRAW); // Reserve the memory space for the PBO
                glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); // Release the PBO binding
                pboIds.push_back(pboId); // Update our list of PBO IDs
                pboFences.push_back(NULL);
                cout << "Created PBO buffer #" << i << " of size: " << DATA_SIZE << endl;
            }
            pboCount = pboIds.size();
            assert(GL_NO_ERROR == glGetError());
        }
    // 特殊的 DMA 模式，需要自己手动分配对齐的内存，并提供内存指针
        else
        {
            // Generate each Pixel Buffer object and allocate memory for it
            // PBOs will get allocated in System RAM, and GPU will access it through DMA
            glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0); // Unbind any buffer object previously bound
            for (int i = pboCount; i < count; ++i)
            {
                GLuint pboId;
                glGenBuffers(1, &pboId); // Generate new Buffer Object ID
                glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, pboId); // Create a zero-sized memory Pixel Buffer Object and bind it
                assert(GL_NO_ERROR == glGetError());
                // Memory alignment functions are compiler-specific
                GLubyte *ptAlignedBuffer = (GLubyte *)alignedMalloc(systemPageSize, DATA_SIZE);
                if (NULL == ptAlignedBuffer)
                {
                    cout << "ERROR [setPboCount] (alignedMalloc) size: " << DATA_SIZE << " alignment: " << systemPageSize << endl;
                    break;
                }
                cout << "Created memory buffer #" << i << " of size: " << DATA_SIZE << " alignment: " << systemPageSize << endl;
                glBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, DATA_SIZE, ptAlignedBuffer, GL_STREAM_DRAW); // Take control of the memory space for the PBO
                GLenum error = glGetError();
                if (GL_NO_ERROR != error)
                {
                    cout << "ERROR [setPboCount] (glBufferData): " << (char *)gluErrorString(error) << endl;
                    alignedFree(ptAlignedBuffer);
                    cout << "Freed memory buffer #" << i << endl;
                    break;
                }
                glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0); // Release the PBO binding
                assert(GL_NO_ERROR == glGetError());
                pboIds.push_back(pboId); // Update our list of PBO IDs
                pboFences.push_back(NULL);
                alignedBuffers.push_back((GLubyte *)ptAlignedBuffer);
                cout << "Created PBO buffer #" << i << endl;
            }
            pboCount = pboIds.size();
            assert(GL_NO_ERROR == glGetError());
        }
    }
  // 如果 count 小于当前的 PBO 数
    else if (count < pboCount)
    {
        if (pboMethod != AMD)
        {
            glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); // Unbind any buffer object previously bound
            for (int i = pboCount - 1; i >= count; --i)
            {
                glDeleteSync(pboFences.back());
                pboFences.pop_back();
                GLuint pboId = pboIds.back();
                glDeleteBuffers(1, &pboId);
                pboIds.pop_back(); // Update our list of PBO IDs
                cout << "Deleted PBO buffer #" << i << endl;
            }
            pboCount = pboIds.size();
            assert(GL_NO_ERROR == glGetError());
        }
        else
        {
            glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0); // Unbind any buffer object previously bound
            for (int i = pboCount - 1; i >= count; --i)
            {
                glDeleteSync(pboFences.back());
                pboFences.pop_back();
                GLuint pboId = pboIds.back();
                glDeleteBuffers(1, &pboId);
                pboIds.pop_back(); // Update our list of PBO IDs
                cout << "Deleted PBO buffer #" << i << endl;
        // 手动释放自己分配的内存
                alignedFree(alignedBuffers.back());
                alignedBuffers.pop_back();
                cout << "Freed memory buffer #" << i << endl;
            }
            pboCount = pboIds.size();
            assert(GL_NO_ERROR == glGetError());
        }
    }
    cout << "PBO Count: " << pboCount << endl;
}

最关键的显示回调方法：

void displayCB()
{
    if (pboMethod == NONE)
    {
        /*
        * Update data in System Memory.
        */
        t1.start();
        updatePixels(imageData, DATA_SIZE); // 更新 imageData 的像素
        t1.stop();
        updateTime = t1.getElapsedTimeInMilliSec();
        /*
        * Copy data from System Memory to texture object. （将 imageData 的内容从内存拷贝到纹理当中）
        */
        t1.start();
        glBindTexture(GL_TEXTURE_2D, textureId);
        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, IMAGE_WIDTH, IMAGE_HEIGHT, PIXEL_FORMAT, GL_UNSIGNED_BYTE, (GLvoid *)imageData);
        t1.stop();
        copyTime = t1.getElapsedTimeInMilliSec();
    }
    else
    {
        /*
        * Update buffer indices used in data upload & copy.
        *
        * "uploadIdx": index used to upload pixels to a Pixel Buffer Object.（上传像素至 uploadIdx 指定的 PBO）
        * "copyIdx": index used to copy pixels from a Pixel Buffer Object to a GPU texture.（拷贝 cpyIdx 指定的 PBO 的像素到 GPU 纹理）
        *
        * When (pboCount > 1), this will allow to perform（当 pboCount 数大于1时，就允许使用备用buffer来同时进行上传和拷贝）
        * simultaneous upload & copy, by using alternative buffers.
        * That is a good thing, unless the double buffering is being already
        * done somewhere else in the code.
        */
        static int copyIdx = 0;
        copyIdx = (copyIdx + 1) % pboCount;
        int uploadIdx = (copyIdx + 1) % pboCount;
        /*
        * Upload new data to a Pixel Buffer Object.
        */
        t1.start();
        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pboIds[uploadIdx]); // Access the Pixel Buffer Object and bind it
    // pboMethod 表示不同的纹理流机制（Texture Stream methods）
        if (pboMethod == ORPHAN)
        {
      // GL_STREAM_DRAW 表示每次渲染都会更新该 PBO的像素数据
      // GL_DYNAMITC_DRAW 表示每帧都会更新该 Buffer
      // GL_STATIC_DRAW 表示几乎或从不更新该 Buffer
            glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, DATA_SIZE, NULL, GL_STREAM_DRAW_ARB);
      // 获得 PBO 的映射 Buffer 指针，以待写入操作
            GLubyte *ptr = (GLubyte *)glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY_ARB);
            if (NULL == ptr)
            {
                cout << "ERROR [displayCB] (glMapBufferARB): " << (char *)gluErrorString(glGetError()) << endl;
                return;
            }
            else
            {
                // update data directly on the mapped buffer（在映射的 Buffer 上直接更新 PBO 的像素数据）
                updatePixels(ptr, DATA_SIZE);
                // release pointer to mapping buffer（释放映射 Buffer 的指针）
                if (!glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB))
                {
                    cout << "ERROR [displayCB] (glUnmapBufferARB): " << (char *)gluErrorString(glGetError()) << endl;
                }
            }
        }
    // 异步模式
        else if (pboMethod == UNSYNCH_ORPHAN || pboMethod == UNSYNCH_FENCES)
        {
            if (pboMethod == UNSYNCH_FENCES)
            {
        // 检查索引的pboFences 是否是同步对象（Sync Object）
                if (glIsSync(pboFences[uploadIdx]))
                {
          // 阻塞并 wait 同步对象，等待被 signal
                    GLenum result = glClientWaitSync(pboFences[uploadIdx], 0, GL_TIMEOUT_IGNORED);
                    switch (result)
                    {
                    case GL_ALREADY_SIGNALED:
                        // Transfer was already done when trying to use buffer
            // （说明此时传输已经完成）
                        cout << "DEBUG (glClientWaitSync): ALREADY_SIGNALED (good timing!) uploadIdx: " << uploadIdx << endl;
                        break;
                    case GL_CONDITION_SATISFIED:
                        // This means that we had to wait for the fence to synchronize us after using all the buffers,
                        // which implies that the GPU command queue is full and that we are GPU-bound (DMA transfers aren't fast enough).
            // （说明我们不得不等待 fence 的同步，即 我们所绑定的 GPU 命令队列已满[DMA传输还不够快]）
                        cout << "WARNING (glClientWaitSync): CONDITION_SATISFIED (had to wait for the sync) uploadIdx: " << uploadIdx << endl;
                        break;
                    case GL_TIMEOUT_EXPIRED:
                        cout << "WARNING (glClientWaitSync): TIMEOUT_EXPIRED (DMA transfers are too slow!) uploadIdx: " << uploadIdx << endl;
                        break;
                    case GL_WAIT_FAILED:
                        cout << "ERROR (glClientWaitSync): WAIT_FAILED: " << (char *)gluErrorString(glGetError()) << endl;
                        break;
                    }
          // 删除同步对象
                    glDeleteSync(pboFences[uploadIdx]);
                    pboFences[uploadIdx] = NULL;
                }
            }
      // 注意和 ORPHAN 的区别
            else if (pboMethod == UNSYNCH_ORPHAN)
            {
        // Buffer 需要重新指定
                glBufferData(GL_PIXEL_UNPACK_BUFFER, DATA_SIZE, NULL, GL_STREAM_DRAW); // Buffer re-specification (orphaning)
            }
      // 获得 PBO 的映射 Buffer 指针，以待写入操作
            GLubyte *ptr = (GLubyte *)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, DATA_SIZE, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
            if (NULL == ptr)
            {
                cout << "ERROR [displayCB] (glMapBufferRange): " << (char *)gluErrorString(glGetError()) << endl;
                return;
            }
            else
            {
                updatePixels(ptr, DATA_SIZE); // Update data directly on the mapped buffer（直接更新映射 Buffer 的数据）
                if (!glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER))
                {
                    cout << "ERROR [displayCB] (glUnmapBuffer): " << (char *)gluErrorString(glGetError()) << endl;
                }
            }
        }
        else if (pboMethod == AMD)
        {
      // 同样要进行手动同步
            if (glIsSync(pboFences[uploadIdx]))
            {
                GLenum result = glClientWaitSync(pboFences[uploadIdx], 0, GL_TIMEOUT_IGNORED);
                switch (result)
                {
                case GL_ALREADY_SIGNALED:
                    // Transfer was already done when trying to use buffer
                    //cout << "DEBUG (glClientWaitSync): ALREADY_SIGNALED (good timing!) uploadIdx: " << uploadIdx << endl;
                    break;
                case GL_CONDITION_SATISFIED:
                    // This means that we had to wait for the fence to synchronize us after using all the buffers,
                    // which implies that the GPU command queue is full and that we are GPU-bound (DMA transfers aren't fast enough).
                    //cout << "WARNING (glClientWaitSync): CONDITION_SATISFIED (had to wait for the sync) uploadIdx: " << uploadIdx << endl;
                    break;
                case GL_TIMEOUT_EXPIRED:
                    cout << "WARNING (glClientWaitSync): TIMEOUT_EXPIRED (DMA transfers are too slow!) uploadIdx: " << uploadIdx << endl;
                    break;
                case GL_WAIT_FAILED:
                    cout << "ERROR (glClientWaitSync): WAIT_FAILED: " << (char *)gluErrorString(glGetError()) << endl;
                    break;
                }
                glDeleteSync(pboFences[uploadIdx]);
                pboFences[uploadIdx] = NULL;
            }
      // alignedBuffers 存储的是手动分配的一块对齐过的内存指针
            updatePixels(alignedBuffers[uploadIdx], DATA_SIZE); // Update data directly on the mapped buffer
        }
        t1.stop();
        updateTime = t1.getElapsedTimeInMilliSec();
        /*
        * Protect each Pixel Buffer Object against being overwritten.（防止 PBO 被重复写入）
        *
        * Tipically the data upload will be slower than our main loop, so this
        * function will be called again before the previous frame was uploaded
        * and processed. The main bottleneck is the PCI bus transfer speed,
        * which limits how fast the DMA (System Memory --> VRAM) can work.
    * 
        * 通常数据上传将会慢于主循环，意味着在先前的帧被上传处理之前，该方法将会被再次调用。
    * 主要的性能瓶颈在于 PCI 总线的传输速度， 它限制了 DMA 的传输速度（内存到显存）。
    *
        * OpenGL Sync Fences will block until the PBO is released.（GL 同步对象将会阻塞主线程，直到 PBO 被释放为止）
        */
        if (pboMethod == UNSYNCH_FENCES || pboMethod == AMD)
        {
      // 创建一个同步对象，并将其加入 GL 的命令流中（❤ 具体请参看第八版红宝书的 P589 第11章 Memory ❤）
            pboFences[uploadIdx] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
        }
        /*
        * Copy data from a Pixel Buffer Object to a GPU texture.
        * glTexSubImage2D() will copy pixels to the corresponding texture in the GPU.
    * 传输 PBO 中的数据到 GPU 纹理
        */
        t1.start();
        glBindTexture(GL_TEXTURE_2D, textureId); // Bind the texture
        glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pboIds[copyIdx]); // Access the Pixel Buffer Object and bind it
        // Use offset instead of pointer（由于使用了 PBO，所以传递的是偏移量）
        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, IMAGE_WIDTH, IMAGE_HEIGHT, PIXEL_FORMAT, GL_UNSIGNED_BYTE, 0);
        t1.stop();
        copyTime = t1.getElapsedTimeInMilliSec();
        // it is good idea to release PBOs with ID 0 after use.
        // Once bound with 0, all pixel operations behave normal ways.
        glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
    }
    // clear buffer
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
    // save the initial ModelView matrix before modifying ModelView matrix
    glPushMatrix();
    // tramsform camera
    glTranslatef(0, 0, -cameraDistance);
    glRotatef(cameraAngleX, 1, 0, 0); // pitch
    glRotatef(cameraAngleY, 0, 1, 0); // heading
    // draw a point with texture
    glBindTexture(GL_TEXTURE_2D, textureId);
    glColor4f(1, 1, 1, 1);
    glBegin(GL_QUADS);
    glNormal3f(0, 0, 1);
    glTexCoord2f(0.0f, 0.0f);
    glVertex3f(-1.0f, -1.0f, 0.0f);
    glTexCoord2f(1.0f, 0.0f);
    glVertex3f(1.0f, -1.0f, 0.0f);
    glTexCoord2f(1.0f, 1.0f);
    glVertex3f(1.0f, 1.0f, 0.0f);
    glTexCoord2f(0.0f, 1.0f);
    glVertex3f(-1.0f, 1.0f, 0.0f);
    glEnd();
    // unbind texture
    glBindTexture(GL_TEXTURE_2D, 0);
    // draw info messages
    showInfo();
    //showTransferRate();
    printTransferRate();
    glPopMatrix();
    glutSwapBuffers();
}