iOS硬件编解码-VideoToolbox

发表于 2022-07-14 分类于音视频本文字数： 2.7k 阅读时长 ≈ 10 分钟

在 iOS 平台上，VideoToolbox 是 Apple 提供的一个强大的框架，用于执行硬件加速的视频编解码。它支持广泛的视频编码格式，包括但不限于 H.264 和 HEVC。使用 VideoToolbox 可以提高编解码的性能，同时降低 CPU 使用率。

VideoToolbox 编解码流程

解码

创建解码器

设置视频描述信息

将vps sps pps信息读取出来设置到CMVideoFormatDescriptionRef中

CMVideoFormatDescriptionRef VideoToolBoxDecoderImpl::CreateVideoFormatDesc(
    const VideoInfo &video_info, const ExtraData &extra_data) {
  OSStatus status;
  CMFormatDescriptionRef format_desc;
  switch (video_info.codec_type) {
    case CodecType::CODEC_TYPE_H264: {
      auto *avc_ps_ = new avc::ParamSets();
      ...
      // 准备好sps pps数据
      const uint8_t *const parameterSetPointers[] = {avc_ps_->sps_list[0].data,
                                                     avc_ps_->pps_list[0].data};
      const size_t parameterSetSizes[] = {
          (size_t)avc_ps_->sps_list[0].data_size,
          (size_t)avc_ps_->pps_list[0].data_size};
      status = CMVideoFormatDescriptionCreateFromH264ParameterSets(
          kCFAllocatorDefault, 2, parameterSetPointers, parameterSetSizes, 4,
          &format_desc);
      delete avc_ps_;
    } break;
    case CodecType::CODEC_TYPE_H265: {
      auto *hevc_ps = new hevc::ParamSets();
      ...
      // 准备好vps sps pps数据
      if (hevc_ps->vps_list.empty() || hevc_ps->sps_list.empty() ||
          hevc_ps->pps_list.empty()) {
        return nullptr;
      }
      const uint8_t *const parameterSetPointers[] = {hevc_ps->vps_list[0].data,
                                                     hevc_ps->sps_list[0].data,
                                                     hevc_ps->pps_list[0].data};
      const size_t parameterSetSizes[] = {
          (size_t)hevc_ps->vps_list[0].data_size,
          (size_t)hevc_ps->sps_list[0].data_size,
          (size_t)hevc_ps->pps_list[0].data_size};
      status = CMVideoFormatDescriptionCreateFromHEVCParameterSets(
          kCFAllocatorDefault, 3, parameterSetPointers, parameterSetSizes, 4,
          nil, &format_desc);
      delete hevc_ps;
    } break;
    default:
      return nullptr;
  }

  if (status) {
    return nullptr;
  }
  return format_desc;
}

设置缓冲区属性

可以通过kCVPixelBufferPixelFormatTypeKey设置输出的颜色格式，内部会进行格式转换

CFDictionaryRef VideoToolBoxDecoderImpl::CreateVideoBufferAttr(
    const VideoInfo &video_info) {
   
  CFNumberRef w = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type,
                                 &video_info.width);
  CFNumberRef h = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type,
                                 &video_info.height);

  OSType pix_fmt = PixFormatConvert<PixFormat, OSType>(video_info.pix_fmt);
  if (pix_fmt == 0) {
    return nullptr;
  }

  CFNumberRef cv_pix_fmt =
      CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &pix_fmt);

  CFMutableDictionaryRef buffer_attributes = CFDictionaryCreateMutable(
      kCFAllocatorDefault, 5, &kCFTypeDictionaryKeyCallBacks,
      &kCFTypeDictionaryValueCallBacks);

  CFMutableDictionaryRef io_surface_properties = CFDictionaryCreateMutable(
      kCFAllocatorDefault, 0, &kCFTypeDictionaryKeyCallBacks,
      &kCFTypeDictionaryValueCallBacks);

  // CPU、GUP共享内存，后续可以直接上传纹理
  CFDictionarySetValue(buffer_attributes, kCVPixelBufferIOSurfacePropertiesKey,
                       io_surface_properties);
  // 设置输出颜色格式和分辨率
  CFDictionarySetValue(buffer_attributes, kCVPixelBufferPixelFormatTypeKey,
                       cv_pix_fmt);
  CFDictionarySetValue(buffer_attributes, kCVPixelBufferWidthKey, w);
  CFDictionarySetValue(buffer_attributes, kCVPixelBufferHeightKey, h);
#if TARGET_OS_IPHONE
  CFDictionarySetValue(buffer_attributes,
                       kCVPixelBufferOpenGLESCompatibilityKey, kCFBooleanTrue);
#else
  CFDictionarySetValue(buffer_attributes,
                       kCVPixelBufferIOSurfaceOpenGLTextureCompatibilityKey,
                       kCFBooleanTrue);
#endif

  CFRelease(io_surface_properties);
  CFRelease(cv_pix_fmt);
  CFRelease(w);
  CFRelease(h);

  return buffer_attributes;
}

创建解压缩会话

(VTDecompressionSessionCreate)：根据视频流的格式信息创建解码会话。

VTDecompressionSessionRef session;
// 设置视频描述信息
CMVideoFormatDescriptionRef format_desc = CreateVideoFormatDesc(video_info_, extra_data);

// 设置缓冲区属性 
CFDictionaryRef buf_attr = CreateVideoBufferAttr(video_info_);

// 解码回调
VTDecompressionOutputCallbackRecord decoder_cb;
decoder_cb.decompressionOutputCallback =
    [](void *opaque, void *sourceFrameRefCon, OSStatus status,
       VTDecodeInfoFlags flags, CVImageBufferRef image_buffer, CMTime pts,
       CMTime duration) {
      if (status != noErr) {
         LOG("VTDecompressionSessionDecodeFrame failed status = %d", status);
      }
      ...
    };
decoder_cb.decompressionOutputRefCon = this;
// 创建解码会话
OSStatus status = VTDecompressionSessionCreate(kCFAllocatorDefault, format_desc, nullptr,
                                 buf_attr, &decoder_cb, &session);

CFRelease(buf_attr);

if (status) {
  CFRelease(format_desc);
  return;
}

处理数据

VideoToolbox支持的avc格式的码流，不支持Annex-B格式的码流，需要提前处理好

创建SampleBuffer

用来保存压缩数据和对应时间戳的

CMSampleBufferRef VideoToolBoxDecoderImpl::CreateSampleBuffer(Packet &packet) {
  CMSampleBufferRef sample_buf;
  CMBlockBufferRef block_buf;

  //  使用 CMBlockBufferCreateWithMemoryBlock 接口将 NALU unit 封装成
  //  CMBlockBuffer
  // 将数据拷贝到Buffer中
  OSStatus status = CMBlockBufferCreateWithMemoryBlock(
      kCFAllocatorDefault, packet.byte_data, packet.data_size, kCFAllocatorNull,
      nullptr, 0, packet.data_size, 0, &block_buf);
  if (status) {
    return sample_buf;
  }

  CMSampleTimingInfo timingInfo = kCMTimingInfoInvalid;
  // 将时间戳存入
  timingInfo.presentationTimeStamp = CMTimeMake(packet.pts, 1000);
  timingInfo.duration = CMTimeMake(packet.duration, 1000);
  timingInfo.decodeTimeStamp = CMTimeMake(packet.dts, 1000);
  //  通过 CMSampleBufferCreate 将 CMBlockBuffer + CMVideoFormatDescription +
  //  CMTime 创建成 CMSampleBuffer
  status = CMSampleBufferCreate(kCFAllocatorDefault, block_buf, TRUE, nullptr,
                                nullptr, format_desc_, 1, 1, &timingInfo, 0,
                                nullptr, &sample_buf);

  if (block_buf) {
    CFRelease(block_buf);
  }

  return status ? nullptr : sample_buf;
}

解码视频帧

(VTDecompressionSessionDecodeFrame)：将编码的视频帧数据送入解码器。

OSStatus status;
CMSampleBufferRef sample_buf = nullptr;

if (packet.get()->byte_data) {
  sample_buf = CreateSampleBuffer(*packet);
  if (sample_buf == nullptr) {
    return HMError::FAILED;
  }
}
// 将SampleBuffer送人解码器进行解码  
status = VTDecompressionSessionDecodeFrame(session_, sample_buf, 0, nullptr,
                                           nullptr);
if (sample_buf) {
  CFRelease(sample_buf);
}

帧排序

当存在B帧时，VideoToolbox并不会进行帧排序，所以其输出的视频顺序是按照dts的顺序输出，需要应用层自身来解决；
可以通过维护一个队列，当解析到非B帧，也就是I或者P帧时，就表明上一个非B帧时间戳自身以及之前的所有帧都准备好了，通过排序后，按照顺序向外输出即可；
也就是每解完一个连续的B帧序列都可以向外输出当前序列，队列最多缓存16+1帧；

if (packet->byte_data) {
  std::sort(frame_queue_.begin(), frame_queue_.end(),
            [](auto f1, auto f2) { return f1->timestamp < f2->timestamp; });

  if (packet->slice_type != SliceType::B_FRAME) {
    if (last_nonb_pts > 0) {
      current_pts_ = last_nonb_pts;
    }
    last_nonb_pts = packet->pts;
  }
}

处理解码输出

通过回调函数处理解码后的视频帧，比如显示到屏幕上。

decoder_cb.decompressionOutputCallback =
    [](void *opaque, void *sourceFrameRefCon, OSStatus status,
       VTDecodeInfoFlags flags, CVImageBufferRef image_buffer, CMTime pts,
       CMTime duration) {
      if (status != noErr) {
        AVLOG("VTDecompressionSessionDecodeFrame failed status = %d", status);
      }
      // 读取解码输出的ImageBuffer
    };

读取解码数据

从ImageBuffer中读取不同格式的数据

CVPixelBufferLockBaseAddress(src, 0);

dst->video_frame_info.width = CVPixelBufferGetWidth(src);
dst->video_frame_info.height = CVPixelBufferGetHeight(src);

OSType format = CVPixelBufferGetPixelFormatType(src);
uint8_t* src_data = nullptr;
unsigned long len = 0;
switch (format) {
  case kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange: {
    dst->video_frame_info.pix_fmt = PixFormat::PIX_FMT_NV12;
    if (!dst->byte_data.data) {
      FrameDataAlloc(&dst->byte_data, PixFormat::PIX_FMT_NV12, dst->video_frame_info.width,
                     dst->video_frame_info.height);
    }
    dst->byte_data.linesize[0] = CVPixelBufferGetBytesPerRowOfPlane(src, 0);
    len = dst->byte_data.linesize[0] * dst->video_frame_info.height * sizeof(uint8_t);
    src_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(src, 0);
    memcpy(dst->byte_data.data[0], src_data, len);

    dst->byte_data.linesize[1] = CVPixelBufferGetBytesPerRowOfPlane(src, 1);
    len = dst->byte_data.linesize[1] * (dst->video_frame_info.height / 2) * sizeof(uint8_t);
    src_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(src, 1);
    memcpy(dst->byte_data.data[1], src_data, len);
  } break;
  case kCVPixelFormatType_420YpCbCr8Planar: {
    dst->video_frame_info.pix_fmt = PixFormat::PIX_FMT_I420;
    if (!dst->byte_data.data) {
      FrameDataAlloc(&dst->byte_data, PixFormat::PIX_FMT_I420, dst->video_frame_info.width,
                     dst->video_frame_info.height);
    }
    dst->byte_data.linesize[0] = CVPixelBufferGetBytesPerRowOfPlane(src, 0);
    len = dst->byte_data.linesize[0] * dst->video_frame_info.height * sizeof(uint8_t);
    src_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(src, 0);
    memcpy(dst->byte_data.data[0], src_data, len);

    dst->byte_data.linesize[1] = CVPixelBufferGetBytesPerRowOfPlane(src, 1);
    len = dst->byte_data.linesize[1] * (dst->video_frame_info.height / 2) * sizeof(uint8_t);
    src_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(src, 1);
    memcpy(dst->byte_data.data[1], src_data, len);

    dst->byte_data.linesize[2] = CVPixelBufferGetBytesPerRowOfPlane(src, 2);
    len = dst->byte_data.linesize[2] * (dst->video_frame_info.height / 2) * sizeof(uint8_t);
    src_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(src, 2);
    memcpy(dst->byte_data.data[2], src_data, len);
  } break;
  case kCVPixelFormatType_32ARGB:
    dst->video_frame_info.pix_fmt = PixFormat::PIX_FMT_ARGB;
    if (!dst->byte_data.data) {
      FrameDataAlloc(&dst->byte_data, PixFormat::PIX_FMT_ARGB, dst->video_frame_info.width,
                     dst->video_frame_info.height);
    }
    dst->byte_data.linesize[0] = CVPixelBufferGetBytesPerRowOfPlane(src, 0);
    len = dst->byte_data.linesize[0] * dst->video_frame_info.height * sizeof(uint8_t);
    src_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(src, 0);
    memcpy(dst->byte_data.data[0], src_data, len);
    break;
  default:
    return dst;
}

CVPixelBufferUnlockBaseAddress(src, 0);

end_of_stream

当输入的解码结束后，需要将解码器中剩余的帧全部吐出来，告诉解码器已经end_of_stream

1
2
3

if (send_eos_) {
  VTDecompressionSessionWaitForAsynchronousFrames(session_);
}

结束解码会话

解码完成后，释放会话和相关资源。

if (session_) {
  VTDecompressionSessionInvalidate(session_);
  CFRelease(session_);
  session_ = nullptr;
}

if (format_desc_) {
  CFRelease(format_desc_);
  format_desc_ = nullptr;
}

编码

创建编码器

设置缓冲区属性

CFDictionaryRef VideoToolBoxEncoderImpl::CreateVideoBufferAttr(
    const VideoInfo &video_info) {
  CFNumberRef w = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type,
                                 &video_info.width);
  CFNumberRef h = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type,
                                 &video_info.height);

  OSType pix_fmt = PixFormatConvert<PixFormat, OSType>(video_info.pix_fmt);
  if (pix_fmt == 0) {
    return nullptr;
  }

  CFNumberRef cv_pix_fmt =
      CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &pix_fmt);

  CFMutableDictionaryRef buffer_attributes = CFDictionaryCreateMutable(
      kCFAllocatorDefault, 5, &kCFTypeDictionaryKeyCallBacks,
      &kCFTypeDictionaryValueCallBacks);

  CFMutableDictionaryRef io_surface_properties = CFDictionaryCreateMutable(
      kCFAllocatorDefault, 0, &kCFTypeDictionaryKeyCallBacks,
      &kCFTypeDictionaryValueCallBacks);

  // CPU、GUP共享内存
  CFDictionarySetValue(buffer_attributes, kCVPixelBufferIOSurfacePropertiesKey,
                       io_surface_properties);
  // 输出格式和分辨率
  CFDictionarySetValue(buffer_attributes, kCVPixelBufferPixelFormatTypeKey,
                       cv_pix_fmt);
  CFDictionarySetValue(buffer_attributes, kCVPixelBufferWidthKey, w);
  CFDictionarySetValue(buffer_attributes, kCVPixelBufferHeightKey, h);
#if TARGET_OS_IPHONE
  CFDictionarySetValue(buffer_attributes,
                       kCVPixelBufferOpenGLESCompatibilityKey, kCFBooleanTrue);
#else
  CFDictionarySetValue(buffer_attributes,
                       kCVPixelBufferIOSurfaceOpenGLTextureCompatibilityKey,
                       kCFBooleanTrue);
#endif

  CFRelease(io_surface_properties);
  CFRelease(cv_pix_fmt);
  CFRelease(w);
  CFRelease(h);

  return buffer_attributes;
}

创建编码会话

(VTCompressionSessionCreate)：设置视频编码参数，创建一个用于视频压缩的会话。设置编码器的各种属性，如码率、帧率、分辨率等。

VTCompressionSessionRef session;

CFDictionaryRef buf_attr = CreateVideoBufferAttr(video_info_);

// 创建PixelBuffer缓存池，用于统一管理接收编码输入数据  
CVPixelBufferPoolCreate(kCFAllocatorDefault, nullptr, buf_attr, &pool);

// 设置编码回调  
VTCompressionOutputCallback encoder_cb =
    [](void *outputCallbackRefCon, void *sourceFrameRefCon, OSStatus status,
       VTEncodeInfoFlags infoFlags, CMSampleBufferRef sampleBuffer) {
      if (status != noErr) {
        AVLOG("VTCompressionSessionEncodeFrame failed status = %d", status);
      }
      ...
    };

CMVideoCodecType codec_type = CodecTypeConvert<CodecType, CMVideoCodecType>(video_info_.codec_type);

// 创建编码会话，指定宽高，编码协议
OSStatus status = VTCompressionSessionCreate(
    kCFAllocatorDefault, video_info_.width, video_info_.height, codec_type,
    nullptr, buf_attr, nullptr, encoder_cb, this, &session);

CFMutableDictionaryRef props = CFDictionaryCreateMutable(
    kCFAllocatorDefault, 5, &kCFTypeDictionaryKeyCallBacks,
    &kCFTypeDictionaryValueCallBacks);

CFNumberRef fps = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type,
                                 &video_info_.fps);
CFNumberRef bit_rate = CFNumberCreate(
    kCFAllocatorDefault, kCFNumberSInt32Type, &video_info_.bit_rate);
// 设置编码帧率，码率  
CFDictionarySetValue(props, kVTCompressionPropertyKey_ExpectedFrameRate, fps);
CFDictionarySetValue(props, kVTCompressionPropertyKey_AverageBitRate,
                     bit_rate);
// 默认开启B帧                  
CFDictionarySetValue(props, kVTCompressionPropertyKey_AllowFrameReordering,
                     kCFBooleanTrue);

VTSessionSetProperties(session, props);

VTCompressionSessionPrepareToEncodeFrames(session);

CFRelease(props);

if (status) {
  return;
}

session_ = session;

处理数据

VideoToolbox输出的是avc格式的码流，所以需要将sps pps数据格式包装为avc格式的ExtraData

写入输入数据

向ImageBuffer中写入不同格式的数据

CVPixelBufferLockBaseAddress(dst, 0);

uint8_t* dst_data;
unsigned long len;
switch (src->video_frame_info.pix_fmt) {
  case PixFormat::PIX_FMT_I420:
    len = src->byte_data.linesize[0] * src->video_frame_info.height * sizeof(uint8_t);
    dst_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(dst, 0);
    memcpy(dst_data, src->byte_data.data[0], len);

    len = src->byte_data.linesize[1] * (src->video_frame_info.height / 2) * sizeof(uint8_t);
    dst_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(dst, 1);
    memcpy(dst_data, src->byte_data.data[1], len);

    len = src->byte_data.linesize[2] * (src->video_frame_info.height / 2) * sizeof(uint8_t);
    dst_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(dst, 2);
    memcpy(dst_data, src->byte_data.data[2], len);
    break;
  case PixFormat::PIX_FMT_NV12: {
    len = src->byte_data.linesize[0] * src->video_frame_info.height * sizeof(uint8_t);
    dst_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(dst, 0);
    size_t i = CVPixelBufferGetBytesPerRowOfPlane(dst, 0);
    memcpy(dst_data, src->byte_data.data[0], len);

    len = src->byte_data.linesize[1] * (src->video_frame_info.height / 2) * sizeof(uint8_t);
    dst_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(dst, 1);
    size_t i1 = CVPixelBufferGetBytesPerRowOfPlane(dst, 1);
    memcpy(dst_data, src->byte_data.data[1], len);
  } break;
  case PixFormat::PIX_FMT_ARGB:
    len = src->byte_data.linesize[0] * src->video_frame_info.height * sizeof(uint8_t);
    dst_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(dst, 0);
    memcpy(dst_data, src->byte_data.data[0], len);
    break;
  default:
    return dst;
}

CVPixelBufferUnlockBaseAddress(dst, 0);

编码帧数据

(VTCompressionSessionEncodeFrame)：将原始视频帧送入编码器进行编码。
从Buffer池子中创建CVImageBufferRef用来接收编码输入数据，

CVImageBufferRef pixel_buffer = nullptr;
CVPixelBufferPoolCreatePixelBuffer(kCFAllocatorDefault, pool, &pixel_buffer);
if (!pixel_buffer) {
  return;
}

// 将数据写入到ImageBuffer中
...

CMTime timestamp = CMTimeMake(frame->timestamp, COMMON_TIME_BASE);
CMTime timespan = CMTimeMake(frame->timespan, COMMON_TIME_BASE);
VTEncodeInfoFlags flags = kVTEncodeInfo_Asynchronous;
// 向编码器中送入一帧以及对应的时间戳
OSStatus status = VTCompressionSessionEncodeFrame(
    session_, frame->byte_data.data ? pixel_buffer : nullptr, timestamp,
    timespan, nullptr, nullptr, &flags);
if (pixel_buffer) {
  CFRelease(pixel_buffer);
}

return;

处理压缩输出

使用回调函数处理压缩后的数据，通常是将其写入文件或网络传输。

VTCompressionOutputCallback encoder_cb =
    [](void *outputCallbackRefCon, void *sourceFrameRefCon, OSStatus status,
       VTEncodeInfoFlags infoFlags, CMSampleBufferRef sampleBuffer) {
      if (status != noErr) {
        AVLOG("VTCompressionSessionEncodeFrame failed status = %d", status);
      }
      // 读取编码输出的SampleBuffer
    };

需要注意的是，VideoToolbox在有B帧的情况下，并没有对dts进行偏移，会导致后续muxer有问题，所以需要进行处理

auto packet = std::make_unique<Packet>();

const CMTime &pts = CMSampleBufferGetPresentationTimeStamp(sampleBuffer);
const CMTime &dts = CMSampleBufferGetDecodeTimeStamp(sampleBuffer);
const CMTime &duration = CMSampleBufferGetDuration(sampleBuffer);
packet->pts = pts.value;
packet->dts = dts.value - (2 * duration.value); // dts偏移
packet->duration = duration.value;

CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
size_t length, totalLength;
char *dataPointer;

OSStatus statusCodeRet = CMBlockBufferGetDataPointer(
    blockBuffer, 0, &length, &totalLength, &dataPointer);
if (statusCodeRet == noErr) {
  // 获取到SampleBuffer的数据，将其拷贝出来
  unsigned long len = totalLength * sizeof(uint8_t);
  packet->byte_data = static_cast<uint8_t *>(malloc(len));
  memset(packet->byte_data, 0, len);
  memcpy(packet->byte_data, dataPointer, len);
  packet->data_size = static_cast<int>(totalLength);
}

packet_queue_.push_back(packet.release());

dts偏移

输出的帧pts和dts都是从0开始累计的，所以会导致dts大于pts的情况，需要进行dts偏移，否则在使用ffmpeg做muxer时会被丢弃

I b b b b P
1 2 3 4 5 6 pts
1 3 4 5 6 2 dts
0 2 3 4 5 1 dts

当存在B帧时，由于后向参考帧需要提前编码，所以会前移，从而导致PTS与DTS会不一样；
如上图，第6帧是前面四帧的后向参考，所以在编码时需要排到它们前面；从而导致dts为第二列所示，但是这又导致一个问题，同一帧的dts会大于pts，这从播放角度来将是不合理的，当这帧已经到了播放的时间，但是其解码时间缺没有到，所以如果用ffmpeg进行muxer会被抛弃。ffmpeg不接受dts>pts的帧；
因此需要进行dts偏移；


当没有B参考帧时，只会有P帧会被前移，所以dts向前偏移1帧即可，如第三列所示；


 I b b B b P
 1 2 3 4 5 6 pts
 1 4 5 3 6 2 dts
-1 2 3 1 4 0 dts

当存在B参考帧时，除了P帧会被前移，B参考帧也会被前移，所以dts向前偏移2帧才行，如果上图第三列所示；

获取sps pps信息

从第一帧中获取sps pps信息，后续muxer要用到

bool isKeyFrame = false;
auto dict = static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(
    CMSampleBufferGetSampleAttachmentsArray(sampleBuffer, true), 0));
if (dict) {
  isKeyFrame = !CFDictionaryContainsKey(dict, kCMSampleAttachmentKey_NotSync);
}

if (isKeyFrame && !extra_data_.extradata) {
  CMFormatDescriptionRef formatDescription =
      CMSampleBufferGetFormatDescription(sampleBuffer);
  int NALUnitHeaderLengthOut = 0;
  const uint8_t *vpsParmeterSet;
  size_t vpsParameterSetSize, vpsParameterCount;
  CMVideoFormatDescriptionGetHEVCParameterSetAtIndex(
      formatDescription, 0, &vpsParmeterSet, &vpsParameterSetSize,
      &vpsParameterCount, &NALUnitHeaderLengthOut);

  const uint8_t *spsParameterSet;
  size_t spsParameterSetSize, spsParameterCount;
  CMVideoFormatDescriptionGetHEVCParameterSetAtIndex(
      formatDescription, 1, &spsParameterSet, &spsParameterSetSize,
      &spsParameterCount, &NALUnitHeaderLengthOut);

  const uint8_t *ppsParmeterSet;
  size_t ppsParameterSetSize, ppsParameterCount;
  CMVideoFormatDescriptionGetHEVCParameterSetAtIndex(
      formatDescription, 2, &ppsParmeterSet, &ppsParameterSetSize,
      &ppsParameterCount, &NALUnitHeaderLengthOut);

  hevc::ParamSets ps = {};
  ps.nal_unit_header_length = NALUnitHeaderLengthOut;
  hevc::VPS vps = {};
  memcpy(vps.data, vpsParmeterSet, vpsParameterSetSize);
  vps.data_size = static_cast<int>(vpsParameterSetSize);
  ps.vps_list.push_back(vps);
  hevc::SPS sps = {};
  memcpy(sps.data, spsParameterSet, spsParameterSetSize);
  sps.data_size = static_cast<int>(spsParameterSetSize);
  ps.sps_list.push_back(sps);
  hevc::PPS pps = {};
  memcpy(pps.data, ppsParmeterSet, ppsParameterSetSize);
  pps.data_size = static_cast<int>(ppsParameterSetSize);
  ps.pps_list.push_back(pps);
  hevc::ParseParamSetToExtraData(ps, extra_data_);
}

end_of_stream

当输入的编码结束后，需要将编码器中剩余的帧全部编码吐出来，告诉编码器已经end_of_stream

1
2
3

if (send_eos_) {
  VTCompressionSessionCompleteFrames(session_, kCMTimeInvalid);
}

结束编码会话

编码完成后，清理并释放会话资源。

if (session_) {
  VTCompressionSessionInvalidate(session_);
  CFRelease(session_);
  session_ = nullptr;
}