iOS硬件编解码-VideoToolbox

在 iOS 平台上,VideoToolbox 是 Apple 提供的一个强大的框架,用于执行硬件加速的视频编解码。它支持广泛的视频编码格式,包括但不限于 H.264 和 HEVC。使用 VideoToolbox 可以提高编解码的性能,同时降低 CPU 使用率。

VideoToolbox 编解码流程

解码

创建解码器

设置视频描述信息

将vps sps pps信息读取出来设置到CMVideoFormatDescriptionRef中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
CMVideoFormatDescriptionRef VideoToolBoxDecoderImpl::CreateVideoFormatDesc(
const VideoInfo &video_info, const ExtraData &extra_data) {
OSStatus status;
CMFormatDescriptionRef format_desc;
switch (video_info.codec_type) {
case CodecType::CODEC_TYPE_H264: {
auto *avc_ps_ = new avc::ParamSets();
...
// 准备好sps pps数据
const uint8_t *const parameterSetPointers[] = {avc_ps_->sps_list[0].data,
avc_ps_->pps_list[0].data};
const size_t parameterSetSizes[] = {
(size_t)avc_ps_->sps_list[0].data_size,
(size_t)avc_ps_->pps_list[0].data_size};
status = CMVideoFormatDescriptionCreateFromH264ParameterSets(
kCFAllocatorDefault, 2, parameterSetPointers, parameterSetSizes, 4,
&format_desc);
delete avc_ps_;
} break;
case CodecType::CODEC_TYPE_H265: {
auto *hevc_ps = new hevc::ParamSets();
...
// 准备好vps sps pps数据
if (hevc_ps->vps_list.empty() || hevc_ps->sps_list.empty() ||
hevc_ps->pps_list.empty()) {
return nullptr;
}
const uint8_t *const parameterSetPointers[] = {hevc_ps->vps_list[0].data,
hevc_ps->sps_list[0].data,
hevc_ps->pps_list[0].data};
const size_t parameterSetSizes[] = {
(size_t)hevc_ps->vps_list[0].data_size,
(size_t)hevc_ps->sps_list[0].data_size,
(size_t)hevc_ps->pps_list[0].data_size};
status = CMVideoFormatDescriptionCreateFromHEVCParameterSets(
kCFAllocatorDefault, 3, parameterSetPointers, parameterSetSizes, 4,
nil, &format_desc);
delete hevc_ps;
} break;
default:
return nullptr;
}

if (status) {
return nullptr;
}
return format_desc;
}

设置缓冲区属性

可以通过kCVPixelBufferPixelFormatTypeKey设置输出的颜色格式,内部会进行格式转换

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
CFDictionaryRef VideoToolBoxDecoderImpl::CreateVideoBufferAttr(
const VideoInfo &video_info) {

CFNumberRef w = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type,
&video_info.width);
CFNumberRef h = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type,
&video_info.height);

OSType pix_fmt = PixFormatConvert<PixFormat, OSType>(video_info.pix_fmt);
if (pix_fmt == 0) {
return nullptr;
}

CFNumberRef cv_pix_fmt =
CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &pix_fmt);

CFMutableDictionaryRef buffer_attributes = CFDictionaryCreateMutable(
kCFAllocatorDefault, 5, &kCFTypeDictionaryKeyCallBacks,
&kCFTypeDictionaryValueCallBacks);

CFMutableDictionaryRef io_surface_properties = CFDictionaryCreateMutable(
kCFAllocatorDefault, 0, &kCFTypeDictionaryKeyCallBacks,
&kCFTypeDictionaryValueCallBacks);

// CPU、GUP共享内存,后续可以直接上传纹理
CFDictionarySetValue(buffer_attributes, kCVPixelBufferIOSurfacePropertiesKey,
io_surface_properties);
// 设置输出颜色格式和分辨率
CFDictionarySetValue(buffer_attributes, kCVPixelBufferPixelFormatTypeKey,
cv_pix_fmt);
CFDictionarySetValue(buffer_attributes, kCVPixelBufferWidthKey, w);
CFDictionarySetValue(buffer_attributes, kCVPixelBufferHeightKey, h);
#if TARGET_OS_IPHONE
CFDictionarySetValue(buffer_attributes,
kCVPixelBufferOpenGLESCompatibilityKey, kCFBooleanTrue);
#else
CFDictionarySetValue(buffer_attributes,
kCVPixelBufferIOSurfaceOpenGLTextureCompatibilityKey,
kCFBooleanTrue);
#endif

CFRelease(io_surface_properties);
CFRelease(cv_pix_fmt);
CFRelease(w);
CFRelease(h);

return buffer_attributes;
}

创建解压缩会话

(VTDecompressionSessionCreate):根据视频流的格式信息创建解码会话。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
VTDecompressionSessionRef session;
// 设置视频描述信息
CMVideoFormatDescriptionRef format_desc = CreateVideoFormatDesc(video_info_, extra_data);

// 设置缓冲区属性
CFDictionaryRef buf_attr = CreateVideoBufferAttr(video_info_);

// 解码回调
VTDecompressionOutputCallbackRecord decoder_cb;
decoder_cb.decompressionOutputCallback =
[](void *opaque, void *sourceFrameRefCon, OSStatus status,
VTDecodeInfoFlags flags, CVImageBufferRef image_buffer, CMTime pts,
CMTime duration) {
if (status != noErr) {
LOG("VTDecompressionSessionDecodeFrame failed status = %d", status);
}
...
};
decoder_cb.decompressionOutputRefCon = this;
// 创建解码会话
OSStatus status = VTDecompressionSessionCreate(kCFAllocatorDefault, format_desc, nullptr,
buf_attr, &decoder_cb, &session);

CFRelease(buf_attr);

if (status) {
CFRelease(format_desc);
return;
}

处理数据

VideoToolbox支持的avc格式的码流,不支持Annex-B格式的码流,需要提前处理好

创建SampleBuffer

用来保存压缩数据和对应时间戳的

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
CMSampleBufferRef VideoToolBoxDecoderImpl::CreateSampleBuffer(Packet &packet) {
CMSampleBufferRef sample_buf;
CMBlockBufferRef block_buf;

// 使用 CMBlockBufferCreateWithMemoryBlock 接口将 NALU unit 封装成
// CMBlockBuffer
// 将数据拷贝到Buffer中
OSStatus status = CMBlockBufferCreateWithMemoryBlock(
kCFAllocatorDefault, packet.byte_data, packet.data_size, kCFAllocatorNull,
nullptr, 0, packet.data_size, 0, &block_buf);
if (status) {
return sample_buf;
}

CMSampleTimingInfo timingInfo = kCMTimingInfoInvalid;
// 将时间戳存入
timingInfo.presentationTimeStamp = CMTimeMake(packet.pts, 1000);
timingInfo.duration = CMTimeMake(packet.duration, 1000);
timingInfo.decodeTimeStamp = CMTimeMake(packet.dts, 1000);
// 通过 CMSampleBufferCreate 将 CMBlockBuffer + CMVideoFormatDescription +
// CMTime 创建成 CMSampleBuffer
status = CMSampleBufferCreate(kCFAllocatorDefault, block_buf, TRUE, nullptr,
nullptr, format_desc_, 1, 1, &timingInfo, 0,
nullptr, &sample_buf);

if (block_buf) {
CFRelease(block_buf);
}

return status ? nullptr : sample_buf;
}

解码视频帧

(VTDecompressionSessionDecodeFrame):将编码的视频帧数据送入解码器。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
OSStatus status;
CMSampleBufferRef sample_buf = nullptr;

if (packet.get()->byte_data) {
sample_buf = CreateSampleBuffer(*packet);
if (sample_buf == nullptr) {
return HMError::FAILED;
}
}
// 将SampleBuffer送人解码器进行解码
status = VTDecompressionSessionDecodeFrame(session_, sample_buf, 0, nullptr,
nullptr);
if (sample_buf) {
CFRelease(sample_buf);
}

帧排序

当存在B帧时,VideoToolbox并不会进行帧排序,所以其输出的视频顺序是按照dts的顺序输出,需要应用层自身来解决;
可以通过维护一个队列,当解析到非B帧,也就是I或者P帧时,就表明上一个非B帧时间戳自身以及之前的所有帧都准备好了,通过排序后,按照顺序向外输出即可;
也就是每解完一个连续的B帧序列都可以向外输出当前序列,队列最多缓存16+1帧;

1
2
3
4
5
6
7
8
9
10
11
if (packet->byte_data) {
std::sort(frame_queue_.begin(), frame_queue_.end(),
[](auto f1, auto f2) { return f1->timestamp < f2->timestamp; });

if (packet->slice_type != SliceType::B_FRAME) {
if (last_nonb_pts > 0) {
current_pts_ = last_nonb_pts;
}
last_nonb_pts = packet->pts;
}
}

处理解码输出

通过回调函数处理解码后的视频帧,比如显示到屏幕上。

1
2
3
4
5
6
7
8
9
decoder_cb.decompressionOutputCallback =
[](void *opaque, void *sourceFrameRefCon, OSStatus status,
VTDecodeInfoFlags flags, CVImageBufferRef image_buffer, CMTime pts,
CMTime duration) {
if (status != noErr) {
AVLOG("VTDecompressionSessionDecodeFrame failed status = %d", status);
}
// 读取解码输出的ImageBuffer
};

读取解码数据

从ImageBuffer中读取不同格式的数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
CVPixelBufferLockBaseAddress(src, 0);

dst->video_frame_info.width = CVPixelBufferGetWidth(src);
dst->video_frame_info.height = CVPixelBufferGetHeight(src);

OSType format = CVPixelBufferGetPixelFormatType(src);
uint8_t* src_data = nullptr;
unsigned long len = 0;
switch (format) {
case kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange: {
dst->video_frame_info.pix_fmt = PixFormat::PIX_FMT_NV12;
if (!dst->byte_data.data) {
FrameDataAlloc(&dst->byte_data, PixFormat::PIX_FMT_NV12, dst->video_frame_info.width,
dst->video_frame_info.height);
}
dst->byte_data.linesize[0] = CVPixelBufferGetBytesPerRowOfPlane(src, 0);
len = dst->byte_data.linesize[0] * dst->video_frame_info.height * sizeof(uint8_t);
src_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(src, 0);
memcpy(dst->byte_data.data[0], src_data, len);

dst->byte_data.linesize[1] = CVPixelBufferGetBytesPerRowOfPlane(src, 1);
len = dst->byte_data.linesize[1] * (dst->video_frame_info.height / 2) * sizeof(uint8_t);
src_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(src, 1);
memcpy(dst->byte_data.data[1], src_data, len);
} break;
case kCVPixelFormatType_420YpCbCr8Planar: {
dst->video_frame_info.pix_fmt = PixFormat::PIX_FMT_I420;
if (!dst->byte_data.data) {
FrameDataAlloc(&dst->byte_data, PixFormat::PIX_FMT_I420, dst->video_frame_info.width,
dst->video_frame_info.height);
}
dst->byte_data.linesize[0] = CVPixelBufferGetBytesPerRowOfPlane(src, 0);
len = dst->byte_data.linesize[0] * dst->video_frame_info.height * sizeof(uint8_t);
src_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(src, 0);
memcpy(dst->byte_data.data[0], src_data, len);

dst->byte_data.linesize[1] = CVPixelBufferGetBytesPerRowOfPlane(src, 1);
len = dst->byte_data.linesize[1] * (dst->video_frame_info.height / 2) * sizeof(uint8_t);
src_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(src, 1);
memcpy(dst->byte_data.data[1], src_data, len);

dst->byte_data.linesize[2] = CVPixelBufferGetBytesPerRowOfPlane(src, 2);
len = dst->byte_data.linesize[2] * (dst->video_frame_info.height / 2) * sizeof(uint8_t);
src_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(src, 2);
memcpy(dst->byte_data.data[2], src_data, len);
} break;
case kCVPixelFormatType_32ARGB:
dst->video_frame_info.pix_fmt = PixFormat::PIX_FMT_ARGB;
if (!dst->byte_data.data) {
FrameDataAlloc(&dst->byte_data, PixFormat::PIX_FMT_ARGB, dst->video_frame_info.width,
dst->video_frame_info.height);
}
dst->byte_data.linesize[0] = CVPixelBufferGetBytesPerRowOfPlane(src, 0);
len = dst->byte_data.linesize[0] * dst->video_frame_info.height * sizeof(uint8_t);
src_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(src, 0);
memcpy(dst->byte_data.data[0], src_data, len);
break;
default:
return dst;
}

CVPixelBufferUnlockBaseAddress(src, 0);

end_of_stream

当输入的解码结束后,需要将解码器中剩余的帧全部吐出来,告诉解码器已经end_of_stream

1
2
3
if (send_eos_) {
VTDecompressionSessionWaitForAsynchronousFrames(session_);
}

结束解码会话

解码完成后,释放会话和相关资源。

1
2
3
4
5
6
7
8
9
10
if (session_) {
VTDecompressionSessionInvalidate(session_);
CFRelease(session_);
session_ = nullptr;
}

if (format_desc_) {
CFRelease(format_desc_);
format_desc_ = nullptr;
}

编码

创建编码器

设置缓冲区属性

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
CFDictionaryRef VideoToolBoxEncoderImpl::CreateVideoBufferAttr(
const VideoInfo &video_info) {
CFNumberRef w = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type,
&video_info.width);
CFNumberRef h = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type,
&video_info.height);

OSType pix_fmt = PixFormatConvert<PixFormat, OSType>(video_info.pix_fmt);
if (pix_fmt == 0) {
return nullptr;
}

CFNumberRef cv_pix_fmt =
CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &pix_fmt);

CFMutableDictionaryRef buffer_attributes = CFDictionaryCreateMutable(
kCFAllocatorDefault, 5, &kCFTypeDictionaryKeyCallBacks,
&kCFTypeDictionaryValueCallBacks);

CFMutableDictionaryRef io_surface_properties = CFDictionaryCreateMutable(
kCFAllocatorDefault, 0, &kCFTypeDictionaryKeyCallBacks,
&kCFTypeDictionaryValueCallBacks);

// CPU、GUP共享内存
CFDictionarySetValue(buffer_attributes, kCVPixelBufferIOSurfacePropertiesKey,
io_surface_properties);
// 输出格式和分辨率
CFDictionarySetValue(buffer_attributes, kCVPixelBufferPixelFormatTypeKey,
cv_pix_fmt);
CFDictionarySetValue(buffer_attributes, kCVPixelBufferWidthKey, w);
CFDictionarySetValue(buffer_attributes, kCVPixelBufferHeightKey, h);
#if TARGET_OS_IPHONE
CFDictionarySetValue(buffer_attributes,
kCVPixelBufferOpenGLESCompatibilityKey, kCFBooleanTrue);
#else
CFDictionarySetValue(buffer_attributes,
kCVPixelBufferIOSurfaceOpenGLTextureCompatibilityKey,
kCFBooleanTrue);
#endif

CFRelease(io_surface_properties);
CFRelease(cv_pix_fmt);
CFRelease(w);
CFRelease(h);

return buffer_attributes;
}

创建编码会话

(VTCompressionSessionCreate):设置视频编码参数,创建一个用于视频压缩的会话。设置编码器的各种属性,如码率、帧率、分辨率等。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
VTCompressionSessionRef session;

CFDictionaryRef buf_attr = CreateVideoBufferAttr(video_info_);

// 创建PixelBuffer缓存池,用于统一管理接收编码输入数据
CVPixelBufferPoolCreate(kCFAllocatorDefault, nullptr, buf_attr, &pool);

// 设置编码回调
VTCompressionOutputCallback encoder_cb =
[](void *outputCallbackRefCon, void *sourceFrameRefCon, OSStatus status,
VTEncodeInfoFlags infoFlags, CMSampleBufferRef sampleBuffer) {
if (status != noErr) {
AVLOG("VTCompressionSessionEncodeFrame failed status = %d", status);
}
...
};

CMVideoCodecType codec_type = CodecTypeConvert<CodecType, CMVideoCodecType>(video_info_.codec_type);

// 创建编码会话,指定宽高,编码协议
OSStatus status = VTCompressionSessionCreate(
kCFAllocatorDefault, video_info_.width, video_info_.height, codec_type,
nullptr, buf_attr, nullptr, encoder_cb, this, &session);

CFMutableDictionaryRef props = CFDictionaryCreateMutable(
kCFAllocatorDefault, 5, &kCFTypeDictionaryKeyCallBacks,
&kCFTypeDictionaryValueCallBacks);

CFNumberRef fps = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type,
&video_info_.fps);
CFNumberRef bit_rate = CFNumberCreate(
kCFAllocatorDefault, kCFNumberSInt32Type, &video_info_.bit_rate);
// 设置编码帧率,码率
CFDictionarySetValue(props, kVTCompressionPropertyKey_ExpectedFrameRate, fps);
CFDictionarySetValue(props, kVTCompressionPropertyKey_AverageBitRate,
bit_rate);
// 默认开启B帧
CFDictionarySetValue(props, kVTCompressionPropertyKey_AllowFrameReordering,
kCFBooleanTrue);

VTSessionSetProperties(session, props);

VTCompressionSessionPrepareToEncodeFrames(session);

CFRelease(props);

if (status) {
return;
}

session_ = session;

处理数据

VideoToolbox输出的是avc格式的码流,所以需要将sps pps数据格式包装为avc格式的ExtraData

写入输入数据

向ImageBuffer中写入不同格式的数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
CVPixelBufferLockBaseAddress(dst, 0);

uint8_t* dst_data;
unsigned long len;
switch (src->video_frame_info.pix_fmt) {
case PixFormat::PIX_FMT_I420:
len = src->byte_data.linesize[0] * src->video_frame_info.height * sizeof(uint8_t);
dst_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(dst, 0);
memcpy(dst_data, src->byte_data.data[0], len);

len = src->byte_data.linesize[1] * (src->video_frame_info.height / 2) * sizeof(uint8_t);
dst_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(dst, 1);
memcpy(dst_data, src->byte_data.data[1], len);

len = src->byte_data.linesize[2] * (src->video_frame_info.height / 2) * sizeof(uint8_t);
dst_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(dst, 2);
memcpy(dst_data, src->byte_data.data[2], len);
break;
case PixFormat::PIX_FMT_NV12: {
len = src->byte_data.linesize[0] * src->video_frame_info.height * sizeof(uint8_t);
dst_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(dst, 0);
size_t i = CVPixelBufferGetBytesPerRowOfPlane(dst, 0);
memcpy(dst_data, src->byte_data.data[0], len);

len = src->byte_data.linesize[1] * (src->video_frame_info.height / 2) * sizeof(uint8_t);
dst_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(dst, 1);
size_t i1 = CVPixelBufferGetBytesPerRowOfPlane(dst, 1);
memcpy(dst_data, src->byte_data.data[1], len);
} break;
case PixFormat::PIX_FMT_ARGB:
len = src->byte_data.linesize[0] * src->video_frame_info.height * sizeof(uint8_t);
dst_data = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(dst, 0);
memcpy(dst_data, src->byte_data.data[0], len);
break;
default:
return dst;
}

CVPixelBufferUnlockBaseAddress(dst, 0);

编码帧数据

(VTCompressionSessionEncodeFrame):将原始视频帧送入编码器进行编码。
从Buffer池子中创建CVImageBufferRef用来接收编码输入数据,

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
CVImageBufferRef pixel_buffer = nullptr;
CVPixelBufferPoolCreatePixelBuffer(kCFAllocatorDefault, pool, &pixel_buffer);
if (!pixel_buffer) {
return;
}

// 将数据写入到ImageBuffer中
...

CMTime timestamp = CMTimeMake(frame->timestamp, COMMON_TIME_BASE);
CMTime timespan = CMTimeMake(frame->timespan, COMMON_TIME_BASE);
VTEncodeInfoFlags flags = kVTEncodeInfo_Asynchronous;
// 向编码器中送入一帧以及对应的时间戳
OSStatus status = VTCompressionSessionEncodeFrame(
session_, frame->byte_data.data ? pixel_buffer : nullptr, timestamp,
timespan, nullptr, nullptr, &flags);
if (pixel_buffer) {
CFRelease(pixel_buffer);
}

return;

处理压缩输出

使用回调函数处理压缩后的数据,通常是将其写入文件或网络传输。

1
2
3
4
5
6
7
8
VTCompressionOutputCallback encoder_cb =
[](void *outputCallbackRefCon, void *sourceFrameRefCon, OSStatus status,
VTEncodeInfoFlags infoFlags, CMSampleBufferRef sampleBuffer) {
if (status != noErr) {
AVLOG("VTCompressionSessionEncodeFrame failed status = %d", status);
}
// 读取编码输出的SampleBuffer
};

需要注意的是,VideoToolbox在有B帧的情况下,并没有对dts进行偏移,会导致后续muxer有问题,所以需要进行处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
auto packet = std::make_unique<Packet>();

const CMTime &pts = CMSampleBufferGetPresentationTimeStamp(sampleBuffer);
const CMTime &dts = CMSampleBufferGetDecodeTimeStamp(sampleBuffer);
const CMTime &duration = CMSampleBufferGetDuration(sampleBuffer);
packet->pts = pts.value;
packet->dts = dts.value - (2 * duration.value); // dts偏移
packet->duration = duration.value;

CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
size_t length, totalLength;
char *dataPointer;

OSStatus statusCodeRet = CMBlockBufferGetDataPointer(
blockBuffer, 0, &length, &totalLength, &dataPointer);
if (statusCodeRet == noErr) {
// 获取到SampleBuffer的数据,将其拷贝出来
unsigned long len = totalLength * sizeof(uint8_t);
packet->byte_data = static_cast<uint8_t *>(malloc(len));
memset(packet->byte_data, 0, len);
memcpy(packet->byte_data, dataPointer, len);
packet->data_size = static_cast<int>(totalLength);
}

packet_queue_.push_back(packet.release());

dts偏移

输出的帧pts和dts都是从0开始累计的,所以会导致dts大于pts的情况,需要进行dts偏移,否则在使用ffmpeg做muxer时会被丢弃

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
I b b b b P
1 2 3 4 5 6 pts
1 3 4 5 6 2 dts
0 2 3 4 5 1 dts

当存在B帧时,由于后向参考帧需要提前编码,所以会前移,从而导致PTS与DTS会不一样;
如上图,第6帧是前面四帧的后向参考,所以在编码时需要排到它们前面;从而导致dts为第二列所示,但是这又导致一个问题,同一帧的dts会大于pts,这从播放角度来将是不合理的,当这帧已经到了播放的时间,但是其解码时间缺没有到,所以如果用ffmpeg进行muxer会被抛弃。ffmpeg不接受dts>pts的帧;
因此需要进行dts偏移;


当没有B参考帧时,只会有P帧会被前移,所以dts向前偏移1帧即可,如第三列所示;


I b b B b P
1 2 3 4 5 6 pts
1 4 5 3 6 2 dts
-1 2 3 1 4 0 dts

当存在B参考帧时,除了P帧会被前移,B参考帧也会被前移,所以dts向前偏移2帧才行,如果上图第三列所示;

获取sps pps信息

从第一帧中获取sps pps信息,后续muxer要用到

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
bool isKeyFrame = false;
auto dict = static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(
CMSampleBufferGetSampleAttachmentsArray(sampleBuffer, true), 0));
if (dict) {
isKeyFrame = !CFDictionaryContainsKey(dict, kCMSampleAttachmentKey_NotSync);
}

if (isKeyFrame && !extra_data_.extradata) {
CMFormatDescriptionRef formatDescription =
CMSampleBufferGetFormatDescription(sampleBuffer);
int NALUnitHeaderLengthOut = 0;
const uint8_t *vpsParmeterSet;
size_t vpsParameterSetSize, vpsParameterCount;
CMVideoFormatDescriptionGetHEVCParameterSetAtIndex(
formatDescription, 0, &vpsParmeterSet, &vpsParameterSetSize,
&vpsParameterCount, &NALUnitHeaderLengthOut);

const uint8_t *spsParameterSet;
size_t spsParameterSetSize, spsParameterCount;
CMVideoFormatDescriptionGetHEVCParameterSetAtIndex(
formatDescription, 1, &spsParameterSet, &spsParameterSetSize,
&spsParameterCount, &NALUnitHeaderLengthOut);

const uint8_t *ppsParmeterSet;
size_t ppsParameterSetSize, ppsParameterCount;
CMVideoFormatDescriptionGetHEVCParameterSetAtIndex(
formatDescription, 2, &ppsParmeterSet, &ppsParameterSetSize,
&ppsParameterCount, &NALUnitHeaderLengthOut);

hevc::ParamSets ps = {};
ps.nal_unit_header_length = NALUnitHeaderLengthOut;
hevc::VPS vps = {};
memcpy(vps.data, vpsParmeterSet, vpsParameterSetSize);
vps.data_size = static_cast<int>(vpsParameterSetSize);
ps.vps_list.push_back(vps);
hevc::SPS sps = {};
memcpy(sps.data, spsParameterSet, spsParameterSetSize);
sps.data_size = static_cast<int>(spsParameterSetSize);
ps.sps_list.push_back(sps);
hevc::PPS pps = {};
memcpy(pps.data, ppsParmeterSet, ppsParameterSetSize);
pps.data_size = static_cast<int>(ppsParameterSetSize);
ps.pps_list.push_back(pps);
hevc::ParseParamSetToExtraData(ps, extra_data_);
}

end_of_stream

当输入的编码结束后,需要将编码器中剩余的帧全部编码吐出来,告诉编码器已经end_of_stream

1
2
3
if (send_eos_) {
VTCompressionSessionCompleteFrames(session_, kCMTimeInvalid);
}

结束编码会话

编码完成后,清理并释放会话资源。

1
2
3
4
5
if (session_) {
VTCompressionSessionInvalidate(session_);
CFRelease(session_);
session_ = nullptr;
}