iOS视频硬编码

概述

iOS8之后 系统提供了VideoToolBox用来处理音频和视频的编解码(硬解使用GPU),在iOS8之前,普遍使用的是ffmpeg(软解使用CPU)。

处理流程

1.采集

使用AVFoundation提供的AVCapture系列类自定义一个视频流采集的相机,在采集到数据的回调内处理视频编码。

相机自定义:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
self.session = [[AVCaptureSession alloc]init];
if ([self.session canSetSessionPreset:AVCaptureSessionPreset640x480]) {
self.session.sessionPreset = AVCaptureSessionPreset640x480;
}
self.videoDevice = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
if ([self.videoDevice isFocusModeSupported:AVCaptureFocusModeAutoFocus]) {
if ([self.videoDevice lockForConfiguration:nil]) {
self.videoDevice.focusMode = AVCaptureFocusModeContinuousAutoFocus;
}
}
self.videoInput = [[AVCaptureDeviceInput alloc]initWithDevice:self.videoDevice error:nil];
if ([self.session canAddInput:self.videoInput]) {
[self.session addInput:self.videoInput];
}
self.videoDataOutput = [[AVCaptureVideoDataOutput alloc]init];
//kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange 表示原始数据的格式为YUV420
// YUV 4:4:4采样,每一个Y对应一组UV分量。
// YUV 4:2:2采样,每两个Y共用一组UV分量。
// YUV 4:2:0采样,每四个Y共用一组UV分量。
NSDictionary *settings = [[NSDictionary alloc]initWithObjectsAndKeys:[NSNumber numberWithUnsignedInteger:kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange],kCVPixelBufferPixelFormatTypeKey,nil];
self.videoDataOutput.videoSettings = settings;
self.videoDataOutput.alwaysDiscardsLateVideoFrames = YES;
_videoOutputQueue = dispatch_queue_create("videoOutputQueue", DISPATCH_QUEUE_SERIAL);
[self.videoDataOutput setSampleBufferDelegate:self queue:_videoOutputQueue];
if ([self.session canAddOutput:self.videoDataOutput]) {
[self.session addOutput:self.videoDataOutput];
}
self.videoConnection = [self.videoDataOutput connectionWithMediaType:AVMediaTypeVideo];
[self.videoConnection setVideoScaleAndCropFactor:1];
self.videoConnection.videoOrientation = AVCaptureVideoOrientationPortrait;

回调处理:

1
2
3
4
5
6
7
8
9
- (void)captureOutput:(AVCaptureOutput *)captureOutput didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer fromConnection:(AVCaptureConnection *)connection{
__weak typeof(self) weakSelf = self;
// CVPixelBufferRef pixelBufferRef = CMSampleBufferGetImageBuffer(sampleBuffer);
if ([self.delegate respondsToSelector:@selector(videOutputHandler:didOutputSampleBuffer:)]) {
[self.delegate videOutputHandler:weakSelf didOutputSampleBuffer:sampleBuffer];
}
}

sampleBuffer 这个就是视频流的原始数据。

2.开始编码

使用VideoToolBox库的相关API可能很方便的编码上面的sampleBuffer数据.

首先要初始化一个VTCompressionSessionRef对象

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
OSStatus status = VTCompressionSessionCreate(NULL, _videoConfig.videoSize.width, _videoConfig.videoSize.height, kCMVideoCodecType_H264, NULL, NULL, NULL, didCompressBuffer, (__bridge void *)self, &_compressionSession);
if(status != noErr){
return;
}
//关键帧间隔 一般为帧率的2倍 间隔越大 压缩比越高
VTSessionSetProperty(_compressionSession, kVTCompressionPropertyKey_MaxKeyFrameInterval,(__bridge CFTypeRef)@(_videoConfig.keyframeInterval));
VTSessionSetProperty(_compressionSession, kVTCompressionPropertyKey_MaxKeyFrameIntervalDuration,(__bridge CFTypeRef)@(_videoConfig.keyframeInterval));
//Just remember that kVTCompressionPropertyKey_AverageBitRate takes bits and kVTCompressionPropertyKey_DataRateLimits takes bytes and seconds.
// status = VTSessionSetProperty(session, kVTCompressionPropertyKey_AverageBitRate, (__bridge CFTypeRef)@(600 * 1024));
// status = VTSessionSetProperty(session, kVTCompressionPropertyKey_DataRateLimits, (__bridge CFArrayRef)@[800 * 1024 / 8, 1]);
//码率 单位是bit
VTSessionSetProperty(_compressionSession, kVTCompressionPropertyKey_AverageBitRate, (__bridge CFTypeRef)@(_videoConfig.bitrate * 8));
//码率上限 单位为 byte/s
NSArray *limit = @[@(_videoConfig.bitrate),@(1)];
VTSessionSetProperty(_compressionSession, kVTCompressionPropertyKey_DataRateLimits, (__bridge CFArrayRef)limit);
VTSessionSetProperty(_compressionSession, kVTCompressionPropertyKey_ExpectedFrameRate, (__bridge CFTypeRef)@(_videoConfig.fps));
// 设置实时编码输出(避免延迟)
VTSessionSetProperty(_compressionSession, kVTCompressionPropertyKey_RealTime, kCFBooleanFalse);
VTSessionSetProperty(_compressionSession, kVTCompressionPropertyKey_ProfileLevel, kVTProfileLevel_H264_Baseline_AutoLevel);
//控制是否产生B帧
VTSessionSetProperty(_compressionSession, kVTCompressionPropertyKey_AllowFrameReordering, kCFBooleanFalse);
//16:9
VTSessionSetProperty(_compressionSession, kVTCompressionPropertyKey_AspectRatio16x9, kCFBooleanTrue);
VTCompressionSessionPrepareToEncodeFrames(_compressionSession);

开始编码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
//开始编码
- (void)videoEncodeData:(CVPixelBufferRef)pixelBuffer time:(uint64_t)time{
frameCount++;
//CMTimeMake(a,b) a当前第几帧,b每秒钟多少帧。当前播放时间a/b
CMTime presentationTimeStamp = CMTimeMake(frameCount, 1000);
//每一帧需要播放的时间
VTEncodeInfoFlags flags;
CMTime duration = CMTimeMake(1, _videoConfig.fps);
NSDictionary *properties = nil;
if(frameCount % (int32_t)(self.videoConfig.keyframeInterval) == 0){//强制关键帧
properties = @{(__bridge NSString *)kVTEncodeFrameOptionKey_ForceKeyFrame: @YES};
}
NSNumber *timeNumber = @(time);
OSStatus statusCode = VTCompressionSessionEncodeFrame(_compressionSession, pixelBuffer, presentationTimeStamp, duration, (__bridge CFDictionaryRef)properties, (__bridge void *)timeNumber, &flags);
if (statusCode != noErr) {
NSLog(@"H264: VTCompressionSessionEncodeFrame failed with %d", (int)statusCode);
VTCompressionSessionInvalidate(_compressionSession);
CFRelease(_compressionSession);
_compressionSession = NULL;
return;
}
NSLog(@"H264: VTCompressionSessionEncodeFrame Success");
}

编码成功回调:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
//编码完成的回调
static void didCompressBuffer(void *VTref, void *VTFrameRef, OSStatus status, VTEncodeInfoFlags infoFlags, CMSampleBufferRef sampleBuffer){
LLYVideoEncode *videoEncode = (__bridge LLYVideoEncode *)VTref;
uint64_t timeStamp = [((__bridge_transfer NSNumber*)VTFrameRef) longLongValue];
//编码后的原始数据
CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
CFArrayRef attachments = CMSampleBufferGetSampleAttachmentsArray(sampleBuffer, false);
//判断关键帧
BOOL isKeyFrame = NO;
if (attachments != NULL) {
CFDictionaryRef attachment;
CFBooleanRef dependsOnOthers;
attachment = (CFDictionaryRef)CFArrayGetValueAtIndex(attachments, 0);
dependsOnOthers = (CFBooleanRef)CFDictionaryGetValue(attachment, kCMSampleAttachmentKey_DependsOnOthers);
isKeyFrame = (dependsOnOthers == kCFBooleanFalse);
}
//关键帧需要把sps pps信息取出
if (isKeyFrame) {
CMFormatDescriptionRef format = CMSampleBufferGetFormatDescription(sampleBuffer);
size_t sparameterSetSize, sparameterSetCount;
const uint8_t *sparameterSet;
//sps
OSStatus statusCode = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(format, 0, &sparameterSet, &sparameterSetSize, &sparameterSetCount, NULL );
if (statusCode == noErr) {
//pps
size_t pparameterSetSize, pparameterSetCount;
const uint8_t *pparameterSet;
OSStatus statusCode = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(format, 1, &pparameterSet, &pparameterSetSize, &pparameterSetCount, NULL );
if (statusCode == noErr) {
NSData *sps = [NSData dataWithBytes:sparameterSet length:sparameterSetSize];
NSData *pps = [NSData dataWithBytes:pparameterSet length:pparameterSetSize];
if ([videoEncode.delegate respondsToSelector:@selector(videoEncode:sps:pps:time:)] ) {
[videoEncode.delegate videoEncode:videoEncode sps:sps pps:pps time:timeStamp];
}
}
}
}
//视频数据 不管是不是关键帧都需要取出
//前4个字节表示长度后面的数据的长度
//除了关键帧,其它帧只有一个数据
size_t length, totalLength;
char *dataPointer;
size_t offset = 0;
int const headLen = 4;// 返回的nalu数据前四个字节不是0001的startcode,而是大端模式的帧长度length
OSStatus statusCodeRet = CMBlockBufferGetDataPointer(blockBuffer, 0, &length, &totalLength, &dataPointer);
if (statusCodeRet == noErr) {
// 循环获取nalu数据
while (offset < totalLength - headLen) {
int NALUnitLength = 0;
memcpy(&NALUnitLength, dataPointer + offset, headLen);
NALUnitLength = CFSwapInt32BigToHost(NALUnitLength);
NSData *naluData = [NSData dataWithBytes:dataPointer + headLen + offset length:NALUnitLength];
offset += headLen + NALUnitLength;
if ([videoEncode.delegate respondsToSelector:@selector(videoEncode:frame:time:isKeyFrame:)]) {
[videoEncode.delegate videoEncode:videoEncode frame:naluData time:timeStamp isKeyFrame:isKeyFrame];
}
}
}
}

这里是编码流程中比较关键的一环,需要将编码后的数据(H264数据)根据相应的数据格式取出来,用来做进行二次封装(封装为rtmp协议对应格式的数据)进行rtmp传输。不过这几种数据都是固定格式的,所以只要清楚相应的格式后,拆分了封装应该都是能解决的。

3.拆分后的视频数据二次封装为rtmp格式包

sps和pps数据的封装(关键帧数据)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
- (void)packageKeyFrameSps:(NSData *)spsData pps:(NSData *)ppsData timestamp:(uint64_t)timestamp{
if (spsData.length <= 0 || ppsData <= 0) {
return;
}
if (_hasSendKeyFrame) {
return;
}
_hasSendKeyFrame = YES;
const char *sps = spsData.bytes;
const char *pps = ppsData.bytes;
NSInteger sps_len = spsData.length;
NSInteger pps_len = ppsData.length;
NSInteger total = sps_len + pps_len + 16;
uint8_t *body = (uint8_t *)malloc(total);
int index = 0;
memset(body,0,total);
body[index++] = 0x17;
body[index++] = 0x00;//sps_pps
body[index++] = 0x00;
body[index++] = 0x00;
body[index++] = 0x00;
body[index++] = 0x01;
body[index++] = sps[1];
body[index++] = sps[2];
body[index++] = sps[3];
body[index++] = 0xff;
/*sps*/
body[index++] = 0xe1;
body[index++] = (sps_len >> 8) & 0xff;
body[index++] = sps_len & 0xff;
memcpy(&body[index],sps,sps_len);
index += sps_len;
/*pps*/
body[index++] = 0x01;
body[index++] = (pps_len >> 8) & 0xff;
body[index++] = (pps_len) & 0xff;
memcpy(&body[index], pps, pps_len);
index += pps_len;
if ([self.delegate respondsToSelector:@selector(videoPackage:didPacketFrame:)]) {
NSData *data = [NSData dataWithBytes:body length:index];
LLYFrame *frame = [[LLYFrame alloc] init];
frame.data = data;
frame.timestamp = 0;//一定是0
frame.msgLength = (int)data.length;
frame.msgTypeId = LLYMSGTypeID_VIDEO;
frame.msgStreamId = LLYStreamIDVideo;//video
frame.isKeyframe = YES;
[self.delegate videoPackage:self didPacketFrame:frame];
}
}

非关键帧数据的封装

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
- (void)packageFrame:(NSData *)data timestamp:(uint64_t)timestamp isKeyFrame:(BOOL)isKeyFrame{
if (!_hasSendKeyFrame) {
return;
}
NSInteger i = 0;
NSInteger total = data.length + 9;
unsigned char *body = (unsigned char *)malloc(total);
memset(body, 0, total);
if (isKeyFrame) {
body[i++] = 0x17;//1:I Frame 7:AVC
}
else{
body[i++] = 0x27;//2:P Frame 7:AVC
}
body[i++] = 0x01;//AVC NALU 不是psp_pps
body[i++] = 0x00;
body[i++] = 0x00;
body[i++] = 0x00;//pts - dts
//长度数据
body[i++] = (data.length >> 24) & 0xff;
body[i++] = (data.length >> 16) & 0xff;
body[i++] = (data.length >> 8) & 0xff;
body[i++] = (data.length ) & 0xff;
memcpy(&body[i], data.bytes, data.length);
if ([self.delegate respondsToSelector:@selector(videoPackage:didPacketFrame:)]) {
NSData *data = [NSData dataWithBytes:body length:total];
LLYFrame *frame = [[LLYFrame alloc]init];
frame.data = data;
frame.timestamp = (int)timestamp;
frame.msgLength = (int)data.length;
frame.msgTypeId = LLYMSGTypeID_VIDEO;
frame.msgStreamId = LLYStreamIDVideo;
frame.isKeyframe = isKeyFrame;
[self.delegate videoPackage:self didPacketFrame:frame];
}
}

4.建立rtmp通道

先要开始一个socket的通道

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
- (void)connectToServer:(NSString *)host port:(UInt32)port{
if (self.streamStatus > 0) {
[self close];
}
//输入流 用来读取数据
CFReadStreamRef readStream;
//输出流,用来发送数据
CFWriteStreamRef writeStream;
if (port <= 0) {
//RTMP默认端口1935
port = 1935;
}
//建立socket链接
CFStreamCreatePairWithSocketToHost(NULL,(__bridge CFStringRef)host, port, &readStream, &writeStream);
//注意__bridge_transfer,转移对象的内存管理权
_inputStream = (__bridge_transfer NSInputStream *)readStream;
_outputStream = (__bridge_transfer NSOutputStream *)writeStream;
_inputStream.delegate = self;
_outputStream.delegate = self;
[_outputStream scheduleInRunLoop:[NSRunLoop mainRunLoop] forMode:NSRunLoopCommonModes];
[_inputStream scheduleInRunLoop:[NSRunLoop mainRunLoop] forMode:NSRunLoopCommonModes];
[_inputStream open];
[_outputStream open];
}

和服务器建立一个rtmp通信,通过服务器返回的状态码发送相应的握手请求,在3次握手成功后,rtmp通道建立完成,就可以发送封二次封装好的数据了。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
- (void)streamSession:(LLYStreamSession *)session didChangeStatus:(LLYStreamStatus)streamStatus{
if (streamStatus & NSStreamEventHasBytesAvailable) {//收到数据
[self didReceivedata];
return;//return
}
if (streamStatus & NSStreamEventHasSpaceAvailable){ //可以写数据
if (_rtmpStatus == LLYRtmpSessionStatusConnected) {
[self handshake0];
}
return;//return
}
if ((streamStatus & NSStreamEventOpenCompleted) &&
_rtmpStatus < LLYRtmpSessionStatusConnected) {
self.rtmpStatus = LLYRtmpSessionStatusConnected;
}
if (streamStatus & NSStreamEventErrorOccurred) {
self.rtmpStatus = LLYRtmpSessionStatusError;
}
if (streamStatus & NSStreamEventEndEncountered) {
self.rtmpStatus = LLYRtmpSessionStatusNotConnected;
}
}

5.rtmp推流

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
/**
* Chunk Basic Header: HeaderType+ChannelID组成 1个字节
* >HeaderType(前两bit): 00->12字节 01->8字节
* >ChannelID(后6个bit): 02->Ping和ByteRead通道 03->Invoke通道 connect() publish()和自己写的NetConnection.Call() 04->Audio和Vidio通道
*
* 12字节举例
* Chunk Message Header:timestamp + message_length+message_typ + msg_stream_id
* message_typ :type为1,2,3,5,6的时候是协议控制消息
*
* type为4的时候表示 User Control Messages [Event_type + Event_Data] Event_type有Stream Begin,Stream End...
*
* type为8,音频数据
*
* type为9,视频数据
*
* type为18 元数据消息[AMF0]
*
* type为20 命令消息 Command Message(RPC Message)
* 例如connect, createStream, publish, play, pause on the peer
*
*
*
*/
- (void)sendBuffer:(LLYFrame *)frame{
dispatch_sync(_packageQueue, ^{
uint64_t ts = frame.timestamp;
int streamId = frame.msgStreamId;
NSLog(@"streamId------%d",streamId);
NSNumber *preTimestamp = self.preChunk[@(streamId)];
uint8_t *chunk;
int offset = 0;
if (preTimestamp == nil) {//第一帧,音频或者视频
chunk = malloc(12);
chunk[0] = RTMP_CHUNK_TYPE_0/*0x00*/ | (streamId & 0x1F); //前两个字节 00 表示12字节
offset += 1;
memcpy(chunk+offset, [NSMutableData be24:(uint32_t)ts], 3);
offset += 3;//时间戳3个字节
memcpy(chunk+offset, [NSMutableData be24:frame.msgLength], 3);
offset += 3;//消息长度3个字节
int msgTypeId = frame.msgTypeId;//一个字节的消息类型
memcpy(chunk+offset, &msgTypeId, 1);
offset += 1;
memcpy(chunk+offset, (uint8_t *)&(_streamID), sizeof(_streamID));
offset += sizeof(_streamID);
}else{//不是第一帧
chunk = malloc(8);
chunk[0] = RTMP_CHUNK_TYPE_1/*0x40*/ | (streamId & 0x1F);//前两个字节01表示8字节
offset += 1;
char *temp = [NSMutableData be24:(uint32_t)(ts - preTimestamp.integerValue)];
memcpy(chunk+offset, temp, 3);
offset += 3;
memcpy(chunk+offset, [NSMutableData be24:frame.msgLength], 3);
offset += 3;
int msgTypeId = frame.msgTypeId;
memcpy(chunk+offset, &msgTypeId, 1);
offset += 1;
}
self.preChunk[@(streamId)] = @(ts);
uint8_t *bufferData = (uint8_t *)frame.data.bytes;
uint8_t *outp = (uint8_t *)malloc(frame.data.length + 64);
memcpy(outp, chunk, offset);
free(chunk);
NSUInteger total = frame.data.length;
NSInteger step = MIN(total, _outChunkSize);
memcpy(outp+offset, bufferData, step);
offset += step;
total -= step;
bufferData += step;
while (total > 0) {
step = MIN(total, _outChunkSize);
bufferData[-1] = RTMP_CHUNK_TYPE_3/*0xC0*/ | (streamId & 0x1F);//11表示一个字节,直接跳过这个字节;
memcpy(outp+offset, bufferData - 1, step + 1);
offset += step + 1;
total -= step;
bufferData += step;
}
NSData *tosend = [NSData dataWithBytes:outp length:offset];
free(outp);
[self writeData:tosend];
});
}

如果一切OK 推流地址应该就可以用VLC播放了。

具体代码参考我的 demo.

参考文档1

参考文档2