iOS视频硬编码

概述

iOS8之后系统提供了VideoToolBox用来处理音频和视频的编解码（硬解使用GPU），在iOS8之前，普遍使用的是ffmpeg（软解使用CPU）。

处理流程

1.采集

使用AVFoundation提供的AVCapture系列类自定义一个视频流采集的相机，在采集到数据的回调内处理视频编码。

相机自定义:


self.session = [[AVCaptureSession alloc]init];
    
    if ([self.session canSetSessionPreset:AVCaptureSessionPreset640x480]) {
        self.session.sessionPreset = AVCaptureSessionPreset640x480;
    }
    
    self.videoDevice = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
    
    
    if ([self.videoDevice isFocusModeSupported:AVCaptureFocusModeAutoFocus]) {
        if ([self.videoDevice lockForConfiguration:nil]) {
            self.videoDevice.focusMode = AVCaptureFocusModeContinuousAutoFocus;
        }
    }
    
    self.videoInput = [[AVCaptureDeviceInput alloc]initWithDevice:self.videoDevice error:nil];
    if ([self.session canAddInput:self.videoInput]) {
        [self.session addInput:self.videoInput];
    }
    
    self.videoDataOutput = [[AVCaptureVideoDataOutput alloc]init];
    
    //kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange 表示原始数据的格式为YUV420
//    YUV 4:4:4采样，每一个Y对应一组UV分量。
//    YUV 4:2:2采样，每两个Y共用一组UV分量。
//    YUV 4:2:0采样，每四个Y共用一组UV分量。
    NSDictionary *settings = [[NSDictionary alloc]initWithObjectsAndKeys:[NSNumber numberWithUnsignedInteger:kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange],kCVPixelBufferPixelFormatTypeKey,nil];
    self.videoDataOutput.videoSettings = settings;
    self.videoDataOutput.alwaysDiscardsLateVideoFrames = YES;
    
    _videoOutputQueue = dispatch_queue_create("videoOutputQueue", DISPATCH_QUEUE_SERIAL);
    [self.videoDataOutput setSampleBufferDelegate:self queue:_videoOutputQueue];
    
    if ([self.session canAddOutput:self.videoDataOutput]) {
        [self.session addOutput:self.videoDataOutput];
    }
    
    self.videoConnection = [self.videoDataOutput connectionWithMediaType:AVMediaTypeVideo];
    [self.videoConnection setVideoScaleAndCropFactor:1];
    self.videoConnection.videoOrientation = AVCaptureVideoOrientationPortrait;

回调处理:


- (void)captureOutput:(AVCaptureOutput *)captureOutput didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer fromConnection:(AVCaptureConnection *)connection{
    __weak typeof(self) weakSelf = self;
//    CVPixelBufferRef pixelBufferRef = CMSampleBufferGetImageBuffer(sampleBuffer);
    if ([self.delegate respondsToSelector:@selector(videOutputHandler:didOutputSampleBuffer:)]) {
        [self.delegate videOutputHandler:weakSelf didOutputSampleBuffer:sampleBuffer];
    }
}

sampleBuffer 这个就是视频流的原始数据。

2.开始编码

使用VideoToolBox库的相关API可能很方便的编码上面的sampleBuffer数据.

首先要初始化一个VTCompressionSessionRef对象

 OSStatus status = VTCompressionSessionCreate(NULL, _videoConfig.videoSize.width, _videoConfig.videoSize.height, kCMVideoCodecType_H264, NULL, NULL, NULL, didCompressBuffer, (__bridge void *)self, &_compressionSession);
    if(status != noErr){
        return;
    }
    //关键帧间隔 一般为帧率的2倍 间隔越大 压缩比越高
    VTSessionSetProperty(_compressionSession, kVTCompressionPropertyKey_MaxKeyFrameInterval,(__bridge CFTypeRef)@(_videoConfig.keyframeInterval));
    VTSessionSetProperty(_compressionSession, kVTCompressionPropertyKey_MaxKeyFrameIntervalDuration,(__bridge CFTypeRef)@(_videoConfig.keyframeInterval));
    
    
    //Just remember that kVTCompressionPropertyKey_AverageBitRate takes bits and kVTCompressionPropertyKey_DataRateLimits takes bytes and seconds.
//    status = VTSessionSetProperty(session, kVTCompressionPropertyKey_AverageBitRate, (__bridge CFTypeRef)@(600 * 1024));
//    status = VTSessionSetProperty(session, kVTCompressionPropertyKey_DataRateLimits, (__bridge CFArrayRef)@[800 * 1024 / 8, 1]);
    //码率 单位是bit
    VTSessionSetProperty(_compressionSession, kVTCompressionPropertyKey_AverageBitRate, (__bridge CFTypeRef)@(_videoConfig.bitrate * 8));
    
    //码率上限 单位为 byte/s
    NSArray *limit = @[@(_videoConfig.bitrate),@(1)];
    VTSessionSetProperty(_compressionSession, kVTCompressionPropertyKey_DataRateLimits, (__bridge CFArrayRef)limit);
    
    VTSessionSetProperty(_compressionSession, kVTCompressionPropertyKey_ExpectedFrameRate, (__bridge CFTypeRef)@(_videoConfig.fps));
    // 设置实时编码输出（避免延迟）
    VTSessionSetProperty(_compressionSession, kVTCompressionPropertyKey_RealTime, kCFBooleanFalse);
    VTSessionSetProperty(_compressionSession, kVTCompressionPropertyKey_ProfileLevel,  kVTProfileLevel_H264_Baseline_AutoLevel);
    
    //控制是否产生B帧
    VTSessionSetProperty(_compressionSession, kVTCompressionPropertyKey_AllowFrameReordering, kCFBooleanFalse);
    //16:9
    VTSessionSetProperty(_compressionSession, kVTCompressionPropertyKey_AspectRatio16x9, kCFBooleanTrue);
    
    VTCompressionSessionPrepareToEncodeFrames(_compressionSession);

开始编码：

//开始编码
- (void)videoEncodeData:(CVPixelBufferRef)pixelBuffer time:(uint64_t)time{
    frameCount++;
    
    //CMTimeMake(a,b) a当前第几帧，b每秒钟多少帧。当前播放时间a/b
    CMTime presentationTimeStamp = CMTimeMake(frameCount, 1000);
    
    //每一帧需要播放的时间
    VTEncodeInfoFlags flags;
    CMTime duration = CMTimeMake(1, _videoConfig.fps);
    
    NSDictionary *properties = nil;
    if(frameCount % (int32_t)(self.videoConfig.keyframeInterval) == 0){//强制关键帧
        properties = @{(__bridge NSString *)kVTEncodeFrameOptionKey_ForceKeyFrame: @YES};
    }
    
    NSNumber *timeNumber = @(time);
     OSStatus statusCode = VTCompressionSessionEncodeFrame(_compressionSession, pixelBuffer, presentationTimeStamp, duration, (__bridge CFDictionaryRef)properties, (__bridge void *)timeNumber, &flags);
    
    if (statusCode != noErr) {
        NSLog(@"H264: VTCompressionSessionEncodeFrame failed with %d", (int)statusCode);
        
        VTCompressionSessionInvalidate(_compressionSession);
        CFRelease(_compressionSession);
        _compressionSession = NULL;
        return;
    }
    
    NSLog(@"H264: VTCompressionSessionEncodeFrame Success");
}

编码成功回调:


//编码完成的回调
static void didCompressBuffer(void *VTref, void *VTFrameRef, OSStatus status, VTEncodeInfoFlags infoFlags, CMSampleBufferRef sampleBuffer){
    LLYVideoEncode *videoEncode = (__bridge LLYVideoEncode *)VTref;
    uint64_t timeStamp = [((__bridge_transfer NSNumber*)VTFrameRef) longLongValue];
    
    //编码后的原始数据
    CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
    CFArrayRef attachments = CMSampleBufferGetSampleAttachmentsArray(sampleBuffer, false);
    
    //判断关键帧
    BOOL isKeyFrame = NO;
    if (attachments != NULL) {
        CFDictionaryRef attachment;
        CFBooleanRef dependsOnOthers;
        attachment = (CFDictionaryRef)CFArrayGetValueAtIndex(attachments, 0);
        dependsOnOthers = (CFBooleanRef)CFDictionaryGetValue(attachment, kCMSampleAttachmentKey_DependsOnOthers);
        isKeyFrame = (dependsOnOthers == kCFBooleanFalse);
    }
    
    //关键帧需要把sps pps信息取出
    if (isKeyFrame) {
        CMFormatDescriptionRef format = CMSampleBufferGetFormatDescription(sampleBuffer);
        
        size_t sparameterSetSize, sparameterSetCount;
        const uint8_t *sparameterSet;
        
        //sps
        OSStatus statusCode = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(format, 0, &sparameterSet, &sparameterSetSize, &sparameterSetCount, NULL );
        if (statusCode == noErr) {
            
            //pps
            size_t pparameterSetSize, pparameterSetCount;
            const uint8_t *pparameterSet;
            OSStatus statusCode = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(format, 1, &pparameterSet, &pparameterSetSize, &pparameterSetCount, NULL );
            if (statusCode == noErr) {
                
                NSData *sps = [NSData dataWithBytes:sparameterSet length:sparameterSetSize];
                NSData *pps = [NSData dataWithBytes:pparameterSet length:pparameterSetSize];
                
                if ([videoEncode.delegate respondsToSelector:@selector(videoEncode:sps:pps:time:)]  ) {
                    [videoEncode.delegate videoEncode:videoEncode sps:sps pps:pps time:timeStamp];
                }
            }
        }
        
    }
    
    //视频数据 不管是不是关键帧都需要取出
    //前4个字节表示长度后面的数据的长度
    //除了关键帧,其它帧只有一个数据
    
    size_t length, totalLength;
    char *dataPointer;
    size_t offset = 0;
    int const headLen = 4;// 返回的nalu数据前四个字节不是0001的startcode，而是大端模式的帧长度length
    OSStatus statusCodeRet = CMBlockBufferGetDataPointer(blockBuffer, 0, &length, &totalLength, &dataPointer);
    if (statusCodeRet == noErr) {
        
        // 循环获取nalu数据
        while (offset < totalLength - headLen) {
            
            int NALUnitLength = 0;
            memcpy(&NALUnitLength, dataPointer + offset, headLen);
            
            NALUnitLength = CFSwapInt32BigToHost(NALUnitLength);
            NSData *naluData = [NSData dataWithBytes:dataPointer + headLen + offset length:NALUnitLength];
            offset += headLen + NALUnitLength;
            
            if ([videoEncode.delegate respondsToSelector:@selector(videoEncode:frame:time:isKeyFrame:)]) {
                [videoEncode.delegate videoEncode:videoEncode frame:naluData time:timeStamp isKeyFrame:isKeyFrame];
            }
        }
    }
}

这里是编码流程中比较关键的一环，需要将编码后的数据（H264数据）根据相应的数据格式取出来，用来做进行二次封装（封装为rtmp协议对应格式的数据）进行rtmp传输。不过这几种数据都是固定格式的，所以只要清楚相应的格式后，拆分了封装应该都是能解决的。

3.拆分后的视频数据二次封装为rtmp格式包

sps和pps数据的封装(关键帧数据)


- (void)packageKeyFrameSps:(NSData *)spsData pps:(NSData *)ppsData timestamp:(uint64_t)timestamp{
    
    if (spsData.length <= 0 || ppsData <= 0) {
        return;
    }
    
    if (_hasSendKeyFrame) {
        return;
    }
    
    _hasSendKeyFrame = YES;
    
    const char *sps = spsData.bytes;
    const char *pps = ppsData.bytes;
    NSInteger sps_len = spsData.length;
    NSInteger pps_len = ppsData.length;
    
    NSInteger total = sps_len + pps_len + 16;
    uint8_t *body = (uint8_t *)malloc(total);
    int index = 0;
    
    memset(body,0,total);
    
    body[index++] = 0x17;
    body[index++] = 0x00;//sps_pps
    
    body[index++] = 0x00;
    body[index++] = 0x00;
    body[index++] = 0x00;
    
    body[index++] = 0x01;
    body[index++] = sps[1];
    body[index++] = sps[2];
    body[index++] = sps[3];
    body[index++] = 0xff;
    
    /*sps*/
    body[index++]   = 0xe1;
    body[index++] = (sps_len >> 8) & 0xff;
    body[index++] = sps_len & 0xff;
    memcpy(&body[index],sps,sps_len);
    index +=  sps_len;
    
    /*pps*/
    body[index++]   = 0x01;
    body[index++] = (pps_len >> 8) & 0xff;
    body[index++] = (pps_len) & 0xff;
    memcpy(&body[index], pps, pps_len);
    index +=  pps_len;
    
    if ([self.delegate respondsToSelector:@selector(videoPackage:didPacketFrame:)]) {
        NSData *data = [NSData dataWithBytes:body length:index];
        
        LLYFrame *frame = [[LLYFrame alloc] init];
        frame.data = data;
        frame.timestamp = 0;//一定是0
        frame.msgLength = (int)data.length;
        frame.msgTypeId = LLYMSGTypeID_VIDEO;
        frame.msgStreamId = LLYStreamIDVideo;//video
        frame.isKeyframe = YES;
        [self.delegate videoPackage:self didPacketFrame:frame];
    }
}

非关键帧数据的封装

- (void)packageFrame:(NSData *)data timestamp:(uint64_t)timestamp isKeyFrame:(BOOL)isKeyFrame{
    if (!_hasSendKeyFrame) {
        return;
    }
    
    NSInteger i = 0;
    NSInteger total = data.length + 9;
    unsigned char *body = (unsigned char *)malloc(total);
    
    memset(body, 0, total);
    
    if (isKeyFrame) {
        body[i++] = 0x17;//1:I Frame 7:AVC
    }
    else{
        body[i++] = 0x27;//2:P Frame 7:AVC
    }
    body[i++] = 0x01;//AVC NALU 不是psp_pps
    
    body[i++] = 0x00;
    body[i++] = 0x00;
    body[i++] = 0x00;//pts - dts
    //长度数据
    body[i++] = (data.length >> 24) & 0xff;
    body[i++] = (data.length >> 16) & 0xff;
    body[i++] = (data.length >>  8) & 0xff;
    body[i++] = (data.length ) & 0xff;
    memcpy(&body[i], data.bytes, data.length);
    
    if ([self.delegate respondsToSelector:@selector(videoPackage:didPacketFrame:)]) {
        NSData *data = [NSData dataWithBytes:body length:total];
        LLYFrame *frame = [[LLYFrame alloc]init];
        frame.data = data;
        frame.timestamp = (int)timestamp;
        frame.msgLength = (int)data.length;
        frame.msgTypeId = LLYMSGTypeID_VIDEO;
        frame.msgStreamId = LLYStreamIDVideo;
        frame.isKeyframe = isKeyFrame;
        [self.delegate videoPackage:self didPacketFrame:frame];
    }
}

4.建立rtmp通道

先要开始一个socket的通道

- (void)connectToServer:(NSString *)host port:(UInt32)port{
    if (self.streamStatus > 0) {
        [self close];
    }
    
    //输入流 用来读取数据
    CFReadStreamRef readStream;
    //输出流，用来发送数据
    CFWriteStreamRef writeStream;
    
    if (port <= 0) {
        //RTMP默认端口1935
        port = 1935;
    }
    
    //建立socket链接
    CFStreamCreatePairWithSocketToHost(NULL,(__bridge CFStringRef)host, port, &readStream, &writeStream);
    
    //注意__bridge_transfer,转移对象的内存管理权
    _inputStream           = (__bridge_transfer  NSInputStream  *)readStream;
    _outputStream          = (__bridge_transfer  NSOutputStream *)writeStream;
    _inputStream.delegate = self;
    _outputStream.delegate = self;
    
    [_outputStream scheduleInRunLoop:[NSRunLoop mainRunLoop] forMode:NSRunLoopCommonModes];
    [_inputStream  scheduleInRunLoop:[NSRunLoop mainRunLoop] forMode:NSRunLoopCommonModes];
    
    [_inputStream open];
    [_outputStream open];
}

和服务器建立一个rtmp通信，通过服务器返回的状态码发送相应的握手请求，在3次握手成功后，rtmp通道建立完成，就可以发送封二次封装好的数据了。

- (void)streamSession:(LLYStreamSession *)session didChangeStatus:(LLYStreamStatus)streamStatus{
    if (streamStatus & NSStreamEventHasBytesAvailable) {//收到数据
        [self didReceivedata];
        return;//return
    }
    
    if (streamStatus & NSStreamEventHasSpaceAvailable){ //可以写数据
        
        if (_rtmpStatus == LLYRtmpSessionStatusConnected) {
            [self handshake0];
        }
        
        return;//return
    }
    
    if ((streamStatus & NSStreamEventOpenCompleted) &&
        _rtmpStatus < LLYRtmpSessionStatusConnected) {
        self.rtmpStatus = LLYRtmpSessionStatusConnected;
    }
    
    if (streamStatus & NSStreamEventErrorOccurred) {
        self.rtmpStatus = LLYRtmpSessionStatusError;
    }
    
    if (streamStatus & NSStreamEventEndEncountered) {
        self.rtmpStatus = LLYRtmpSessionStatusNotConnected;
    }
}

5.rtmp推流

/**
 *  Chunk Basic Header: HeaderType+ChannelID组成  1个字节
 *     >HeaderType(前两bit): 00->12字节  01->8字节
 *     >ChannelID(后6个bit): 02->Ping和ByteRead通道 03->Invoke通道 connect() publish()和自己写的NetConnection.Call() 04->Audio和Vidio通道
 *
 *  12字节举例
 *  Chunk Message Header:timestamp + message_length+message_typ + msg_stream_id
 *  message_typ :type为1,2,3,5,6的时候是协议控制消息
 *
 *               type为4的时候表示 User Control Messages [Event_type + Event_Data] Event_type有Stream Begin，Stream End...
 *
 *               type为8，音频数据
 *
 *               type为9，视频数据
 *
 *               type为18 元数据消息[AMF0]
 *
 *               type为20 命令消息 Command Message(RPC Message)
 *               例如connect, createStream, publish, play, pause on the peer
 *
 *
 *
 */
- (void)sendBuffer:(LLYFrame *)frame{
    dispatch_sync(_packageQueue, ^{
        
        uint64_t ts = frame.timestamp;
        
        int streamId = frame.msgStreamId;
        NSLog(@"streamId------%d",streamId);
        NSNumber *preTimestamp = self.preChunk[@(streamId)];
        
        uint8_t *chunk;
        int offset = 0;
        
        if (preTimestamp == nil) {//第一帧,音频或者视频
            chunk = malloc(12);
            chunk[0] = RTMP_CHUNK_TYPE_0/*0x00*/ | (streamId & 0x1F); //前两个字节 00 表示12字节
            offset += 1;
            
            memcpy(chunk+offset, [NSMutableData be24:(uint32_t)ts], 3);
            offset += 3;//时间戳3个字节
            
            memcpy(chunk+offset, [NSMutableData be24:frame.msgLength], 3);
            offset += 3;//消息长度3个字节
            
            int msgTypeId = frame.msgTypeId;//一个字节的消息类型
            memcpy(chunk+offset, &msgTypeId, 1);
            offset += 1;
            
            memcpy(chunk+offset, (uint8_t *)&(_streamID), sizeof(_streamID));
            offset += sizeof(_streamID);
            
        }else{//不是第一帧
            chunk = malloc(8);
            chunk[0] = RTMP_CHUNK_TYPE_1/*0x40*/ | (streamId & 0x1F);//前两个字节01表示8字节
            offset += 1;
            
            char *temp = [NSMutableData be24:(uint32_t)(ts - preTimestamp.integerValue)];
            memcpy(chunk+offset, temp, 3);
            offset += 3;
            
            memcpy(chunk+offset, [NSMutableData be24:frame.msgLength], 3);
            offset += 3;
            
            int msgTypeId = frame.msgTypeId;
            memcpy(chunk+offset, &msgTypeId, 1);
            offset += 1;
        }
        
        self.preChunk[@(streamId)] = @(ts);
        
        uint8_t *bufferData = (uint8_t *)frame.data.bytes;
        uint8_t *outp = (uint8_t *)malloc(frame.data.length + 64);
        memcpy(outp, chunk, offset);
        free(chunk);
        
        NSUInteger total = frame.data.length;
        NSInteger step = MIN(total, _outChunkSize);
        
        memcpy(outp+offset, bufferData, step);
        offset += step;
        total  -= step;
        bufferData += step;
        
        while (total > 0) {
            step = MIN(total, _outChunkSize);
            bufferData[-1] = RTMP_CHUNK_TYPE_3/*0xC0*/ | (streamId & 0x1F);//11表示一个字节,直接跳过这个字节;
            memcpy(outp+offset, bufferData - 1, step + 1);
            
            offset += step + 1;
            total  -= step;
            bufferData += step;
        }
        
        NSData *tosend = [NSData dataWithBytes:outp length:offset];
        free(outp);
        [self writeData:tosend];
    });
}

如果一切OK 推流地址应该就可以用VLC播放了。

具体代码参考我的 demo.

参考文档1

参考文档2