VideoToolbox是一个低级框架,提供对硬件编码器和解码器的直接访问。它提供视频压缩和解压,以及存储在CoreVideo像素缓冲中的栅格图像格式之间的转换服务。这些服务以会话对象(压缩、解压缩和像素传输)的形式提供,它们作为Core Foundation (CF)类型提供。不需要直接访问硬件编码器和解码器的应用程序应该不需要直接使用VideoToolbox。
使用VideoToolbox对视频进行硬编码和硬解码,下面是对视频进行H264编码的实现过程,demo源码地址:https://github.com/duzhaoquan/VideoEncodeH264
1.视频捕获
//视频捕获相关 var session : AVCaptureSession = AVCaptureSession() var queue = DispatchQueue(label: "quque") var input: AVCaptureDeviceInput? lazy var previewLayer = AVCaptureVideoPreviewLayer(session: self.session) lazy var recordOutput = AVCaptureMovieFileOutput() var captureView : UIView! let output = AVCaptureVideoDataOutput() var focusBox:UIView! var exposureBox : UIView! //开始捕获 func startCapture(){ guard let device = getCamera(postion: .back) else{ return } guard let input = try? AVCaptureDeviceInput(device: device) else{ return } self.input = input if session.canAddInput(input) { session.addInput(input) } previewLayer.isHidden = false //视图重力 previewLayer.videoGravity = .resizeAspect session.startRunning() //编码 encoder = DQVideoEncoder( 480, height: 640) encoder.videoEncodeCallback {[weak self] (data) in // self?.writeTofile(data: data) self?.decoder.decode(data: data) // self?.ccDecode?.decodeNaluData(data) } encoder.videoEncodeCallbackSPSAndPPS {[weak self] (sps, pps) in //存入文件 // self?.writeTofile(data: sps) // self?.writeTofile(data: pps) //直接解码 self?.decoder.decode(data: sps) self?.decoder.decode(data: pps) // self?.ccDecode?.decodeNaluData(sps) // self?.ccDecode?.decodeNaluData(pps) } //解码 decoder = DQVideoDecode( 480, height: 640) decoder.SetVideoDecodeCallback { (image) in self.player?.pixelBuffer = image } //OC版本使用 let con = CCVideoConfig() con.width = 480 con.height = 640 con.bitrate = 480 * 640 * 5 ccencode = CCVideoEncoder(config: con) ccencode?.delegate = self ccDecode = CCVideoDecoder(config: con) ccDecode?.delegate = self } //写入文件 func writeTofile(data: Data){ try? self.fileHandle?.seekToEnd() self.fileHandle?.write(data) } // @objc func recordAction(btn:UIButton){ btn.isSelected = !btn.isSelected if !session.isRunning{ session.startRunning() } if btn.isSelected { btn.setTitle("stop record", for: .normal) output.setSampleBufferDelegate(self, queue: queue) if session.canAddOutput(output){ session.addOutput(output) } output.alwaysDiscardsLateVideoFrames = false //这里设置格式为BGRA,而不用YUV的颜色空间,避免使用Shader转换 //注意:这里必须和后面CVMetalTextureCacheCreateTextureFromImage 保存图像像素存储格式保持一致.否则视频会出现异常现象. output.videoSettings = [String(kCVPixelBufferPixelFormatTypeKey) :NSNumber(value: kCVPixelFormatType_32BGRA) ] let connection: AVCaptureConnection = output.connection(with: .video)! connection.videoOrientation = .portrait if fileHandle == nil{ //生成的文件地址 guard let path = NSSearchPathForDirectoriesInDomains(.documentDirectory, .userDomainMask, true).first else { return } let filePath = "(path)/video.h264" try? FileManager.default.removeItem(atPath: filePath) if FileManager.default.createFile(atPath: filePath, contents: nil, attributes: nil){ print("创建264文件成功") }else{ print("创建264文件失败") } fileHandle = FileHandle(forWritingAtPath: filePath) } }else{ session.removeOutput(output) btn.setTitle("start record", for: .normal) } } //获取相机设备 func getCamera(postion: AVCaptureDevice.Position) -> AVCaptureDevice? { var devices = [AVCaptureDevice]() if #available(iOS 10.0, *) { let discoverySession = AVCaptureDevice.DiscoverySession(deviceTypes: [AVCaptureDevice.DeviceType.builtInWideAngleCamera], mediaType: AVMediaType.video, position: AVCaptureDevice.Position.unspecified) devices = discoverySession.devices } else { devices = AVCaptureDevice.devices(for: AVMediaType.video) } for device in devices { if device.position == postion { return device } } return nil }
2.视频编码器封装
class DQVideoEncoder: NSObject { var frameID:Int64 = 0 var hasSpsPps = false var Int32 = 480 var height:Int32 = 640 var bitRate : Int32 = 480 * 640 * 3 * 4 var fps : Int32 = 10 var encodeQueue = DispatchQueue(label: "encode") var callBackQueue = DispatchQueue(label: "callBack") var encodeSession:VTCompressionSession! var encodeCallBack:VTCompressionOutputCallback? var videoEncodeCallback : ((Data)-> Void)? func videoEncodeCallback(block:@escaping (Data)-> Void){ self.videoEncodeCallback = block } var videoEncodeCallbackSPSAndPPS :((Data,Data)->Void)? func videoEncodeCallbackSPSAndPPS(block:@escaping (Data,Data)->Void) { videoEncodeCallbackSPSAndPPS = block } init(Int32 = 480,height:Int32 = 640,bitRate : Int32? = nil,fps: Int32? = nil ) { self.width = width self.height = height self.bitRate = bitRate != nil ? bitRate! : 480 * 640 * 3 * 4 self.fps = (fps != nil) ? fps! : 10 super.init() setCallBack() initVideoToolBox() } //初始化编码器 func initVideoToolBox() { print(self) //创建VTCompressionSession // var bself = self let state = VTCompressionSessionCreate(allocator: kCFAllocatorDefault, width, height: height, codecType: kCMVideoCodecType_H264, encoderSpecification: nil, imageBufferAttributes: nil, compressedDataAllocator: nil, outputCallback:encodeCallBack , refcon: unsafeBitCast(self, to: UnsafeMutableRawPointer.self), compressionSessionOut: &self.encodeSession) if state != 0{ print("creat VTCompressionSession failed") return } //设置实时编码输出 VTSessionSetProperty(encodeSession, key: kVTCompressionPropertyKey_RealTime, value: kCFBooleanTrue) //设置编码方式 VTSessionSetProperty(encodeSession, key: kVTCompressionPropertyKey_ProfileLevel, value: kVTProfileLevel_H264_Baseline_AutoLevel) //设置是否产生B帧(因为B帧在解码时并不是必要的,是可以抛弃B帧的) VTSessionSetProperty(encodeSession, key: kVTCompressionPropertyKey_AllowFrameReordering, value: kCFBooleanFalse) //设置关键帧间隔 var frameInterval = 10 let number = CFNumberCreate(kCFAllocatorDefault, CFNumberType.intType, &frameInterval) VTSessionSetProperty(encodeSession, key: kVTCompressionPropertyKey_MaxKeyFrameInterval, value: number) //设置期望帧率,不是实际帧率 let fpscf = CFNumberCreate(kCFAllocatorDefault, CFNumberType.intType, &fps) VTSessionSetProperty(encodeSession, key: kVTCompressionPropertyKey_ExpectedFrameRate, value: fpscf) //设置码率平均值,单位是bps。码率大了话就会非常清晰,但同时文件也会比较大。码率小的话,图像有时会模糊,但也勉强能看 //码率计算公式参考笔记 // var bitrate = width * height * 3 * 4 let bitrateAverage = CFNumberCreate(kCFAllocatorDefault, CFNumberType.intType, &bitRate) VTSessionSetProperty(encodeSession, key: kVTCompressionPropertyKey_AverageBitRate, value: bitrateAverage) //码率限制 let bitRatesLimit :CFArray = [bitRate * 2,1] as CFArray VTSessionSetProperty(encodeSession, key: kVTCompressionPropertyKey_DataRateLimits, value: bitRatesLimit) } //开始编码 func encodeVideo(sampleBuffer:CMSampleBuffer){ if self.encodeSession == nil { initVideoToolBox() } encodeQueue.async { let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) let time = CMTime(value: self.frameID, timescale: 1000) let state = VTCompressionSessionEncodeFrame(self.encodeSession, imageBuffer: imageBuffer!, presentationTimeStamp: time, duration: .invalid, frameProperties: nil, sourceFrameRefcon: nil, infoFlagsOut: nil) if state != 0{ print("encode filure") } } } private func setCallBack() { //编码完成回调 encodeCallBack = {(outputCallbackRefCon, sourceFrameRefCon, status, flag, sampleBuffer) in //指针对象转换 let encoder :DQVideoEncoder = unsafeBitCast(outputCallbackRefCon, to: DQVideoEncoder.self) guard sampleBuffer != nil else { return } /// 0. 原始字节数据 8字节 let buffer : [UInt8] = [0x00,0x00,0x00,0x01] /// 1. [UInt8] -> UnsafeBufferPointer<UInt8> let unsafeBufferPointer = buffer.withUnsafeBufferPointer {$0} /// 2.. UnsafeBufferPointer<UInt8> -> UnsafePointer<UInt8> let unsafePointer = unsafeBufferPointer.baseAddress guard let startCode = unsafePointer else {return} let attachArray = CMSampleBufferGetSampleAttachmentsArray(sampleBuffer!, createIfNecessary: false) var strkey = unsafeBitCast(kCMSampleAttachmentKey_NotSync, to: UnsafeRawPointer.self) let cfDic = unsafeBitCast(CFArrayGetValueAtIndex(attachArray, 0), to: CFDictionary.self) let keyFrame = !CFDictionaryContainsKey(cfDic, strkey);//没有这个键就意味着同步,就是关键帧 // 获取sps pps if keyFrame && !encoder.hasSpsPps{ if let description = CMSampleBufferGetFormatDescription(sampleBuffer!){ var spsSize: Int = 0, spsCount :Int = 0,spsHeaderLength:Int32 = 0 var ppsSize: Int = 0, ppsCount: Int = 0,ppsHeaderLength:Int32 = 0 //var spsData:UInt8 = 0, ppsData:UInt8 = 0 var spsDataPointer : UnsafePointer<UInt8>? = UnsafePointer(UnsafeMutablePointer<UInt8>.allocate(capacity: 0)) var ppsDataPointer : UnsafePointer<UInt8>? = UnsafePointer<UInt8>(bitPattern: 0) let spsstatus = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(description, parameterSetIndex: 0, parameterSetPointerOut: &spsDataPointer, parameterSetSizeOut: &spsSize, parameterSetCountOut: &spsCount, nalUnitHeaderLengthOut: &spsHeaderLength) if spsstatus != 0{ print("sps失败") } let ppsStatus = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(description, parameterSetIndex: 1, parameterSetPointerOut: &ppsDataPointer, parameterSetSizeOut: &ppsSize, parameterSetCountOut: &ppsCount, nalUnitHeaderLengthOut: &ppsHeaderLength) if ppsStatus != 0 { print("pps失败") } if let spsData = spsDataPointer,let ppsData = ppsDataPointer{ var spsDataValue = Data(capacity: 4 + spsSize) spsDataValue.append(buffer, count: 4) spsDataValue.append(spsData, count: spsSize) var ppsDataValue = Data(capacity: 4 + ppsSize) ppsDataValue.append(startCode, count: 4) ppsDataValue.append(ppsData, count: ppsSize) encoder.callBackQueue.async { encoder.videoEncodeCallbackSPSAndPPS!(spsDataValue, ppsDataValue) } } } } let dataBuffer = CMSampleBufferGetDataBuffer(sampleBuffer!) // var arr = [Int8]() // let pointer = arr.withUnsafeMutableBufferPointer({$0}) var dataPointer: UnsafeMutablePointer<Int8>? = nil var totalLength :Int = 0 let blockState = CMBlockBufferGetDataPointer(dataBuffer!, atOffset: 0, lengthAtOffsetOut: nil, totalLengthOut: &totalLength, dataPointerOut: &dataPointer) if blockState != 0{ print("获取data失败(blockState)") } //NALU var offset :UInt32 = 0 //返回的nalu数据前四个字节不是0001的startcode(不是系统端的0001),而是大端模式的帧长度length let lengthInfoSize = 4 //循环写入nalu数据 while offset < totalLength - lengthInfoSize { //获取nalu 数据长度 var naluDataLength:UInt32 = 0 memcpy(&naluDataLength, dataPointer! + UnsafeMutablePointer<Int8>.Stride(offset), lengthInfoSize) //大端转系统端 naluDataLength = CFSwapInt32BigToHost(naluDataLength) //获取到编码好的视频数据 var data = Data(capacity: Int(naluDataLength) + lengthInfoSize) data.append(buffer, count: 4) //转化pointer;UnsafeMutablePointer<Int8> -> UnsafePointer<UInt8> let naluUnsafePoint = unsafeBitCast(dataPointer, to: UnsafePointer<UInt8>.self) data.append(naluUnsafePoint + UnsafePointer<UInt8>.Stride(offset + UInt32(lengthInfoSize)) , count: Int(naluDataLength)) encoder.callBackQueue.async { encoder.videoEncodeCallback!(data) } offset += (naluDataLength + UInt32(lengthInfoSize)) } } } deinit { if ((encodeSession) != nil) { VTCompressionSessionCompleteFrames(encodeSession, untilPresentationTimeStamp: .invalid) VTCompressionSessionInvalidate(encodeSession); encodeSession = nil; } } }
3.视频解码器封装
class DQVideoDecode: NSObject { var Int32 = 480 var height:Int32 = 640 var decodeQueue = DispatchQueue(label: "decode") var callBackQueue = DispatchQueue(label: "decodeCallBack") var decodeDesc : CMVideoFormatDescription? var spsData:Data? var ppsData:Data? var decompressionSession : VTDecompressionSession? var callback :VTDecompressionOutputCallback? var videoDecodeCallback:((CVImageBuffer?) -> Void)? func SetVideoDecodeCallback(block:((CVImageBuffer?) -> Void)?) { videoDecodeCallback = block } init(Int32,height:Int32) { self.width = width self.height = height } func initDecoder() -> Bool { if decompressionSession != nil { return true } guard spsData != nil,ppsData != nil else { return false } // var frameData = Data(capacity: Int(size)) // frameData.append(length, count: 4) // let point :UnsafePointer<UInt8> = [UInt8](data).withUnsafeBufferPointer({$0}).baseAddress! // frameData.append(point + UnsafePointer<UInt8>.Stride(4), count: Int(naluSize)) //处理sps/pps var sps : [UInt8] = [] [UInt8](spsData!).suffix(from: 4).forEach { (value) in sps.append(value) } var pps : [UInt8] = [] [UInt8](ppsData!).suffix(from: 4).forEach{(value) in pps.append(value) } let spsAndpps = [sps.withUnsafeBufferPointer{$0}.baseAddress!,pps.withUnsafeBufferPointer{$0}.baseAddress!] let sizes = [sps.count,pps.count] /** 根据sps pps设置解码参数 param kCFAllocatorDefault 分配器 param 2 参数个数 param parameterSetPointers 参数集指针 param parameterSetSizes 参数集大小 param naluHeaderLen nalu nalu start code 的长度 4 param _decodeDesc 解码器描述 return 状态 */ let descriptionState = CMVideoFormatDescriptionCreateFromH264ParameterSets(allocator: kCFAllocatorDefault, parameterSetCount: 2, parameterSetPointers: spsAndpps, parameterSetSizes: sizes, nalUnitHeaderLength: 4, formatDescriptionOut: &decodeDesc) if descriptionState != 0 { print("description创建失败" ) return false } //解码回调设置 /* VTDecompressionOutputCallbackRecord 是一个简单的结构体,它带有一个指针 (decompressionOutputCallback),指向帧解压完成后的回调方法。你需要提供可以找到这个回调方法的实例 (decompressionOutputRefCon)。VTDecompressionOutputCallback 回调方法包括七个参数: 参数1: 回调的引用 参数2: 帧的引用 参数3: 一个状态标识 (包含未定义的代码) 参数4: 指示同步/异步解码,或者解码器是否打算丢帧的标识 参数5: 实际图像的缓冲 参数6: 出现的时间戳 参数7: 出现的持续时间 */ setCallBack() var callbackRecord = VTDecompressionOutputCallbackRecord(decompressionOutputCallback: callback, decompressionOutputRefCon: unsafeBitCast(self, to: UnsafeMutableRawPointer.self)) /* 解码参数: * kCVPixelBufferPixelFormatTypeKey:摄像头的输出数据格式 kCVPixelBufferPixelFormatTypeKey,已测可用值为 kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange,即420v kCVPixelFormatType_420YpCbCr8BiPlanarFullRange,即420f kCVPixelFormatType_32BGRA,iOS在内部进行YUV至BGRA格式转换 YUV420一般用于标清视频,YUV422用于高清视频,这里的限制让人感到意外。但是,在相同条件下,YUV420计算耗时和传输压力比YUV422都小。 * kCVPixelBufferWidthKey/kCVPixelBufferHeightKey: 视频源的分辨率 width*height * kCVPixelBufferOpenGLCompatibilityKey : 它允许在 OpenGL 的上下文中直接绘制解码后的图像,而不是从总线和 CPU 之间复制数据。这有时候被称为零拷贝通道,因为在绘制过程中没有解码的图像被拷贝. */ let imageBufferAttributes = [ kCVPixelBufferPixelFormatTypeKey:kCVPixelFormatType_420YpCbCr8BiPlanarFullRange, kCVPixelBufferWidthKey:width, kCVPixelBufferHeightKey:height, // kCVPixelBufferOpenGLCompatibilityKey:true ] as [CFString : Any] //创建session /*! @function VTDecompressionSessionCreate @abstract 创建用于解压缩视频帧的会话。 @discussion 解压后的帧将通过调用OutputCallback发出 @param allocator 内存的会话。通过使用默认的kCFAllocatorDefault的分配器。 @param videoFormatDescription 描述源视频帧 @param videoDecoderSpecification 指定必须使用的特定视频解码器.NULL @param destinationImageBufferAttributes 描述源像素缓冲区的要求 NULL @param outputCallback 使用已解压缩的帧调用的回调 @param decompressionSessionOut 指向一个变量以接收新的解压会话 */ let state = VTDecompressionSessionCreate(allocator: kCFAllocatorDefault, formatDescription: decodeDesc!, decoderSpecification: nil, imageBufferAttributes: imageBufferAttributes as CFDictionary, outputCallback: &callbackRecord, decompressionSessionOut: &decompressionSession) if state != 0 { print("创建decodeSession失败") } VTSessionSetProperty(self.decompressionSession!, key: kVTDecompressionPropertyKey_RealTime, value: kCFBooleanTrue) return true } //解码成功的回掉 private func setCallBack() { //(UnsafeMutableRawPointer?, UnsafeMutableRawPointer?, OSStatus, VTDecodeInfoFlags, CVImageBuffer?, CMTime, CMTime) -> Void callback = { decompressionOutputRefCon,sourceFrameRefCon,status,inforFlags,imageBuffer,presentationTimeStamp,presentationDuration in let decoder :DQVideoDecode = unsafeBitCast(decompressionOutputRefCon, to: DQVideoDecode.self) guard imageBuffer != nil else { return } // sourceFrameRefCon = imageBuffer if let block = decoder.videoDecodeCallback { decoder.callBackQueue.async { block(imageBuffer) } } } } func decode(data:Data) { decodeQueue.async { let length:UInt32 = UInt32(data.count) self.decodeByte(data: data, size: length) } } private func decodeByte(data:Data,size:UInt32) { //数据类型:frame的前4个字节是NALU数据的开始码,也就是00 00 00 01, // 将NALU的开始码转为4字节大端NALU的长度信息 let naluSize = size - 4 let length : [UInt8] = [ UInt8(truncatingIfNeeded: naluSize >> 24), UInt8(truncatingIfNeeded: naluSize >> 16), UInt8(truncatingIfNeeded: naluSize >> 8), UInt8(truncatingIfNeeded: naluSize) ] var frameByte :[UInt8] = length [UInt8](data).suffix(from: 4).forEach { (bb) in frameByte.append(bb) } let bytes = frameByte //[UInt8](frameData) // 第5个字节是表示数据类型,转为10进制后,7是sps, 8是pps, 5是IDR(I帧)信息 let type :Int = Int(bytes[4] & 0x1f) switch type{ case 0x05: if initDecoder() { decode(frame: bytes, size: size) } case 0x06: // print("增强信息") break case 0x07: spsData = data case 0x08: ppsData = data default: if initDecoder() { decode(frame: bytes, size: size) } } } private func decode(frame:[UInt8],size:UInt32) { // var blockBUffer :CMBlockBuffer? var frame1 = frame // var memoryBlock = frame1.withUnsafeMutableBytes({$0}).baseAddress // var ddd = Data(bytes: frame, count: Int(size)) //创建blockBuffer /*! 参数1: structureAllocator kCFAllocatorDefault 参数2: memoryBlock frame 参数3: frame size 参数4: blockAllocator: Pass NULL 参数5: customBlockSource Pass NULL 参数6: offsetToData 数据偏移 参数7: dataLength 数据长度 参数8: flags 功能和控制标志 参数9: newBBufOut blockBuffer地址,不能为空 */ let blockState = CMBlockBufferCreateWithMemoryBlock(allocator: kCFAllocatorDefault, memoryBlock: &frame1, blockLength: Int(size), blockAllocator: kCFAllocatorNull, customBlockSource: nil, offsetToData:0, dataLength: Int(size), flags: 0, blockBufferOut: &blockBUffer) if blockState != 0 { print("创建blockBuffer失败") } // var sampleSizeArray :[Int] = [Int(size)] var sampleBuffer :CMSampleBuffer? //创建sampleBuffer /* 参数1: allocator 分配器,使用默认内存分配, kCFAllocatorDefault 参数2: blockBuffer.需要编码的数据blockBuffer.不能为NULL 参数3: formatDescription,视频输出格式 参数4: numSamples.CMSampleBuffer 个数. 参数5: numSampleTimingEntries 必须为0,1,numSamples 参数6: sampleTimingArray. 数组.为空 参数7: numSampleSizeEntries 默认为1 参数8: sampleSizeArray 参数9: sampleBuffer对象 */ let readyState = CMSampleBufferCreateReady(allocator: kCFAllocatorDefault, dataBuffer: blockBUffer, formatDescription: decodeDesc, sampleCount: CMItemCount(1), sampleTimingEntryCount: CMItemCount(), sampleTimingArray: nil, sampleSizeEntryCount: CMItemCount(1), sampleSizeArray: &sampleSizeArray, sampleBufferOut: &sampleBuffer) if readyState != 0 { print("Sample Buffer Create Ready faile") } //解码数据 /* 参数1: 解码session 参数2: 源数据 包含一个或多个视频帧的CMsampleBuffer 参数3: 解码标志 参数4: 解码后数据outputPixelBuffer 参数5: 同步/异步解码标识 */ let sourceFrame:UnsafeMutableRawPointer? = nil var inforFalg = VTDecodeInfoFlags.asynchronous let decodeState = VTDecompressionSessionDecodeFrame(self.decompressionSession!, sampleBuffer: sampleBuffer!, flags:VTDecodeFrameFlags._EnableAsynchronousDecompression , frameRefcon: sourceFrame, infoFlagsOut: &inforFalg) if decodeState != 0 { print("解码失败") } } deinit { if decompressionSession != nil { VTDecompressionSessionInvalidate(decompressionSession!) decompressionSession = nil } } }