第一篇翻译的Direct3D device manager,链接:http://www.cnblogs.com/betterwgo/p/6124588.html
第二篇翻译的在DirectShow中支持DXVA 2.0,链接:http://www.cnblogs.com/betterwgo/p/6125351.html
当前我所使用的ffmpeg的版本是3.2,支持dxva2硬件加速的有以下几种文件格式: AV_CODEC_ID_MPEG2VIDEO、AV_CODEC_ID_H264、AV_CODEC_ID_VC1、AV_CODEC_ID_WMV3、AV_CODEC_ID_HEVC、AV_CODEC_ID_VP9。ffmpeg识别为这几种格式的文件都可以尝试使用dxva2做硬件加速。但这并不代表是这几种格式的文件就一定支持dxva2硬件加速,因为我就遇到了一个AV_CODEC_ID_HEVC文件在初始化配置dxva2的过程中会失败,PotPlayer在播放这个文件时也不能用dxva2硬件加速。
/* * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef FFMPEG_DXVA2_H #define FFMPEG_DXVA2_H //#include "windows.h" extern "C"{ #include "libavcodec/avcodec.h" #include "libavutil/pixfmt.h" #include "libavutil/rational.h" } enum HWAccelID { HWACCEL_NONE = 0, HWACCEL_AUTO, HWACCEL_VDPAU, HWACCEL_DXVA2, HWACCEL_VDA, HWACCEL_VIDEOTOOLBOX, HWACCEL_QSV, }; typedef struct AVStream AVStream; typedef struct AVCodecContext AVCodecContext; typedef struct AVCodec AVCodec; typedef struct AVFrame AVFrame; typedef struct AVDictionary AVDictionary; typedef struct InputStream { int file_index; AVStream *st; int discard; /* true if stream data should be discarded */ int user_set_discard; int decoding_needed; /* non zero if the packets must be decoded in 'raw_fifo', see DECODING_FOR_* */ #define DECODING_FOR_OST 1 #define DECODING_FOR_FILTER 2 AVCodecContext *dec_ctx; AVCodec *dec; AVFrame *decoded_frame; AVFrame *filter_frame; /* a ref of decoded_frame, to be sent to filters */ int64_t start; /* time when read started */ /* predicted dts of the next packet read for this stream or (when there are * several frames in a packet) of the next frame in current packet (in AV_TIME_BASE units) */ int64_t next_dts; int64_t dts; ///< dts of the last packet read for this stream (in AV_TIME_BASE units) int64_t next_pts; ///< synthetic pts for the next decode frame (in AV_TIME_BASE units) int64_t pts; ///< current pts of the decoded frame (in AV_TIME_BASE units) int wrap_correction_done; int64_t filter_in_rescale_delta_last; int64_t min_pts; /* pts with the smallest value in a current stream */ int64_t max_pts; /* pts with the higher value in a current stream */ int64_t nb_samples; /* number of samples in the last decoded audio frame before looping */ double ts_scale; int saw_first_ts; int showed_multi_packet_warning; AVDictionary *decoder_opts; AVRational framerate; /* framerate forced with -r */ int top_field_first; int guess_layout_max; int autorotate; int resample_height; int resample_width; int resample_pix_fmt; int resample_sample_fmt; int resample_sample_rate; int resample_channels; uint64_t resample_channel_layout; int fix_sub_duration; struct { /* previous decoded subtitle and related variables */ int got_output; int ret; AVSubtitle subtitle; } prev_sub; struct sub2video { int64_t last_pts; int64_t end_pts; AVFrame *frame; int w, h; } sub2video; int dr1; /* decoded data from this stream goes into all those filters * currently video and audio only */ //InputFilter **filters; //int nb_filters; //int reinit_filters; /* hwaccel options */ enum HWAccelID hwaccel_id; char *hwaccel_device; /* hwaccel context */ enum HWAccelID active_hwaccel_id; void *hwaccel_ctx; void(*hwaccel_uninit)(AVCodecContext *s); int(*hwaccel_get_buffer)(AVCodecContext *s, AVFrame *frame, int flags); int(*hwaccel_retrieve_data)(AVCodecContext *s, AVFrame *frame); enum AVPixelFormat hwaccel_pix_fmt; enum AVPixelFormat hwaccel_retrieved_pix_fmt; /* stats */ // combined size of all the packets read uint64_t data_size; /* number of packets successfully read for this stream */ uint64_t nb_packets; // number of frames/samples retrieved from the decoder uint64_t frames_decoded; uint64_t samples_decoded; } InputStream; int dxva2_init(AVCodecContext *s, HWND hwnd); int dxva2_retrieve_data_call(AVCodecContext *s, AVFrame *frame); #endif /* FFMPEG_DXVA2_H */
int dxva2_init(AVCodecContext *s, HWND hwnd); int dxva2_retrieve_data_call(AVCodecContext *s, AVFrame *frame);
static int dxva2_retrieve_data(AVCodecContext *s, AVFrame *frame) { LPDIRECT3DSURFACE9 surface = (LPDIRECT3DSURFACE9)frame->data[3]; InputStream *ist = (InputStream *)s->opaque; DXVA2Context *ctx = (DXVA2Context *)ist->hwaccel_ctx; EnterCriticalSection(&cs); //直接渲染 ctx->d3d9device->Clear(0, NULL, D3DCLEAR_TARGET, D3DCOLOR_XRGB(0, 0, 0), 1.0f, 0); ctx->d3d9device->BeginScene(); if (m_pBackBuffer) { m_pBackBuffer->Release(); m_pBackBuffer = NULL; } ctx->d3d9device->GetBackBuffer(0, 0, D3DBACKBUFFER_TYPE_MONO, &m_pBackBuffer); GetClientRect(d3dpp.hDeviceWindow, &m_rtViewport); ctx->d3d9device->StretchRect(surface, NULL, m_pBackBuffer, &m_rtViewport, D3DTEXF_LINEAR); ctx->d3d9device->EndScene(); ctx->d3d9device->Present(NULL, NULL, NULL, NULL); LeaveCriticalSection(&cs); return 0; }
switch (codec->id) { case AV_CODEC_ID_MPEG2VIDEO: case AV_CODEC_ID_H264: case AV_CODEC_ID_VC1: case AV_CODEC_ID_WMV3: case AV_CODEC_ID_HEVC: case AV_CODEC_ID_VP9: { codecctx->thread_count = 1; // Multithreading is apparently not compatible with hardware decoding InputStream *ist = new InputStream(); ist->hwaccel_id = HWACCEL_AUTO; ist->active_hwaccel_id = HWACCEL_AUTO; ist->hwaccel_device = "dxva2"; ist->dec = codec; ist->dec_ctx = codecctx; codecctx->opaque = ist; if (dxva2_init(codecctx, hWnd) == 0) { codecctx->get_buffer2 = ist->hwaccel_get_buffer; codecctx->get_format = GetHwFormat; codecctx->thread_safe_callbacks = 1; break; } bAccel = false; break; } default: bAccel = false; break; }
if (pkt.stream_index == videoindex) { int got_picture = 0; DWORD t_start = GetTickCount(); int bytes_used = avcodec_decode_video2(codecctx, picture, &got_picture, &pkt); if (got_picture) { if (bAccel) { //获取数据同时渲染 dxva2_retrieve_data_call(codecctx, picture); DWORD t_end = GetTickCount(); printf("dxva2 time using: %lu ", t_end - t_start); } else { //非dxva2情形 if (img_convert_ctx &&pFrameBGR && out_buffer) { //转换数据并渲染 sws_scale(img_convert_ctx, (const uint8_t* const*)picture->data, picture->linesize, 0, codecctx->height, pFrameBGR->data, pFrameBGR->linesize); m_D3DVidRender.Render_YUV(out_buffer, picture->width, picture->height); DWORD t_end = GetTickCount(); printf("normal time using: %lu ", t_end - t_start); } } count++; } av_packet_unref(&pkt); }
--------------------------------------------------------------2017.3.5 更新---------------------------------------------
我的建议:最好不要这样做。如果你要对硬解出来的数据做进一步处理,我建议直接在显卡上进行,GPU并行计算对于做某些图像处理速度比CPU快好多。如果你渲染用的OpenGL,你可以用GLSL写shader;如果你渲染用的D3D,你可以用HLSL写shader;如果你渲染用的D3D11,compute shader将有可能完成你要求得更为复杂的图像处理(compute shader没用过,只看了点介绍,可能不确实);如果你能用CUDA,不要犹豫;如果你不能用CUDA,我推荐你OpenCL。如果以上都不行,我知识面也有限,也没有什么好办法。因为硬解出来的数据很大,从显存copy到内存必然很耗时间和CPU,而且从时间消耗上来看,从显存copy回内存的时间比硬解本身所花费的时间大好多,所以依我狭窄的知识面来看,如果你一定要copy回内存,我建议你干脆放弃硬解。