27 #define ENABLE_VAAPI 0
30 #define MAX_SUPPORTED_WIDTH 1950
31 #define MAX_SUPPORTED_HEIGHT 1100
34 #include "libavutil/hwcontext_vaapi.h"
36 typedef struct VAAPIDecodeContext {
38 VAEntrypoint va_entrypoint;
40 VAContextID va_context;
42 #if FF_API_STRUCT_VAAPI_CONTEXT
45 struct vaapi_context *old_context;
46 AVBufferRef *device_ref;
50 AVHWDeviceContext *device;
51 AVVAAPIDeviceContext *hwctx;
53 AVHWFramesContext *frames;
54 AVVAAPIFramesContext *hwfc;
56 enum AVPixelFormat surface_format;
72 : last_frame(0), is_seeking(0), seeking_pts(0), seeking_frame(0), seek_count(0), NO_PTS_OFFSET(-99999),
73 path(
path), is_video_seek(true), check_interlace(false), check_fps(false), enable_seek(true), is_open(false),
74 seek_audio_frame_found(0), seek_video_frame_found(0),is_duration_known(false), largest_frame_processed(0),
76 video_pts(0), pFormatCtx(NULL), videoStream(-1), audioStream(-1), pCodecCtx(NULL), aCodecCtx(NULL),
77 pStream(NULL), aStream(NULL), pFrame(NULL), previous_packet_location{-1,0},
85 pts_offset_seconds = NO_PTS_OFFSET;
86 video_pts_seconds = NO_PTS_OFFSET;
87 audio_pts_seconds = NO_PTS_OFFSET;
116 if (abs(diff) <= amount)
127 static enum AVPixelFormat get_hw_dec_format(AVCodecContext *ctx,
const enum AVPixelFormat *pix_fmts)
129 const enum AVPixelFormat *p;
131 for (p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
133 #if defined(__linux__)
135 case AV_PIX_FMT_VAAPI:
140 case AV_PIX_FMT_VDPAU:
148 case AV_PIX_FMT_DXVA2_VLD:
153 case AV_PIX_FMT_D3D11:
159 #if defined(__APPLE__)
161 case AV_PIX_FMT_VIDEOTOOLBOX:
168 case AV_PIX_FMT_CUDA:
184 return AV_PIX_FMT_NONE;
187 int FFmpegReader::IsHardwareDecodeSupported(
int codecid)
191 case AV_CODEC_ID_H264:
192 case AV_CODEC_ID_MPEG2VIDEO:
193 case AV_CODEC_ID_VC1:
194 case AV_CODEC_ID_WMV1:
195 case AV_CODEC_ID_WMV2:
196 case AV_CODEC_ID_WMV3:
211 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
221 if (avformat_open_input(&pFormatCtx,
path.c_str(), NULL, NULL) != 0)
225 if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
232 packet_status.
reset(
true);
235 for (
unsigned int i = 0; i < pFormatCtx->nb_streams; i++) {
237 if (
AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_VIDEO && videoStream < 0) {
244 if (
AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_AUDIO && audioStream < 0) {
251 if (videoStream == -1 && audioStream == -1)
255 if (videoStream != -1) {
260 pStream = pFormatCtx->streams[videoStream];
266 const AVCodec *pCodec = avcodec_find_decoder(codecId);
267 AVDictionary *
opts = NULL;
268 int retry_decode_open = 2;
273 if (
hw_de_on && (retry_decode_open==2)) {
275 hw_de_supported = IsHardwareDecodeSupported(pCodecCtx->codec_id);
278 retry_decode_open = 0;
283 if (pCodec == NULL) {
284 throw InvalidCodec(
"A valid video codec could not be found for this file.",
path);
288 av_dict_set(&
opts,
"strict",
"experimental", 0);
292 int i_decoder_hw = 0;
294 char *adapter_ptr = NULL;
297 fprintf(stderr,
"Hardware decoding device number: %d\n", adapter_num);
300 pCodecCtx->get_format = get_hw_dec_format;
302 if (adapter_num < 3 && adapter_num >=0) {
303 #if defined(__linux__)
304 snprintf(adapter,
sizeof(adapter),
"/dev/dri/renderD%d", adapter_num+128);
305 adapter_ptr = adapter;
307 switch (i_decoder_hw) {
309 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
312 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
315 hw_de_av_device_type = AV_HWDEVICE_TYPE_VDPAU;
318 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
321 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
325 #elif defined(_WIN32)
328 switch (i_decoder_hw) {
330 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
333 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
336 hw_de_av_device_type = AV_HWDEVICE_TYPE_D3D11VA;
339 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
342 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
345 #elif defined(__APPLE__)
348 switch (i_decoder_hw) {
350 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
353 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
356 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
366 #if defined(__linux__)
367 if( adapter_ptr != NULL && access( adapter_ptr, W_OK ) == 0 ) {
368 #elif defined(_WIN32)
369 if( adapter_ptr != NULL ) {
370 #elif defined(__APPLE__)
371 if( adapter_ptr != NULL ) {
380 hw_device_ctx = NULL;
382 if (av_hwdevice_ctx_create(&hw_device_ctx, hw_de_av_device_type, adapter_ptr, NULL, 0) >= 0) {
383 if (!(pCodecCtx->hw_device_ctx = av_buffer_ref(hw_device_ctx))) {
425 pCodecCtx->thread_type &= ~FF_THREAD_FRAME;
429 int avcodec_return = avcodec_open2(pCodecCtx, pCodec, &
opts);
430 if (avcodec_return < 0) {
431 std::stringstream avcodec_error_msg;
432 avcodec_error_msg <<
"A video codec was found, but could not be opened. Error: " << av_err2string(avcodec_return);
438 AVHWFramesConstraints *constraints = NULL;
439 void *hwconfig = NULL;
440 hwconfig = av_hwdevice_hwconfig_alloc(hw_device_ctx);
444 ((AVVAAPIHWConfig *)hwconfig)->config_id = ((VAAPIDecodeContext *)(pCodecCtx->priv_data))->va_config;
445 constraints = av_hwdevice_get_hwframe_constraints(hw_device_ctx,hwconfig);
448 if (pCodecCtx->coded_width < constraints->min_width ||
449 pCodecCtx->coded_height < constraints->min_height ||
450 pCodecCtx->coded_width > constraints->max_width ||
451 pCodecCtx->coded_height > constraints->max_height) {
454 retry_decode_open = 1;
457 av_buffer_unref(&hw_device_ctx);
458 hw_device_ctx = NULL;
463 ZmqLogger::Instance()->
AppendDebugMethod(
"\nDecode hardware acceleration is used\n",
"Min width :", constraints->min_width,
"Min Height :", constraints->min_height,
"MaxWidth :", constraints->max_width,
"MaxHeight :", constraints->max_height,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
464 retry_decode_open = 0;
466 av_hwframe_constraints_free(&constraints);
479 if (pCodecCtx->coded_width < 0 ||
480 pCodecCtx->coded_height < 0 ||
481 pCodecCtx->coded_width > max_w ||
482 pCodecCtx->coded_height > max_h ) {
483 ZmqLogger::Instance()->
AppendDebugMethod(
"DIMENSIONS ARE TOO LARGE for hardware acceleration\n",
"Max Width :", max_w,
"Max Height :", max_h,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
485 retry_decode_open = 1;
488 av_buffer_unref(&hw_device_ctx);
489 hw_device_ctx = NULL;
493 ZmqLogger::Instance()->
AppendDebugMethod(
"\nDecode hardware acceleration is used\n",
"Max Width :", max_w,
"Max Height :", max_h,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
494 retry_decode_open = 0;
502 retry_decode_open = 0;
504 }
while (retry_decode_open);
513 if (audioStream != -1) {
518 aStream = pFormatCtx->streams[audioStream];
524 const AVCodec *aCodec = avcodec_find_decoder(codecId);
530 if (aCodec == NULL) {
531 throw InvalidCodec(
"A valid audio codec could not be found for this file.",
path);
535 AVDictionary *
opts = NULL;
536 av_dict_set(&
opts,
"strict",
"experimental", 0);
539 if (avcodec_open2(aCodecCtx, aCodec, &
opts) < 0)
540 throw InvalidCodec(
"An audio codec was found, but could not be opened.",
path);
550 AVDictionaryEntry *tag = NULL;
551 while ((tag = av_dict_get(pFormatCtx->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
552 QString str_key = tag->key;
553 QString str_value = tag->value;
554 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
558 previous_packet_location.
frame = -1;
590 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
596 AVPacket *recent_packet = packet;
601 int max_attempts = 128;
606 "attempts", attempts);
618 RemoveAVPacket(recent_packet);
623 if(avcodec_is_open(pCodecCtx)) {
624 avcodec_flush_buffers(pCodecCtx);
630 av_buffer_unref(&hw_device_ctx);
631 hw_device_ctx = NULL;
639 if(avcodec_is_open(aCodecCtx)) {
640 avcodec_flush_buffers(aCodecCtx);
647 working_cache.
Clear();
650 avformat_close_input(&pFormatCtx);
651 av_freep(&pFormatCtx);
656 largest_frame_processed = 0;
657 seek_audio_frame_found = 0;
658 seek_video_frame_found = 0;
659 current_video_frame = 0;
660 last_video_frame.reset();
664 bool FFmpegReader::HasAlbumArt() {
668 return pFormatCtx && videoStream >= 0 && pFormatCtx->streams[videoStream]
669 && (pFormatCtx->streams[videoStream]->disposition & AV_DISPOSITION_ATTACHED_PIC);
672 void FFmpegReader::UpdateAudioInfo() {
675 info.
file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
693 if (aStream->duration > 0 && aStream->duration >
info.
duration) {
696 }
else if (pFormatCtx->duration > 0 &&
info.
duration <= 0.0f) {
698 info.
duration = float(pFormatCtx->duration) / AV_TIME_BASE;
731 AVDictionaryEntry *tag = NULL;
732 while ((tag = av_dict_get(aStream->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
733 QString str_key = tag->key;
734 QString str_value = tag->value;
735 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
739 void FFmpegReader::UpdateVideoInfo() {
742 info.
file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
749 AVRational framerate = av_guess_frame_rate(pFormatCtx, pStream, NULL);
761 if (pStream->sample_aspect_ratio.num != 0) {
784 if (!check_interlace) {
785 check_interlace =
true;
787 switch(field_order) {
788 case AV_FIELD_PROGRESSIVE:
801 case AV_FIELD_UNKNOWN:
803 check_interlace =
false;
818 if (
info.
duration <= 0.0f && pFormatCtx->duration >= 0) {
820 info.
duration = float(pFormatCtx->duration) / AV_TIME_BASE;
830 if (
info.
duration <= 0.0f && pStream->duration == AV_NOPTS_VALUE && pFormatCtx->duration == AV_NOPTS_VALUE) {
848 is_duration_known =
false;
851 is_duration_known =
true;
861 AVDictionaryEntry *tag = NULL;
862 while ((tag = av_dict_get(pStream->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
863 QString str_key = tag->key;
864 QString str_value = tag->value;
865 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
870 return this->is_duration_known;
876 throw ReaderClosed(
"The FFmpegReader is closed. Call Open() before calling this method.",
path);
879 if (requested_frame < 1)
885 throw InvalidFile(
"Could not detect the duration of the video or audio stream.",
path);
901 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
915 int64_t diff = requested_frame - last_frame;
916 if (diff >= 1 && diff <= 20) {
918 frame = ReadStream(requested_frame);
923 Seek(requested_frame);
932 frame = ReadStream(requested_frame);
940 std::shared_ptr<Frame> FFmpegReader::ReadStream(int64_t requested_frame) {
942 bool check_seek =
false;
943 int packet_error = -1;
953 CheckWorkingFrames(requested_frame);
958 if (is_cache_found) {
962 if (!hold_packet || !packet) {
964 packet_error = GetNextPacket();
965 if (packet_error < 0 && !packet) {
976 check_seek = CheckSeek(
false);
988 if ((
info.
has_video && packet && packet->stream_index == videoStream) ||
992 ProcessVideoPacket(requested_frame);
995 if ((
info.
has_audio && packet && packet->stream_index == audioStream) ||
999 ProcessAudioPacket(requested_frame);
1004 if ((!
info.
has_video && packet && packet->stream_index == videoStream) ||
1005 (!
info.
has_audio && packet && packet->stream_index == audioStream)) {
1007 if (packet->stream_index == videoStream) {
1009 }
else if (packet->stream_index == audioStream) {
1015 RemoveAVPacket(packet);
1025 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::ReadStream (force EOF)",
"packets_read", packet_status.
packets_read(),
"packets_decoded", packet_status.
packets_decoded(),
"packets_eof", packet_status.
packets_eof,
"video_eof", packet_status.
video_eof,
"audio_eof", packet_status.
audio_eof,
"end_of_file", packet_status.
end_of_file);
1042 "largest_frame_processed", largest_frame_processed,
1043 "Working Cache Count", working_cache.
Count());
1052 CheckWorkingFrames(requested_frame);
1068 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1071 if (!frame->has_image_data) {
1076 frame->AddAudioSilence(samples_in_frame);
1081 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1083 f->AddAudioSilence(samples_in_frame);
1091 int FFmpegReader::GetNextPacket() {
1092 int found_packet = 0;
1093 AVPacket *next_packet;
1094 next_packet =
new AVPacket();
1095 found_packet = av_read_frame(pFormatCtx, next_packet);
1099 RemoveAVPacket(packet);
1102 if (found_packet >= 0) {
1104 packet = next_packet;
1107 if (packet->stream_index == videoStream) {
1109 }
else if (packet->stream_index == audioStream) {
1118 return found_packet;
1122 bool FFmpegReader::GetAVFrame() {
1123 int frameFinished = 0;
1129 int send_packet_err = 0;
1130 int64_t send_packet_pts = 0;
1131 if ((packet && packet->stream_index == videoStream && !hold_packet) || !packet) {
1132 send_packet_err = avcodec_send_packet(pCodecCtx, packet);
1134 if (packet && send_packet_err >= 0) {
1135 send_packet_pts = GetPacketPTS();
1136 hold_packet =
false;
1146 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1147 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: Not sent [" + av_err2string(send_packet_err) +
"])",
"send_packet_err", send_packet_err,
"send_packet_pts", send_packet_pts);
1148 if (send_packet_err == AVERROR(EAGAIN)) {
1150 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(EAGAIN): user must read output with avcodec_receive_frame()",
"send_packet_pts", send_packet_pts);
1152 if (send_packet_err == AVERROR(EINVAL)) {
1153 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(EINVAL): codec not opened, it is an encoder, or requires flush",
"send_packet_pts", send_packet_pts);
1155 if (send_packet_err == AVERROR(ENOMEM)) {
1156 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(ENOMEM): failed to add packet to internal queue, or legitimate decoding errors",
"send_packet_pts", send_packet_pts);
1163 int receive_frame_err = 0;
1164 AVFrame *next_frame2;
1172 next_frame2 = next_frame;
1175 while (receive_frame_err >= 0) {
1176 receive_frame_err = avcodec_receive_frame(pCodecCtx, next_frame2);
1178 if (receive_frame_err != 0) {
1179 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (receive frame: frame not ready yet from decoder [\" + av_err2string(receive_frame_err) + \"])",
"receive_frame_err", receive_frame_err,
"send_packet_pts", send_packet_pts);
1181 if (receive_frame_err == AVERROR_EOF) {
1183 "FFmpegReader::GetAVFrame (receive frame: AVERROR_EOF: EOF detected from decoder, flushing buffers)",
"send_packet_pts", send_packet_pts);
1184 avcodec_flush_buffers(pCodecCtx);
1187 if (receive_frame_err == AVERROR(EINVAL)) {
1189 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EINVAL): invalid frame received, flushing buffers)",
"send_packet_pts", send_packet_pts);
1190 avcodec_flush_buffers(pCodecCtx);
1192 if (receive_frame_err == AVERROR(EAGAIN)) {
1194 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EAGAIN): output is not available in this state - user must try to send new input)",
"send_packet_pts", send_packet_pts);
1196 if (receive_frame_err == AVERROR_INPUT_CHANGED) {
1198 "FFmpegReader::GetAVFrame (receive frame: AVERROR_INPUT_CHANGED: current decoded frame has changed parameters with respect to first decoded frame)",
"send_packet_pts", send_packet_pts);
1209 if (next_frame2->format == hw_de_av_pix_fmt) {
1210 next_frame->format = AV_PIX_FMT_YUV420P;
1211 if ((err = av_hwframe_transfer_data(next_frame,next_frame2,0)) < 0) {
1214 if ((err = av_frame_copy_props(next_frame,next_frame2)) < 0) {
1222 next_frame = next_frame2;
1230 av_image_alloc(pFrame->data, pFrame->linesize,
info.
width,
info.
height, (AVPixelFormat)(pStream->codecpar->format), 1);
1231 av_image_copy(pFrame->data, pFrame->linesize, (
const uint8_t**)next_frame->data, next_frame->linesize,
1238 if (next_frame->pts != AV_NOPTS_VALUE) {
1241 video_pts = next_frame->pts;
1242 }
else if (next_frame->pkt_dts != AV_NOPTS_VALUE) {
1244 video_pts = next_frame->pkt_dts;
1248 "FFmpegReader::GetAVFrame (Successful frame received)",
"video_pts", video_pts,
"send_packet_pts", send_packet_pts);
1259 avcodec_decode_video2(pCodecCtx, next_frame, &frameFinished, packet);
1265 if (frameFinished) {
1269 av_picture_copy((AVPicture *) pFrame, (AVPicture *) next_frame, pCodecCtx->pix_fmt,
info.
width,
1278 return frameFinished;
1282 bool FFmpegReader::CheckSeek(
bool is_video) {
1287 if ((is_video_seek && !seek_video_frame_found) || (!is_video_seek && !seek_audio_frame_found))
1295 int64_t max_seeked_frame = std::max(seek_audio_frame_found, seek_video_frame_found);
1298 if (max_seeked_frame >= seeking_frame) {
1301 "is_video_seek", is_video_seek,
1302 "max_seeked_frame", max_seeked_frame,
1303 "seeking_frame", seeking_frame,
1304 "seeking_pts", seeking_pts,
1305 "seek_video_frame_found", seek_video_frame_found,
1306 "seek_audio_frame_found", seek_audio_frame_found);
1309 Seek(seeking_frame - (10 * seek_count * seek_count));
1313 "is_video_seek", is_video_seek,
1314 "packet->pts", GetPacketPTS(),
1315 "seeking_pts", seeking_pts,
1316 "seeking_frame", seeking_frame,
1317 "seek_video_frame_found", seek_video_frame_found,
1318 "seek_audio_frame_found", seek_audio_frame_found);
1332 void FFmpegReader::ProcessVideoPacket(int64_t requested_frame) {
1335 int frame_finished = GetAVFrame();
1338 if (!frame_finished) {
1344 int64_t current_frame = ConvertVideoPTStoFrame(video_pts);
1347 if (!seek_video_frame_found && is_seeking)
1348 seek_video_frame_found = current_frame;
1354 working_cache.
Add(CreateFrame(requested_frame));
1364 AVFrame *my_frame = pFrame;
1368 AVFrame *pFrameRGB =
nullptr;
1369 uint8_t *buffer =
nullptr;
1373 if (pFrameRGB ==
nullptr)
1395 max_width = std::max(
float(max_width), max_width * max_scale_x);
1396 max_height = std::max(
float(max_height), max_height * max_scale_y);
1402 QSize width_size(max_width * max_scale_x,
1405 max_height * max_scale_y);
1407 if (width_size.width() >= max_width && width_size.height() >= max_height) {
1408 max_width = std::max(max_width, width_size.width());
1409 max_height = std::max(max_height, width_size.height());
1411 max_width = std::max(max_width, height_size.width());
1412 max_height = std::max(max_height, height_size.height());
1419 float preview_ratio = 1.0;
1426 max_width =
info.
width * max_scale_x * preview_ratio;
1427 max_height =
info.
height * max_scale_y * preview_ratio;
1432 int original_height = height;
1433 if (max_width != 0 && max_height != 0 && max_width < width && max_height < height) {
1435 float ratio = float(width) / float(height);
1436 int possible_width = round(max_height * ratio);
1437 int possible_height = round(max_width / ratio);
1439 if (possible_width <= max_width) {
1441 width = possible_width;
1442 height = max_height;
1446 height = possible_height;
1451 const int bytes_per_pixel = 4;
1452 int buffer_size = (width * height * bytes_per_pixel) + 128;
1453 buffer =
new unsigned char[buffer_size]();
1458 int scale_mode = SWS_FAST_BILINEAR;
1460 scale_mode = SWS_BICUBIC;
1466 sws_scale(img_convert_ctx, my_frame->data, my_frame->linesize, 0,
1467 original_height, pFrameRGB->data, pFrameRGB->linesize);
1470 std::shared_ptr<Frame> f = CreateFrame(current_frame);
1475 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888_Premultiplied, buffer);
1478 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888, buffer);
1482 working_cache.
Add(f);
1485 last_video_frame = f;
1491 RemoveAVFrame(my_frame);
1492 sws_freeContext(img_convert_ctx);
1498 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::ProcessVideoPacket (After)",
"requested_frame", requested_frame,
"current_frame", current_frame,
"f->number", f->number,
"video_pts_seconds", video_pts_seconds);
1502 void FFmpegReader::ProcessAudioPacket(int64_t requested_frame) {
1505 if (packet && packet->pts != AV_NOPTS_VALUE) {
1507 location = GetAudioPTSLocation(packet->pts);
1510 if (!seek_audio_frame_found && is_seeking)
1511 seek_audio_frame_found = location.
frame;
1518 working_cache.
Add(CreateFrame(requested_frame));
1522 "requested_frame", requested_frame,
1523 "target_frame", location.
frame,
1527 int frame_finished = 0;
1531 int packet_samples = 0;
1535 int send_packet_err = avcodec_send_packet(aCodecCtx, packet);
1536 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1540 int receive_frame_err = avcodec_receive_frame(aCodecCtx, audio_frame);
1541 if (receive_frame_err >= 0) {
1544 if (receive_frame_err == AVERROR_EOF) {
1548 if (receive_frame_err == AVERROR(EINVAL) || receive_frame_err == AVERROR_EOF) {
1550 avcodec_flush_buffers(aCodecCtx);
1552 if (receive_frame_err != 0) {
1557 int used = avcodec_decode_audio4(aCodecCtx, audio_frame, &frame_finished, packet);
1560 if (frame_finished) {
1566 audio_pts = audio_frame->pts;
1569 location = GetAudioPTSLocation(audio_pts);
1572 int plane_size = -1;
1573 data_size = av_samples_get_buffer_size(&plane_size,
1575 audio_frame->nb_samples,
1583 int pts_remaining_samples = packet_samples /
info.
channels;
1586 if (pts_remaining_samples == 0) {
1588 "packet_samples", packet_samples,
1590 "pts_remaining_samples", pts_remaining_samples);
1594 while (pts_remaining_samples) {
1599 int samples = samples_per_frame - previous_packet_location.
sample_start;
1600 if (samples > pts_remaining_samples)
1601 samples = pts_remaining_samples;
1604 pts_remaining_samples -= samples;
1606 if (pts_remaining_samples > 0) {
1608 previous_packet_location.
frame++;
1620 "packet_samples", packet_samples,
1624 "AV_SAMPLE_FMT_S16", AV_SAMPLE_FMT_S16);
1629 audio_converted->nb_samples = audio_frame->nb_samples;
1630 av_samples_alloc(audio_converted->data, audio_converted->linesize,
info.
channels, audio_frame->nb_samples, AV_SAMPLE_FMT_S16, 0);
1640 av_opt_set_int(avr,
"out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
1649 audio_converted->data,
1650 audio_converted->linesize[0],
1651 audio_converted->nb_samples,
1653 audio_frame->linesize[0],
1654 audio_frame->nb_samples);
1658 audio_converted->data[0],
1659 static_cast<size_t>(audio_converted->nb_samples)
1660 * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16)
1669 av_free(audio_converted->data[0]);
1672 int64_t starting_frame_number = -1;
1673 bool partial_frame =
true;
1674 for (
int channel_filter = 0; channel_filter <
info.
channels; channel_filter++) {
1676 starting_frame_number = location.
frame;
1677 int channel_buffer_size = packet_samples /
info.
channels;
1678 float *channel_buffer =
new float[channel_buffer_size];
1681 for (
int z = 0; z < channel_buffer_size; z++)
1682 channel_buffer[z] = 0.0f;
1688 for (
int sample = 0; sample < packet_samples; sample++) {
1690 if (channel_filter == channel) {
1692 channel_buffer[position] = audio_buf[sample] * (1.0f / (1 << 15));
1709 int remaining_samples = channel_buffer_size;
1710 float *iterate_channel_buffer = channel_buffer;
1711 while (remaining_samples > 0) {
1717 int samples = samples_per_frame - start;
1718 if (samples > remaining_samples)
1719 samples = remaining_samples;
1722 std::shared_ptr<Frame> f = CreateFrame(starting_frame_number);
1725 if (samples_per_frame == start + samples)
1726 partial_frame =
false;
1728 partial_frame =
true;
1731 f->AddAudio(
true, channel_filter, start, iterate_channel_buffer,
1736 "frame", starting_frame_number,
1739 "channel", channel_filter,
1740 "partial_frame", partial_frame,
1741 "samples_per_frame", samples_per_frame);
1744 working_cache.
Add(f);
1747 remaining_samples -= samples;
1750 if (remaining_samples > 0)
1751 iterate_channel_buffer += samples;
1754 starting_frame_number++;
1761 delete[] channel_buffer;
1762 channel_buffer = NULL;
1763 iterate_channel_buffer = NULL;
1778 "requested_frame", requested_frame,
1779 "starting_frame", location.
frame,
1780 "end_frame", starting_frame_number - 1,
1781 "audio_pts_seconds", audio_pts_seconds);
1787 void FFmpegReader::Seek(int64_t requested_frame) {
1789 if (requested_frame < 1)
1790 requested_frame = 1;
1793 if (requested_frame > largest_frame_processed && packet_status.
end_of_file) {
1800 "requested_frame", requested_frame,
1801 "seek_count", seek_count,
1802 "last_frame", last_frame);
1805 working_cache.
Clear();
1809 video_pts_seconds = NO_PTS_OFFSET;
1811 audio_pts_seconds = NO_PTS_OFFSET;
1812 hold_packet =
false;
1814 current_video_frame = 0;
1815 largest_frame_processed = 0;
1820 packet_status.
reset(
false);
1826 int buffer_amount = std::max(max_concurrent_frames, 8);
1827 if (requested_frame - buffer_amount < 20) {
1841 if (seek_count == 1) {
1844 seeking_pts = ConvertFrameToVideoPTS(1);
1846 seek_audio_frame_found = 0;
1847 seek_video_frame_found = 0;
1851 bool seek_worked =
false;
1852 int64_t seek_target = 0;
1856 seek_target = ConvertFrameToVideoPTS(requested_frame - buffer_amount);
1858 fprintf(stderr,
"%s: error while seeking video stream\n", pFormatCtx->AV_FILENAME);
1861 is_video_seek =
true;
1868 seek_target = ConvertFrameToAudioPTS(requested_frame - buffer_amount);
1870 fprintf(stderr,
"%s: error while seeking audio stream\n", pFormatCtx->AV_FILENAME);
1873 is_video_seek =
false;
1882 avcodec_flush_buffers(aCodecCtx);
1886 avcodec_flush_buffers(pCodecCtx);
1889 previous_packet_location.
frame = -1;
1894 if (seek_count == 1) {
1896 seeking_pts = seek_target;
1897 seeking_frame = requested_frame;
1899 seek_audio_frame_found = 0;
1900 seek_video_frame_found = 0;
1928 int64_t FFmpegReader::GetPacketPTS() {
1930 int64_t current_pts = packet->pts;
1931 if (current_pts == AV_NOPTS_VALUE && packet->dts != AV_NOPTS_VALUE)
1932 current_pts = packet->dts;
1938 return AV_NOPTS_VALUE;
1943 void FFmpegReader::UpdatePTSOffset() {
1944 if (pts_offset_seconds != NO_PTS_OFFSET) {
1948 pts_offset_seconds = 0.0;
1949 double video_pts_offset_seconds = 0.0;
1950 double audio_pts_offset_seconds = 0.0;
1952 bool has_video_pts =
false;
1955 has_video_pts =
true;
1957 bool has_audio_pts =
false;
1960 has_audio_pts =
true;
1964 while (!has_video_pts || !has_audio_pts) {
1966 if (GetNextPacket() < 0)
1971 int64_t pts = GetPacketPTS();
1974 if (!has_video_pts && packet->stream_index == videoStream) {
1980 if (std::abs(video_pts_offset_seconds) <= 10.0) {
1981 has_video_pts =
true;
1984 else if (!has_audio_pts && packet->stream_index == audioStream) {
1990 if (std::abs(audio_pts_offset_seconds) <= 10.0) {
1991 has_audio_pts =
true;
1997 if (has_video_pts && has_audio_pts) {
2009 pts_offset_seconds = std::max(video_pts_offset_seconds, audio_pts_offset_seconds);
2014 int64_t FFmpegReader::ConvertVideoPTStoFrame(int64_t pts) {
2016 int64_t previous_video_frame = current_video_frame;
2025 if (current_video_frame == 0)
2026 current_video_frame = frame;
2030 if (frame == previous_video_frame) {
2035 current_video_frame++;
2044 int64_t FFmpegReader::ConvertFrameToVideoPTS(int64_t frame_number) {
2046 double seconds = (double(frame_number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2056 int64_t FFmpegReader::ConvertFrameToAudioPTS(int64_t frame_number) {
2058 double seconds = (double(frame_number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2068 AudioLocation FFmpegReader::GetAudioPTSLocation(int64_t pts) {
2076 int64_t whole_frame = int64_t(frame);
2079 double sample_start_percentage = frame - double(whole_frame);
2085 int sample_start = round(
double(samples_per_frame) * sample_start_percentage);
2088 if (whole_frame < 1)
2090 if (sample_start < 0)
2097 if (previous_packet_location.
frame != -1) {
2098 if (location.
is_near(previous_packet_location, samples_per_frame, samples_per_frame)) {
2099 int64_t orig_frame = location.
frame;
2104 location.
frame = previous_packet_location.
frame;
2107 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAudioPTSLocation (Audio Gap Detected)",
"Source Frame", orig_frame,
"Source Audio Sample", orig_start,
"Target Frame", location.
frame,
"Target Audio Sample", location.
sample_start,
"pts", pts);
2116 previous_packet_location = location;
2123 std::shared_ptr<Frame> FFmpegReader::CreateFrame(int64_t requested_frame) {
2125 std::shared_ptr<Frame> output = working_cache.
GetFrame(requested_frame);
2129 output = working_cache.
GetFrame(requested_frame);
2130 if(output)
return output;
2138 working_cache.
Add(output);
2141 if (requested_frame > largest_frame_processed)
2142 largest_frame_processed = requested_frame;
2149 bool FFmpegReader::IsPartialFrame(int64_t requested_frame) {
2152 bool seek_trash =
false;
2153 int64_t max_seeked_frame = seek_audio_frame_found;
2154 if (seek_video_frame_found > max_seeked_frame) {
2155 max_seeked_frame = seek_video_frame_found;
2157 if ((
info.
has_audio && seek_audio_frame_found && max_seeked_frame >= requested_frame) ||
2158 (
info.
has_video && seek_video_frame_found && max_seeked_frame >= requested_frame)) {
2166 void FFmpegReader::CheckWorkingFrames(int64_t requested_frame) {
2169 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
2172 std::vector<std::shared_ptr<openshot::Frame>> working_frames = working_cache.
GetFrames();
2173 std::vector<std::shared_ptr<openshot::Frame>>::iterator working_itr;
2176 for(working_itr = working_frames.begin(); working_itr != working_frames.end(); ++working_itr)
2179 std::shared_ptr<Frame> f = *working_itr;
2182 if (!f || f->number > requested_frame) {
2188 double frame_pts_seconds = (double(f->number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2189 double recent_pts_seconds = std::max(video_pts_seconds, audio_pts_seconds);
2192 bool is_video_ready =
false;
2193 bool is_audio_ready =
false;
2194 double recent_pts_diff = recent_pts_seconds - frame_pts_seconds;
2195 if ((frame_pts_seconds <= video_pts_seconds)
2196 || (recent_pts_diff > 1.5)
2200 is_video_ready =
true;
2202 "frame_number", f->number,
2203 "frame_pts_seconds", frame_pts_seconds,
2204 "video_pts_seconds", video_pts_seconds,
2205 "recent_pts_diff", recent_pts_diff);
2209 for (int64_t previous_frame = requested_frame - 1; previous_frame > 0; previous_frame--) {
2211 if (previous_frame_instance && previous_frame_instance->has_image_data) {
2213 f->AddImage(std::make_shared<QImage>(previous_frame_instance->GetImage()->copy()));
2218 if (last_video_frame && !f->has_image_data) {
2220 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
2221 }
else if (!f->has_image_data) {
2222 f->AddColor(
"#000000");
2227 double audio_pts_diff = audio_pts_seconds - frame_pts_seconds;
2228 if ((frame_pts_seconds < audio_pts_seconds && audio_pts_diff > 1.0)
2229 || (recent_pts_diff > 1.5)
2234 is_audio_ready =
true;
2236 "frame_number", f->number,
2237 "frame_pts_seconds", frame_pts_seconds,
2238 "audio_pts_seconds", audio_pts_seconds,
2239 "audio_pts_diff", audio_pts_diff,
2240 "recent_pts_diff", recent_pts_diff);
2242 bool is_seek_trash = IsPartialFrame(f->number);
2250 "frame_number", f->number,
2251 "is_video_ready", is_video_ready,
2252 "is_audio_ready", is_audio_ready,
2258 if ((!packet_status.
end_of_file && is_video_ready && is_audio_ready) || packet_status.
end_of_file || is_seek_trash) {
2261 "requested_frame", requested_frame,
2262 "f->number", f->number,
2263 "is_seek_trash", is_seek_trash,
2264 "Working Cache Count", working_cache.
Count(),
2268 if (!is_seek_trash) {
2273 working_cache.
Remove(f->number);
2276 last_frame = f->number;
2279 working_cache.
Remove(f->number);
2286 working_frames.clear();
2287 working_frames.shrink_to_fit();
2291 void FFmpegReader::CheckFPS() {
2299 int frames_per_second[3] = {0,0,0};
2300 int max_fps_index =
sizeof(frames_per_second) /
sizeof(frames_per_second[0]);
2303 int all_frames_detected = 0;
2304 int starting_frames_detected = 0;
2309 if (GetNextPacket() < 0)
2314 if (packet->stream_index == videoStream) {
2317 fps_index = int(video_seconds);
2320 if (fps_index >= 0 && fps_index < max_fps_index) {
2322 starting_frames_detected++;
2323 frames_per_second[fps_index]++;
2327 all_frames_detected++;
2332 float avg_fps = 30.0;
2333 if (starting_frames_detected > 0 && fps_index > 0) {
2334 avg_fps = float(starting_frames_detected) / std::min(fps_index, max_fps_index);
2338 if (avg_fps < 8.0) {
2347 if (all_frames_detected > 0) {
2361 void FFmpegReader::RemoveAVFrame(AVFrame *remove_frame) {
2365 av_freep(&remove_frame->data[0]);
2373 void FFmpegReader::RemoveAVPacket(AVPacket *remove_packet) {
2378 delete remove_packet;
2393 root[
"type"] =
"FFmpegReader";
2394 root[
"path"] =
path;
2409 catch (
const std::exception& e) {
2411 throw InvalidJSON(
"JSON is invalid (missing keys or invalid data types)");
2422 if (!root[
"path"].isNull())
2423 path = root[
"path"].asString();
Header file for all Exception classes.
AVPixelFormat hw_de_av_pix_fmt_global
AVHWDeviceType hw_de_av_device_type_global
Header file for FFmpegReader class.
Header file for FFmpegUtilities.
#define AV_FREE_CONTEXT(av_context)
#define AV_FREE_FRAME(av_frame)
#define SWR_CONVERT(ctx, out, linesize, out_count, in, linesize2, in_count)
#define AV_GET_CODEC_TYPE(av_stream)
#define AV_GET_CODEC_PIXEL_FORMAT(av_stream, av_context)
#define AV_GET_CODEC_CONTEXT(av_stream, av_codec)
#define AV_FIND_DECODER_CODEC_ID(av_stream)
#define AV_ALLOCATE_FRAME()
#define AV_COPY_PICTURE_DATA(av_frame, buffer, pix_fmt, width, height)
#define AV_FREE_PACKET(av_packet)
#define AVCODEC_REGISTER_ALL
#define AVCODEC_MAX_AUDIO_FRAME_SIZE
#define AV_GET_CODEC_ATTRIBUTES(av_stream, av_context)
#define MY_INPUT_BUFFER_PADDING_SIZE
#define AV_GET_SAMPLE_FORMAT(av_stream, av_context)
#define AV_RESET_FRAME(av_frame)
#define FF_NUM_PROCESSORS
#define OPEN_MP_NUM_PROCESSORS
Header file for Timeline class.
Header file for ZeroMQ-based Logger class.
void SetMaxBytesFromInfo(int64_t number_of_frames, int width, int height, int sample_rate, int channels)
Set maximum bytes to a different amount based on a ReaderInfo struct.
int64_t Count()
Count the frames in the queue.
void Add(std::shared_ptr< openshot::Frame > frame)
Add a Frame to the cache.
std::shared_ptr< openshot::Frame > GetFrame(int64_t frame_number)
Get a frame from the cache.
std::vector< std::shared_ptr< openshot::Frame > > GetFrames()
Get an array of all Frames.
void Remove(int64_t frame_number)
Remove a specific frame.
void Clear()
Clear the cache of all frames.
openshot::TimelineBase * ParentTimeline()
Get the associated Timeline pointer (if any)
This class represents a clip (used to arrange readers on the timeline)
openshot::Keyframe scale_x
Curve representing the horizontal scaling in percent (0 to 1)
openshot::Keyframe scale_y
Curve representing the vertical scaling in percent (0 to 1)
openshot::ScaleType scale
The scale determines how a clip should be resized to fit its parent.
double Y
The Y value of the coordinate (usually representing the value of the property being animated)
void Open() override
Open File - which is called by the constructor automatically.
FFmpegReader(const std::string &path, bool inspect_reader=true)
Constructor for FFmpegReader.
Json::Value JsonValue() const override
Generate Json::Value for this object.
bool GetIsDurationKnown()
Return true if frame can be read with GetFrame()
void SetJsonValue(const Json::Value root) override
Load Json::Value into this object.
CacheMemory final_cache
Final cache object used to hold final frames.
virtual ~FFmpegReader()
Destructor.
std::string Json() const override
Generate JSON string of this object.
std::shared_ptr< openshot::Frame > GetFrame(int64_t requested_frame) override
void Close() override
Close File.
void SetJson(const std::string value) override
Load JSON string into this object.
This class represents a fraction.
int num
Numerator for the fraction.
float ToFloat()
Return this fraction as a float (i.e. 1/2 = 0.5)
double ToDouble() const
Return this fraction as a double (i.e. 1/2 = 0.5)
int den
Denominator for the fraction.
int GetSamplesPerFrame(openshot::Fraction fps, int sample_rate, int channels)
Calculate the # of samples per video frame (for the current frame number)
Exception when no valid codec is found for a file.
Exception for files that can not be found or opened.
Exception for invalid JSON.
Point GetMaxPoint() const
Get max point (by Y coordinate)
Exception when no streams are found in the file.
Exception when memory could not be allocated.
Coordinate co
This is the primary coordinate.
openshot::ReaderInfo info
Information about the current media file.
virtual void SetJsonValue(const Json::Value root)=0
Load Json::Value into this object.
virtual Json::Value JsonValue() const =0
Generate Json::Value for this object.
std::recursive_mutex getFrameMutex
Mutex for multiple threads.
openshot::ClipBase * ParentClip()
Parent clip object of this reader (which can be unparented and NULL)
Exception when a reader is closed, and a frame is requested.
int DE_LIMIT_WIDTH_MAX
Maximum columns that hardware decode can handle.
int HW_DE_DEVICE_SET
Which GPU to use to decode (0 is the first)
int DE_LIMIT_HEIGHT_MAX
Maximum rows that hardware decode can handle.
static Settings * Instance()
Create or get an instance of this logger singleton (invoke the class with this method)
int HARDWARE_DECODER
Use video codec for faster video decoding (if supported)
int preview_height
Optional preview width of timeline image. If your preview window is smaller than the timeline,...
int preview_width
Optional preview width of timeline image. If your preview window is smaller than the timeline,...
This class represents a timeline.
void AppendDebugMethod(std::string method_name, std::string arg1_name="", float arg1_value=-1.0, std::string arg2_name="", float arg2_value=-1.0, std::string arg3_name="", float arg3_value=-1.0, std::string arg4_name="", float arg4_value=-1.0, std::string arg5_name="", float arg5_value=-1.0, std::string arg6_name="", float arg6_value=-1.0)
Append debug information.
static ZmqLogger * Instance()
Create or get an instance of this logger singleton (invoke the class with this method)
This namespace is the default namespace for all code in the openshot library.
@ SCALE_FIT
Scale the clip until either height or width fills the canvas (with no cropping)
@ SCALE_STRETCH
Scale the clip until both height and width fill the canvas (distort to fit)
@ SCALE_CROP
Scale the clip until both height and width fill the canvas (cropping the overlap)
ChannelLayout
This enumeration determines the audio channel layout (such as stereo, mono, 5 point surround,...
const Json::Value stringToJson(const std::string value)
This struct holds the associated video frame and starting sample # for an audio packet.
bool is_near(AudioLocation location, int samples_per_frame, int64_t amount)
int64_t packets_decoded()
int audio_bit_rate
The bit rate of the audio stream (in bytes)
int video_bit_rate
The bit rate of the video stream (in bytes)
bool has_single_image
Determines if this file only contains a single image.
float duration
Length of time (in seconds)
openshot::Fraction audio_timebase
The audio timebase determines how long each audio packet should be played.
int width
The width of the video (in pixesl)
int channels
The number of audio channels used in the audio stream.
openshot::Fraction fps
Frames per second, as a fraction (i.e. 24/1 = 24 fps)
openshot::Fraction display_ratio
The ratio of width to height of the video stream (i.e. 640x480 has a ratio of 4/3)
int height
The height of the video (in pixels)
int pixel_format
The pixel format (i.e. YUV420P, RGB24, etc...)
int64_t video_length
The number of frames in the video stream.
std::string acodec
The name of the audio codec used to encode / decode the video stream.
std::map< std::string, std::string > metadata
An optional map/dictionary of metadata for this reader.
std::string vcodec
The name of the video codec used to encode / decode the video stream.
openshot::Fraction pixel_ratio
The pixel ratio of the video stream as a fraction (i.e. some pixels are not square)
openshot::ChannelLayout channel_layout
The channel layout (mono, stereo, 5 point surround, etc...)
bool has_video
Determines if this file has a video stream.
bool has_audio
Determines if this file has an audio stream.
openshot::Fraction video_timebase
The video timebase determines how long each frame stays on the screen.
int video_stream_index
The index of the video stream.
int sample_rate
The number of audio samples per second (44100 is a common sample rate)
int audio_stream_index
The index of the audio stream.
int64_t file_size
Size of file (in bytes)