From 0c661e192f89d3c76a6d8db06d682453175b7949 Mon Sep 17 00:00:00 2001 From: xtne6f Date: Wed, 13 Mar 2024 20:49:09 +0900 Subject: [PATCH] Treat non-IDR I-pictures as key frames (issue #1) Refer to hls.js/avc-video-parser.ts "isKey" variables of mp4fragmenter.cpp and tsmemseg.cpp have different roles. --- mp4fragmenter.cpp | 16 ++++++++++++++++ tsmemseg.cpp | 4 ++-- util.cpp | 2 +- util.hpp | 2 +- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/mp4fragmenter.cpp b/mp4fragmenter.cpp index bf1cb29..fa83f34 100644 --- a/mp4fragmenter.cpp +++ b/mp4fragmenter.cpp @@ -391,6 +391,22 @@ void CMp4Fragmenter::AddVideoPes(const std::vector &pes, bool h265) // IRAP (BLA or CRA or IDR) isKey = true; } + else if (!h265 && nalUnitType == 1) { + // Non-IDR + // Emulation prevention should not appear unless first_mb_in_slice value is huge + if (len >= 5 && (nal[1] != 0 || nal[2] != 0 || nal[3] != 3)) { + uint8_t sliceIntro[16] = {}; + std::copy(nal + 1, nal + 5, sliceIntro); + size_t pos = 0; + // first_mb_in_slice + ReadUegBits(sliceIntro, pos); + int sliceType = ReadUegBits(sliceIntro, pos); + if (sliceType == 2 || sliceType == 4 || sliceType == 7 || sliceType == 9) { + // I or SI picture + isKey = true; + } + } + } sampleSize += 4 + len; PushUint(m_videoMdat, static_cast(len)); m_videoMdat.insert(m_videoMdat.end(), nal, nal + len); diff --git a/tsmemseg.cpp b/tsmemseg.cpp index f830e1a..79e6a58 100644 --- a/tsmemseg.cpp +++ b/tsmemseg.cpp @@ -540,7 +540,7 @@ void ProcessSegmentation(FILE *fp, bool enableFragmentation, uint32_t targetDura if (pid == pat.first_pmt.first_video_pid) { nalState = 0; if (9 + pesHeaderLength < payloadSize) { - if (contains_nal_irap(&nalState, payload + 9 + pesHeaderLength, payloadSize - (9 + pesHeaderLength), h265)) { + if (contains_nal_idr_or_cra(&nalState, payload + 9 + pesHeaderLength, payloadSize - (9 + pesHeaderLength), h265)) { isKey = !isFirstKey; isFirstKey = false; } @@ -554,7 +554,7 @@ void ProcessSegmentation(FILE *fp, bool enableFragmentation, uint32_t targetDura } } else if (pid == pat.first_pmt.first_video_pid) { - if (contains_nal_irap(&nalState, payload, payloadSize, h265)) { + if (contains_nal_idr_or_cra(&nalState, payload, payloadSize, h265)) { isKey = !isFirstKey; isFirstKey = false; } diff --git a/util.cpp b/util.cpp index 3cf1902..161ff1e 100644 --- a/util.cpp +++ b/util.cpp @@ -149,7 +149,7 @@ void extract_pmt(PMT *pmt, const uint8_t *payload, int payload_size, int unit_st while (!done); } -int contains_nal_irap(int *nal_state, const uint8_t *payload, int payload_size, bool h_265) +int contains_nal_idr_or_cra(int *nal_state, const uint8_t *payload, int payload_size, bool h_265) { for (int i = 0; i < payload_size; ++i) { // 0,1,2: Searching for NAL start code diff --git a/util.hpp b/util.hpp index 1b5aaa9..f872d01 100644 --- a/util.hpp +++ b/util.hpp @@ -45,7 +45,7 @@ uint32_t calc_crc32(const uint8_t *data, int data_size, uint32_t crc = 0xfffffff int extract_psi(PSI *psi, const uint8_t *payload, int payload_size, int unit_start, int counter); void extract_pat(PAT *pat, const uint8_t *payload, int payload_size, int unit_start, int counter); void extract_pmt(PMT *pmt, const uint8_t *payload, int payload_size, int unit_start, int counter); -int contains_nal_irap(int *nal_state, const uint8_t *payload, int payload_size, bool h_265); +int contains_nal_idr_or_cra(int *nal_state, const uint8_t *payload, int payload_size, bool h_265); int get_ts_payload_size(const uint8_t *packet); int64_t get_pes_timestamp(const uint8_t *data_5bytes);