Skip to content

Commit f393cfb

Browse files
committed
主にtsの字幕データの処理を改善。
字幕の長さが不明な時は、次の字幕を待ち、長さを決定するようにする。
1 parent aa752d4 commit f393cfb

File tree

2 files changed

+166
-48
lines changed

2 files changed

+166
-48
lines changed

VCECore/rgy_output_avcodec.cpp

+151-47
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,16 @@ AVMuxAudio::AVMuxAudio() :
189189

190190
}
191191

192+
AVSubtitleData::AVSubtitleData() :
193+
decodecSub(),
194+
origPts(-1),
195+
origDuration(-1) {
196+
197+
}
198+
199+
AVSubtitleData::~AVSubtitleData() {
200+
}
201+
192202
AVMuxOther::AVMuxOther() :
193203
inTrackId(0),
194204
streamIn(nullptr),
@@ -200,7 +210,8 @@ AVMuxOther::AVMuxOther() :
200210
outCodecEncode(nullptr),
201211
outCodecEncodeCtx(nullptr),
202212
bufConvert(nullptr),
203-
bsfc(nullptr) {
213+
bsfc(nullptr),
214+
decodedSub() {
204215

205216
}
206217

@@ -1902,15 +1913,43 @@ RGY_ERR RGYOutputAvcodec::InitOther(AVMuxOther *muxSub, AVOutputStreamPrm *input
19021913
//設定されていない必須情報があれば設定する
19031914
muxSub->outCodecDecodeCtx->pkt_timebase = inputStream->src.timebase;
19041915
SetExtraData(muxSub->outCodecDecodeCtx, srcCodecParam->extradata, srcCodecParam->extradata_size);
1905-
int ret;
1906-
if (0 > (ret = avcodec_open2(muxSub->outCodecDecodeCtx, muxSub->outCodecDecode, nullptr))) {
1907-
AddMessage(RGY_LOG_ERROR, _T("failed to open decoder for %s: %s\n"),
1908-
char_to_tstring(avcodec_get_name(srcCodecParam->codec_id)).c_str(), qsv_av_err2str(ret).c_str());
1909-
return RGY_ERR_NULL_PTR;
1916+
1917+
//デコーダのオプションの作成
1918+
{
1919+
AVDictionary *codecPrmDict = nullptr;
1920+
unique_ptr<AVDictionary *, decltype(&av_dict_free)> codecPrmDictDeleter(&codecPrmDict, av_dict_free);
1921+
unique_ptr<char, RGYAVDeleter<void>> prm_buf;
1922+
if (inputStream->decodeCodecPrm.length() > 0) {
1923+
int ret = av_dict_parse_string(&codecPrmDict, tchar_to_string(inputStream->decodeCodecPrm).c_str(), "=", ",", 0);
1924+
if (ret < 0) {
1925+
AddMessage(RGY_LOG_ERROR, _T("failed to parse param(s) for decoder %s for subtitle track %d: %s\n"),
1926+
char_to_tstring(muxSub->outCodecDecode->name).c_str(), trackID(inputStream->src.trackId), qsv_av_err2str(ret).c_str());
1927+
AddMessage(RGY_LOG_ERROR, _T(" prm: %s\n"), inputStream->decodeCodecPrm.c_str());
1928+
return RGY_ERR_INCOMPATIBLE_AUDIO_PARAM;
1929+
}
1930+
char *buf = nullptr;
1931+
av_dict_get_string(codecPrmDict, &buf, '=', ',');
1932+
prm_buf = unique_ptr<char, RGYAVDeleter<void>>(buf, RGYAVDeleter<void>(av_freep));
1933+
}
1934+
int ret = 0;
1935+
if (0 > (ret = avcodec_open2(muxSub->outCodecDecodeCtx, muxSub->outCodecDecode, &codecPrmDict))) {
1936+
AddMessage(RGY_LOG_ERROR, _T("failed to open decoder for %s: %s\n"),
1937+
char_to_tstring(avcodec_get_name(srcCodecParam->codec_id)).c_str(), qsv_av_err2str(ret).c_str());
1938+
return RGY_ERR_NULL_PTR;
1939+
}
1940+
if (codecPrmDict) {
1941+
for (const AVDictionaryEntry *t = nullptr; (t = av_dict_get(codecPrmDict, "", t, AV_DICT_IGNORE_SUFFIX)) != nullptr;) {
1942+
AddMessage(RGY_LOG_WARN, _T("Unknown option to subtitle decoder[%s]: %s=%s, this will be ignored.\n"),
1943+
char_to_tstring(muxSub->outCodecDecode->name).c_str(),
1944+
char_to_tstring(t->key).c_str(),
1945+
char_to_tstring(t->value).c_str());
1946+
}
1947+
}
1948+
AddMessage(RGY_LOG_DEBUG, _T("Subtitle Decoder opened\n"));
1949+
AddMessage(RGY_LOG_DEBUG, _T("Subtitle Decode Info: %s, %dx%d, %s\n"), char_to_tstring(avcodec_get_name(srcCodecParam->codec_id)).c_str(),
1950+
muxSub->outCodecDecodeCtx->width, muxSub->outCodecDecodeCtx->height,
1951+
char_to_tstring(prm_buf.get() ? prm_buf.get() : "default").c_str());
19101952
}
1911-
AddMessage(RGY_LOG_DEBUG, _T("Subtitle Decoder opened\n"));
1912-
AddMessage(RGY_LOG_DEBUG, _T("Subtitle Decode Info: %s, %dx%d\n"), char_to_tstring(avcodec_get_name(srcCodecParam->codec_id)).c_str(),
1913-
muxSub->outCodecDecodeCtx->width, muxSub->outCodecDecodeCtx->height);
19141953

19151954
//エンコーダを探す
19161955
if (nullptr == (muxSub->outCodecEncode = avcodec_find_encoder(codecId))) {
@@ -1945,15 +1984,47 @@ RGY_ERR RGYOutputAvcodec::InitOther(AVMuxOther *muxSub, AVOutputStreamPrm *input
19451984
if (m_Mux.format.outputFmt->flags & AVFMT_GLOBALHEADER) {
19461985
muxSub->outCodecEncodeCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
19471986
}
1948-
if (0 > (ret = avcodec_open2(muxSub->outCodecEncodeCtx, muxSub->outCodecEncode, nullptr))) {
1949-
AddMessage(RGY_LOG_ERROR, errorMesForCodec(_T("failed to open encoder"), codecId));
1950-
AddMessage(RGY_LOG_ERROR, _T(" %s\n"), qsv_av_err2str(ret).c_str());
1951-
return RGY_ERR_NULL_PTR;
1952-
}
1953-
AddMessage(RGY_LOG_DEBUG, _T("Opened Subtitle Encoder Param: %s\n"), char_to_tstring(muxSub->outCodecEncode->name).c_str());
1954-
if (nullptr == (muxSub->bufConvert = (uint8_t *)av_malloc(SUB_ENC_BUF_MAX_SIZE))) {
1955-
AddMessage(RGY_LOG_ERROR, _T("failed to allocate buffer memory for subtitle encoding.\n"));
1956-
return RGY_ERR_MEMORY_ALLOC;
1987+
1988+
//エンコーダのオプションの設定
1989+
{
1990+
AVDictionary *codecPrmDict = nullptr;
1991+
unique_ptr<AVDictionary*, decltype(&av_dict_free)> codecPrmDictDeleter(&codecPrmDict, av_dict_free);
1992+
unique_ptr<char, RGYAVDeleter<void>> prm_buf;
1993+
if (inputStream->encodeCodecPrm.length() > 0) {
1994+
int ret = av_dict_parse_string(&codecPrmDict, tchar_to_string(inputStream->encodeCodecPrm).c_str(), "=", ",", 0);
1995+
if (ret < 0) {
1996+
AddMessage(RGY_LOG_ERROR, _T("failed to parse param(s) for codec %s for subtitle track %d: %s\n"),
1997+
char_to_tstring(muxSub->outCodecEncode->name).c_str(), trackID(inputStream->src.trackId), qsv_av_err2str(ret).c_str());
1998+
AddMessage(RGY_LOG_ERROR, _T(" prm: %s\n"), inputStream->encodeCodecPrm.c_str());
1999+
return RGY_ERR_INCOMPATIBLE_AUDIO_PARAM;
2000+
}
2001+
char *buf = nullptr;
2002+
av_dict_get_string(codecPrmDict, &buf, '=', ',');
2003+
prm_buf = unique_ptr<char, RGYAVDeleter<void>>(buf, RGYAVDeleter<void>(av_freep));
2004+
}
2005+
if (muxSub->outCodecEncode->capabilities & AV_CODEC_CAP_EXPERIMENTAL) {
2006+
av_opt_set(muxSub->outCodecEncodeCtx, "strict", "experimental", 0);
2007+
}
2008+
int ret = 0;
2009+
if (0 > (ret = avcodec_open2(muxSub->outCodecEncodeCtx, muxSub->outCodecEncode, &codecPrmDict))) {
2010+
AddMessage(RGY_LOG_ERROR, errorMesForCodec(_T("failed to open encoder"), codecId));
2011+
AddMessage(RGY_LOG_ERROR, _T(" %s\n"), qsv_av_err2str(ret).c_str());
2012+
return RGY_ERR_NULL_PTR;
2013+
}
2014+
AddMessage(RGY_LOG_DEBUG, _T("Opened Subtitle Encoder Param: %s, %s\n"), char_to_tstring(muxSub->outCodecEncode->name).c_str(),
2015+
char_to_tstring(prm_buf.get() ? prm_buf.get() : "default").c_str());
2016+
if (codecPrmDict) {
2017+
for (const AVDictionaryEntry *t = nullptr; (t = av_dict_get(codecPrmDict, "", t, AV_DICT_IGNORE_SUFFIX)) != nullptr;) {
2018+
AddMessage(RGY_LOG_WARN, _T("Unknown option to subtitle encoder[%s]: %s=%s, this will be ignored.\n"),
2019+
char_to_tstring(muxSub->outCodecEncode->name).c_str(),
2020+
char_to_tstring(t->key).c_str(),
2021+
char_to_tstring(t->value).c_str());
2022+
}
2023+
}
2024+
if (nullptr == (muxSub->bufConvert = (uint8_t *)av_malloc(SUB_ENC_BUF_MAX_SIZE))) {
2025+
AddMessage(RGY_LOG_ERROR, _T("failed to allocate buffer memory for subtitle encoding.\n"));
2026+
return RGY_ERR_MEMORY_ALLOC;
2027+
}
19572028
}
19582029
}
19592030

@@ -3655,39 +3726,22 @@ void RGYOutputAvcodec::AudioFlushStream(AVMuxAudio *muxAudio, int64_t *writtenDt
36553726
muxAudio->flushed = true; // AudioFlushStream を完了したフラグ
36563727
}
36573728

3658-
RGY_ERR RGYOutputAvcodec::SubtitleTranscode(const AVMuxOther *muxSub, AVPacket *pkt) {
3659-
//timescaleの変換が入ると、pts + duration > 次のpts となることがある
3660-
//オリジナルのptsを使って再計算する
3661-
const auto org_start_time = pkt->pts;
3662-
const auto org_end_time = pkt->pts + pkt->duration;
3663-
3664-
int got_sub = 0;
3665-
AVSubtitle sub = { 0 };
3666-
if (0 > avcodec_decode_subtitle2(muxSub->outCodecDecodeCtx, &sub, &got_sub, pkt)) {
3667-
AddMessage(RGY_LOG_ERROR, _T("Failed to decode subtitle.\n"));
3668-
m_Mux.format.streamError = true;
3669-
}
3670-
if (!muxSub->bufConvert) {
3671-
AddMessage(RGY_LOG_ERROR, _T("No buffer for encoding subtitle.\n"));
3672-
m_Mux.format.streamError = true;
3673-
}
3674-
m_Mux.poolPkt->returnFree(&pkt);
3675-
if (m_Mux.format.streamError)
3676-
return RGY_ERR_UNKNOWN;
3677-
if (!got_sub || sub.num_rects == 0)
3678-
return RGY_ERR_NONE;
3729+
RGY_ERR RGYOutputAvcodec::SubtitleEncode(const AVMuxOther *muxSub, AVSubtitleData *subData) {
3730+
const int64_t ptsOffset = (m_Mux.video.streamOut && m_Mux.format.timestampPassThrough)
3731+
? 0ll : -1 * av_rescale_q(m_Mux.video.inputFirstKeyPts, m_Mux.video.inputStreamTimebase, muxSub->streamOut->time_base);
36793732

36803733
//AV_CODEC_ID_DVB_SUBTITLEははじめりと終わりで2パケット
36813734
const int nOutPackets = 1 + (muxSub->outCodecEncodeCtx->codec_id == AV_CODEC_ID_DVB_SUBTITLE);
36823735
for (int i = 0; i < nOutPackets; i++) {
3683-
sub.pts += av_rescale_q(sub.start_display_time, av_make_q(1, 1000), av_make_q(1, AV_TIME_BASE));
3684-
sub.end_display_time -= sub.start_display_time;
3736+
auto& sub = subData->decodecSub;
3737+
sub.pts += av_rescale_q(sub.start_display_time, av_make_q(1, 1000), av_make_q(1, AV_TIME_BASE));
3738+
sub.end_display_time -= sub.start_display_time;
36853739
sub.start_display_time = 0;
36863740
if (i > 0) {
36873741
sub.num_rects = 0;
36883742
}
36893743

3690-
int sub_out_size = avcodec_encode_subtitle(muxSub->outCodecEncodeCtx, muxSub->bufConvert, SUB_ENC_BUF_MAX_SIZE, &sub);
3744+
int sub_out_size = avcodec_encode_subtitle(muxSub->outCodecEncodeCtx, muxSub->bufConvert, SUB_ENC_BUF_MAX_SIZE, &subData->decodecSub);
36913745
if (sub_out_size < 0) {
36923746
AddMessage(RGY_LOG_ERROR, _T("failed to encode subtitle.\n"));
36933747
m_Mux.format.streamError = true;
@@ -3698,13 +3752,18 @@ RGY_ERR RGYOutputAvcodec::SubtitleTranscode(const AVMuxOther *muxSub, AVPacket *
36983752
pktOut->data = muxSub->bufConvert;
36993753
pktOut->stream_index = muxSub->streamOut->index;
37003754
pktOut->size = sub_out_size;
3701-
// pts + duration <= 次のptsとなるよう、オリジナルのptsを使って再計算する
3702-
auto end_ts = av_rescale_q(org_end_time, muxSub->outCodecDecodeCtx->pkt_timebase, muxSub->streamOut->time_base);
3703-
pktOut->pts = av_rescale_q(org_start_time, muxSub->outCodecDecodeCtx->pkt_timebase, muxSub->streamOut->time_base);
3704-
pktOut->duration = (int)av_rescale_q(end_ts - pktOut->pts, muxSub->outCodecDecodeCtx->pkt_timebase, muxSub->streamOut->time_base);
3755+
pktOut->pts = av_rescale_q(sub.pts, av_make_q(1, AV_TIME_BASE), muxSub->streamOut->time_base);
3756+
pktOut->duration = (int)av_rescale_q_rnd(sub.end_display_time - sub.start_display_time, av_make_q(1, 1000), muxSub->streamOut->time_base, AV_ROUND_ZERO);
3757+
if (subData->origDuration > 0) {
3758+
// pts + duration <= 次のptsとなるよう、オリジナルのptsでもチェックする
3759+
auto ts_start = av_rescale_q(subData->origPts, muxSub->outCodecDecodeCtx->pkt_timebase, muxSub->streamOut->time_base);
3760+
auto ts_end = av_rescale_q(subData->origPts + subData->origDuration, muxSub->outCodecDecodeCtx->pkt_timebase, muxSub->streamOut->time_base);
3761+
pktOut->duration = std::min(pktOut->duration, av_rescale_q_rnd(ts_end - ts_start, muxSub->streamOut->time_base, muxSub->streamOut->time_base, AV_ROUND_ZERO));
3762+
}
37053763
if (muxSub->outCodecEncodeCtx->codec_id == AV_CODEC_ID_DVB_SUBTITLE) {
37063764
pktOut->pts += 90 * ((i == 0) ? sub.start_display_time : sub.end_display_time);
37073765
}
3766+
pktOut->pts += ptsOffset;
37083767
pktOut->dts = pktOut->pts;
37093768
const auto ret_write = av_interleaved_write_frame(m_Mux.format.formatCtx, pktOut.get());
37103769
if (ret_write != 0) {
@@ -3717,8 +3776,53 @@ RGY_ERR RGYOutputAvcodec::SubtitleTranscode(const AVMuxOther *muxSub, AVPacket *
37173776
return (m_Mux.format.streamError) ? RGY_ERR_UNKNOWN : RGY_ERR_NONE;
37183777
}
37193778

3779+
RGY_ERR RGYOutputAvcodec::SubtitleTranscode(AVMuxOther *muxSub, AVPacket *pkt) {
3780+
//timescaleの変換が入ると、pts + duration > 次のpts となることがある
3781+
//オリジナルのptsを使って再計算する
3782+
AVSubtitleData subData;
3783+
subData.origPts = pkt->pts;
3784+
subData.origDuration = pkt->duration;
3785+
3786+
int got_sub = 0;
3787+
if (0 > avcodec_decode_subtitle2(muxSub->outCodecDecodeCtx, &subData.decodecSub, &got_sub, pkt)) {
3788+
AddMessage(RGY_LOG_ERROR, _T("Failed to decode subtitle.\n"));
3789+
m_Mux.format.streamError = true;
3790+
}
3791+
if (!muxSub->bufConvert) {
3792+
AddMessage(RGY_LOG_ERROR, _T("No buffer for encoding subtitle.\n"));
3793+
m_Mux.format.streamError = true;
3794+
}
3795+
m_Mux.poolPkt->returnFree(&pkt);
3796+
if (m_Mux.format.streamError)
3797+
return RGY_ERR_UNKNOWN;
3798+
if (!got_sub || subData.decodecSub.num_rects == 0)
3799+
return RGY_ERR_NONE;
3800+
3801+
if (muxSub->decodedSub.size() > 0) {
3802+
// durationが不明な字幕をためていたものをまず処理する
3803+
// 現在のフレームの開始時刻までとする
3804+
for (auto& cachedSub : muxSub->decodedSub) {
3805+
cachedSub.origDuration = subData.origPts - cachedSub.origPts;
3806+
cachedSub.decodecSub.end_display_time = cachedSub.decodecSub.start_display_time + (uint32_t)av_rescale_q_rnd(cachedSub.origDuration, muxSub->outCodecDecodeCtx->pkt_timebase, av_make_q(1, 1000), AV_ROUND_ZERO);
3807+
auto ret = SubtitleEncode(muxSub, &cachedSub);
3808+
if (ret != RGY_ERR_NONE) {
3809+
return ret;
3810+
}
3811+
}
3812+
muxSub->decodedSub.clear();
3813+
}
3814+
if (subData.origDuration == 0 && subData.decodecSub.end_display_time == std::numeric_limits<uint32_t>::max()) {
3815+
// durationが不明な字幕の場合、次の字幕が来るまで保留する
3816+
muxSub->decodedSub.push_back(subData);
3817+
return RGY_ERR_NONE;
3818+
}
3819+
3820+
3821+
return SubtitleEncode(muxSub, &subData);
3822+
}
3823+
37203824
RGY_ERR RGYOutputAvcodec::WriteOtherPacket(AVPacket *pkt) {
3721-
const AVMuxOther* pMuxOther = getOtherPacketStreamData(pkt);
3825+
AVMuxOther* pMuxOther = getOtherPacketStreamData(pkt);
37223826
if (pMuxOther->bsfc) {
37233827
auto sts = applyBitstreamFilterOther(pkt, pMuxOther);
37243828
//bitstream filterを正常に起動できなかった

VCECore/rgy_output_avcodec.h

+15-1
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,15 @@ struct AVMuxAudio {
215215
AVMuxAudio();
216216
};
217217

218+
struct AVSubtitleData {
219+
AVSubtitle decodecSub; //デコードした字幕データ
220+
int64_t origPts;
221+
int64_t origDuration;
222+
223+
AVSubtitleData();
224+
~AVSubtitleData();
225+
};
226+
218227
struct AVMuxOther {
219228
int inTrackId; //ソースファイルの入力トラック番号
220229
const AVStream *streamIn; //入力字幕のストリーム
@@ -232,6 +241,8 @@ struct AVMuxOther {
232241

233242
AVBSFContext *bsfc; //必要なら使用するbitstreamfilter
234243

244+
std::vector<AVSubtitleData> decodedSub; //字幕データ
245+
235246
AVMuxOther();
236247
};
237248

@@ -585,7 +596,10 @@ class RGYOutputAvcodec : public RGYOutput
585596
vector<AVPktMuxData> AudioEncodeFrame(AVMuxAudio *muxAudio, AVFrame *frame);
586597

587598
//字幕パケットを書き出す
588-
RGY_ERR SubtitleTranscode(const AVMuxOther *pMuxSub, AVPacket *pkt);
599+
RGY_ERR SubtitleTranscode(AVMuxOther *pMuxSub, AVPacket *pkt);
600+
601+
//字幕パケットのエンコードと出力
602+
RGY_ERR SubtitleEncode(const AVMuxOther *muxSub, AVSubtitleData *sub);
589603

590604
//その他のパケットを書き出す
591605
RGY_ERR WriteOtherPacket(AVPacket *pkt);

0 commit comments

Comments
 (0)