Skip to content

Commit 2e9361f

Browse files
committed
Output human-readable text positions contained in caption/superimpose traces
1 parent 3a13188 commit 2e9361f

File tree

3 files changed

+45
-14
lines changed

3 files changed

+45
-14
lines changed

Readme.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ tsreadex [-z ignored][-s seek][-l limit][-t timeout][-m mode][-x pids][-n prog_n
7676
- 初めて現れたPCRのタイムスタンプ
7777
pcrpid=0x{4桁PID};pcr={10桁タイムスタンプ}
7878
- エスケープしたARIB字幕のデータとそのPTS
79-
pts={10桁タイムスタンプ};pcrrel=[+-]{7桁PCRとの差};b24caption[0-8]={改行までデータ}
79+
pts={10桁タイムスタンプ};pcrrel=[+-]{7桁PCRとの差}[;text={可読な文字列の位置},..];b24caption[0-8]={改行までデータ}
8080
# [0-8]はそれぞれ字幕管理と字幕文第1~8言語
8181
# 字幕データはARIB STD-B24のデータグループ(data_group)構造を原則 %{2桁HEX} でエスケープして表現したもの
8282
# ただし、data_group_sizeおよびCRC_16フィールドは取り除かれる
@@ -86,7 +86,7 @@ tsreadex [-z ignored][-s seek][-l limit][-t timeout][-m mode][-x pids][-n prog_n
8686
# %+{ および %+} の括弧の区間はBase64エンコードされている(DRCSデータに使用)
8787
# 字幕本文はARIB STD-B24のUCSの規定に沿ってできるだけUTF-8に変換される
8888
- エスケープしたARIB文字スーパーのデータ
89-
pts={10桁タイムスタンプ};pcrrel=+0000000;b24superimpose[0-8]={改行までデータ}
89+
pts={10桁タイムスタンプ};pcrrel=+0000000[;text={可読な文字列の位置},..];b24superimpose[0-8]={改行までデータ}
9090
# PTSにはPCRタイムスタンプが使われる
9191
# ほかARIB字幕と同様
9292
- エスケープ処理に失敗したとき

traceb24.cpp

+41-11
Original file line numberDiff line numberDiff line change
@@ -198,16 +198,21 @@ void CTraceB24Caption::OutputPrivateDataPes(const std::vector<uint8_t> &pes,
198198
return;
199199
}
200200

201-
PARSE_PRIVATE_DATA_RESULT ret = ParsePrivateData(m_buf, pes.data() + payloadPos, pes.size() - payloadPos, drcsList, langTags);
201+
PARSE_PRIVATE_DATA_RESULT ret = ParsePrivateData(m_buf, m_intBuf, pes.data() + payloadPos, pes.size() - payloadPos, drcsList, langTags);
202202
if (ret != PARSE_PRIVATE_DATA_FAILED_NEED_MANAGEMENT) {
203203
int64_t ptsPcrDiff = (0x200000000 + pts - m_pcr) & 0x1ffffffff;
204204
if (ptsPcrDiff >= 0x100000000) {
205205
ptsPcrDiff -= 0x200000000;
206206
}
207-
fprintf(m_fp, "pts=%010lld;pcrrel=%+08d;b24%s",
207+
fprintf(m_fp, "pts=%010lld;pcrrel=%+08d",
208208
static_cast<long long>(pts),
209-
static_cast<int>(m_pcr < 0 ? -9999999 : std::min<int64_t>(std::max<int64_t>(ptsPcrDiff, -9999999), 9999999)),
210-
dataIdentifier == 0x81 ? "superimpose" : "caption");
209+
static_cast<int>(m_pcr < 0 ? -9999999 : std::min<int64_t>(std::max<int64_t>(ptsPcrDiff, -9999999), 9999999)));
210+
if (ret == PARSE_PRIVATE_DATA_SUCCEEDED) {
211+
for (size_t i = 0; i + 1 < m_intBuf.size(); ++i) {
212+
fprintf(m_fp, "%s%d", i == 0 ? ";text=" : ",", m_intBuf[i + 1] - m_intBuf[i]);
213+
}
214+
}
215+
fprintf(m_fp, ";b24%s", dataIdentifier == 0x81 ? "superimpose" : "caption");
211216
if (ret == PARSE_PRIVATE_DATA_SUCCEEDED) {
212217
m_buf.push_back('\n');
213218
fwrite(m_buf.data(), 1, m_buf.size(), m_fp);
@@ -334,8 +339,21 @@ void InitializeArib8(std::pair<GS_CLASS, uint8_t> (&gbuf)[4], int &gl, int &gr,
334339
gr = 2;
335340
}
336341

337-
void AnalizeArib8(std::vector<uint8_t> &buf, const uint8_t *&data, const uint8_t *dataEnd, const std::vector<uint16_t> &drcsList,
338-
std::pair<GS_CLASS, uint8_t> (&gbuf)[4], int &gl, int &gr, bool isLatin)
342+
void CheckReadableTextPosList(std::vector<int> &textPosList, const std::vector<uint8_t> &buf, bool isNextReadable)
343+
{
344+
if ((textPosList.size() % 2 != 0) == isNextReadable) {
345+
int codeCount = 0;
346+
for (size_t i = 0; i < buf.size(); ++i) {
347+
if ((buf[i] & 0xc0) != 0x80) {
348+
++codeCount;
349+
}
350+
}
351+
textPosList.push_back(codeCount);
352+
}
353+
}
354+
355+
void AnalizeArib8(std::vector<uint8_t> &buf, std::vector<int> &textPosList, const uint8_t *&data, const uint8_t *dataEnd,
356+
const std::vector<uint16_t> &drcsList, std::pair<GS_CLASS, uint8_t> (&gbuf)[4], int &gl, int &gr, bool isLatin)
339357
{
340358
std::pair<GS_CLASS, uint8_t> *gss = nullptr;
341359
while (data != dataEnd) {
@@ -403,6 +421,7 @@ void AnalizeArib8(std::vector<uint8_t> &buf, const uint8_t *&data, const uint8_t
403421
gss = &gbuf[3];
404422
}
405423
else if (b != 0) {
424+
CheckReadableTextPosList(textPosList, buf, b == 0x20);
406425
AddChar(buf, b);
407426
if (b == 0x0c) {
408427
// CS
@@ -435,13 +454,16 @@ void AnalizeArib8(std::vector<uint8_t> &buf, const uint8_t *&data, const uint8_t
435454
}
436455
else {
437456
if (b == 0x7f) {
457+
CheckReadableTextPosList(textPosList, buf, false);
438458
AddChar(buf, b);
439459
}
440460
else if (b == 0xa0) {
461+
CheckReadableTextPosList(textPosList, buf, true);
441462
AddChar32(buf, U'\u00A0');
442463
}
443464
else {
444465
// caret notation
466+
CheckReadableTextPosList(textPosList, buf, false);
445467
buf.push_back('%');
446468
AddChar(buf, '^');
447469
AddChar(buf, b - 0x40);
@@ -493,6 +515,7 @@ void AnalizeArib8(std::vector<uint8_t> &buf, const uint8_t *&data, const uint8_t
493515
gss = nullptr;
494516
b &= 0x7f;
495517
if (g.first == GS_1BYTE_G) {
518+
CheckReadableTextPosList(textPosList, buf, true);
496519
if (g.second == GS_ASCII || g.second == GS_PROP_ASCII) {
497520
if (isLatin) {
498521
AddChar(buf, b);
@@ -511,6 +534,7 @@ void AnalizeArib8(std::vector<uint8_t> &buf, const uint8_t *&data, const uint8_t
511534
}
512535
}
513536
else if (g.first == GS_2BYTE_G) {
537+
CheckReadableTextPosList(textPosList, buf, true);
514538
uint8_t c = ReadByte(data, dataEnd) & 0x7f;
515539
if (g.second == GS_JIS_KANJI1 ||
516540
g.second == GS_JIS_KANJI2 ||
@@ -545,12 +569,13 @@ void AnalizeArib8(std::vector<uint8_t> &buf, const uint8_t *&data, const uint8_t
545569
x = static_cast<char32_t>(it - 1 - drcsList.begin() + 0xec00);
546570
}
547571
}
572+
CheckReadableTextPosList(textPosList, buf, true);
548573
AddChar32(buf, x);
549574
}
550575
else if (g.second == GS_MACRO) {
551576
if (0x60 <= b && b <= 0x6f) {
552577
const uint8_t *macro = DefaultMacro[b & 0x0f];
553-
AnalizeArib8(buf, macro, macro + sizeof(DefaultMacro[0]), drcsList, gbuf, gl, gr, isLatin);
578+
AnalizeArib8(buf, textPosList, macro, macro + sizeof(DefaultMacro[0]), drcsList, gbuf, gl, gr, isLatin);
554579
}
555580
else {
556581
AddChar32(buf, U'\uFFFD');
@@ -572,6 +597,7 @@ void AnalizeArib8(std::vector<uint8_t> &buf, const uint8_t *&data, const uint8_t
572597
x = static_cast<char32_t>(it - 1 - drcsList.begin() + 0xec00);
573598
}
574599
}
600+
CheckReadableTextPosList(textPosList, buf, true);
575601
AddChar32(buf, x);
576602
}
577603
else {
@@ -585,12 +611,14 @@ void AnalizeArib8(std::vector<uint8_t> &buf, const uint8_t *&data, const uint8_t
585611
}
586612
}
587613

588-
void AddArib8AsUtf8(std::vector<uint8_t> &buf, const uint8_t *data, size_t dataSize, const std::vector<uint16_t> &drcsList, bool isLatin)
614+
void AddArib8AsUtf8(std::vector<uint8_t> &buf, std::vector<int> &textPosList, const uint8_t *data, size_t dataSize,
615+
const std::vector<uint16_t> &drcsList, bool isLatin)
589616
{
590617
std::pair<GS_CLASS, uint8_t> gbuf[4];
591618
int gl, gr;
592619
InitializeArib8(gbuf, gl, gr, isLatin);
593-
AnalizeArib8(buf, data, data + dataSize, drcsList, gbuf, gl, gr, isLatin);
620+
AnalizeArib8(buf, textPosList, data, data + dataSize, drcsList, gbuf, gl, gr, isLatin);
621+
CheckReadableTextPosList(textPosList, buf, false);
594622
}
595623

596624
size_t AddEscapedData(std::vector<uint8_t> &buf, const uint8_t *data, size_t dataSize)
@@ -636,7 +664,7 @@ void AddUcs(std::vector<uint8_t> &buf, const uint8_t *data, size_t dataSize)
636664
}
637665

638666
CTraceB24Caption::PARSE_PRIVATE_DATA_RESULT
639-
CTraceB24Caption::ParsePrivateData(std::vector<uint8_t> &buf, const uint8_t *data, size_t dataSize,
667+
CTraceB24Caption::ParsePrivateData(std::vector<uint8_t> &buf, std::vector<int> &textPosList, const uint8_t *data, size_t dataSize,
640668
std::vector<uint16_t> &drcsList, LANG_TAG_TYPE (&langTags)[8])
641669
{
642670
const uint8_t BEGIN_UNIT_BRACE[] = {'%', '=', '{'};
@@ -670,6 +698,8 @@ CTraceB24Caption::ParsePrivateData(std::vector<uint8_t> &buf, const uint8_t *dat
670698
buf.clear();
671699
buf.push_back('0' + dgiType);
672700
buf.push_back('=');
701+
textPosList.clear();
702+
CheckReadableTextPosList(textPosList, buf, false);
673703
pos += AddEscapedData(buf, data + pos, 3);
674704
// omit data_group_size
675705
pos += 2;
@@ -758,7 +788,7 @@ CTraceB24Caption::ParsePrivateData(std::vector<uint8_t> &buf, const uint8_t *dat
758788
if (unitParameter == 0x20) {
759789
// Statement body
760790
if (lang == LANG_TAG_ARIB8 || lang == LANG_TAG_ARIB8_LATIN) {
761-
AddArib8AsUtf8(buf, data + pos, dataUnitSize, drcsList, lang == LANG_TAG_ARIB8_LATIN);
791+
AddArib8AsUtf8(buf, textPosList, data + pos, dataUnitSize, drcsList, lang == LANG_TAG_ARIB8_LATIN);
762792
pos += dataUnitSize;
763793
}
764794
else if (lang == LANG_TAG_UCS) {

traceb24.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class CTraceB24Caption
3636
void CheckPmt(const PSI &psi);
3737
void OutputPrivateDataPes(const std::vector<uint8_t> &pes,
3838
std::vector<uint16_t> &drcsList, LANG_TAG_TYPE (&langTags)[8]);
39-
static PARSE_PRIVATE_DATA_RESULT ParsePrivateData(std::vector<uint8_t> &buf, const uint8_t *data, size_t dataSize,
39+
static PARSE_PRIVATE_DATA_RESULT ParsePrivateData(std::vector<uint8_t> &buf, std::vector<int> &textPosList, const uint8_t *data, size_t dataSize,
4040
std::vector<uint16_t> &drcsList, LANG_TAG_TYPE (&langTags)[8]);
4141

4242
FILE *m_fp;
@@ -54,6 +54,7 @@ class CTraceB24Caption
5454
int m_pcrPid;
5555
int64_t m_pcr;
5656
std::vector<uint8_t> m_buf;
57+
std::vector<int> m_intBuf;
5758
};
5859

5960
#endif

0 commit comments

Comments
 (0)