From d7f9fcddb3bc34b590195a9830b391b2a960fd01 Mon Sep 17 00:00:00 2001
From: rigaya <rigaya34589@live.jp>
Date: Wed, 7 Jun 2023 18:32:07 +0900
Subject: [PATCH] =?UTF-8?q?faw=E5=91=A8=E3=82=8A=E3=81=AE=E6=9B=B4?=
 =?UTF-8?q?=E6=96=B0=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 VCECore/rgy_faw.cpp        | 152 +++++++++++++++++++++++++++++++++++--
 VCECore/rgy_faw.h          |  38 ++++++++--
 VCECore/rgy_memmem.h       |   7 +-
 VCECore/rgy_wav_parser.cpp |  29 +++++++
 VCECore/rgy_wav_parser.h   |   4 +
 VCEEnc/VCEEnc_readme.txt   |   2 +-
 6 files changed, 214 insertions(+), 18 deletions(-)

diff --git a/VCECore/rgy_faw.cpp b/VCECore/rgy_faw.cpp
index 5b489bbf..54a76727 100644
--- a/VCECore/rgy_faw.cpp
+++ b/VCECore/rgy_faw.cpp
@@ -45,6 +45,22 @@ decltype(rgy_memmem_fawstart1_c)* get_memmem_fawstart1_func() {
     return rgy_memmem_fawstart1_c;
 }
 
+static const std::array<uint8_t, 2> AACSYNC_BYTES = { 0xff, 0xf0 };
+
+static size_t rgy_find_aacsync_c(const void *data_, const size_t data_size) {
+    const uint16_t target = *(const uint16_t *)AACSYNC_BYTES.data();
+    const size_t target_size = AACSYNC_BYTES.size();
+    const uint8_t *data = (const uint8_t *)data_;
+    if (data_size < target_size) {
+        return RGY_MEMMEM_NOT_FOUND;
+    }
+    for (size_t i = 0; i <= data_size - target_size; i++) {
+        if ((*(const uint16_t *)(data + i) & target) == target) {
+            return i;
+        }
+    }
+    return RGY_MEMMEM_NOT_FOUND;
+}
 
 //16bit音声 -> 8bit音声
 void rgy_convert_audio_16to8(uint8_t *dst, const short *src, const size_t n) {
@@ -163,10 +179,10 @@ void RGYAACHeader::parse(const uint8_t *buf) {
 
 RGYFAWBitstream::RGYFAWBitstream() :
     buffer(),
-    bufferOffset(0),
     bufferLength(0),
+    bufferOffset(0),
     bytePerWholeSample(0),
-    inputSamples(0),
+    inputLengthByte(0),
     outSamples(0),
     aacHeader() {
 
@@ -182,10 +198,10 @@ void RGYFAWBitstream::parseAACHeader(const uint8_t *buf) {
     aacHeader.parse(buf);
 }
 
-int RGYFAWBitstream::aacChannels() const {
+uint32_t RGYFAWBitstream::aacChannels() const {
     return aacHeader.channel;
 }
-int RGYFAWBitstream::aacFrameSize() const {
+uint32_t RGYFAWBitstream::aacFrameSize() const {
     return aacHeader.aac_frame_length;
 }
 
@@ -226,13 +242,13 @@ void RGYFAWBitstream::append(const uint8_t *input, const size_t inputLength) {
         memcpy(buffer.data() + bufferOffset + bufferLength, input, inputLength);
     }
     bufferLength += inputLength;
-    inputSamples += inputLength / bytePerWholeSample;
+    inputLengthByte += inputLength;
 }
 
 void RGYFAWBitstream::clear() {
     bufferLength = 0;
     bufferOffset = 0;
-    inputSamples = 0;
+    inputLengthByte = 0;
     outSamples = 0;
 }
 
@@ -483,3 +499,127 @@ void RGYFAWDecoder::fin(std::vector<uint8_t>& output, RGYFAWBitstream& input) {
         addSilent(output, input);
     }
 }
+
+RGYFAWEncoder::RGYFAWEncoder() :
+    wavheader(),
+    fawmode(),
+    delaySamples(0),
+    inputAACPosByte(0),
+    outputFAWPosByte(0),
+    bufferIn(),
+    bufferTmp() {
+
+}
+
+RGYFAWEncoder::~RGYFAWEncoder() {
+
+}
+
+int RGYFAWEncoder::init(const RGYWAVHeader *data, const RGYFAWMode mode, const int delayMillisec) {
+    wavheader = *data;
+    fawmode = mode;
+    bufferTmp.setBytePerSample(wavheader.number_of_channels * wavheader.bits_per_sample / 8);
+    delaySamples = delayMillisec * (int)wavheader.sample_rate / 1000;
+    inputAACPosByte += delaySamples * bufferTmp.bytePerSample();
+    return 0;
+}
+
+int RGYFAWEncoder::encode(std::vector<uint8_t>& output, const uint8_t *input, const size_t inputLength) {
+    output.clear();
+    bufferTmp.clear();
+
+    if (fawmode == RGYFAWMode::Unknown) {
+        return -1;
+    }
+
+    bufferIn.append(input, inputLength);
+
+    const auto ret = rgy_find_aacsync_c(bufferIn.data(), bufferIn.size());
+    if (ret == RGY_MEMMEM_NOT_FOUND) {
+        return 0;
+    }
+    bufferIn.addOffset(ret);
+    return encode(output);
+}
+
+int RGYFAWEncoder::encode(std::vector<uint8_t>& output) {
+    if (bufferIn.size() < AAC_HEADER_MIN_SIZE) {
+        return 0;
+    }
+    bufferIn.parseAACHeader(bufferIn.data());
+    auto aacBlockSize = bufferIn.aacFrameSize();
+    if (aacBlockSize > bufferIn.size()) {
+        return 0;
+    }
+    auto ret0 = rgy_find_aacsync_c(bufferIn.data() + aacBlockSize, bufferIn.size() - aacBlockSize);
+    while (ret0 != RGY_MEMMEM_NOT_FOUND) {
+        ret0 += aacBlockSize;
+        if (inputAACPosByte < outputFAWPosByte) {
+            ; // このブロックを破棄
+        } else {
+            if (outputFAWPosByte < inputAACPosByte) {
+                const auto offsetBytes = inputAACPosByte - outputFAWPosByte;
+                const auto origSize = bufferTmp.size();
+                bufferTmp.append(nullptr, (size_t)offsetBytes);
+                memset(bufferTmp.data() + origSize, 0, (size_t)offsetBytes);
+                outputFAWPosByte = inputAACPosByte;
+            }
+            // outputWavPosSample == inputAACPosSample
+            encodeBlock(bufferIn.data(), aacBlockSize);
+        }
+        inputAACPosByte += AAC_BLOCK_SAMPLES * bufferTmp.bytePerSample();
+
+        bufferIn.addOffset(ret0);
+        if (bufferIn.size() < AAC_HEADER_MIN_SIZE) {
+            break;
+        }
+        bufferIn.parseAACHeader(bufferIn.data());
+        aacBlockSize = bufferIn.aacFrameSize();
+        if (aacBlockSize > bufferIn.size()) {
+            break;
+        }
+        ret0 = rgy_find_aacsync_c(bufferIn.data() + aacBlockSize, bufferIn.size() - aacBlockSize);
+    }
+
+    output.resize(bufferTmp.size());
+    memcpy(output.data(), bufferTmp.data(), bufferTmp.size());
+    bufferTmp.clear();
+    return 0;
+}
+
+void RGYFAWEncoder::encodeBlock(const uint8_t *data, const size_t dataLength) {
+    const uint32_t checksumCalc = faw_checksum_calc(data, dataLength);
+
+    bufferTmp.append(fawstart1.data(), fawstart1.size());
+    outputFAWPosByte += fawstart1.size();
+
+    bufferTmp.append(data, dataLength);
+    outputFAWPosByte += dataLength;
+
+    bufferTmp.append((const uint8_t *)&checksumCalc, sizeof(checksumCalc));
+    outputFAWPosByte += sizeof(checksumCalc);
+
+    bufferTmp.append(fawfin1.data(), fawfin1.size());
+    outputFAWPosByte += fawfin1.size();
+}
+
+int RGYFAWEncoder::fin(std::vector<uint8_t>& output) {
+    output.clear();
+    bufferIn.append(AACSYNC_BYTES.data(), AACSYNC_BYTES.size());
+    auto ret = encode(output);
+    if (outputFAWPosByte < inputAACPosByte) {
+        // 残りのbyteを0で調整
+        const auto offsetBytes = inputAACPosByte - outputFAWPosByte;
+        output.resize(output.size() + (size_t)offsetBytes, 0);
+    }
+    if (delaySamples < 0) {
+        // 負のdelayの場合、wavの長さを合わせるために0で埋める
+        const auto offsetBytes = -1 * delaySamples * bufferTmp.bytePerSample();
+        output.resize(output.size() + offsetBytes, 0);
+    }
+    //最終出力は4byte少ない (先頭に4byte入れたためと思われる)
+    if (output.size() > 4) {
+        output.resize(output.size() - 4);
+    }
+    return ret;
+}
diff --git a/VCECore/rgy_faw.h b/VCECore/rgy_faw.h
index 84c76527..7836e1be 100644
--- a/VCECore/rgy_faw.h
+++ b/VCECore/rgy_faw.h
@@ -70,6 +70,7 @@ enum class RGYFAWMode {
     Mix
 };
 
+static const int AAC_HEADER_MIN_SIZE = 7;
 static const uint32_t AAC_BLOCK_SAMPLES = 1024;
 
 struct RGYAACHeader {
@@ -78,12 +79,12 @@ struct RGYAACHeader {
     int profile;     // 00 ... main, 01 ... lc, 10 ... ssr
     int samplerate;
     bool private_bit;
-    int channel;
+    uint32_t channel;
     bool original;
     bool home;
     bool copyright;
     bool copyright_start;
-    int aac_frame_length; // AACヘッダを含む
+    uint32_t aac_frame_length; // AACヘッダを含む
     int adts_buffer_fullness;
     int no_raw_data_blocks_in_frame;
 
@@ -98,7 +99,7 @@ class RGYFAWBitstream {
     size_t bufferLength;
 
     int bytePerWholeSample; // channels * bits per sample
-    uint64_t inputSamples;
+    uint64_t inputLengthByte;
     uint64_t outSamples;
 
     RGYAACHeader aacHeader;
@@ -111,8 +112,9 @@ class RGYFAWBitstream {
     uint8_t *data() { return buffer.data() + bufferOffset; }
     const uint8_t *data() const { return buffer.data() + bufferOffset; }
     size_t size() const { return bufferLength; }
-    uint64_t inputSampleStart() const { return inputSamples - bufferLength / bytePerWholeSample; }
-    uint64_t inputSampleFin() const { return inputSamples; }
+    uint64_t inputLength() const { return inputLengthByte; }
+    uint64_t inputSampleStart() const { return (inputLengthByte - bufferLength) / bytePerWholeSample; }
+    uint64_t inputSampleFin() const { return inputLengthByte / bytePerWholeSample; }
     uint64_t outputSamples() const { return outSamples; }
     int bytePerSample() const { return bytePerWholeSample; }
 
@@ -124,8 +126,8 @@ class RGYFAWBitstream {
     void clear();
 
     void parseAACHeader(const uint8_t *buffer);
-    int aacChannels() const;
-    int aacFrameSize() const;
+    uint32_t aacChannels() const;
+    uint32_t aacFrameSize() const;
 };
 
 class RGYFAWDecoder {
@@ -162,4 +164,26 @@ class RGYFAWDecoder {
     void fin(std::vector<uint8_t>& output, RGYFAWBitstream& input);
 };
 
+class RGYFAWEncoder {
+private:
+    RGYWAVHeader wavheader;
+    RGYFAWMode fawmode;
+    int delaySamples;
+
+    int64_t inputAACPosByte;
+    int64_t outputFAWPosByte;
+    RGYFAWBitstream bufferIn;
+    RGYFAWBitstream bufferTmp;
+public:
+    RGYFAWEncoder();
+    ~RGYFAWEncoder();
+
+    int init(const RGYWAVHeader *data, const RGYFAWMode mode, const int delayMillisec);
+    int encode(std::vector<uint8_t>& output, const uint8_t *data, const size_t dataLength);
+    int fin(std::vector<uint8_t>& output);
+private:
+    int encode(std::vector<uint8_t>& output);
+    void encodeBlock(const uint8_t *data, const size_t dataLength);
+};
+
 #endif //__RGY_FAW_H__
\ No newline at end of file
diff --git a/VCECore/rgy_memmem.h b/VCECore/rgy_memmem.h
index 260b3655..ab844926 100644
--- a/VCECore/rgy_memmem.h
+++ b/VCECore/rgy_memmem.h
@@ -38,7 +38,7 @@ size_t rgy_memmem_c(const void *data_, const size_t data_size, const void *targe
 size_t rgy_memmem_avx2(const void *data_, const size_t data_size, const void *target_, const size_t target_size);
 size_t rgy_memmem_avx512bw(const void *data_, const size_t data_size, const void *target_, const size_t target_size);
 
-static const auto RGY_MEMMEM_NOT_FOUND = std::numeric_limits<decltype(rgy_memmem_c(nullptr,0,nullptr,0))>::max();
+static const auto RGY_MEMMEM_NOT_FOUND = std::numeric_limits<decltype(rgy_memmem_c(nullptr, 0, nullptr, 0))>::max();
 
 decltype(rgy_memmem_c)* get_memmem_func();
 
@@ -134,7 +134,6 @@ static RGY_FORCEINLINE size_t rgy_memmem_avx2_imp(const void *data_, const size_
     }
     return RGY_MEMMEM_NOT_FOUND;
 }
-
 #endif //#if defined(_M_IX86) || defined(_M_X64) || defined(__x86_64)
 
 #elif defined(RGY_MEMMEM_AVX512) 
@@ -188,7 +187,7 @@ static RGY_FORCEINLINE size_t rgy_memmem_avx512_imp(const void *data_, const siz
             const __m512i r1 = _mm512_loadu_si512((const __m512i*)(data + i + target_size - 1));
             uint64_t mask = _mm512_mask_cmpeq_epi8_mask(_mm512_cmpeq_epi8_mask(r0, target_first), r1, target_last);
             while (mask != 0) {
-                const int64_t j = (int64_t)CTZ64(mask);
+                const auto j = CTZ64(mask);
                 if (memcmp(data + i + j + 1, target + 1, target_size - 2) == 0) {
                     const auto ret = i + j;
                     return ret;
@@ -204,7 +203,7 @@ static RGY_FORCEINLINE size_t rgy_memmem_avx512_imp(const void *data_, const siz
         const __m512i r1 = _mm512_loadu_si512_exact(data + i + target_size - 1, data_fin);
         uint64_t mask = _mm512_mask_cmpeq_epi8_mask(_mm512_cmpeq_epi8_mask(r0, target_first), r1, target_last);
         while (mask != 0) {
-            const int64_t j = (int64_t)CTZ64(mask);
+            const auto j = CTZ64(mask);
             if (memcmp(data + i + j + 1, target + 1, target_size - 2) == 0) {
                 const auto ret = i + j;
                 return ret;
diff --git a/VCECore/rgy_wav_parser.cpp b/VCECore/rgy_wav_parser.cpp
index a63727ab..8c4a51bc 100644
--- a/VCECore/rgy_wav_parser.cpp
+++ b/VCECore/rgy_wav_parser.cpp
@@ -81,3 +81,32 @@ uint32_t RGYWAVHeader::parseHeader(const uint8_t *data) {
 
     return (uint32_t)(data_ptr - data);
 }
+
+std::vector<uint8_t> RGYWAVHeader::createHeader() {
+    std::vector<uint8_t> buffer(WAVE_HEADER_SIZE);
+    auto head = buffer.data();
+
+    static const char * const RIFF_HEADER = "RIFF";
+    static const char * const WAVE_HEADER = "WAVE";
+    static const char * const FMT_CHUNK = "fmt ";
+    static const char * const DATA_CHUNK = "data";
+    const int32_t FMT_SIZE = 16;
+    const int16_t FMT_ID = 1;
+    const int   size = bits_per_sample / 8;
+
+    memcpy(head + 0, RIFF_HEADER, strlen(RIFF_HEADER));
+    *(int32_t*)(head + 4) = data_size + WAVE_HEADER_SIZE - 8;
+    memcpy(head +  8, WAVE_HEADER, strlen(WAVE_HEADER));
+    memcpy(head + 12, FMT_CHUNK, strlen(FMT_CHUNK));
+    *(int32_t*)(head + 16) = FMT_SIZE;
+    *(int16_t*)(head + 20) = FMT_ID;
+    *(int16_t*)(head + 22) = (int16_t)number_of_channels;
+    *(int32_t*)(head + 24) = sample_rate;
+    *(int32_t*)(head + 28) = sample_rate * number_of_channels * size;
+    *(int16_t*)(head + 32) = (int16_t)(size * number_of_channels);
+    *(int16_t*)(head + 34) = (int16_t)(size * 8);
+    memcpy(head + 36, DATA_CHUNK, strlen(DATA_CHUNK));
+    *(int32_t*)(head + 40) = data_size;
+    //計44byte(WAVE_HEADER_SIZE)
+    return buffer;
+}
diff --git a/VCECore/rgy_wav_parser.h b/VCECore/rgy_wav_parser.h
index c7e1801b..f0e76600 100644
--- a/VCECore/rgy_wav_parser.h
+++ b/VCECore/rgy_wav_parser.h
@@ -29,6 +29,9 @@
 #define __RGY_WAV_PARSER_H__
 
 #include <cstdint>
+#include <vector>
+
+static const uint32_t WAVE_HEADER_SIZE = 44;
 
 struct RGYWAVHeader {
     char file_id[5]; // "RIFF"
@@ -46,6 +49,7 @@ struct RGYWAVHeader {
     uint32_t data_size; // samples * number of channels * bits per sample / 8 (Actual number of bytes)
 
     uint32_t parseHeader(const uint8_t *data);
+    std::vector<uint8_t> createHeader();
 };
 
 #endif //__RGY_WAV_PARSER_H__
diff --git a/VCEEnc/VCEEnc_readme.txt b/VCEEnc/VCEEnc_readme.txt
index 6852e568..4e7c7949 100644
--- a/VCEEnc/VCEEnc_readme.txt
+++ b/VCEEnc/VCEEnc_readme.txt
@@ -181,7 +181,7 @@ Radeon RX550
 今後の更新で設定ファイルの互換性がなくなるかもしれません。
 
 【どうでもいいメモ】
-2023.06.xx (8.14)
+2023.06.07 (8.14)
 [VCEEncC]
 - SAR比使用時の出力を改善。
 - 複数のデバイスからGPUを選択する際、OpenCLの初期化に失敗したデバイスの優先度を落とすように。