google-ai-edge
diff --git a/Diff for: ‎.bazelversion
+1-1 b/Diff for: ‎.bazelversion
+1-1
diff --git a/Diff for: ‎Dockerfile
+5-3 b/Diff for: ‎Dockerfile
+5-3
diff --git a/Diff for: ‎WORKSPACE
+3-2 b/Diff for: ‎WORKSPACE
+3-2
diff --git a/Diff for: ‎mediapipe/calculators/audio/BUILD
+1 b/Diff for: ‎mediapipe/calculators/audio/BUILD
+1
diff --git a/Diff for: ‎mediapipe/calculators/audio/audio_decoder_calculator_test.cc
+13-12 b/Diff for: ‎mediapipe/calculators/audio/audio_decoder_calculator_test.cc
+13-12
diff --git a/Diff for: ‎mediapipe/calculators/audio/spectrogram_calculator.cc
+58-23 b/Diff for: ‎mediapipe/calculators/audio/spectrogram_calculator.cc
+58-23
diff --git a/Diff for: ‎mediapipe/calculators/audio/spectrogram_calculator.proto
+10-1 b/Diff for: ‎mediapipe/calculators/audio/spectrogram_calculator.proto
+10-1
diff --git a/Diff for: ‎mediapipe/calculators/core/BUILD
+16 b/Diff for: ‎mediapipe/calculators/core/BUILD
+16
diff --git a/Diff for: ‎mediapipe/calculators/core/concatenate_vector_calculator.cc
+10-1 b/Diff for: ‎mediapipe/calculators/core/concatenate_vector_calculator.cc
+10-1
diff --git a/Diff for: ‎mediapipe/calculators/core/flow_limiter_calculator.cc
+2-2 b/Diff for: ‎mediapipe/calculators/core/flow_limiter_calculator.cc
+2-2
@@ -1 +1 @@
-5.0.0
+5.2.0
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-FROM ubuntu:18.04
+FROM ubuntu:20.04
 
 MAINTAINER <[email protected]>
 
@@ -42,6 +42,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         software-properties-common && \
     add-apt-repository -y ppa:openjdk-r/ppa && \
     apt-get update && apt-get install -y openjdk-8-jdk && \
+    apt-get install -y mesa-common-dev libegl1-mesa-dev libgles2-mesa-dev && \
+    apt-get install -y mesa-utils && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
@@ -50,13 +52,13 @@ RUN pip3 install --upgrade setuptools
 RUN pip3 install wheel
 RUN pip3 install future
 RUN pip3 install six==1.14.0
-RUN pip3 install tensorflow==1.14.0
+RUN pip3 install tensorflow==2.2.0
 RUN pip3 install tf_slim
 
 RUN ln -s /usr/bin/python3 /usr/bin/python
 
 # Install bazel
-ARG BAZEL_VERSION=5.0.0
+ARG BAZEL_VERSION=5.2.0
 RUN mkdir /bazel && \
     wget --no-check-certificate -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/b\
 azel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
 
@@ -35,8 +35,9 @@ http_archive(
 
 http_archive(
     name = "rules_cc",
-    strip_prefix = "rules_cc-main",
-    urls = ["https://github.com/bazelbuild/rules_cc/archive/main.zip"],
+    strip_prefix = "rules_cc-2f8c04c04462ab83c545ab14c0da68c3b4c96191",
+# The commit can be updated if the build passes. Last updated 6/23/22.
+    urls = ["https://github.com/bazelbuild/rules_cc/archive/2f8c04c04462ab83c545ab14c0da68c3b4c96191.zip"],
 )
 
 http_archive(
 
@@ -244,6 +244,7 @@ cc_test(
         "//mediapipe/framework/formats:time_series_header_cc_proto",
         "//mediapipe/framework/port:gtest_main",
         "//mediapipe/framework/port:parse_text_proto",
+        "//mediapipe/framework/tool:test_util",
         "@com_google_absl//absl/flags:flag",
     ],
 )
 
@@ -20,8 +20,12 @@
 #include "mediapipe/framework/port/gtest.h"
 #include "mediapipe/framework/port/parse_text_proto.h"
 #include "mediapipe/framework/port/status_matchers.h"
+#include "mediapipe/framework/tool/test_util.h"
 
 namespace mediapipe {
+namespace {
+
+constexpr char kTestPackageRoot[] = "mediapipe/calculators/audio";
 
 TEST(AudioDecoderCalculatorTest, TestWAV) {
   CalculatorGraphConfig::Node node_config =
@@ -37,9 +41,8 @@ TEST(AudioDecoderCalculatorTest, TestWAV) {
         })pb");
   CalculatorRunner runner(node_config);
   runner.MutableSidePackets()->Tag("INPUT_FILE_PATH") = MakePacket<std::string>(
-      file::JoinPath("./",
-                     "/mediapipe/calculators/audio/"
-                     "testdata/sine_wave_1k_44100_mono_2_sec_wav.audio"));
+      file::JoinPath(GetTestDataDir(kTestPackageRoot),
+                     "sine_wave_1k_44100_mono_2_sec_wav.audio"));
   MP_ASSERT_OK(runner.Run());
   MP_EXPECT_OK(runner.Outputs()
                    .Tag("AUDIO_HEADER")
@@ -68,9 +71,8 @@ TEST(AudioDecoderCalculatorTest, Test48KWAV) {
         })pb");
   CalculatorRunner runner(node_config);
   runner.MutableSidePackets()->Tag("INPUT_FILE_PATH") = MakePacket<std::string>(
-      file::JoinPath("./",
-                     "/mediapipe/calculators/audio/"
-                     "testdata/sine_wave_1k_48000_stereo_2_sec_wav.audio"));
+      file::JoinPath(GetTestDataDir(kTestPackageRoot),
+                     "sine_wave_1k_48000_stereo_2_sec_wav.audio"));
   MP_ASSERT_OK(runner.Run());
   MP_EXPECT_OK(runner.Outputs()
                    .Tag("AUDIO_HEADER")
@@ -99,9 +101,8 @@ TEST(AudioDecoderCalculatorTest, TestMP3) {
         })pb");
   CalculatorRunner runner(node_config);
   runner.MutableSidePackets()->Tag("INPUT_FILE_PATH") = MakePacket<std::string>(
-      file::JoinPath("./",
-                     "/mediapipe/calculators/audio/"
-                     "testdata/sine_wave_1k_44100_stereo_2_sec_mp3.audio"));
+      file::JoinPath(GetTestDataDir(kTestPackageRoot),
+                     "sine_wave_1k_44100_stereo_2_sec_mp3.audio"));
   MP_ASSERT_OK(runner.Run());
   MP_EXPECT_OK(runner.Outputs()
                    .Tag("AUDIO_HEADER")
@@ -130,9 +131,8 @@ TEST(AudioDecoderCalculatorTest, TestAAC) {
         })pb");
   CalculatorRunner runner(node_config);
   runner.MutableSidePackets()->Tag("INPUT_FILE_PATH") = MakePacket<std::string>(
-      file::JoinPath("./",
-                     "/mediapipe/calculators/audio/"
-                     "testdata/sine_wave_1k_44100_stereo_2_sec_aac.audio"));
+      file::JoinPath(GetTestDataDir(kTestPackageRoot),
+                     "sine_wave_1k_44100_stereo_2_sec_aac.audio"));
   MP_ASSERT_OK(runner.Run());
   MP_EXPECT_OK(runner.Outputs()
                    .Tag("AUDIO_HEADER")
@@ -147,4 +147,5 @@ TEST(AudioDecoderCalculatorTest, TestAAC) {
               std::ceil(44100.0 * 2 / 1024));
 }
 
+}  // namespace
 }  // namespace mediapipe
@@ -20,24 +20,22 @@
 #include <memory>
 #include <string>
 
-#include "Eigen/Core"
 #include "absl/strings/string_view.h"
 #include "audio/dsp/spectrogram/spectrogram.h"
 #include "audio/dsp/window_functions.h"
 #include "mediapipe/calculators/audio/spectrogram_calculator.pb.h"
 #include "mediapipe/framework/calculator_framework.h"
 #include "mediapipe/framework/formats/matrix.h"
-#include "mediapipe/framework/formats/time_series_header.pb.h"
-#include "mediapipe/framework/port/core_proto_inc.h"
-#include "mediapipe/framework/port/integral_types.h"
 #include "mediapipe/framework/port/logging.h"
-#include "mediapipe/framework/port/ret_check.h"
-#include "mediapipe/framework/port/source_location.h"
 #include "mediapipe/framework/port/status_builder.h"
 #include "mediapipe/util/time_series_util.h"
 
 namespace mediapipe {
 
+namespace {
+constexpr char kFrameDurationTag[] = "FRAME_DURATION";
+constexpr char kFrameOverlapTag[] = "FRAME_OVERLAP";
+}  // namespace
 // MediaPipe Calculator for computing the "spectrogram" (short-time Fourier
 // transform squared-magnitude, by default) of a multichannel input
 // time series, including optionally overlapping frames.  Options are
@@ -46,11 +44,14 @@ namespace mediapipe {
 //
 // Result is a MatrixData record (for single channel input and when the
 // allow_multichannel_input flag is false), or a vector of MatrixData records,
-// one for each channel (when the allow_multichannel_input flag is set). The
-// rows of each spectrogram matrix correspond to the n_fft/2+1 unique complex
-// values, or squared/linear/dB magnitudes, depending on the output_type option.
-// Each input packet will result in zero or one output packets, each containing
-// one Matrix for each channel of the input, where each Matrix has one or more
+// one for each channel (when the allow_multichannel_input flag is set). Each
+// waveform frame is converted to frequency by a fast Fourier transform whose
+// size, n_fft, is the smallest power of two large enough to enclose the frame
+// length of round(frame_duration_seconds * sample_rate).The rows of each
+// spectrogram matrix(result) correspond to the n_fft/2+1 unique complex values,
+// or squared/linear/dB magnitudes, depending on the output_type option. Each
+// input packet will result in zero or one output packets, each containing one
+// Matrix for each channel of the input, where each Matrix has one or more
 // columns of spectral values, one for each complete frame of input samples. If
 // the input packet contains too few samples to trigger a new output frame, no
 // output packet is generated (since zero-length packets are not legal since
@@ -71,6 +72,22 @@ class SpectrogramCalculator : public CalculatorBase {
         // Input stream with TimeSeriesHeader.
     );
 
+    if (cc->InputSidePackets().HasTag(kFrameDurationTag)) {
+      cc->InputSidePackets()
+          .Tag(kFrameDurationTag)
+          .Set<double>(
+              // Optional side packet for frame_duration_seconds if provided.
+          );
+    }
+
+    if (cc->InputSidePackets().HasTag(kFrameOverlapTag)) {
+      cc->InputSidePackets()
+          .Tag(kFrameOverlapTag)
+          .Set<double>(
+              // Optional side packet for frame_overlap_seconds if provided.
+          );
+    }
+
     SpectrogramCalculatorOptions spectrogram_options =
         cc->Options<SpectrogramCalculatorOptions>();
     if (!spectrogram_options.allow_multichannel_input()) {
@@ -184,27 +201,47 @@ class SpectrogramCalculator : public CalculatorBase {
   // Fixed scale factor applied to output values (regardless of type).
   double output_scale_;
 
-  static const float kLnPowerToDb;
+  static const float kLnSquaredMagnitudeToDb;
 };
 REGISTER_CALCULATOR(SpectrogramCalculator);
 
-// Factor to convert ln(magnitude_squared) to deciBels = 10.0/ln(10.0).
-const float SpectrogramCalculator::kLnPowerToDb = 4.342944819032518;
+// DECIBELS = 20*log10(LINEAR_MAGNITUDE) = 10*Log10(SQUARED_MAGNITUDE)
+// =10/ln(10)*ln(SQUARED_MAGNITUDE).
+// Factor to convert ln(SQUARED_MAGNITUDE) to deciBels = 10.0/ln(10.0).
+const float SpectrogramCalculator::kLnSquaredMagnitudeToDb = 4.342944819032518;
 
 absl::Status SpectrogramCalculator::Open(CalculatorContext* cc) {
   SpectrogramCalculatorOptions spectrogram_options =
       cc->Options<SpectrogramCalculatorOptions>();
+  // Provide frame_duration_seconds and frame_overlap_seconds either from static
+  // options, or dynamically from a side packet, the side packet one will
+  // override the options one if provided.
+
+  double frame_duration_seconds = 0;
+  double frame_overlap_seconds = 0;
+  if (cc->InputSidePackets().HasTag(kFrameDurationTag)) {
+    frame_duration_seconds =
+        cc->InputSidePackets().Tag(kFrameDurationTag).Get<double>();
+  } else {
+    frame_duration_seconds = spectrogram_options.frame_duration_seconds();
+  }
+
+  if (cc->InputSidePackets().HasTag(kFrameOverlapTag)) {
+    frame_overlap_seconds =
+        cc->InputSidePackets().Tag(kFrameOverlapTag).Get<double>();
+  } else {
+    frame_overlap_seconds = spectrogram_options.frame_overlap_seconds();
+  }
 
   use_local_timestamp_ = spectrogram_options.use_local_timestamp();
 
-  if (spectrogram_options.frame_duration_seconds() <= 0.0) {
+  if (frame_duration_seconds <= 0.0) {
     // TODO: return an error.
   }
-  if (spectrogram_options.frame_overlap_seconds() >=
-      spectrogram_options.frame_duration_seconds()) {
+  if (frame_overlap_seconds >= frame_duration_seconds) {
     // TODO: return an error.
   }
-  if (spectrogram_options.frame_overlap_seconds() < 0.0) {
+  if (frame_overlap_seconds < 0.0) {
     // TODO: return an error.
   }
 
@@ -220,10 +257,8 @@ absl::Status SpectrogramCalculator::Open(CalculatorContext* cc) {
     // TODO: return an error.
   }
 
-  frame_duration_samples_ =
-      round(spectrogram_options.frame_duration_seconds() * input_sample_rate_);
-  frame_overlap_samples_ =
-      round(spectrogram_options.frame_overlap_seconds() * input_sample_rate_);
+  frame_duration_samples_ = round(frame_duration_seconds * input_sample_rate_);
+  frame_overlap_samples_ = round(frame_overlap_seconds * input_sample_rate_);
 
   pad_final_packet_ = spectrogram_options.pad_final_packet();
   output_type_ = spectrogram_options.output_type();
@@ -419,7 +454,7 @@ absl::Status SpectrogramCalculator::ProcessVector(const Matrix& input_stream,
       return ProcessVectorToOutput(
           input_stream,
           +[](const Matrix& col) -> const Matrix {
-            return kLnPowerToDb * col.array().log().matrix();
+            return kLnSquaredMagnitudeToDb * col.array().log().matrix();
           }, cc);
     }
     // clang-format on
 
@@ -32,7 +32,11 @@ message SpectrogramCalculatorOptions {
 
   // Duration of overlap between adjacent windows.
   // Hence, frame_rate = 1/(frame_duration_seconds - frame_overlap_seconds).
-  // Required that 0 <= frame_overlap_seconds <  frame_duration_seconds.
+  // Note the frame_rate here is not the MediaPipe packet rate, the frame here
+  // means each Fourier transform analysis waveform frame, the output MediaPipe
+  // packet rate will the the same as input, if frame rate is lower than input
+  // packet rate, will result in intermittent empty output packets. Required
+  // that 0 <= frame_overlap_seconds <  frame_duration_seconds.
   optional double frame_overlap_seconds = 2 [default = 0.0];
 
   // Whether to pad the final packet with zeros.  If true, guarantees that
@@ -42,6 +46,11 @@ message SpectrogramCalculatorOptions {
 
   // Output value type can be squared-magnitude, linear-magnitude,
   // deciBels (dB, = 20*log10(linear_magnitude)), or std::complex.
+  // Their relationship:
+  // COMPLEX c = Re + Im*i;
+  // SQUARED_MAGNITUDE = Re^2 + Im^2;
+  // LINEAR_MAGNITUDE = sqrt(SQUARED_MAGNITUDE);
+  // DECIBELS = 20*log10(LINEAR_MAGNITUDE) = 10*log10(SQUARED_MAGNITUDE);
   enum OutputType {
     SQUARED_MAGNITUDE = 0;
     LINEAR_MAGNITUDE = 1;
 
@@ -557,6 +557,22 @@ cc_library(
     alwayslink = 1,
 )
 
+cc_test(
+    name = "packet_cloner_calculator_test",
+    srcs = ["packet_cloner_calculator_test.cc"],
+    deps = [
+        ":packet_cloner_calculator",
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework:timestamp",
+        "//mediapipe/framework/port:gtest_main",
+        "//mediapipe/framework/port:parse_text_proto",
+        "//mediapipe/framework/port:status",
+        "//mediapipe/framework/stream_handler:immediate_input_stream_handler",
+        "//mediapipe/framework/tool:sink",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
 cc_library(
     name = "packet_inner_join_calculator",
     srcs = ["packet_inner_join_calculator.cc"],
 
@@ -73,8 +73,17 @@ typedef ConcatenateVectorCalculator<::mediapipe::NormalizedLandmark>
     ConcatenateLandmarkVectorCalculator;
 MEDIAPIPE_REGISTER_NODE(ConcatenateLandmarkVectorCalculator);
 
+typedef ConcatenateVectorCalculator<::mediapipe::LandmarkList>
+    ConcatenateLandmarkListVectorCalculator;
+MEDIAPIPE_REGISTER_NODE(ConcatenateLandmarkListVectorCalculator);
+
 typedef ConcatenateVectorCalculator<::mediapipe::NormalizedLandmarkList>
-    ConcatenateLandmarListVectorCalculator;
+    ConcatenateNormalizedLandmarkListVectorCalculator;
+MEDIAPIPE_REGISTER_NODE(ConcatenateNormalizedLandmarkListVectorCalculator);
+
+// For backwards compatibility, keep the version with the typo.
+using ConcatenateLandmarListVectorCalculator =
+    ConcatenateNormalizedLandmarkListVectorCalculator;
 MEDIAPIPE_REGISTER_NODE(ConcatenateLandmarListVectorCalculator);
 
 typedef ConcatenateVectorCalculator<mediapipe::ClassificationList>
 
@@ -32,8 +32,8 @@ constexpr char kOptionsTag[] = "OPTIONS";
 // FlowLimiterCalculator is used to limit the number of frames in flight
 // by dropping input frames when necessary.
 //
-// The input stream "FINISH" is used to signal the FlowLimiterCalculator
-// when a frame is finished processing.  Either a non-empty "FINISH" packet
+// The input stream "FINISHED" is used to signal the FlowLimiterCalculator
+// when a frame is finished processing.  Either a non-empty "FINISHED" packet
 // or a timestamp bound should be received for each processed frame.
 //
 // The combination of `max_in_flight: 1` and `max_in_queue: 1` generally gives
Original file line number	Diff line number	Diff line change
`@@ -244,6 +244,7 @@ cc_test(`
`244`	`244`	`"//mediapipe/framework/formats:time_series_header_cc_proto",`
`245`	`245`	`"//mediapipe/framework/port:gtest_main",`
`246`	`246`	`"//mediapipe/framework/port:parse_text_proto",`
	`247`	`+ "//mediapipe/framework/tool:test_util",`
`247`	`248`	`"@com_google_absl//absl/flags:flag",`
`248`	`249`	`],`
`249`	`250`	`)`