Skip to content

Commit c688862

Browse files
MediaPipe TeamSebastian Schmidt
MediaPipe Team
authored and
Sebastian Schmidt
committed
Project import generated by Copybara.
GitOrigin-RevId: 6e5aa035cd1f6a9333962df5d3ab97a05bd5744e
1 parent 4a20e99 commit c688862

File tree

144 files changed

+5757
-2103
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

144 files changed

+5757
-2103
lines changed

Diff for: .bazelversion

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
5.0.0
1+
5.2.0

Diff for: Dockerfile

+5-3
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
FROM ubuntu:18.04
15+
FROM ubuntu:20.04
1616

1717
MAINTAINER <[email protected]>
1818

@@ -42,6 +42,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
4242
software-properties-common && \
4343
add-apt-repository -y ppa:openjdk-r/ppa && \
4444
apt-get update && apt-get install -y openjdk-8-jdk && \
45+
apt-get install -y mesa-common-dev libegl1-mesa-dev libgles2-mesa-dev && \
46+
apt-get install -y mesa-utils && \
4547
apt-get clean && \
4648
rm -rf /var/lib/apt/lists/*
4749

@@ -50,13 +52,13 @@ RUN pip3 install --upgrade setuptools
5052
RUN pip3 install wheel
5153
RUN pip3 install future
5254
RUN pip3 install six==1.14.0
53-
RUN pip3 install tensorflow==1.14.0
55+
RUN pip3 install tensorflow==2.2.0
5456
RUN pip3 install tf_slim
5557

5658
RUN ln -s /usr/bin/python3 /usr/bin/python
5759

5860
# Install bazel
59-
ARG BAZEL_VERSION=5.0.0
61+
ARG BAZEL_VERSION=5.2.0
6062
RUN mkdir /bazel && \
6163
wget --no-check-certificate -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/b\
6264
azel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \

Diff for: WORKSPACE

+3-2
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,9 @@ http_archive(
3535

3636
http_archive(
3737
name = "rules_cc",
38-
strip_prefix = "rules_cc-main",
39-
urls = ["https://github.com/bazelbuild/rules_cc/archive/main.zip"],
38+
strip_prefix = "rules_cc-2f8c04c04462ab83c545ab14c0da68c3b4c96191",
39+
# The commit can be updated if the build passes. Last updated 6/23/22.
40+
urls = ["https://github.com/bazelbuild/rules_cc/archive/2f8c04c04462ab83c545ab14c0da68c3b4c96191.zip"],
4041
)
4142

4243
http_archive(

Diff for: mediapipe/calculators/audio/BUILD

+1
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ cc_test(
244244
"//mediapipe/framework/formats:time_series_header_cc_proto",
245245
"//mediapipe/framework/port:gtest_main",
246246
"//mediapipe/framework/port:parse_text_proto",
247+
"//mediapipe/framework/tool:test_util",
247248
"@com_google_absl//absl/flags:flag",
248249
],
249250
)

Diff for: mediapipe/calculators/audio/audio_decoder_calculator_test.cc

+13-12
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,12 @@
2020
#include "mediapipe/framework/port/gtest.h"
2121
#include "mediapipe/framework/port/parse_text_proto.h"
2222
#include "mediapipe/framework/port/status_matchers.h"
23+
#include "mediapipe/framework/tool/test_util.h"
2324

2425
namespace mediapipe {
26+
namespace {
27+
28+
constexpr char kTestPackageRoot[] = "mediapipe/calculators/audio";
2529

2630
TEST(AudioDecoderCalculatorTest, TestWAV) {
2731
CalculatorGraphConfig::Node node_config =
@@ -37,9 +41,8 @@ TEST(AudioDecoderCalculatorTest, TestWAV) {
3741
})pb");
3842
CalculatorRunner runner(node_config);
3943
runner.MutableSidePackets()->Tag("INPUT_FILE_PATH") = MakePacket<std::string>(
40-
file::JoinPath("./",
41-
"/mediapipe/calculators/audio/"
42-
"testdata/sine_wave_1k_44100_mono_2_sec_wav.audio"));
44+
file::JoinPath(GetTestDataDir(kTestPackageRoot),
45+
"sine_wave_1k_44100_mono_2_sec_wav.audio"));
4346
MP_ASSERT_OK(runner.Run());
4447
MP_EXPECT_OK(runner.Outputs()
4548
.Tag("AUDIO_HEADER")
@@ -68,9 +71,8 @@ TEST(AudioDecoderCalculatorTest, Test48KWAV) {
6871
})pb");
6972
CalculatorRunner runner(node_config);
7073
runner.MutableSidePackets()->Tag("INPUT_FILE_PATH") = MakePacket<std::string>(
71-
file::JoinPath("./",
72-
"/mediapipe/calculators/audio/"
73-
"testdata/sine_wave_1k_48000_stereo_2_sec_wav.audio"));
74+
file::JoinPath(GetTestDataDir(kTestPackageRoot),
75+
"sine_wave_1k_48000_stereo_2_sec_wav.audio"));
7476
MP_ASSERT_OK(runner.Run());
7577
MP_EXPECT_OK(runner.Outputs()
7678
.Tag("AUDIO_HEADER")
@@ -99,9 +101,8 @@ TEST(AudioDecoderCalculatorTest, TestMP3) {
99101
})pb");
100102
CalculatorRunner runner(node_config);
101103
runner.MutableSidePackets()->Tag("INPUT_FILE_PATH") = MakePacket<std::string>(
102-
file::JoinPath("./",
103-
"/mediapipe/calculators/audio/"
104-
"testdata/sine_wave_1k_44100_stereo_2_sec_mp3.audio"));
104+
file::JoinPath(GetTestDataDir(kTestPackageRoot),
105+
"sine_wave_1k_44100_stereo_2_sec_mp3.audio"));
105106
MP_ASSERT_OK(runner.Run());
106107
MP_EXPECT_OK(runner.Outputs()
107108
.Tag("AUDIO_HEADER")
@@ -130,9 +131,8 @@ TEST(AudioDecoderCalculatorTest, TestAAC) {
130131
})pb");
131132
CalculatorRunner runner(node_config);
132133
runner.MutableSidePackets()->Tag("INPUT_FILE_PATH") = MakePacket<std::string>(
133-
file::JoinPath("./",
134-
"/mediapipe/calculators/audio/"
135-
"testdata/sine_wave_1k_44100_stereo_2_sec_aac.audio"));
134+
file::JoinPath(GetTestDataDir(kTestPackageRoot),
135+
"sine_wave_1k_44100_stereo_2_sec_aac.audio"));
136136
MP_ASSERT_OK(runner.Run());
137137
MP_EXPECT_OK(runner.Outputs()
138138
.Tag("AUDIO_HEADER")
@@ -147,4 +147,5 @@ TEST(AudioDecoderCalculatorTest, TestAAC) {
147147
std::ceil(44100.0 * 2 / 1024));
148148
}
149149

150+
} // namespace
150151
} // namespace mediapipe

Diff for: mediapipe/calculators/audio/spectrogram_calculator.cc

+58-23
Original file line numberDiff line numberDiff line change
@@ -20,24 +20,22 @@
2020
#include <memory>
2121
#include <string>
2222

23-
#include "Eigen/Core"
2423
#include "absl/strings/string_view.h"
2524
#include "audio/dsp/spectrogram/spectrogram.h"
2625
#include "audio/dsp/window_functions.h"
2726
#include "mediapipe/calculators/audio/spectrogram_calculator.pb.h"
2827
#include "mediapipe/framework/calculator_framework.h"
2928
#include "mediapipe/framework/formats/matrix.h"
30-
#include "mediapipe/framework/formats/time_series_header.pb.h"
31-
#include "mediapipe/framework/port/core_proto_inc.h"
32-
#include "mediapipe/framework/port/integral_types.h"
3329
#include "mediapipe/framework/port/logging.h"
34-
#include "mediapipe/framework/port/ret_check.h"
35-
#include "mediapipe/framework/port/source_location.h"
3630
#include "mediapipe/framework/port/status_builder.h"
3731
#include "mediapipe/util/time_series_util.h"
3832

3933
namespace mediapipe {
4034

35+
namespace {
36+
constexpr char kFrameDurationTag[] = "FRAME_DURATION";
37+
constexpr char kFrameOverlapTag[] = "FRAME_OVERLAP";
38+
} // namespace
4139
// MediaPipe Calculator for computing the "spectrogram" (short-time Fourier
4240
// transform squared-magnitude, by default) of a multichannel input
4341
// time series, including optionally overlapping frames. Options are
@@ -46,11 +44,14 @@ namespace mediapipe {
4644
//
4745
// Result is a MatrixData record (for single channel input and when the
4846
// allow_multichannel_input flag is false), or a vector of MatrixData records,
49-
// one for each channel (when the allow_multichannel_input flag is set). The
50-
// rows of each spectrogram matrix correspond to the n_fft/2+1 unique complex
51-
// values, or squared/linear/dB magnitudes, depending on the output_type option.
52-
// Each input packet will result in zero or one output packets, each containing
53-
// one Matrix for each channel of the input, where each Matrix has one or more
47+
// one for each channel (when the allow_multichannel_input flag is set). Each
48+
// waveform frame is converted to frequency by a fast Fourier transform whose
49+
// size, n_fft, is the smallest power of two large enough to enclose the frame
50+
// length of round(frame_duration_seconds * sample_rate).The rows of each
51+
// spectrogram matrix(result) correspond to the n_fft/2+1 unique complex values,
52+
// or squared/linear/dB magnitudes, depending on the output_type option. Each
53+
// input packet will result in zero or one output packets, each containing one
54+
// Matrix for each channel of the input, where each Matrix has one or more
5455
// columns of spectral values, one for each complete frame of input samples. If
5556
// the input packet contains too few samples to trigger a new output frame, no
5657
// output packet is generated (since zero-length packets are not legal since
@@ -71,6 +72,22 @@ class SpectrogramCalculator : public CalculatorBase {
7172
// Input stream with TimeSeriesHeader.
7273
);
7374

75+
if (cc->InputSidePackets().HasTag(kFrameDurationTag)) {
76+
cc->InputSidePackets()
77+
.Tag(kFrameDurationTag)
78+
.Set<double>(
79+
// Optional side packet for frame_duration_seconds if provided.
80+
);
81+
}
82+
83+
if (cc->InputSidePackets().HasTag(kFrameOverlapTag)) {
84+
cc->InputSidePackets()
85+
.Tag(kFrameOverlapTag)
86+
.Set<double>(
87+
// Optional side packet for frame_overlap_seconds if provided.
88+
);
89+
}
90+
7491
SpectrogramCalculatorOptions spectrogram_options =
7592
cc->Options<SpectrogramCalculatorOptions>();
7693
if (!spectrogram_options.allow_multichannel_input()) {
@@ -184,27 +201,47 @@ class SpectrogramCalculator : public CalculatorBase {
184201
// Fixed scale factor applied to output values (regardless of type).
185202
double output_scale_;
186203

187-
static const float kLnPowerToDb;
204+
static const float kLnSquaredMagnitudeToDb;
188205
};
189206
REGISTER_CALCULATOR(SpectrogramCalculator);
190207

191-
// Factor to convert ln(magnitude_squared) to deciBels = 10.0/ln(10.0).
192-
const float SpectrogramCalculator::kLnPowerToDb = 4.342944819032518;
208+
// DECIBELS = 20*log10(LINEAR_MAGNITUDE) = 10*Log10(SQUARED_MAGNITUDE)
209+
// =10/ln(10)*ln(SQUARED_MAGNITUDE).
210+
// Factor to convert ln(SQUARED_MAGNITUDE) to deciBels = 10.0/ln(10.0).
211+
const float SpectrogramCalculator::kLnSquaredMagnitudeToDb = 4.342944819032518;
193212

194213
absl::Status SpectrogramCalculator::Open(CalculatorContext* cc) {
195214
SpectrogramCalculatorOptions spectrogram_options =
196215
cc->Options<SpectrogramCalculatorOptions>();
216+
// Provide frame_duration_seconds and frame_overlap_seconds either from static
217+
// options, or dynamically from a side packet, the side packet one will
218+
// override the options one if provided.
219+
220+
double frame_duration_seconds = 0;
221+
double frame_overlap_seconds = 0;
222+
if (cc->InputSidePackets().HasTag(kFrameDurationTag)) {
223+
frame_duration_seconds =
224+
cc->InputSidePackets().Tag(kFrameDurationTag).Get<double>();
225+
} else {
226+
frame_duration_seconds = spectrogram_options.frame_duration_seconds();
227+
}
228+
229+
if (cc->InputSidePackets().HasTag(kFrameOverlapTag)) {
230+
frame_overlap_seconds =
231+
cc->InputSidePackets().Tag(kFrameOverlapTag).Get<double>();
232+
} else {
233+
frame_overlap_seconds = spectrogram_options.frame_overlap_seconds();
234+
}
197235

198236
use_local_timestamp_ = spectrogram_options.use_local_timestamp();
199237

200-
if (spectrogram_options.frame_duration_seconds() <= 0.0) {
238+
if (frame_duration_seconds <= 0.0) {
201239
// TODO: return an error.
202240
}
203-
if (spectrogram_options.frame_overlap_seconds() >=
204-
spectrogram_options.frame_duration_seconds()) {
241+
if (frame_overlap_seconds >= frame_duration_seconds) {
205242
// TODO: return an error.
206243
}
207-
if (spectrogram_options.frame_overlap_seconds() < 0.0) {
244+
if (frame_overlap_seconds < 0.0) {
208245
// TODO: return an error.
209246
}
210247

@@ -220,10 +257,8 @@ absl::Status SpectrogramCalculator::Open(CalculatorContext* cc) {
220257
// TODO: return an error.
221258
}
222259

223-
frame_duration_samples_ =
224-
round(spectrogram_options.frame_duration_seconds() * input_sample_rate_);
225-
frame_overlap_samples_ =
226-
round(spectrogram_options.frame_overlap_seconds() * input_sample_rate_);
260+
frame_duration_samples_ = round(frame_duration_seconds * input_sample_rate_);
261+
frame_overlap_samples_ = round(frame_overlap_seconds * input_sample_rate_);
227262

228263
pad_final_packet_ = spectrogram_options.pad_final_packet();
229264
output_type_ = spectrogram_options.output_type();
@@ -419,7 +454,7 @@ absl::Status SpectrogramCalculator::ProcessVector(const Matrix& input_stream,
419454
return ProcessVectorToOutput(
420455
input_stream,
421456
+[](const Matrix& col) -> const Matrix {
422-
return kLnPowerToDb * col.array().log().matrix();
457+
return kLnSquaredMagnitudeToDb * col.array().log().matrix();
423458
}, cc);
424459
}
425460
// clang-format on

Diff for: mediapipe/calculators/audio/spectrogram_calculator.proto

+10-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@ message SpectrogramCalculatorOptions {
3232

3333
// Duration of overlap between adjacent windows.
3434
// Hence, frame_rate = 1/(frame_duration_seconds - frame_overlap_seconds).
35-
// Required that 0 <= frame_overlap_seconds < frame_duration_seconds.
35+
// Note the frame_rate here is not the MediaPipe packet rate, the frame here
36+
// means each Fourier transform analysis waveform frame, the output MediaPipe
37+
// packet rate will the the same as input, if frame rate is lower than input
38+
// packet rate, will result in intermittent empty output packets. Required
39+
// that 0 <= frame_overlap_seconds < frame_duration_seconds.
3640
optional double frame_overlap_seconds = 2 [default = 0.0];
3741

3842
// Whether to pad the final packet with zeros. If true, guarantees that
@@ -42,6 +46,11 @@ message SpectrogramCalculatorOptions {
4246

4347
// Output value type can be squared-magnitude, linear-magnitude,
4448
// deciBels (dB, = 20*log10(linear_magnitude)), or std::complex.
49+
// Their relationship:
50+
// COMPLEX c = Re + Im*i;
51+
// SQUARED_MAGNITUDE = Re^2 + Im^2;
52+
// LINEAR_MAGNITUDE = sqrt(SQUARED_MAGNITUDE);
53+
// DECIBELS = 20*log10(LINEAR_MAGNITUDE) = 10*log10(SQUARED_MAGNITUDE);
4554
enum OutputType {
4655
SQUARED_MAGNITUDE = 0;
4756
LINEAR_MAGNITUDE = 1;

Diff for: mediapipe/calculators/core/BUILD

+16
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,22 @@ cc_library(
557557
alwayslink = 1,
558558
)
559559

560+
cc_test(
561+
name = "packet_cloner_calculator_test",
562+
srcs = ["packet_cloner_calculator_test.cc"],
563+
deps = [
564+
":packet_cloner_calculator",
565+
"//mediapipe/framework:calculator_framework",
566+
"//mediapipe/framework:timestamp",
567+
"//mediapipe/framework/port:gtest_main",
568+
"//mediapipe/framework/port:parse_text_proto",
569+
"//mediapipe/framework/port:status",
570+
"//mediapipe/framework/stream_handler:immediate_input_stream_handler",
571+
"//mediapipe/framework/tool:sink",
572+
"@com_google_absl//absl/strings",
573+
],
574+
)
575+
560576
cc_library(
561577
name = "packet_inner_join_calculator",
562578
srcs = ["packet_inner_join_calculator.cc"],

Diff for: mediapipe/calculators/core/concatenate_vector_calculator.cc

+10-1
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,17 @@ typedef ConcatenateVectorCalculator<::mediapipe::NormalizedLandmark>
7373
ConcatenateLandmarkVectorCalculator;
7474
MEDIAPIPE_REGISTER_NODE(ConcatenateLandmarkVectorCalculator);
7575

76+
typedef ConcatenateVectorCalculator<::mediapipe::LandmarkList>
77+
ConcatenateLandmarkListVectorCalculator;
78+
MEDIAPIPE_REGISTER_NODE(ConcatenateLandmarkListVectorCalculator);
79+
7680
typedef ConcatenateVectorCalculator<::mediapipe::NormalizedLandmarkList>
77-
ConcatenateLandmarListVectorCalculator;
81+
ConcatenateNormalizedLandmarkListVectorCalculator;
82+
MEDIAPIPE_REGISTER_NODE(ConcatenateNormalizedLandmarkListVectorCalculator);
83+
84+
// For backwards compatibility, keep the version with the typo.
85+
using ConcatenateLandmarListVectorCalculator =
86+
ConcatenateNormalizedLandmarkListVectorCalculator;
7887
MEDIAPIPE_REGISTER_NODE(ConcatenateLandmarListVectorCalculator);
7988

8089
typedef ConcatenateVectorCalculator<mediapipe::ClassificationList>

Diff for: mediapipe/calculators/core/flow_limiter_calculator.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ constexpr char kOptionsTag[] = "OPTIONS";
3232
// FlowLimiterCalculator is used to limit the number of frames in flight
3333
// by dropping input frames when necessary.
3434
//
35-
// The input stream "FINISH" is used to signal the FlowLimiterCalculator
36-
// when a frame is finished processing. Either a non-empty "FINISH" packet
35+
// The input stream "FINISHED" is used to signal the FlowLimiterCalculator
36+
// when a frame is finished processing. Either a non-empty "FINISHED" packet
3737
// or a timestamp bound should be received for each processed frame.
3838
//
3939
// The combination of `max_in_flight: 1` and `max_in_queue: 1` generally gives

0 commit comments

Comments
 (0)