@@ -17,7 +17,7 @@ class vits_model {
17
17
int speaking_rate;
18
18
std::unique_ptr<vits_model_data> model;
19
19
struct ggml_context * ctx;
20
- struct ggml_tensor * last_hidden_state ;
20
+ struct ggml_tensor * debug_tensor ;
21
21
struct ggml_tensor * waveform;
22
22
struct ggml_tensor * cum_duration_output;
23
23
struct ggml_tensor * predicted_lengths_output;
@@ -55,6 +55,28 @@ class vits_model {
55
55
struct ggml_tensor * conv_flow_graph (struct ggml_context * ctx, struct ggml_tensor * inputs, struct ggml_tensor * global_conditioning, bool reverse);
56
56
struct ggml_tensor * stochastic_duration_predictor_graph (struct ggml_context * ctx, struct ggml_tensor * inputs, struct ggml_tensor * speaker_embeddings, bool reverse, float noise_scale_duration);
57
57
struct ggml_tensor * hifigan_residual_block_graph (struct ggml_context *ctx, struct ggml_tensor *hidden_states, int kernel_size, std::vector<int > dilation, double leaky_relu_slope);
58
+ struct ggml_tensor * unconstrained_rational_quadratic_spline (
59
+ struct ggml_context * ctx,
60
+ struct ggml_tensor * inputs,
61
+ struct ggml_tensor * unnormalized_widths,
62
+ struct ggml_tensor * unnormalized_heights,
63
+ struct ggml_tensor * unnormalized_derivatives,
64
+ bool reverse = false ,
65
+ float tail_bound = 5.0 ,
66
+ float min_bin_width = 1e-3 ,
67
+ float min_bin_height = 1e-3 ,
68
+ float min_derivative = 1e-3 );
69
+ struct ggml_tensor * rational_quadratic_spline (
70
+ struct ggml_context * ctx,
71
+ struct ggml_tensor * inputs,
72
+ struct ggml_tensor * unnormalized_widths,
73
+ struct ggml_tensor * unnormalized_heights,
74
+ struct ggml_tensor * unnormalized_derivatives,
75
+ bool reverse = false ,
76
+ float tail_bound = 5.0 ,
77
+ float min_bin_width = 1e-3 ,
78
+ float min_bin_height = 1e-3 ,
79
+ float min_derivative = 1e-3 );
58
80
std::vector<float > process (std::string phonemes);
59
81
};
60
82
0 commit comments