@@ -635,7 +635,9 @@ sd_tiling_calc_tiles(int &num_tiles_dim, float& tile_overlap_factor_dim, int sma
635635}
636636
637637// Tiling
638- __STATIC_INLINE__ void sd_tiling (ggml_tensor* input, ggml_tensor* output, const int scale, const int tile_size, const float tile_overlap_factor, on_tile_process on_processing) {
638+ __STATIC_INLINE__ void sd_tiling_non_square (ggml_tensor* input, ggml_tensor* output, const int scale,
639+ const int p_tile_size_x, const int p_tile_size_y,
640+ const float tile_overlap_factor, on_tile_process on_processing) {
639641 int input_width = (int )input->ne [0 ];
640642 int input_height = (int )input->ne [1 ];
641643 int output_width = (int )output->ne [0 ];
@@ -656,25 +658,25 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
656658
657659 int num_tiles_x;
658660 float tile_overlap_factor_x;
659- sd_tiling_calc_tiles (num_tiles_x, tile_overlap_factor_x, small_width, tile_size , tile_overlap_factor);
661+ sd_tiling_calc_tiles (num_tiles_x, tile_overlap_factor_x, small_width, p_tile_size_x , tile_overlap_factor);
660662
661663 int num_tiles_y;
662664 float tile_overlap_factor_y;
663- sd_tiling_calc_tiles (num_tiles_y, tile_overlap_factor_y, small_height, tile_size , tile_overlap_factor);
665+ sd_tiling_calc_tiles (num_tiles_y, tile_overlap_factor_y, small_height, p_tile_size_y , tile_overlap_factor);
664666
665667 LOG_DEBUG (" num tiles : %d, %d " , num_tiles_x, num_tiles_y);
666668 LOG_DEBUG (" optimal overlap : %f, %f (targeting %f)" , tile_overlap_factor_x, tile_overlap_factor_y, tile_overlap_factor);
667669
668670 GGML_ASSERT (input_width % 2 == 0 && input_height % 2 == 0 && output_width % 2 == 0 && output_height % 2 == 0 ); // should be multiple of 2
669671
670- int tile_overlap_x = (int32_t )(tile_size * tile_overlap_factor_x);
671- int non_tile_overlap_x = tile_size - tile_overlap_x;
672+ int tile_overlap_x = (int32_t )(p_tile_size_x * tile_overlap_factor_x);
673+ int non_tile_overlap_x = p_tile_size_x - tile_overlap_x;
672674
673- int tile_overlap_y = (int32_t )(tile_size * tile_overlap_factor_y);
674- int non_tile_overlap_y = tile_size - tile_overlap_y;
675+ int tile_overlap_y = (int32_t )(p_tile_size_y * tile_overlap_factor_y);
676+ int non_tile_overlap_y = p_tile_size_y - tile_overlap_y;
675677
676- int tile_size_x = tile_size < small_width ? tile_size : small_width;
677- int tile_size_y = tile_size < small_height ? tile_size : small_height;
678+ int tile_size_x = p_tile_size_x < small_width ? p_tile_size_x : small_width;
679+ int tile_size_y = p_tile_size_y < small_height ? p_tile_size_y : small_height;
678680
679681 int input_tile_size_x = tile_size_x;
680682 int input_tile_size_y = tile_size_y;
@@ -763,6 +765,11 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
763765 ggml_free (tiles_ctx);
764766}
765767
768+ __STATIC_INLINE__ void sd_tiling (ggml_tensor* input, ggml_tensor* output, const int scale,
769+ const int tile_size, const float tile_overlap_factor, on_tile_process on_processing) {
770+ sd_tiling_non_square (input, output, scale, tile_size, tile_size, tile_overlap_factor, on_processing);
771+ }
772+
766773__STATIC_INLINE__ struct ggml_tensor * ggml_group_norm_32 (struct ggml_context * ctx,
767774 struct ggml_tensor * a) {
768775 const float eps = 1e-6f ; // default eps parameter
0 commit comments