diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index c8efaa9e4b4c75..b4702224b44e35 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -1454,8 +1454,6 @@ enter_msl_status gc_heap::enter_spin_lock_msl_helper (GCSpinLock* msl) { #ifdef DYNAMIC_HEAP_COUNT uint64_t start = GetHighPrecisionTimeStamp(); - - msl->msl_wait_count++; #endif //DYNAMIC_HEAP_COUNT unsigned int i = 0; @@ -1511,7 +1509,7 @@ enter_msl_status gc_heap::enter_spin_lock_msl_helper (GCSpinLock* msl) #ifdef DYNAMIC_HEAP_COUNT uint64_t end = GetHighPrecisionTimeStamp(); Interlocked::ExchangeAdd64 (&msl->msl_wait_time, end - start); - dprintf (6666, ("wait for msl lock total time: %zd, total count: %zd, this time: %zd, this count: %u", msl->msl_wait_time, msl->msl_wait_count, end - start, i)); + dprintf (3, ("h%d wait for msl lock wait time %zd, total wait time: %zd", heap_number, (end - start), msl->msl_wait_time)); #endif //DYNAMIC_HEAP_COUNT } while (Interlocked::CompareExchange (&msl->lock, lock_taken, lock_free) != lock_free); @@ -2351,9 +2349,6 @@ size_t gc_heap::min_balance_threshold = 0; VOLATILE(BOOL) gc_heap::gc_started; #ifdef MULTIPLE_HEAPS -#ifdef STRESS_DYNAMIC_HEAP_COUNT -int gc_heap::heaps_in_this_gc = 0; -#endif //STRESS_DYNAMIC_HEAP_COUNT GCEvent gc_heap::gc_start_event; bool gc_heap::gc_thread_no_affinitize_p = false; uintptr_t process_mask = 0; @@ -2944,6 +2939,12 @@ BOOL gc_heap::should_expand_in_full_gc = FALSE; #ifdef DYNAMIC_HEAP_COUNT int gc_heap::dynamic_adaptation_mode = dynamic_adaptation_default; gc_heap::dynamic_heap_count_data_t SVR::gc_heap::dynamic_heap_count_data; +uint64_t gc_heap::last_suspended_end_time = 0; +size_t gc_heap::gc_index_full_gc_end = 0; + +#ifdef STRESS_DYNAMIC_HEAP_COUNT +int gc_heap::heaps_in_this_gc = 0; +#endif //STRESS_DYNAMIC_HEAP_COUNT #endif // DYNAMIC_HEAP_COUNT // Provisional mode related stuff. @@ -7045,10 +7046,6 @@ bool gc_heap::create_gc_thread () return GCToEEInterface::CreateThread(gc_thread_stub, this, false, ".NET Server GC"); } -#ifdef DYNAMIC_HEAP_COUNT -static size_t prev_change_heap_count_gc_index; -#endif //DYNAMIC_HEAP_COUNT - #ifdef _MSC_VER #pragma warning(disable:4715) //IA64 xcompiler recognizes that without the 'break;' the while(1) will never end and therefore not return a value for that code path #endif //_MSC_VER @@ -7067,16 +7064,63 @@ void gc_heap::gc_thread_function () if (heap_number == 0) { - uint32_t wait_result = gc_heap::ee_suspend_event.Wait(gradual_decommit_in_progress_p ? DECOMMIT_TIME_STEP_MILLISECONDS : INFINITE, FALSE); + bool wait_on_time_out_p = gradual_decommit_in_progress_p; + uint32_t wait_time = DECOMMIT_TIME_STEP_MILLISECONDS; +#ifdef DYNAMIC_HEAP_COUNT + // background_running_p can only change from false to true during suspension. + if (!gc_heap::background_running_p () && dynamic_heap_count_data.should_change_heap_count) + { + assert (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes); + + dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[dynamic_heap_count_data.sample_index]; + wait_time = min (wait_time, (uint32_t)(sample.elapsed_between_gcs / 1000 / 3)); + wait_time = max (wait_time, 1); + + dprintf (6666, ("gc#0 thread waiting for %d ms (betwen GCs %I64d)", wait_time, sample.elapsed_between_gcs)); + } +#endif //DYNAMIC_HEAP_COUNT + uint32_t wait_result = gc_heap::ee_suspend_event.Wait(wait_on_time_out_p ? wait_time : INFINITE, FALSE); + dprintf (9999, ("waiting for ee done res %d (timeout %d, %I64d ms since last suspend end)(should_change_heap_count is %d) (gradual_decommit_in_progress_p %d)", + wait_result, wait_time, ((GetHighPrecisionTimeStamp() - last_suspended_end_time) / 1000), + dynamic_heap_count_data.should_change_heap_count, gradual_decommit_in_progress_p)); if (wait_result == WAIT_TIMEOUT) { - decommit_lock.Enter(); - gradual_decommit_in_progress_p = decommit_step (DECOMMIT_TIME_STEP_MILLISECONDS); - decommit_lock.Leave(); +#ifdef DYNAMIC_HEAP_COUNT + if (dynamic_heap_count_data.should_change_heap_count) + { +#ifdef BACKGROUND_GC + if (!gc_heap::background_running_p ()) +#endif //BACKGROUND_GC + { + dprintf (6666, ("changing heap count due to timeout")); + check_heap_count(); + } + } +#endif //DYNAMIC_HEAP_COUNT + + if (gradual_decommit_in_progress_p) + { + decommit_lock.Enter (); + gradual_decommit_in_progress_p = decommit_step (DECOMMIT_TIME_STEP_MILLISECONDS); + decommit_lock.Leave (); + } continue; } #ifdef DYNAMIC_HEAP_COUNT + // We might want to consider also doing this when a BGC finishes. + if (dynamic_heap_count_data.should_change_heap_count) + { +#ifdef BACKGROUND_GC + if (!gc_heap::background_running_p ()) +#endif //BACKGROUND_GC + { + // this was a request to do a GC so make sure we follow through with one. + dprintf (6666, ("changing heap count at a GC start")); + check_heap_count (); + } + } + // wait till the threads that should have gone idle at least reached the place where they are about to wait on the idle event. if ((gc_heap::dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) && (n_heaps != dynamic_heap_count_data.last_n_heaps)) @@ -7095,6 +7139,7 @@ void gc_heap::gc_thread_function () dynamic_heap_count_data.last_n_heaps = n_heaps; } #endif //DYNAMIC_HEAP_COUNT + suspended_start_time = GetHighPrecisionTimeStamp(); BEGIN_TIMING(suspend_ee_during_log); dprintf (9999, ("h0 suspending EE in GC!")); @@ -7265,10 +7310,6 @@ void gc_heap::gc_thread_function () { gradual_decommit_in_progress_p = decommit_step (DECOMMIT_TIME_STEP_MILLISECONDS); } -#ifdef DYNAMIC_HEAP_COUNT - // check if we should adjust the number of heaps - check_heap_count(); -#endif //DYNAMIC_HEAP_COUNT } else { @@ -22101,11 +22142,70 @@ BOOL gc_heap::should_proceed_with_gc() void gc_heap::update_end_gc_time_per_heap() { +#ifdef DYNAMIC_HEAP_COUNT + size_t prev_gen2_end_time = 0; + if ((heap_number == 0) && (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) && (settings.condemned_generation == max_generation)) + { + dynamic_data* dd = dynamic_data_of (max_generation); + prev_gen2_end_time = dd_previous_time_clock (dd) + dd_gc_elapsed_time (dd);; + } +#endif //DYNAMIC_HEAP_COUNT + for (int gen_number = 0; gen_number <= settings.condemned_generation; gen_number++) { dynamic_data* dd = dynamic_data_of (gen_number); + + if (heap_number == 0) + { + dprintf (6666, ("prev gen%d GC end time: prev start %I64d + prev gc elapsed %Id = %I64d", + gen_number, dd_previous_time_clock (dd), dd_gc_elapsed_time (dd), (dd_previous_time_clock (dd) + dd_gc_elapsed_time (dd)))); + } + dd_gc_elapsed_time (dd) = (size_t)(end_gc_time - dd_time_clock (dd)); + + if (heap_number == 0) + { + dprintf (6666, ("updated NGC%d %Id elapsed time to %I64d - %I64d = %I64d", gen_number, dd_gc_clock (dd), end_gc_time, dd_time_clock (dd), dd_gc_elapsed_time (dd))); + } } + +#ifdef DYNAMIC_HEAP_COUNT + if ((heap_number == 0) && (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)) + { + dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[dynamic_heap_count_data.sample_index]; + sample.elapsed_between_gcs = end_gc_time - last_suspended_end_time; + sample.gc_pause_time = dd_gc_elapsed_time (dynamic_data_of (0)); + sample.msl_wait_time = get_msl_wait_time(); + + dprintf (6666, ("sample#%d: this GC end %I64d - last sus end %I64d = %I64d, this GC pause %I64d, msl wait %I64d", + dynamic_heap_count_data.sample_index, end_gc_time, last_suspended_end_time, sample.elapsed_between_gcs, sample.gc_pause_time, sample.msl_wait_time)); + + last_suspended_end_time = end_gc_time; + + GCEventFireHeapCountSample_V1 ( + (uint64_t)VolatileLoadWithoutBarrier (&settings.gc_index), + sample.elapsed_between_gcs, + sample.gc_pause_time, + sample.msl_wait_time); + + dynamic_heap_count_data.sample_index = (dynamic_heap_count_data.sample_index + 1) % dynamic_heap_count_data_t::sample_size; + + if (settings.condemned_generation == max_generation) + { + gc_index_full_gc_end = dd_gc_clock (dynamic_data_of (0)); + size_t elapsed_between_gen2_gcs = end_gc_time - prev_gen2_end_time; + size_t gen2_elapsed_time = sample.gc_pause_time; + dynamic_heap_count_data.gen2_gc_percents[dynamic_heap_count_data.gen2_sample_index] = (float)gen2_elapsed_time * 100.0f / elapsed_between_gen2_gcs; + + dprintf (6666, ("gen2 sample#%d: this GC end %I64d - last gen2 end %I64d = %I64d, GC elapsed %I64d, percent %.3f", + dynamic_heap_count_data.gen2_sample_index, end_gc_time, prev_gen2_end_time, elapsed_between_gen2_gcs, + gen2_elapsed_time, dynamic_heap_count_data.gen2_gc_percents[dynamic_heap_count_data.gen2_sample_index])); + dynamic_heap_count_data.gen2_sample_index = (dynamic_heap_count_data.gen2_sample_index + 1) % dynamic_heap_count_data_t::sample_size; + } + + calculate_new_heap_count (); + } +#endif //DYNAMIC_HEAP_COUNT } void gc_heap::update_end_ngc_time() @@ -22252,7 +22352,31 @@ void gc_heap::gc1() { dynamic_data* dd = dynamic_data_of (n); end_gc_time = GetHighPrecisionTimeStamp(); + size_t time_since_last_gen2 = 0; + +#ifdef DYNAMIC_HEAP_COUNT + if ((heap_number == 0) && (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)) + { + time_since_last_gen2 = (size_t)(end_gc_time - (dd_previous_time_clock (dd) + dd_gc_elapsed_time (dd))); + dprintf (6666, ("BGC %Id end %I64d - (prev gen2 start %I64d + elapsed %Id = %I64d) = time inbewteen gen2 %Id", + dd_gc_clock (dd), end_gc_time, dd_previous_time_clock (dd), dd_gc_elapsed_time (dd), (dd_previous_time_clock (dd) + dd_gc_elapsed_time (dd)), time_since_last_gen2)); + } +#endif //DYNAMIC_HEAP_COUNT + dd_gc_elapsed_time (dd) = (size_t)(end_gc_time - dd_time_clock (dd)); +#ifdef DYNAMIC_HEAP_COUNT + if ((heap_number == 0) && (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)) + { + dprintf (6666, ("updating BGC %Id elapsed time to %I64d - %I64d = %I64d", dd_gc_clock (dd), end_gc_time, dd_time_clock (dd), dd_gc_elapsed_time (dd))); + + float bgc_percent = (float)dd_gc_elapsed_time (dd) * 100.0f / (float)time_since_last_gen2; + dynamic_heap_count_data.gen2_gc_percents[dynamic_heap_count_data.gen2_sample_index] = bgc_percent; + dprintf (6666, ("gen2 sample %d elapsed %Id * 100 / time inbetween gen2 %Id = %.3f", + dynamic_heap_count_data.gen2_sample_index, dd_gc_elapsed_time (dd), time_since_last_gen2, bgc_percent)); + dynamic_heap_count_data.gen2_sample_index = (dynamic_heap_count_data.gen2_sample_index + 1) % dynamic_heap_count_data_t::sample_size; + gc_index_full_gc_end = dd_gc_clock (dynamic_data_of (0)); + } +#endif //DYNAMIC_HEAP_COUNT #ifdef HEAP_BALANCE_INSTRUMENTATION if (heap_number == 0) @@ -25152,266 +25276,262 @@ void gc_heap::recommission_heap() #endif //RECORD_LOH_STATE } -void gc_heap::check_heap_count () +float median_of_3 (float a, float b, float c) { - dynamic_heap_count_data.new_n_heaps = n_heaps; +#define compare_and_swap(i, j) \ + { \ + if (i < j) \ + { \ + float t = i; \ + i = j; \ + j = t; \ + } \ + } + compare_and_swap (b, a); + compare_and_swap (c, a); + compare_and_swap (c, b); +#undef compare_and_swap + return b; +} - if (dynamic_adaptation_mode != dynamic_adaptation_to_application_sizes) +size_t gc_heap::get_num_completed_gcs () +{ + size_t num_completed_gcs = settings.gc_index; +#ifdef BACKGROUND_GC + if (g_heaps[0]->is_bgc_in_progress ()) { - return; + num_completed_gcs--; + dprintf (6666, ("BGC in prog, completed GCs -> %Id", num_completed_gcs)); } +#endif //BACKGROUND_GC + + return num_completed_gcs; +} - // we should be calling this only on the main GC thread - assert (heap_number == 0); +int gc_heap::calculate_new_heap_count () +{ + assert (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes); - // acquire data for the current sample - uint64_t soh_msl_wait_time = 0; - uint64_t uoh_msl_wait_time = 0; - size_t allocating_thread_count = 0; - size_t heap_size = 0; - for (int i = 0; i < n_heaps; i++) + size_t num_completed_gcs = get_num_completed_gcs (); + + dprintf (6666, ("current GC %Id(completed: %Id), prev completed GCs %Id, last full GC happened at index %Id", + VolatileLoadWithoutBarrier (&settings.gc_index), num_completed_gcs, dynamic_heap_count_data.prev_num_completed_gcs, gc_index_full_gc_end)); + + if (num_completed_gcs < (dynamic_heap_count_data.prev_num_completed_gcs + dynamic_heap_count_data_t::sample_size)) { - gc_heap* hp = g_heaps[i]; + dprintf (6666, ("not enough GCs, skipping")); + return n_heaps; + } + + float median_gen2_tcp_percent = 0.0f; + if (gc_index_full_gc_end >= (settings.gc_index - dynamic_heap_count_data_t::sample_size)) + { + median_gen2_tcp_percent = dynamic_heap_count_data.get_median_gen2_gc_percent (); + } - allocating_thread_count += hp->alloc_contexts_used; + // If there was a blocking gen2 GC, the overhead would be very large and most likely we would not pick it. So we + // rely on the gen2 sample's overhead calculated above. + float throughput_cost_percents[dynamic_heap_count_data_t::sample_size]; + for (int i = 0; i < dynamic_heap_count_data_t::sample_size; i++) + { + dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[i]; + throughput_cost_percents[i] = (sample.elapsed_between_gcs ? (((float)sample.msl_wait_time / n_heaps + sample.gc_pause_time) * 100.0f / (float)sample.elapsed_between_gcs) : 0.0f); + assert (throughput_cost_percents[i] >= 0.0); + if (throughput_cost_percents[i] > 100.0) + throughput_cost_percents[i] = 100.0; + dprintf (6666, ("sample %d: msl %I64d / %d + pause %I64d / elapsed %I64d = throughput_cost_percent: %.3f", i, + sample.msl_wait_time, n_heaps, sample.gc_pause_time, sample.elapsed_between_gcs, throughput_cost_percents[i])); + } - soh_msl_wait_time += hp->more_space_lock_soh.msl_wait_time; - hp->more_space_lock_soh.msl_wait_time = 0; - hp->more_space_lock_soh.msl_wait_count = 0; + float median_throughput_cost_percent = median_of_3 (throughput_cost_percents[0], throughput_cost_percents[1], throughput_cost_percents[2]); - uoh_msl_wait_time += hp->more_space_lock_uoh.msl_wait_time; - hp->more_space_lock_uoh.msl_wait_time = 0; - hp->more_space_lock_uoh.msl_wait_count = 0; + // apply exponential smoothing and use 1/3 for the smoothing factor + const float smoothing = 3; + float smoothed_median_throughput_cost_percent = dynamic_heap_count_data.smoothed_median_throughput_cost_percent; + if (smoothed_median_throughput_cost_percent != 0.0f) + { + // average it with the previous value + smoothed_median_throughput_cost_percent = median_throughput_cost_percent / smoothing + (smoothed_median_throughput_cost_percent / smoothing) * (smoothing - 1); + } + else + { + smoothed_median_throughput_cost_percent = median_throughput_cost_percent; + } + + dprintf (6666, ("median tcp: %.3f, smoothed tcp: %.3f, gen2 tcp %.3f(%.3f, %.3f, %.3f)", + median_throughput_cost_percent, smoothed_median_throughput_cost_percent, median_gen2_tcp_percent, + dynamic_heap_count_data.gen2_gc_percents[0], dynamic_heap_count_data.gen2_gc_percents[1], dynamic_heap_count_data.gen2_gc_percents[2])); + + size_t heap_size = 0; + for (int i = 0; i < n_heaps; i++) + { + gc_heap* hp = g_heaps[i]; for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++) { dynamic_data* dd = hp->dynamic_data_of (gen_idx); // estimate the size of each generation as the live data size plus the budget - heap_size += dd_promoted_size (dd) + dd_desired_allocation (dd); - dprintf (6666, ("h%d g%d promoted: %zd desired allocation: %zd", i, gen_idx, dd_promoted_size (dd), dd_desired_allocation (dd))); + heap_size += dd_current_size (dd) + dd_desired_allocation (dd); + dprintf (3, ("h%d g%d current: %zd desired allocation: %zd", i, gen_idx, dd_promoted_size (dd), dd_desired_allocation (dd))); } } - dynamic_data* hp0_dd0 = g_heaps[0]->dynamic_data_of (0); + // estimate the space cost of adding a heap as the min gen0 budget + size_t heap_space_cost_per_heap = dd_min_size (g_heaps[0]->dynamic_data_of (0)); - // persist data for the current sample - dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[dynamic_heap_count_data.sample_index]; + // compute the % space cost of adding a heap + float percent_heap_space_cost_per_heap = heap_space_cost_per_heap * 100.0f / heap_size; - sample.soh_msl_wait_time = soh_msl_wait_time / n_heaps; - sample.uoh_msl_wait_time = uoh_msl_wait_time / n_heaps; - sample.elapsed_between_gcs = dd_time_clock (hp0_dd0) - dd_previous_time_clock (hp0_dd0); - sample.gc_elapsed_time = dd_gc_elapsed_time (hp0_dd0); - sample.allocating_thread_count = allocating_thread_count; - sample.heap_size = heap_size; + // compute reasonable step sizes for the heap count + // + // on the way up, we essentially multiply the heap count by 1.5, so we go 1, 2, 3, 5, 8 ... + // we don't go all the way to the number of CPUs, but stay 1 or 2 short + int step_up = (n_heaps + 1) / 2; + int extra_heaps = 1 + (n_max_heaps >= 32); + step_up = min (step_up, n_max_heaps - extra_heaps - n_heaps); - dprintf (9999, ("sample %d: soh_msl_wait_time: %zd, uoh_msl_wait_time: %zd, elapsed_between_gcs: %zd, gc_elapsed_time: %d, heap_size: %zd MB", - dynamic_heap_count_data.sample_index, - sample.soh_msl_wait_time, - sample.uoh_msl_wait_time, - sample.elapsed_between_gcs, - sample.gc_elapsed_time, - sample.heap_size/(1024*1024))); + // on the way down, we essentially divide the heap count by 1.5 + int step_down = (n_heaps + 1) / 3; - dynamic_heap_count_data.sample_index = (dynamic_heap_count_data.sample_index + 1) % dynamic_heap_count_data_t::sample_size; + // estimate the potential time benefit of going up a step + float tcp_reduction_per_step_up = smoothed_median_throughput_cost_percent * step_up / (n_heaps + step_up); - GCEventFireHeapCountSample_V1( - sample.gc_elapsed_time, - sample.soh_msl_wait_time, - sample.uoh_msl_wait_time, - sample.elapsed_between_gcs - ); + // estimate the potential time cost of going down a step + float tcp_increase_per_step_down = smoothed_median_throughput_cost_percent * step_down / (n_heaps - step_down); - dprintf (9999, ("current GC %Id, prev %Id", VolatileLoadWithoutBarrier (&settings.gc_index), prev_change_heap_count_gc_index)); + // estimate the potential space cost of going up a step + float scp_increase_per_step_up = percent_heap_space_cost_per_heap * step_up; - if (settings.gc_index < (prev_change_heap_count_gc_index + 3)) + // estimate the potential space saving of going down a step + float scp_decrease_per_step_down = percent_heap_space_cost_per_heap * step_down; + + dprintf (6666, ("[CHP] u %d, d %d | space cost %Id / heap %Id(%.2fmb) = scp %.3f (u: %.3f, d: %.3f) | stcp %.3f, u * %.1f = %.3f, d * %.1f = %.3f", + step_up, step_down, + heap_space_cost_per_heap, heap_size, ((float)heap_size / (float)1000 / (float)1000), percent_heap_space_cost_per_heap, + scp_increase_per_step_up, scp_decrease_per_step_down, + smoothed_median_throughput_cost_percent, + ((float)step_up / (float)(n_heaps + step_up)), tcp_reduction_per_step_up, + ((float)step_down / (float)(n_heaps - step_down)), tcp_increase_per_step_down)); + +#ifdef STRESS_DYNAMIC_HEAP_COUNT + // quick hack for initial testing + int new_n_heaps = (int)gc_rand::get_rand (n_max_heaps - 1) + 1; + + // if we are adjusting down, make sure we adjust lower than the lowest uoh msl heap + if ((new_n_heaps < n_heaps) && (dynamic_heap_count_data.lowest_heap_with_msl_uoh != -1)) { - // reconsider the decision every few gcs - return; + new_n_heaps = min (dynamic_heap_count_data.lowest_heap_with_msl_uoh, new_n_heaps); + new_n_heaps = max (new_n_heaps, 1); } - -#ifdef BACKGROUND_GC - if (gc_heap::background_running_p()) + dprintf (6666, ("stress %d -> %d", n_heaps, new_n_heaps)); +#else //STRESS_DYNAMIC_HEAP_COUNT + int new_n_heaps = n_heaps; + if (median_throughput_cost_percent > 10.0f) { - // can't have background gc running while we change the number of heaps - // so it's useless to compute a new number of heaps here - dprintf (9999, ("BGC in progress, don't change")); + // ramp up more agressively - use as many heaps as it would take to bring + // the tcp down to 5% + new_n_heaps = (int)(n_heaps * (median_throughput_cost_percent / 5.0)); + dprintf (6666, ("[CHP0] tcp %.3f -> %d * %.3f = %d", median_throughput_cost_percent, n_heaps, (median_throughput_cost_percent / 5.0), new_n_heaps)); + new_n_heaps = min (new_n_heaps, n_max_heaps - extra_heaps); } - else -#endif //BACKGROUND_GC + // if the median tcp is 10% or less, react slower + else if ((smoothed_median_throughput_cost_percent > 5.0f) || (median_gen2_tcp_percent > 10.0f)) { - // compute the % overhead from msl waiting time and gc time for each of the samples - float percent_overhead[dynamic_heap_count_data_t::sample_size]; - for (int i = 0; i < dynamic_heap_count_data_t::sample_size; i++) - { - dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[i]; - uint64_t overhead_time = sample.soh_msl_wait_time + sample.uoh_msl_wait_time + sample.gc_elapsed_time; - percent_overhead[i] = overhead_time * 100.0f / sample.elapsed_between_gcs; - if (percent_overhead[i] < 0) - percent_overhead[i] = 0; - else if (percent_overhead[i] > 100) - percent_overhead[i] = 100; - dprintf (9999, ("sample %d: percent_overhead: %.3f%%", i, percent_overhead[i])); - } - // compute the median of the percent overhead samples - #define compare_and_swap(i, j) \ - { \ - if (percent_overhead[i] < percent_overhead[j]) \ - { \ - float t = percent_overhead[i]; \ - percent_overhead[i] = percent_overhead[j]; \ - percent_overhead[j] = t; \ - } \ - } - compare_and_swap (1, 0); - compare_and_swap (2, 0); - compare_and_swap (2, 1); - #undef compare_and_swap - - // the middle element is the median overhead percentage - float median_percent_overhead = percent_overhead[1]; - - // apply exponential smoothing and use 1/3 for the smoothing factor - const float smoothing = 3; - float smoothed_median_percent_overhead = dynamic_heap_count_data.smoothed_median_percent_overhead; - if (smoothed_median_percent_overhead != 0.0f) - { - // average it with the previous value - smoothed_median_percent_overhead = median_percent_overhead / smoothing + (smoothed_median_percent_overhead / smoothing) * (smoothing - 1); + if (smoothed_median_throughput_cost_percent > 5.0f) + { + dprintf (6666, ("[CHP1] stcp %.3f > 5, %d + %d = %d", smoothed_median_throughput_cost_percent, n_heaps, step_up, (n_heaps + step_up))); } else { - // first time? initialize to the median - smoothed_median_percent_overhead = median_percent_overhead; + dprintf (6666, ("[CHP2] tcp %.3f > 10, %d + %d = %d", median_gen2_tcp_percent, n_heaps, step_up, (n_heaps + step_up))); } + new_n_heaps += step_up; + } + // if we can save at least 1% more in time than we spend in space, increase number of heaps + else if ((tcp_reduction_per_step_up - scp_increase_per_step_up) >= 1.0f) + { + dprintf (6666, ("[CHP3] % .3f - % .3f = % .3f, % d + % d = % d", + tcp_reduction_per_step_up, scp_increase_per_step_up, (tcp_reduction_per_step_up - scp_increase_per_step_up), + n_heaps, step_up, (n_heaps + step_up))); + new_n_heaps += step_up; + } + // if we can save at least 1% more in space than we spend in time, decrease number of heaps + else if ((smoothed_median_throughput_cost_percent < 1.0f) && + (median_gen2_tcp_percent < 5.0f) && + ((scp_decrease_per_step_down - tcp_increase_per_step_down) >= 1.0f)) + { + dprintf (6666, ("[CHP4] stcp %.3f tcp %.3f, %.3f - %.3f = %.3f, %d + %d = %d", + smoothed_median_throughput_cost_percent, median_gen2_tcp_percent, + scp_decrease_per_step_down, tcp_increase_per_step_down, (scp_decrease_per_step_down - tcp_increase_per_step_down), + n_heaps, step_up, (n_heaps + step_up))); + new_n_heaps -= step_down; + } - dprintf (9999, ("median overhead: %.3f%% smoothed median overhead: %.3f%%", median_percent_overhead, smoothed_median_percent_overhead)); - - // estimate the space cost of adding a heap as the min gen0 size - size_t heap_space_cost_per_heap = dd_min_size (hp0_dd0); - - // compute the % space cost of adding a heap - float percent_heap_space_cost_per_heap = heap_space_cost_per_heap * 100.0f / heap_size; - - // compute reasonable step sizes for the heap count - - // on the way up, we essentially multiply the heap count by 1.5, so we go 1, 2, 3, 5, 8 ... - // we don't go all the way to the number of CPUs, but stay 1 or 2 short - int step_up = (n_heaps + 1) / 2; - int extra_heaps = 1 + (n_max_heaps >= 32); - step_up = min (step_up, n_max_heaps - extra_heaps - n_heaps); + assert (new_n_heaps >= 1); + assert (new_n_heaps <= n_max_heaps); +#endif //STRESS_DYNAMIC_HEAP_COUNT - // on the way down, we essentially divide the heap count by 1.5 - int step_down = (n_heaps + 1) / 3; + // store data used for decision to emit in ETW event + dynamic_heap_count_data.median_throughput_cost_percent = median_throughput_cost_percent; + dynamic_heap_count_data.smoothed_median_throughput_cost_percent = smoothed_median_throughput_cost_percent; + dynamic_heap_count_data.percent_heap_space_cost_per_heap = percent_heap_space_cost_per_heap; + dynamic_heap_count_data.tcp_reduction_per_step_up = tcp_reduction_per_step_up; + dynamic_heap_count_data.tcp_increase_per_step_down = tcp_increase_per_step_down; + dynamic_heap_count_data.scp_increase_per_step_up = scp_increase_per_step_up; + dynamic_heap_count_data.scp_decrease_per_step_down = scp_decrease_per_step_down; + + GCEventFireHeapCountTuning_V1 ( + (uint16_t)dynamic_heap_count_data.new_n_heaps, + (uint64_t)VolatileLoadWithoutBarrier (&settings.gc_index), + dynamic_heap_count_data.median_throughput_cost_percent, + dynamic_heap_count_data.smoothed_median_throughput_cost_percent, + dynamic_heap_count_data.tcp_reduction_per_step_up, + dynamic_heap_count_data.tcp_increase_per_step_down, + dynamic_heap_count_data.scp_increase_per_step_up, + dynamic_heap_count_data.scp_decrease_per_step_down + ); - // estimate the potential time benefit of going up a step - float overhead_reduction_per_step_up = smoothed_median_percent_overhead * step_up / (n_heaps + step_up); + dynamic_heap_count_data.prev_num_completed_gcs = num_completed_gcs; - // estimate the potential time cost of going down a step - float overhead_increase_per_step_down = smoothed_median_percent_overhead * step_down / (n_heaps - step_down); + if (new_n_heaps != n_heaps) + { + dprintf (6666, ("should change! %d->%d", n_heaps, new_n_heaps)); + dynamic_heap_count_data.heap_count_to_change_to = new_n_heaps; + dynamic_heap_count_data.should_change_heap_count = true; + } - // estimate the potential space cost of going up a step - float space_cost_increase_per_step_up = percent_heap_space_cost_per_heap * step_up; + return new_n_heaps; +} - // estimate the potential space saving of going down a step - float space_cost_decrease_per_step_down = percent_heap_space_cost_per_heap * step_down; +void gc_heap::check_heap_count () +{ + dynamic_heap_count_data.new_n_heaps = dynamic_heap_count_data.heap_count_to_change_to; - dprintf (9999, ("up: %d down: %d, ou %.3f, od %.3f, su %.3f, sd %.3f", step_up, step_down, - overhead_reduction_per_step_up, overhead_increase_per_step_down, space_cost_increase_per_step_up, space_cost_decrease_per_step_down)); + assert (dynamic_heap_count_data.new_n_heaps != n_heaps); -#ifdef STRESS_DYNAMIC_HEAP_COUNT - // quick hack for initial testing - int new_n_heaps = (int)gc_rand::get_rand (n_max_heaps - 1) + 1; + if (dynamic_heap_count_data.new_n_heaps != n_heaps) + { + dprintf (9999, ("h0 suspending EE in check")); + // can't have threads allocating while we change the number of heaps + GCToEEInterface::SuspendEE(SUSPEND_FOR_GC_PREP); + dprintf (9999, ("h0 suspended EE in check")); - // if we are adjusting down, make sure we adjust lower than the lowest uoh msl heap - if ((new_n_heaps < n_heaps) && (dynamic_heap_count_data.lowest_heap_with_msl_uoh != -1)) +#ifdef BACKGROUND_GC + if (gc_heap::background_running_p()) { - new_n_heaps = min (dynamic_heap_count_data.lowest_heap_with_msl_uoh, new_n_heaps); + // background GC is running - reset the new heap count + dynamic_heap_count_data.new_n_heaps = n_heaps; + dprintf (6666, ("can't change heap count! BGC in progress")); - // but not down to zero, obviously... - new_n_heaps = max (new_n_heaps, 1); - } - dprintf (9999, ("stress %d -> %d", n_heaps, new_n_heaps)); -#else //STRESS_DYNAMIC_HEAP_COUNT - int new_n_heaps = n_heaps; - if (median_percent_overhead > 10.0f) - { - // ramp up more agressively - use as many heaps as it would take to bring - // the overhead down to 5% - new_n_heaps = (int)(n_heaps * (median_percent_overhead / 5.0)); - new_n_heaps = min (new_n_heaps, n_max_heaps - extra_heaps); - } - // if the median overhead is 10% or less, react slower - else if (smoothed_median_percent_overhead > 5.0f) - { - new_n_heaps += step_up; - } - // if we can save at least 1% more in time than we spend in space, increase number of heaps - else if ((overhead_reduction_per_step_up - space_cost_increase_per_step_up) >= 1.0f) - { - new_n_heaps += step_up; - } - // if we can save at least 1% more in space than we spend in time, decrease number of heaps - else if ((smoothed_median_percent_overhead < 1.0f) && ((space_cost_decrease_per_step_down - overhead_increase_per_step_down) >= 1.0f)) - { - new_n_heaps -= step_down; + GCToEEInterface::RestartEE(TRUE); } - - dprintf (9999, ("or: %d, si: %d, sd: %d, oi: %d => %d -> %d", - (int)overhead_reduction_per_step_up, - (int)space_cost_increase_per_step_up, - (int)space_cost_decrease_per_step_down, - (int)overhead_increase_per_step_down, - n_heaps, - new_n_heaps)); - - assert (1 <= new_n_heaps); - assert (new_n_heaps <= n_max_heaps); -#endif //STRESS_DYNAMIC_HEAP_COUNT - - dynamic_heap_count_data.new_n_heaps = new_n_heaps; - - // store data used for decision to emit in ETW event - dynamic_heap_count_data.median_percent_overhead = median_percent_overhead; - dynamic_heap_count_data.smoothed_median_percent_overhead = smoothed_median_percent_overhead; - dynamic_heap_count_data.percent_heap_space_cost_per_heap = percent_heap_space_cost_per_heap; - dynamic_heap_count_data.overhead_reduction_per_step_up = overhead_reduction_per_step_up; - dynamic_heap_count_data.overhead_increase_per_step_down = overhead_increase_per_step_down; - dynamic_heap_count_data.space_cost_increase_per_step_up = space_cost_increase_per_step_up; - dynamic_heap_count_data.space_cost_decrease_per_step_down = space_cost_decrease_per_step_down; - - GCEventFireHeapCountTuning_V1( - (uint16_t)dynamic_heap_count_data.new_n_heaps, - (uint64_t)VolatileLoad(&settings.gc_index), - dynamic_heap_count_data.median_percent_overhead, - dynamic_heap_count_data.smoothed_median_percent_overhead, - dynamic_heap_count_data.overhead_reduction_per_step_up, - dynamic_heap_count_data.overhead_increase_per_step_down, - dynamic_heap_count_data.space_cost_increase_per_step_up, - dynamic_heap_count_data.space_cost_decrease_per_step_down - ); - - if (new_n_heaps != n_heaps) - { - dprintf (9999, ("h0 suspending EE in check")); - // can't have threads allocating while we change the number of heaps - GCToEEInterface::SuspendEE(SUSPEND_FOR_GC_PREP); - dprintf (9999, ("h0 suspended EE in check")); - -#ifdef BACKGROUND_GC - if (gc_heap::background_running_p()) - { - // background GC is running - reset the new heap count - dynamic_heap_count_data.new_n_heaps = n_heaps; - - GCToEEInterface::RestartEE(TRUE); - } #endif //BACKGROUND_GC - } } if (dynamic_heap_count_data.new_n_heaps != n_heaps) { + dprintf (6666, ("prep to change from %d to %d", n_heaps, dynamic_heap_count_data.new_n_heaps)); if (!prepare_to_change_heap_count (dynamic_heap_count_data.new_n_heaps)) { // we don't have sufficient resources - reset the new heap count @@ -25422,17 +25542,18 @@ void gc_heap::check_heap_count () if (dynamic_heap_count_data.new_n_heaps == n_heaps) { // heap count stays the same, no work to do - dprintf (9999, ("heap count stays the same, no work to do %d == %d", dynamic_heap_count_data.new_n_heaps, n_heaps)); + dynamic_heap_count_data.prev_num_completed_gcs = get_num_completed_gcs (); + dynamic_heap_count_data.should_change_heap_count = false; - // come back after 3 GCs to reconsider - prev_change_heap_count_gc_index = settings.gc_index; + dprintf (6666, ("heap count stays the same %d, no work to do, set prev completed to %Id", dynamic_heap_count_data.new_n_heaps, dynamic_heap_count_data.prev_num_completed_gcs)); return; } int new_n_heaps = dynamic_heap_count_data.new_n_heaps; - if (GCScan::GetGcRuntimeStructuresValid()) + assert (!(dynamic_heap_count_data.init_only_p)); + { // At this point we are guaranteed to be able to change the heap count to the new one. // Change the heap count for joins here because we will need to join new_n_heaps threads together. @@ -25464,15 +25585,18 @@ void gc_heap::check_heap_count () int old_n_heaps = n_heaps; + (dynamic_heap_count_data.heap_count_change_count)++; change_heap_count (dynamic_heap_count_data.new_n_heaps); GCToEEInterface::RestartEE(TRUE); dprintf (9999, ("h0 restarted EE")); - prev_change_heap_count_gc_index = settings.gc_index; // we made changes to the heap count that will change the overhead, // so change the smoothed overhead to reflect that - dynamic_heap_count_data.smoothed_median_percent_overhead = dynamic_heap_count_data.smoothed_median_percent_overhead/n_heaps*old_n_heaps; + dynamic_heap_count_data.smoothed_median_throughput_cost_percent = dynamic_heap_count_data.smoothed_median_throughput_cost_percent / n_heaps * old_n_heaps; + + dprintf (6666, ("h0 finished changing, set should change to false!")); + dynamic_heap_count_data.should_change_heap_count = false; } bool gc_heap::prepare_to_change_heap_count (int new_n_heaps) @@ -25936,6 +26060,26 @@ bool gc_heap::change_heap_count (int new_n_heaps) return true; } + +size_t gc_heap::get_msl_wait_time() +{ + assert (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes); + + size_t msl_wait_since_pause = 0; + + for (int i = 0; i < n_heaps; i++) + { + gc_heap* hp = g_heaps[i]; + + msl_wait_since_pause += hp->more_space_lock_soh.msl_wait_time; + hp->more_space_lock_soh.msl_wait_time = 0; + + msl_wait_since_pause += hp->more_space_lock_uoh.msl_wait_time; + hp->more_space_lock_uoh.msl_wait_time = 0; + } + + return msl_wait_since_pause; +} #endif //DYNAMIC_HEAP_COUNT #endif //USE_REGIONS @@ -32971,17 +33115,17 @@ void gc_heap::plan_phase (int condemned_gen_number) } else { - dprintf (2, ("gen2 didn't grow (end seg alloc: %zd, , condemned alloc: %zd, gen1 c alloc: %zd", + dprintf (1, ("gen2 didn't grow (end seg alloc: %zd, , condemned alloc: %zd, gen1 c alloc: %zd", end_seg_allocated, condemned_allocated, generation_condemned_allocated (generation_of (max_generation - 1)))); } - dprintf (1, ("older gen's free alloc: %zd->%zd, seg alloc: %zd->%zd, condemned alloc: %zd->%zd", + dprintf (2, ("older gen's free alloc: %zd->%zd, seg alloc: %zd->%zd, condemned alloc: %zd->%zd", r_older_gen_free_list_allocated, generation_free_list_allocated (older_gen), r_older_gen_end_seg_allocated, generation_end_seg_allocated (older_gen), r_older_gen_condemned_allocated, generation_condemned_allocated (older_gen))); - dprintf (1, ("this GC did %zd free list alloc(%zd bytes free space rejected)", + dprintf (2, ("this GC did %zd free list alloc(%zd bytes free space rejected)", free_list_allocated, rejected_free_space)); maxgen_size_increase* maxgen_size_info = &(get_gc_data_per_heap()->maxgen_size_info); @@ -39118,7 +39262,7 @@ void gc_heap::bgc_thread_function() dprintf (SPINLOCK_LOG, ("bgc Lgc")); leave_spin_lock (&gc_lock); #ifdef MULTIPLE_HEAPS - dprintf(1, ("End of BGC - starting all BGC threads")); + dprintf(1, ("End of BGC")); bgc_t_join.restart(); #endif //MULTIPLE_HEAPS } @@ -42995,6 +43139,9 @@ bool gc_heap::init_dynamic_data() { process_start_time = now; smoothed_desired_total[0] = dynamic_data_of (0)->min_size * n_heaps; +#ifdef DYNAMIC_HEAP_COUNT + last_suspended_end_time = now; +#endif //DYNAMIC_HEAP_COUNT #ifdef HEAP_BALANCE_INSTRUMENTATION last_gc_end_time_us = now; dprintf (HEAP_BALANCE_LOG, ("qpf=%zd, start: %zd(%d)", qpf, start_raw_ts, now)); @@ -49951,7 +50098,7 @@ void gc_heap::do_post_gc() dprintf (1, (ThreadStressLog::gcDetailedEndMsg(), VolatileLoad (&settings.gc_index), dd_collection_count (hp->dynamic_data_of (0)), - (size_t)(GetHighPrecisionTimeStamp() / 1000), + (size_t)(GetHighPrecisionTimeStamp () / 1000), settings.condemned_generation, str_gc_type, (settings.compaction ? "C" : "S"), diff --git a/src/coreclr/gc/gcpriv.h b/src/coreclr/gc/gcpriv.h index cef7b2a0fe6da3..a90bde62e23b0f 100644 --- a/src/coreclr/gc/gcpriv.h +++ b/src/coreclr/gc/gcpriv.h @@ -403,8 +403,6 @@ struct GCDebugSpinLock { #if defined(DYNAMIC_HEAP_COUNT) // time in microseconds we wait for the more space lock uint64_t msl_wait_time; - // number of times we wait for the more space lock - uint64_t msl_wait_count; #endif //DYNAMIC_HEAP_COUNT GCDebugSpinLock() @@ -416,7 +414,7 @@ struct GCDebugSpinLock { , num_switch_thread(0), num_wait_longer(0), num_switch_thread_w(0), num_disable_preemptive_w(0) #endif #if defined(DYNAMIC_HEAP_COUNT) - , msl_wait_time(0), msl_wait_count(0) + , msl_wait_time(0) #endif //DYNAMIC_HEAP_COUNT { } @@ -1153,15 +1151,12 @@ class dynamic_data // // The following 3 fields are updated at the beginning of each GC, if that GC condemns this generation. // - // The number of GC that condemned this generation. The only difference between this - // and collection_count is just that collection_count is maintained for all physical generations - // (currently there are 5) whereas this is only updated for logical generations (there are 3). - size_t gc_clock; - uint64_t time_clock; //time when this gc started + size_t gc_clock; // the gc index + uint64_t time_clock; // time when this gc started uint64_t previous_time_clock; // time when previous gc started // Updated at the end of a GC, if that GC condemns this generation. - size_t gc_elapsed_time; // Time it took for the gc to complete + size_t gc_elapsed_time; // time it took for the gc to complete // // The following fields (and fields in sdata) are initialized during GC init time and do not change. @@ -1500,6 +1495,8 @@ class mark_queue_t void verify_empty(); }; +float median_of_3 (float a, float b, float c); + //class definition of the internal class class gc_heap { @@ -2608,11 +2605,17 @@ class gc_heap // re-initialize a heap in preparation to putting it back into service PER_HEAP_METHOD void recommission_heap(); + PER_HEAP_ISOLATED_METHOD size_t get_num_completed_gcs(); + + PER_HEAP_ISOLATED_METHOD int calculate_new_heap_count(); + // check if we should change the heap count PER_HEAP_METHOD void check_heap_count(); PER_HEAP_ISOLATED_METHOD bool prepare_to_change_heap_count (int new_n_heaps); PER_HEAP_METHOD bool change_heap_count (int new_n_heaps); + + PER_HEAP_ISOLATED_METHOD size_t get_msl_wait_time(); #endif //DYNAMIC_HEAP_COUNT #endif //USE_REGIONS @@ -4273,42 +4276,65 @@ class gc_heap #endif //USE_REGIONS #ifdef DYNAMIC_HEAP_COUNT + // Sample collection - + // + // For every GC, we collect the msl wait time + GC pause duration info and use both to calculate the + // throughput cost percentage. We will also be using the wait time and the GC pause duration separately + // for other purposes in the future. + // + // For all gen2 GCs we also keep a separate array currently just for the GC cost. This serves as a backstop + // to smooth out the situation when we rarely pick the gen2 GCs in the first array. struct dynamic_heap_count_data_t { static const int sample_size = 3; struct sample { - uint64_t elapsed_between_gcs; // time between gcs in microseconds - uint64_t gc_elapsed_time; // time the gc took - uint64_t soh_msl_wait_time; // time the allocator spent waiting for the soh msl lock - uint64_t uoh_msl_wait_time; // time the allocator spent waiting for the uoh msl lock - size_t allocating_thread_count;// number of allocating threads - size_t heap_size; + uint64_t elapsed_between_gcs; // time between gcs in microseconds (this should really be between_pauses) + uint64_t gc_pause_time; // pause time for this GC + uint64_t msl_wait_time; }; - unsigned sample_index; + uint32_t sample_index; sample samples[sample_size]; + size_t prev_num_completed_gcs; + + uint32_t gen2_sample_index; + // This is (gc_elapsed_time / time inbetween this and the last gen2 GC) + float gen2_gc_percents[sample_size]; - float median_percent_overhead; // estimated overhead of allocator + gc - float smoothed_median_percent_overhead; // exponentially smoothed version - float percent_heap_space_cost_per_heap; // percent space cost of adding a heap - float overhead_reduction_per_step_up; // percentage effect on overhead of increasing heap count - float overhead_increase_per_step_down; // percentage effect on overhead of decreasing heap count - float space_cost_increase_per_step_up; // percentage effect on space of increasing heap count - float space_cost_decrease_per_step_down;// percentage effect on space of decreasing heap count + float median_throughput_cost_percent; // estimated overhead of allocator + gc + float smoothed_median_throughput_cost_percent; // exponentially smoothed version + float percent_heap_space_cost_per_heap; // percent space cost of adding a heap + float tcp_reduction_per_step_up; // throughput cost percent effect of increasing heap count + float tcp_increase_per_step_down; // throughput cost percent effect of decreasing heap count + float scp_increase_per_step_up; // space cost percent effect of increasing heap count + float scp_decrease_per_step_down; // space cost percent effect of decreasing heap count int new_n_heaps; // the heap count we changed from int last_n_heaps; // don't start a GC till we see (n_max_heaps - new_n_heaps) number of threads idling VOLATILE(int32_t) idle_thread_count; - bool init_only_p; + bool init_only_p; + + bool should_change_heap_count; + int heap_count_to_change_to; + int heap_count_change_count; #ifdef STRESS_DYNAMIC_HEAP_COUNT int lowest_heap_with_msl_uoh; #endif //STRESS_DYNAMIC_HEAP_COUNT + + float get_median_gen2_gc_percent() + { + return median_of_3 (gen2_gc_percents[0], gen2_gc_percents[1], gen2_gc_percents[2]); + } }; PER_HEAP_ISOLATED_FIELD_MAINTAINED dynamic_heap_count_data_t dynamic_heap_count_data; + PER_HEAP_ISOLATED_FIELD_MAINTAINED uint64_t last_suspended_end_time; + // If the last full GC is blocking, this is that GC's index; for BGC, this is the settings.gc_index + // when the BGC ended. + PER_HEAP_ISOLATED_FIELD_MAINTAINED size_t gc_index_full_gc_end; #endif //DYNAMIC_HEAP_COUNT /****************************************************/ @@ -4885,7 +4911,6 @@ uint64_t& dd_previous_time_clock (dynamic_data* inst) return inst->previous_time_clock; } - inline size_t& dd_gc_clock_interval (dynamic_data* inst) {