Skip to content

Commit 31e69d3

Browse files
committed
gvfs-helper: add gvfs.fallback and unit tests (#665)
By default, GVFS Protocol-enabled Scalar clones will fall back to the origin server if there is a network issue with the cache servers. However (and especially for the prefetch endpoint) this may be a very expensive operation for the origin server, leading to the user being throttled. This shows up later in cases such as 'git push' or other web operations. To avoid this, create a new config option, 'gvfs.fallback', which defaults to true. When set to 'false', pass '--no-fallback' from the gvfs-helper client to the child gvfs-helper server process. This will allow users who have hit this problem to avoid it in the future. In case this becomes a more widespread problem, engineering systems can enable the config option more broadly. Enabling the config will of course lead to immediate failures for users, but at least that will help diagnose the problem when it occurs instead of later when the throttling shows up and the server load has already passed, damage done. This change only applies to interactions with Azure DevOps and the GVFS Protocol. --- * [x] This change only applies to interactions with Azure DevOps and the GVFS Protocol.
2 parents 450f1b6 + bf0b20f commit 31e69d3

File tree

4 files changed

+185
-1
lines changed

4 files changed

+185
-1
lines changed

Documentation/config/gvfs.txt

+5
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,8 @@ gvfs.cache-server::
33

44
gvfs.sharedcache::
55
TODO
6+
7+
gvfs.fallback::
8+
If set to `false`, then never fallback to the origin server when the cache
9+
server fails to connect. This will alert users to failures with the cache
10+
server, but avoid causing throttling on the origin server.

gvfs-helper-client.c

+10-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "quote.h"
1515
#include "packfile.h"
1616
#include "hex.h"
17+
#include "config.h"
1718

1819
static struct oidset gh_client__oidset_queued = OIDSET_INIT;
1920
static unsigned long gh_client__oidset_count;
@@ -340,17 +341,25 @@ static struct gh_server__process *gh_client__find_long_running_process(
340341
struct gh_server__process *entry;
341342
struct strvec argv = STRVEC_INIT;
342343
struct strbuf quoted = STRBUF_INIT;
344+
int fallback;
343345

344346
gh_client__choose_odb();
345347

346348
/*
347349
* TODO decide what defaults we want.
348350
*/
349351
strvec_push(&argv, "gvfs-helper");
350-
strvec_push(&argv, "--fallback");
351352
strvec_push(&argv, "--cache-server=trust");
352353
strvec_pushf(&argv, "--shared-cache=%s",
353354
gh_client__chosen_odb->path);
355+
356+
/* If gvfs.fallback=false, then don't add --fallback. */
357+
if (!git_config_get_bool("gvfs.fallback", &fallback) &&
358+
!fallback)
359+
strvec_push(&argv, "--no-fallback");
360+
else
361+
strvec_push(&argv, "--fallback");
362+
354363
strvec_push(&argv, "server");
355364

356365
sq_quote_argv_pretty(&quoted, argv.v);

t/helper/test-gvfs-protocol.c

+8
Original file line numberDiff line numberDiff line change
@@ -1581,6 +1581,14 @@ static enum worker_result dispatch(struct req *req)
15811581
const char *method;
15821582
enum worker_result wr;
15831583

1584+
if (strstr(req->uri_base.buf, MY_SERVER_TYPE__CACHE)) {
1585+
if (string_list_has_string(&mayhem_list, "cache_http_503")) {
1586+
logmayhem("cache_http_503");
1587+
return send_http_error(1, 503, "Service Unavailable", 2,
1588+
WR_MAYHEM | WR_HANGUP);
1589+
}
1590+
}
1591+
15841592
if (string_list_has_string(&mayhem_list, "close_no_write")) {
15851593
logmayhem("close_no_write");
15861594
return WR_MAYHEM | WR_HANGUP;

t/t5799-gvfs-helper.sh

+162
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,12 @@ test_set_port GIT_TEST_GVFS_PROTOCOL_PORT
2424
# actually use it). We are only testing explicit object
2525
# fetching using gvfs-helper.exe in isolation.
2626
#
27+
# repo_t2:
28+
# Another empty repo to use after we contaminate t1.
29+
#
2730
REPO_SRC="$(pwd)"/repo_src
2831
REPO_T1="$(pwd)"/repo_t1
32+
REPO_T2="$(pwd)"/repo_t2
2933

3034
# Setup some loopback URLs where test-gvfs-protocol.exe will be
3135
# listening. We will spawn it directly inside the repo_src directory,
@@ -45,6 +49,7 @@ ORIGIN_URL=http://$HOST_PORT/servertype/origin
4549
CACHE_URL=http://$HOST_PORT/servertype/cache
4650

4751
SHARED_CACHE_T1="$(pwd)"/shared_cache_t1
52+
SHARED_CACHE_T2="$(pwd)"/shared_cache_t2
4853

4954
# The pid-file is created by test-gvfs-protocol.exe when it starts.
5055
# The server will shut down if/when we delete it. (This is a little
@@ -182,6 +187,10 @@ test_expect_success 'setup repos' '
182187
mkdir "$SHARED_CACHE_T1/pack" &&
183188
mkdir "$SHARED_CACHE_T1/info" &&
184189
#
190+
mkdir "$SHARED_CACHE_T2" &&
191+
mkdir "$SHARED_CACHE_T2/pack" &&
192+
mkdir "$SHARED_CACHE_T2/info" &&
193+
#
185194
# setup repo_t1 and point all of the gvfs.* values to repo_src.
186195
#
187196
test_create_repo "$REPO_T1" &&
@@ -191,6 +200,13 @@ test_expect_success 'setup repos' '
191200
git -C "$REPO_T1" config --local gvfs.sharedCache "$SHARED_CACHE_T1" &&
192201
echo "$SHARED_CACHE_T1" >> "$REPO_T1"/.git/objects/info/alternates &&
193202
#
203+
test_create_repo "$REPO_T2" &&
204+
git -C "$REPO_T2" branch -M main &&
205+
git -C "$REPO_T2" remote add origin $ORIGIN_URL &&
206+
git -C "$REPO_T2" config --local gvfs.cache-server $CACHE_URL &&
207+
git -C "$REPO_T2" config --local gvfs.sharedCache "$SHARED_CACHE_T2" &&
208+
echo "$SHARED_CACHE_T2" >> "$REPO_T2"/.git/objects/info/alternates &&
209+
#
194210
#
195211
#
196212
cat <<-EOF >creds.txt &&
@@ -203,6 +219,7 @@ test_expect_success 'setup repos' '
203219
EOF
204220
chmod 755 creds.sh &&
205221
git -C "$REPO_T1" config --local credential.helper "!f() { cat \"$(pwd)\"/creds.txt; }; f" &&
222+
git -C "$REPO_T2" config --local credential.helper "!f() { cat \"$(pwd)\"/creds.txt; }; f" &&
206223
#
207224
# Create some test data sets.
208225
#
@@ -1036,6 +1053,70 @@ test_expect_success 'successful retry after http-error: origin get' '
10361053
verify_connection_count 2
10371054
'
10381055

1056+
#################################################################
1057+
# So far we have confirmed that gvfs-helper can recover from a network
1058+
# error (with retries, since the cache-server was disabled in all of
1059+
# the above tests). Try again with fallback turned on.
1060+
#
1061+
# With mayhem "http_503" turned on both the cache and origin server
1062+
# will always throw a 503 error.
1063+
#
1064+
# Confirm that we tried to make six connections: we should hit the
1065+
# cache-server 3 times (one initial attempt and two retries) and then
1066+
# try the origin server 3 times.
1067+
#
1068+
#################################################################
1069+
1070+
test_expect_success 'http-error: 503 Service Unavailable (with retry and fallback)' '
1071+
test_when_finished "per_test_cleanup" &&
1072+
start_gvfs_protocol_server_with_mayhem http_503 &&
1073+
1074+
test_expect_code $GH__ERROR_CODE__HTTP_503 \
1075+
git -C "$REPO_T1" gvfs-helper \
1076+
--cache-server=trust \
1077+
--remote=origin \
1078+
--fallback \
1079+
get \
1080+
--max-retries=2 \
1081+
<"$OIDS_FILE" >OUT.output 2>OUT.stderr &&
1082+
1083+
stop_gvfs_protocol_server &&
1084+
1085+
grep -q "error: get: (http:503)" OUT.stderr &&
1086+
verify_connection_count 6
1087+
'
1088+
1089+
#################################################################
1090+
# Now repeat the above, but explicitly turn off fallback.
1091+
#
1092+
# Again, we use mayhem "http_503". However, with fallback turned
1093+
# off, we will only attempt the 3 connections to the cache server.
1094+
# We will not try to hit the origin server.
1095+
#
1096+
# So we should only see a total of 3 connections rather than the
1097+
# six in the previous test.
1098+
#
1099+
#################################################################
1100+
1101+
test_expect_success 'http-error: 503 Service Unavailable (with retry and no-fallback)' '
1102+
test_when_finished "per_test_cleanup" &&
1103+
start_gvfs_protocol_server_with_mayhem http_503 &&
1104+
1105+
test_expect_code $GH__ERROR_CODE__HTTP_503 \
1106+
git -C "$REPO_T1" gvfs-helper \
1107+
--cache-server=trust \
1108+
--remote=origin \
1109+
--no-fallback \
1110+
get \
1111+
--max-retries=2 \
1112+
<"$OIDS_FILE" >OUT.output 2>OUT.stderr &&
1113+
1114+
stop_gvfs_protocol_server &&
1115+
1116+
grep -q "error: get: (http:503)" OUT.stderr &&
1117+
verify_connection_count 3
1118+
'
1119+
10391120
#################################################################
10401121
# Test HTTP Auth
10411122
#
@@ -1193,6 +1274,87 @@ test_expect_success 'integration: fully implicit: diff 2 commits' '
11931274
>OUT.output 2>OUT.stderr
11941275
'
11951276

1277+
# T1 should be considered contaminated at this point.
1278+
1279+
#################################################################
1280+
# gvfs-helper.exe defaults to no fallback.
1281+
# gvfs-helper-client.c defaults to adding `--fallback` to child process.
1282+
#
1283+
# `gvfs.fallback` was added to change the default behavior in the
1284+
# gvfs-helper-client.c code to add either `--fallback` or `--no-fallback`
1285+
# (for origin server load reasons).
1286+
#
1287+
# When `gvfs.fallback` is unset, we default to TRUE and pass `--fallback`.
1288+
# Otherwise, we use the boolean value to decide.
1289+
#
1290+
# NOTE: We DO NOT attempt to count connection requests in the
1291+
# following tests. Since we are using a normal `git` command to drive
1292+
# the `gvfs-helper-client.c` code (and spawn `git-gvfs-helper.exe`) we
1293+
# cannot make assumptions on the number of child processes or
1294+
# reqeusts. The "promisor" logic may drive one or more single-item
1295+
# GETs or a series of bulk POST attempts. Therefore, we must rely
1296+
# only on the result of the command and (implicitly) whether all
1297+
# missing objects were resolved. We use mayhem features to selectively
1298+
# break the cache and origin servers.
1299+
#################################################################
1300+
1301+
test_expect_success 'integration: implicit-get: http_503: diff 2 commits' '
1302+
test_when_finished "per_test_cleanup" &&
1303+
1304+
# Tell both servers to always send 503.
1305+
start_gvfs_protocol_server_with_mayhem http_503 &&
1306+
1307+
# Implicitly demand-load everything without any pre-seeding.
1308+
# (We cannot tell from whether fallback was used or not in this
1309+
# limited test.)
1310+
#
1311+
test_must_fail \
1312+
git -C "$REPO_T2" -c core.useGVFSHelper=true \
1313+
diff $(cat m1.branch)..$(cat m3.branch) \
1314+
>OUT.output 2>OUT.stderr &&
1315+
1316+
stop_gvfs_protocol_server
1317+
'
1318+
1319+
test_expect_success 'integration: implicit-get: cache_http_503,no-fallback: diff 2 commits' '
1320+
test_when_finished "per_test_cleanup" &&
1321+
1322+
# Tell cache server to send 503 and origin server to send 200.
1323+
start_gvfs_protocol_server_with_mayhem cache_http_503 &&
1324+
1325+
# Implicitly demand-load everything without any pre-seeding.
1326+
# This should fail because we do not allow fallback.
1327+
#
1328+
test_must_fail \
1329+
git -C "$REPO_T2" \
1330+
-c core.useGVFSHelper=true \
1331+
-c gvfs.fallback=false \
1332+
diff $(cat m1.branch)..$(cat m3.branch) \
1333+
>OUT.output 2>OUT.stderr &&
1334+
1335+
stop_gvfs_protocol_server
1336+
'
1337+
1338+
test_expect_success 'integration: implicit-get: cache_http_503,with-fallback: diff 2 commits' '
1339+
test_when_finished "per_test_cleanup" &&
1340+
1341+
# Tell cache server to send 503 and origin server to send 200.
1342+
start_gvfs_protocol_server_with_mayhem cache_http_503 &&
1343+
1344+
# Implicitly demand-load everything without any pre-seeding.
1345+
#
1346+
git -C "$REPO_T2" \
1347+
-c core.useGVFSHelper=true \
1348+
-c gvfs.fallback=true \
1349+
diff $(cat m1.branch)..$(cat m3.branch) \
1350+
>OUT.output 2>OUT.stderr &&
1351+
1352+
stop_gvfs_protocol_server
1353+
'
1354+
1355+
# T2 should be considered contaminated at this point.
1356+
1357+
11961358
#################################################################
11971359
# Duplicate packfile tests.
11981360
#

0 commit comments

Comments
 (0)