Skip to content

Commit 8dd30c3

Browse files
committed
Merge upstream HEAD(bea04d9, 2024-05-21) [release] 0.2.130 (hail-is#14454)
2 parents 82b13e5 + bea04d9 commit 8dd30c3

File tree

345 files changed

+2081
-5815
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

345 files changed

+2081
-5815
lines changed

.git-blame-ignore-revs

+2
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,5 @@ da2790242a40ec425a53a02707d261c893b264f7
1414
422edf6386616711ca70f87c455f76781ac925d4
1515
# replaces black formatting with ruff
1616
fa2ef0f2c76654d0c037ff6db60ccb8842fb8539
17+
# ruff lint python imports
18+
01a6a6a107faf204d4f5c20f8ae510d2c35518e9

Makefile

-2
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ check-pip-requirements:
8686
hail/python/dev \
8787
gear \
8888
web_common \
89-
auth \
9089
batch \
9190
ci
9291

@@ -98,7 +97,6 @@ check-linux-pip-requirements:
9897
hail/python/dev \
9998
gear \
10099
web_common \
101-
auth \
102100
batch \
103101
ci
104102

batch/batch/batch.py

+2
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@ def job_record_to_dict(record: Dict[str, Any], name: Optional[str]) -> JobListEn
157157
'cost': coalesce(record.get('cost'), 0),
158158
'msec_mcpu': record['msec_mcpu'],
159159
'cost_breakdown': cost_breakdown,
160+
'always_run': bool(record['always_run']),
161+
'display_state': None,
160162
},
161163
)
162164

batch/batch/cloud/gcp/worker/worker_api.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import base64
22
import os
33
import tempfile
4+
from contextlib import AsyncExitStack
45
from typing import Dict, List
56

67
import orjson
@@ -20,13 +21,13 @@
2021
class GCPWorkerAPI(CloudWorkerAPI):
2122
nameserver_ip = '169.254.169.254'
2223

23-
# async because GoogleSession must be created inside a running event loop
24+
# async because ClientSession must be created inside a running event loop
2425
@staticmethod
2526
async def from_env() -> 'GCPWorkerAPI':
2627
project = os.environ['PROJECT']
2728
zone = os.environ['ZONE'].rsplit('/', 1)[1]
2829
worker_credentials = aiogoogle.GoogleInstanceMetadataCredentials()
29-
http_session = httpx.ClientSession()
30+
http_session = httpx.client_session()
3031
return GCPWorkerAPI(project, zone, worker_credentials, http_session)
3132

3233
def __init__(
@@ -38,9 +39,15 @@ def __init__(
3839
):
3940
self.project = project
4041
self.zone = zone
42+
43+
self._exit_stack = AsyncExitStack()
4144
self._http_session = http_session
42-
self._metadata_server_client = aiogoogle.GoogleMetadataServerClient(http_session)
45+
self._exit_stack.push_async_callback(self._http_session.close)
46+
4347
self._compute_client = aiogoogle.GoogleComputeClient(project)
48+
self._exit_stack.push_async_callback(self._compute_client.close)
49+
50+
self._metadata_server_client = aiogoogle.GoogleMetadataServerClient(http_session)
4451
self._gcsfuse_credential_files: Dict[str, str] = {}
4552
self._worker_credentials = worker_credentials
4653

@@ -132,7 +139,7 @@ async def unmount_cloudfuse(self, mount_base_path_data: str):
132139
del self._gcsfuse_credential_files[mount_base_path_data]
133140

134141
async def close(self):
135-
await self._compute_client.close()
142+
await self._exit_stack.aclose()
136143

137144
def __str__(self):
138145
return f'project={self.project} zone={self.zone}'

batch/batch/front_end/front_end.py

+5
Original file line numberDiff line numberDiff line change
@@ -2449,6 +2449,11 @@ async def ui_batch(request, userdata, batch_id):
24492449
for j in jobs:
24502450
j['duration'] = humanize_timedelta_msecs(j['duration'])
24512451
j['cost'] = cost_str(j['cost'])
2452+
j['display_state'] = (
2453+
f"{j['state']} (always run)"
2454+
if j['always_run'] and j['state'] not in {'Success', 'Failed', 'Error'}
2455+
else j['state']
2456+
)
24522457
batch['jobs'] = jobs
24532458

24542459
batch['cost'] = cost_str(batch['cost'])

batch/batch/front_end/templates/batch.html

+1-1
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ <h2>Jobs</h2>
9191
{{ job['name'] }}
9292
{% endif %}
9393
</td>
94-
<td>{{ job['state'] }}</td>
94+
<td>{{ job['display_state'] }}</td>
9595
<td>
9696
{% if 'exit_code' in job and job['exit_code'] is not none %}
9797
{{ job['exit_code'] }}

batch/batch/front_end/templates/billing.html

+7-7
Original file line numberDiff line numberDiff line change
@@ -34,19 +34,19 @@ <h1>Billing</h1>
3434
</div>
3535
</div>
3636

37-
<h2>Total Cost</h2>
37+
<h2>Total Spend</h2>
3838
<ul>
3939
<li>{{ total_cost }}</li>
4040
</ul>
4141

4242
{% if is_developer %}
43-
<h2>Cost by Billing Project</h2>
43+
<h2>Spend by Billing Project</h2>
4444
<div class='flex-col' style="overflow: auto;">
4545
<table class="data-table" id="billing_by_project">
4646
<thead>
4747
<tr>
4848
<th>Billing Project</th>
49-
<th>Cost</th>
49+
<th>Spend</th>
5050
</tr>
5151
</thead>
5252
<tbody>
@@ -60,13 +60,13 @@ <h2>Cost by Billing Project</h2>
6060
</table>
6161
</div>
6262

63-
<h2>Cost by User</h2>
63+
<h2>Spend by User</h2>
6464
<div class='flex-col' style="overflow: auto;">
6565
<table class="data-table" id="billing_by_user">
6666
<thead>
6767
<tr>
6868
<th>User</th>
69-
<th>Cost</th>
69+
<th>Spend</th>
7070
</tr>
7171
</thead>
7272
<tbody>
@@ -81,14 +81,14 @@ <h2>Cost by User</h2>
8181
</div>
8282
{% endif %}
8383

84-
<h2>Cost by Billing Project and User</h2>
84+
<h2>Spend by Billing Project and User</h2>
8585
<div class='flex-col' style="overflow: auto;">
8686
<table class="data-table" id="billing_by_project_user">
8787
<thead>
8888
<tr>
8989
<th>Billing Project</th>
9090
<th>User</th>
91-
<th>Cost</th>
91+
<th>Spend</th>
9292
</tr>
9393
</thead>
9494
<tbody>

batch/batch/front_end/templates/job.html

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ <h2>Properties</h2>
1313
<li>Exit Code: {% if 'exit_code' in job and job['exit_code'] is not none %}{{ job['exit_code'] }}{% endif %}</li>
1414
<li>Duration: {% if 'duration' in job and job['duration'] is not none %}{{ job['duration'] }}{% endif %}</li>
1515
<li>Cost: {% if 'cost' in job and job['cost'] is not none %}{{ job['cost'] }}{% endif %}</li>
16+
<li>Always Run: {% if 'always_run' in job and job['always_run'] is not none %}{{ job['always_run'] }}{% endif %}</li>
1617
</ul>
1718

1819
<h2>Attributes</h2>

batch/batch/utils.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,7 @@ async def query_billing_projects_with_cost(db, user=None, billing_project=None)
157157
) AS usage_t
158158
LEFT JOIN resources ON resources.resource_id = usage_t.resource_id
159159
) AS cost_t ON TRUE
160-
{where_condition}
161-
LOCK IN SHARE MODE;
160+
{where_condition};
162161
"""
163162

164163
billing_projects = []

batch/batch/worker/worker.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -2936,7 +2936,7 @@ def __repr__(self):
29362936

29372937

29382938
class Worker:
2939-
def __init__(self, client_session: httpx.ClientSession):
2939+
def __init__(self):
29402940
self.active = False
29412941
self.cores_mcpu = CORES * 1000
29422942
self.last_updated = time_msecs()
@@ -2948,7 +2948,7 @@ def __init__(self, client_session: httpx.ClientSession):
29482948
self.task_manager = aiotools.BackgroundTaskManager()
29492949
os.makedirs('/hail-jars/', exist_ok=True)
29502950
self.jar_download_locks: Dict[str, asyncio.Lock] = defaultdict(asyncio.Lock)
2951-
self.client_session = client_session
2951+
self.client_session = httpx.client_session()
29522952

29532953
self.image_data: Dict[str, ImageData] = defaultdict(ImageData)
29542954
self.image_data[BATCH_WORKER_IMAGE_ID] += 1
@@ -3452,7 +3452,7 @@ async def async_main():
34523452
network_allocator = NetworkAllocator(network_allocator_task_manager)
34533453
await network_allocator.reserve()
34543454

3455-
worker = Worker(httpx.client_session())
3455+
worker = Worker()
34563456
try:
34573457
async with AsyncExitStack() as cleanup:
34583458
cleanup.push_async_callback(docker.close)

batch/test/test_batch.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import asyncio
12
import collections
23
import os
34
import secrets
@@ -1497,18 +1498,23 @@ def test_pool_standard_instance_cheapest(client: BatchClient):
14971498

14981499
# Transitively is not valid for terra
14991500
@skip_in_azure
1500-
@pytest.mark.timeout(10 * 60)
1501-
def test_gpu_accesibility_g2(client: BatchClient):
1502-
b = create_batch(client)
1501+
async def test_gpu_accesibility_g2(client: BatchClient):
1502+
b = create_batch(client)._async_batch
15031503
resources = {'machine_type': "g2-standard-4", 'storage': '100Gi'}
15041504
j = b.create_job(
15051505
os.environ['HAIL_GPU_IMAGE'],
15061506
['python3', '-c', 'import torch; assert torch.cuda.is_available()'],
15071507
resources=resources,
15081508
)
1509-
b.submit()
1510-
status = j.wait()
1511-
assert status['state'] == 'Success', str((status, b.debug_info()))
1509+
await b.submit()
1510+
try:
1511+
status = await asyncio.wait_for(j.wait(), timeout=5 * 60)
1512+
assert status['state'] == 'Success', str((status, b.debug_info()))
1513+
except asyncio.TimeoutError:
1514+
# G2 instances are not always available within a time window
1515+
# acceptable for CI. This test is permitted to time out
1516+
# but not otherwise fail
1517+
pass
15121518

15131519

15141520
def test_job_private_instance_preemptible(client: BatchClient):

benchmark/Makefile

+4-6
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
include ../config.mk
22
include ../hail/version.mk
33

4-
SHORT_REVISION := $(shell git rev-parse --short=12 HEAD)
5-
64
HAIL_PYTHON3 ?= python3
75
PIP := $(HAIL_PYTHON3) -m pip
86

@@ -53,21 +51,21 @@ pushed_image: image
5351

5452
BENCHMARK_ITERS ?= 3
5553
BENCHMARK_REPLICATES ?= 5
56-
HAIL_WHEEL_DESCRIPTOR ?= $(HAIL_PIP_VERSION)-$(SHORT_REVISION)
5754
BENCHMARK_BUCKET ?= gs://hail-benchmarks-2
5855
.PHONY: submit
5956
submit: pushed_image install
6057
@echo Using pushed image `cat pushed_image`
6158
$(HAIL_PYTHON3) scripts/benchmark_in_batch.py \
6259
`cat pushed_image` \
6360
$(BENCHMARK_BUCKET)/$(shell whoami) \
64-
$(HAIL_WHEEL_DESCRIPTOR) \
61+
$(HAIL_VERSION) \
6562
$(BENCHMARK_REPLICATES) \
66-
$(BENCHMARK_ITERS)
63+
$(BENCHMARK_ITERS) \
64+
run
6765

6866
clean: cleanup_image
6967
rm -rf python/dist/*
7068
rm -rf python/build/*
71-
rm -r $(HAIL_BENCHMARK_VERSION_FILE)
69+
rm -f $(HAIL_BENCHMARK_VERSION_FILE)
7270

7371
FORCE:

0 commit comments

Comments
 (0)