Skip to content

Commit dcbecd1

Browse files
committed
adds job type guard, flush_redis prompts, improved status output
1 parent fbea005 commit dcbecd1

File tree

6 files changed

+97
-36
lines changed

6 files changed

+97
-36
lines changed

pychunkedgraph/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "3.0.5"
1+
__version__ = "3.0.6"

pychunkedgraph/ingest/cli.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,22 +16,26 @@
1616
bootstrap,
1717
chunk_id_str,
1818
print_completion_rate,
19-
print_ingest_status,
19+
print_status,
2020
queue_layer_helper,
21+
job_type_guard,
2122
)
2223
from .simple_tests import run_all
2324
from .create.parent_layer import add_parent_chunk
2425
from ..graph.chunkedgraph import ChunkedGraph
2526
from ..utils.redis import get_redis_connection, keys as r_keys
2627

27-
ingest_cli = AppGroup("ingest")
28+
group_name = "ingest"
29+
ingest_cli = AppGroup(group_name)
2830

2931

3032
def init_ingest_cmds(app):
3133
app.cli.add_command(ingest_cli)
3234

3335

3436
@ingest_cli.command("flush_redis")
37+
@click.confirmation_option(prompt="Are you sure you want to flush redis?")
38+
@job_type_guard(group_name)
3539
def flush_redis():
3640
"""FLush redis db."""
3741
redis = get_redis_connection()
@@ -44,13 +48,16 @@ def flush_redis():
4448
@click.option("--raw", is_flag=True, help="Read edges from agglomeration output.")
4549
@click.option("--test", is_flag=True, help="Test 8 chunks at the center of dataset.")
4650
@click.option("--retry", is_flag=True, help="Rerun without creating a new table.")
51+
@job_type_guard(group_name)
4752
def ingest_graph(
4853
graph_id: str, dataset: click.Path, raw: bool, test: bool, retry: bool
4954
):
5055
"""
5156
Main ingest command.
5257
Takes ingest config from a yaml file and queues atomic tasks.
5358
"""
59+
redis = get_redis_connection()
60+
redis.set(r_keys.JOB_TYPE, group_name)
5461
with open(dataset, "r") as stream:
5562
config = yaml.safe_load(stream)
5663

@@ -70,6 +77,7 @@ def ingest_graph(
7077
@click.argument("graph_id", type=str)
7178
@click.argument("dataset", type=click.Path(exists=True))
7279
@click.option("--raw", is_flag=True)
80+
@job_type_guard(group_name)
7381
def pickle_imanager(graph_id: str, dataset: click.Path, raw: bool):
7482
"""
7583
Load ingest config into redis server.
@@ -83,11 +91,12 @@ def pickle_imanager(graph_id: str, dataset: click.Path, raw: bool):
8391

8492
meta, ingest_config, _ = bootstrap(graph_id, config=config, raw=raw)
8593
imanager = IngestionManager(ingest_config, meta)
86-
imanager.redis # pylint: disable=pointless-statement
94+
imanager.redis.set(r_keys.JOB_TYPE, group_name)
8795

8896

8997
@ingest_cli.command("layer")
9098
@click.argument("parent_layer", type=int)
99+
@job_type_guard(group_name)
91100
def queue_layer(parent_layer):
92101
"""
93102
Queue all chunk tasks at a given layer.
@@ -100,16 +109,21 @@ def queue_layer(parent_layer):
100109

101110

102111
@ingest_cli.command("status")
112+
@job_type_guard(group_name)
103113
def ingest_status():
104114
"""Print ingest status to console by layer."""
105115
redis = get_redis_connection()
106-
imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
107-
print_ingest_status(imanager, redis)
116+
try:
117+
imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
118+
print_status(imanager, redis)
119+
except TypeError as err:
120+
print(f"\nNo current `{group_name}` job found in redis: {err}")
108121

109122

110123
@ingest_cli.command("chunk")
111124
@click.argument("queue", type=str)
112125
@click.argument("chunk_info", nargs=4, type=int)
126+
@job_type_guard(group_name)
113127
def ingest_chunk(queue: str, chunk_info):
114128
"""Manually queue chunk when a job is stuck for whatever reason."""
115129
redis = get_redis_connection()
@@ -135,6 +149,7 @@ def ingest_chunk(queue: str, chunk_info):
135149
@click.argument("graph_id", type=str)
136150
@click.argument("chunk_info", nargs=4, type=int)
137151
@click.option("--n_threads", type=int, default=1)
152+
@job_type_guard(group_name)
138153
def ingest_chunk_local(graph_id: str, chunk_info, n_threads: int):
139154
"""Manually ingest a chunk on a local machine."""
140155
layer, coords = chunk_info[0], chunk_info[1:]
@@ -150,6 +165,7 @@ def ingest_chunk_local(graph_id: str, chunk_info, n_threads: int):
150165
@ingest_cli.command("rate")
151166
@click.argument("layer", type=int)
152167
@click.option("--span", default=10, help="Time span to calculate rate.")
168+
@job_type_guard(group_name)
153169
def rate(layer: int, span: int):
154170
redis = get_redis_connection()
155171
imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
@@ -158,5 +174,6 @@ def rate(layer: int, span: int):
158174

159175
@ingest_cli.command("run_tests")
160176
@click.argument("graph_id", type=str)
177+
@job_type_guard(group_name)
161178
def run_tests(graph_id):
162179
run_all(ChunkedGraph(graph_id=graph_id))

pychunkedgraph/ingest/cli_upgrade.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,22 +24,26 @@
2424
from .utils import (
2525
chunk_id_str,
2626
print_completion_rate,
27-
print_ingest_status,
27+
print_status,
2828
queue_layer_helper,
2929
start_ocdbt_server,
30+
job_type_guard,
3031
)
3132
from ..graph.chunkedgraph import ChunkedGraph, ChunkedGraphMeta
3233
from ..utils.redis import get_redis_connection
3334
from ..utils.redis import keys as r_keys
3435

35-
upgrade_cli = AppGroup("upgrade")
36+
group_name = "upgrade"
37+
upgrade_cli = AppGroup(group_name)
3638

3739

3840
def init_upgrade_cmds(app):
3941
app.cli.add_command(upgrade_cli)
4042

4143

4244
@upgrade_cli.command("flush_redis")
45+
@click.confirmation_option(prompt="Are you sure you want to flush redis?")
46+
@job_type_guard(group_name)
4347
def flush_redis():
4448
"""FLush redis db."""
4549
redis = get_redis_connection()
@@ -50,11 +54,13 @@ def flush_redis():
5054
@click.argument("graph_id", type=str)
5155
@click.option("--test", is_flag=True, help="Test 8 chunks at the center of dataset.")
5256
@click.option("--ocdbt", is_flag=True, help="Store edges using ts ocdbt kv store.")
57+
@job_type_guard(group_name)
5358
def upgrade_graph(graph_id: str, test: bool, ocdbt: bool):
5459
"""
55-
Main upgrade command.
56-
Takes upgrade config from a yaml file and queues atomic tasks.
60+
Main upgrade command. Queues atomic tasks.
5761
"""
62+
redis = get_redis_connection()
63+
redis.set(r_keys.JOB_TYPE, group_name)
5864
ingest_config = IngestConfig(TEST_RUN=test)
5965
cg = ChunkedGraph(graph_id=graph_id)
6066
cg.client.add_graph_version(__version__, overwrite=True)
@@ -91,6 +97,7 @@ def upgrade_graph(graph_id: str, test: bool, ocdbt: bool):
9197

9298
@upgrade_cli.command("layer")
9399
@click.argument("parent_layer", type=int)
100+
@job_type_guard(group_name)
94101
def queue_layer(parent_layer):
95102
"""
96103
Queue all chunk tasks at a given layer.
@@ -103,17 +110,22 @@ def queue_layer(parent_layer):
103110

104111

105112
@upgrade_cli.command("status")
106-
def ingest_status():
113+
@job_type_guard(group_name)
114+
def upgrade_status():
107115
"""Print upgrade status to console."""
108116
redis = get_redis_connection()
109-
imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
110-
print_ingest_status(imanager, redis, upgrade=True)
117+
try:
118+
imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
119+
print_status(imanager, redis, upgrade=True)
120+
except TypeError as err:
121+
print(f"\nNo current `{group_name}` job found in redis: {err}")
111122

112123

113124
@upgrade_cli.command("chunk")
114125
@click.argument("queue", type=str)
115126
@click.argument("chunk_info", nargs=4, type=int)
116-
def ingest_chunk(queue: str, chunk_info):
127+
@job_type_guard(group_name)
128+
def upgrade_chunk(queue: str, chunk_info):
117129
"""Manually queue chunk when a job is stuck for whatever reason."""
118130
redis = get_redis_connection()
119131
imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
@@ -137,6 +149,7 @@ def ingest_chunk(queue: str, chunk_info):
137149
@upgrade_cli.command("rate")
138150
@click.argument("layer", type=int)
139151
@click.option("--span", default=10, help="Time span to calculate rate.")
152+
@job_type_guard(group_name)
140153
def rate(layer: int, span: int):
141154
redis = get_redis_connection()
142155
imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))

pychunkedgraph/ingest/upgrade/parent_layer.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import fastremap
88
import numpy as np
9-
from multiwrapper import multiprocessing_utils as mu
9+
from tqdm import tqdm
1010

1111
from pychunkedgraph.graph import ChunkedGraph
1212
from pychunkedgraph.graph.attributes import Connectivity, Hierarchy
@@ -51,7 +51,7 @@ def _get_cx_edges_at_timestamp(node, response, ts):
5151

5252

5353
def _populate_cx_edges_with_timestamps(
54-
cg: ChunkedGraph, layer: int, nodes: list, nodes_ts:list, earliest_ts
54+
cg: ChunkedGraph, layer: int, nodes: list, nodes_ts: list, earliest_ts
5555
):
5656
"""
5757
Collect timestamps of edits from children, since we use the same timestamp
@@ -83,7 +83,6 @@ def update_cross_edges(cg: ChunkedGraph, layer, node, node_ts, earliest_ts) -> l
8383
try:
8484
cx_edges_d = CX_EDGES[node][node_ts]
8585
except KeyError:
86-
print(CX_EDGES)
8786
raise KeyError(f"{node}:{node_ts}")
8887
edges = np.concatenate([empty_2d] + list(cx_edges_d.values()))
8988
if edges.size:
@@ -158,15 +157,14 @@ def update_chunk(
158157
chunked_nodes_ts = chunked(nodes_ts, task_size)
159158
cg_info = cg.get_serialized_info()
160159

161-
multi_args = []
160+
tasks = []
162161
for chunk, ts_chunk in zip(chunked_nodes, chunked_nodes_ts):
163162
args = (cg_info, layer, chunk, ts_chunk, earliest_ts)
164-
multi_args.append(args)
165-
166-
print(f"nodes: {len(nodes)}, tasks: {len(multi_args)}, size: {task_size}")
167-
mu.multiprocess_func(
168-
_update_cross_edges_helper,
169-
multi_args,
170-
n_threads=min(len(multi_args), mp.cpu_count()),
171-
)
163+
tasks.append(args)
164+
165+
with mp.Pool(min(mp.cpu_count(), len(tasks))) as pool:
166+
tqdm(
167+
pool.imap_unordered(_update_cross_edges_helper, tasks),
168+
total=len(tasks),
169+
)
172170
print(f"total elaspsed time: {time.time() - start}")

pychunkedgraph/ingest/utils.py

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# pylint: disable=invalid-name, missing-docstring
22

33
import logging
4+
import functools
45
from os import environ
56
from time import sleep
67
from typing import Any, Generator, Tuple
@@ -16,6 +17,8 @@
1617
from ..graph.client import BackendClientInfo
1718
from ..graph.client.bigtable import BigTableConfig
1819
from ..utils.general import chunked
20+
from ..utils.redis import get_redis_connection
21+
from ..utils.redis import keys as r_keys
1922

2023
chunk_id_str = lambda layer, coords: f"{layer}_{'_'.join(map(str, coords))}"
2124

@@ -116,7 +119,7 @@ def print_completion_rate(imanager: IngestionManager, layer: int, span: int = 10
116119
print(f"{rate} chunks per second.")
117120

118121

119-
def print_ingest_status(imanager: IngestionManager, redis, upgrade: bool = False):
122+
def print_status(imanager: IngestionManager, redis, upgrade: bool = False):
120123
"""
121124
Helper to print status to console.
122125
If `upgrade=True`, status does not include the root layer,
@@ -128,6 +131,7 @@ def print_ingest_status(imanager: IngestionManager, redis, upgrade: bool = False
128131
layer_counts = imanager.cg_meta.layer_chunk_counts
129132

130133
pipeline = redis.pipeline()
134+
pipeline.get(r_keys.JOB_TYPE)
131135
worker_busy = []
132136
for layer in layers:
133137
pipeline.scard(f"{layer}c")
@@ -138,25 +142,32 @@ def print_ingest_status(imanager: IngestionManager, redis, upgrade: bool = False
138142
worker_busy.append(sum([w.get_state() == WorkerStatus.BUSY for w in workers]))
139143

140144
results = pipeline.execute()
145+
job_type = "not_available"
146+
if results[0] is not None:
147+
job_type = results[0].decode()
141148
completed = []
142149
queued = []
143150
failed = []
144-
for i in range(0, len(results), 3):
151+
for i in range(1, len(results), 3):
145152
result = results[i : i + 3]
146153
completed.append(result[0])
147154
queued.append(result[1])
148155
failed.append(result[2])
149156

150-
print(f"version: \t{imanager.cg.version}")
151-
print(f"graph_id: \t{imanager.cg.graph_id}")
152-
print(f"chunk_size: \t{imanager.cg.meta.graph_config.CHUNK_SIZE}")
153-
print("\nlayer status:")
157+
header = (
158+
f"\njob_type: \t{job_type}"
159+
f"\nversion: \t{imanager.cg.version}"
160+
f"\ngraph_id: \t{imanager.cg.graph_id}"
161+
f"\nchunk_size: \t{imanager.cg.meta.graph_config.CHUNK_SIZE}"
162+
"\n\nlayer status:"
163+
)
164+
print(header)
154165
for layer, done, count in zip(layers, completed, layer_counts):
155-
print(f"{layer}\t: {done:<9} / {count}")
166+
print(f"{layer}\t| {done:9} / {count} \t| {done/count:6.1%}")
156167

157168
print("\n\nqueue status:")
158169
for layer, q, f, wb in zip(layers, queued, failed, worker_busy):
159-
print(f"l{layer}\t: queued: {q:<10} failed: {f:<10} busy: {wb}")
170+
print(f"l{layer}\t| queued: {q:<10} failed: {f:<10} busy: {wb}")
160171

161172

162173
def queue_layer_helper(parent_layer: int, imanager: IngestionManager, fn):
@@ -190,3 +201,25 @@ def queue_layer_helper(parent_layer: int, imanager: IngestionManager, fn):
190201
)
191202
)
192203
q.enqueue_many(job_datas)
204+
205+
206+
def job_type_guard(job_type: str):
207+
def decorator_job_type_guard(func):
208+
@functools.wraps(func)
209+
def wrapper_job_type_guard(*args, **kwargs):
210+
redis = get_redis_connection()
211+
current_type = redis.get(r_keys.JOB_TYPE)
212+
if current_type is not None:
213+
current_type = current_type.decode()
214+
msg = (
215+
f"Currently running `{current_type}`. You're attempting to run `{job_type}`."
216+
f"\nRun `[flask] {current_type} flush_redis` to clear the current job and restart."
217+
)
218+
if current_type != job_type:
219+
print(f"\n*WARNING*\n{msg}")
220+
exit(1)
221+
return func(*args, **kwargs)
222+
223+
return wrapper_job_type_guard
224+
225+
return decorator_job_type_guard

pychunkedgraph/utils/redis.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD", "")
2020
REDIS_URL = f"redis://:{REDIS_PASSWORD}@{REDIS_HOST}:{REDIS_PORT}/0"
2121

22-
keys_fields = ("INGESTION_MANAGER",)
23-
keys_defaults = ("pcg:imanager",)
22+
keys_fields = ("INGESTION_MANAGER", "JOB_TYPE")
23+
keys_defaults = ("pcg:imanager", "pcg:job_type")
2424
Keys = namedtuple("keys", keys_fields, defaults=keys_defaults)
2525

2626
keys = Keys()

0 commit comments

Comments
 (0)