Skip to content

Commit 934a58c

Browse files
committed
add label studio compliant taxonomy
1 parent 9f20a60 commit 934a58c

File tree

8 files changed

+3466
-38
lines changed

8 files changed

+3466
-38
lines changed

conf/classification_model/finetune.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ crop_image_dir: /blue/ewhite/b.weinstein/BOEM/classification/crops/
66
under_sample_ratio: 0
77
trainer:
88
fast_dev_run: False
9-
max_epochs: 1
9+
max_epochs: 10
1010
lr: 0.00001
1111
batch_size: 16
1212
workers: 10

conf/config.yaml

+4-4
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ predict:
3434
patch_size: 1000
3535
patch_overlap: 0
3636
min_score: 0.4
37-
batch_size: 32
37+
batch_size: 48
3838

3939
pipeline:
4040
confidence_threshold: 0.9
@@ -54,10 +54,10 @@ detection_model:
5454
labels:
5555
- "Object"
5656
trainer:
57-
batch_size: 12
57+
batch_size: 16
5858
train:
5959
fast_dev_run: False
60-
epochs: 20
60+
epochs: 10
6161
lr: 0.00001
6262
workers: 10
6363
validation:
@@ -86,7 +86,7 @@ active_learning:
8686
evaluation:
8787
dask_client:
8888
pool_limit: 500
89-
gpus: 1
89+
gpus: 2
9090

9191
active_testing:
9292
image_dir: /blue/ewhite/b.weinstein/BOEM/sample_flight/JPG_2024_Jan27

src/active_learning.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def human_review(predictions, min_score=0.1, confident_threshold=0.5):
122122

123123
return confident_predictions, uncertain_predictions
124124

125-
def generate_pool_predictions(image_dir, patch_size=512, patch_overlap=0.1, min_score=0.1, model=None, model_path=None, dask_client=None, batch_size=16, comet_logger=None, pool_limit=1000):
125+
def generate_pool_predictions(image_dir, patch_size=512, patch_overlap=0.1, min_score=0.1, model=None, model_path=None, dask_client=None, batch_size=16, comet_logger=None, pool_limit=1000, crop_model=None):
126126
"""
127127
Generate predictions for the training pool.
128128
@@ -136,6 +136,7 @@ def generate_pool_predictions(image_dir, patch_size=512, patch_overlap=0.1, min_
136136
dask_client (dask.distributed.Client, optional): A Dask client for parallel processing. Defaults to None.
137137
batch_size (int, optional): The batch size for prediction. Defaults to 16.
138138
comet_logger (CometLogger, optional): A CometLogger object. Defaults to None.
139+
crop_model (bool, optional): A deepforest.model.CropModel object. Defaults to None.
139140
pool_limit (int, optional): The maximum number of images to consider. Defaults to 1000.
140141
141142
Returns:
@@ -168,7 +169,7 @@ def update_sys_path():
168169
blocks = dask_pool.to_delayed().ravel()
169170
block_futures = []
170171
for block in blocks:
171-
block_future = dask_client.submit(detection.predict, image_paths=block.compute(), patch_size=patch_size, patch_overlap=patch_overlap, model_path=model_path)
172+
block_future = dask_client.submit(detection.predict, image_paths=block.compute(), patch_size=patch_size, patch_overlap=patch_overlap, model_path=model_path, crop_model=crop_model)
172173
block_futures.append(block_future)
173174
# Get results
174175
dask_results = []
@@ -177,7 +178,7 @@ def update_sys_path():
177178
dask_results.append(pd.concat(block_result))
178179
preannotations = pd.concat(dask_results)
179180
else:
180-
preannotations = detection.predict(m=model, image_paths=pool, patch_size=patch_size, patch_overlap=patch_overlap, batch_size=batch_size)
181+
preannotations = detection.predict(m=model, image_paths=pool, patch_size=patch_size, patch_overlap=patch_overlap, batch_size=batch_size, crop_model=crop_model)
181182
preannotations = pd.concat(preannotations)
182183

183184
if comet_logger:

src/classification.py

+8
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,16 @@ def preprocess_images(model, annotations, root_dir, save_dir):
128128
# Remove any negative values
129129
annotations = annotations[(annotations['xmin'] >= 0) & (annotations['ymin'] >= 0) & (annotations['xmax'] >= 0) & (annotations['ymax'] >= 0)]
130130
boxes = annotations[['xmin', 'ymin', 'xmax', 'ymax']].values.tolist()
131+
132+
# Expand by 20 pixels on all sides
133+
boxes = [[box[0]-20, box[1]-20, box[2]+20, box[3]+20] for box in boxes]
134+
135+
# Make sure no negative values
136+
boxes = [[max(0, box[0]), max(0, box[1]), max(0, box[2]), max(0, box[3])] for box in boxes]
137+
131138
images = annotations["image_path"].values
132139
labels = annotations["label"].values
140+
133141
model.write_crops(boxes=boxes, root_dir=root_dir, images=images, labels=labels, savedir=save_dir)
134142

135143
def preprocess_and_train_classification(config, train_df=None, validation_df=None, comet_logger=None):

src/detection.py

+24-27
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ def evaluate(model, test_csv, image_root_dir):
3131
"""
3232
# create trainer
3333
devices = torch.cuda.device_count()
34-
model.create_trainer(num_nodes=1, devices=devices)
34+
strategy = "ddp" if devices > 1 else None
35+
model.create_trainer(num_nodes=1, devices=devices, strategy=strategy)
3536
model.config["validation"]["csv_file"] = test_csv
3637
model.config["validation"]["root_dir"] = image_root_dir
3738
results = model.trainer.validate(model)
@@ -163,32 +164,28 @@ def train(model, train_annotations, test_annotations, train_image_dir, comet_log
163164
model.config[key] = value
164165

165166
devices = torch.cuda.device_count()
166-
if comet_logger:
167-
comet_logger.experiment.log_parameters(model.config)
168-
comet_logger.experiment.log_table("train.csv", train_annotations)
169-
comet_logger.experiment.log_table("test.csv", test_annotations)
170-
model.create_trainer(logger=comet_logger, num_nodes=1, devices=devices)
171-
else:
172-
model.create_trainer(num_nodes=1, devices=devices)
173-
174-
with comet_logger.experiment.context_manager("train_images"):
175-
non_empty_train_annotations = read_file(model.config["train"]["csv_file"], root_dir=train_image_dir)
176-
# Sanity check for debug
177-
n = 5 if non_empty_train_annotations.shape[0] > 5 else non_empty_train_annotations.shape[0]
178-
for filename in non_empty_train_annotations.image_path.sample(n=n).unique():
179-
sample_train_annotations_for_image = non_empty_train_annotations[non_empty_train_annotations.image_path == filename]
180-
sample_train_annotations_for_image.root_dir = train_image_dir
181-
visualize.plot_annotations(sample_train_annotations_for_image, savedir=tmpdir)
182-
comet_logger.experiment.log_image(os.path.join(tmpdir, filename))
183-
184-
with comet_logger.experiment.context_manager("test_images"):
185-
non_empty_validation_annotations = read_file(model.config["validation"]["csv_file"], root_dir=train_image_dir)
186-
n = 5 if non_empty_validation_annotations.shape[0] > 5 else non_empty_validation_annotations.shape[0]
187-
for filename in non_empty_validation_annotations.image_path.sample(n=n).unique():
188-
sample_validation_annotations_for_image = non_empty_validation_annotations[non_empty_validation_annotations.image_path == filename]
189-
sample_validation_annotations_for_image.root_dir = train_image_dir
190-
visualize.plot_annotations(sample_validation_annotations_for_image, savedir=tmpdir)
191-
comet_logger.experiment.log_image(os.path.join(tmpdir, filename))
167+
strategy = "ddp" if devices > 1 else None
168+
comet_logger.experiment.log_parameters(model.config)
169+
comet_logger.experiment.log_table("train.csv", train_annotations)
170+
comet_logger.experiment.log_table("test.csv", test_annotations)
171+
model.create_trainer(logger=comet_logger, num_nodes=1, accelerator="gpu", strategy="ddp", devices=2)
172+
173+
non_empty_train_annotations = read_file(model.config["train"]["csv_file"], root_dir=train_image_dir)
174+
# Sanity check for debug
175+
n = 5 if non_empty_train_annotations.shape[0] > 5 else non_empty_train_annotations.shape[0]
176+
for filename in non_empty_train_annotations.image_path.sample(n=n).unique():
177+
sample_train_annotations_for_image = non_empty_train_annotations[non_empty_train_annotations.image_path == filename]
178+
sample_train_annotations_for_image.root_dir = train_image_dir
179+
visualize.plot_annotations(sample_train_annotations_for_image, savedir=tmpdir)
180+
comet_logger.experiment.log_image(os.path.join(tmpdir, filename),metadata={"name":filename,"context":'train_images'})
181+
182+
non_empty_validation_annotations = read_file(model.config["validation"]["csv_file"], root_dir=train_image_dir)
183+
n = 5 if non_empty_validation_annotations.shape[0] > 5 else non_empty_validation_annotations.shape[0]
184+
for filename in non_empty_validation_annotations.image_path.sample(n=n).unique():
185+
sample_validation_annotations_for_image = non_empty_validation_annotations[non_empty_validation_annotations.image_path == filename]
186+
sample_validation_annotations_for_image.root_dir = train_image_dir
187+
visualize.plot_annotations(sample_validation_annotations_for_image, savedir=tmpdir)
188+
comet_logger.experiment.log_image(os.path.join(tmpdir, filename),metadata={"name":filename,"context":'validation_images'})
192189

193190
with comet_logger.experiment.context_manager("detection"):
194191
model.trainer.fit(model)

src/pipeline.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,8 @@ def run(self):
162162
model_path=detection_checkpoint_path,
163163
dask_client=dask_client,
164164
batch_size=self.config.predict.batch_size,
165-
comet_logger=self.comet_logger
165+
comet_logger=self.comet_logger,
166+
crop_model=trained_classification_model
166167
)
167168
self.comet_logger.experiment.log_table(tabular_data=training_pool_predictions, filename="training_pool_predictions.csv")
168169

submit.sh

+3-2
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@
1010
#SBATCH --output=/home/b.weinstein/logs/BOEM%j.out # Standard output and error log
1111
#SBATCH --error=/home/b.weinstein/logs/BOEM%j.err
1212
#SBATCH --partition=gpu
13-
#SBATCH --gpus=1
13+
#SBATCH --ntasks-per-node=2
14+
#SBATCH --gpus=2
1415

1516
source activate BOEM
1617

1718
cd ~/BOEM/
18-
srun python main.py check_annotations=True active_learning.pool_limit=10 active_testing.n_images=1 active_learning.n_images=1 ++classification_model.trainer.fast_dev_run=True ++detection_model.trainer.train.fast_dev_run=True debug=True
19+
srun python main.py check_annotations=True active_learning.pool_limit=10 active_testing.n_images=1 active_learning.n_images=1 debug=True

0 commit comments

Comments
 (0)