From 56c4568587eedc1180cedfea1d991b6b74a562e9 Mon Sep 17 00:00:00 2001
From: Christian Rauch <Christian.Rauch@ed.ac.uk>
Date: Mon, 8 Mar 2021 11:16:06 +0000
Subject: [PATCH 1/5] remove trailing whitespace

---
 core/corr.py |  8 ++++----
 core/raft.py | 14 +++++++-------
 demo.py      |  4 ++--
 evaluate.py  |  6 +++---
 train.py     | 18 +++++++++---------
 5 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/core/corr.py b/core/corr.py
index cffcbc82..89e0d914 100644
--- a/core/corr.py
+++ b/core/corr.py
@@ -20,7 +20,7 @@ def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
 
         batch, h1, w1, dim, h2, w2 = corr.shape
         corr = corr.reshape(batch*h1*w1, dim, h2, w2)
-        
+
         self.corr_pyramid.append(corr)
         for i in range(self.num_levels-1):
             corr = F.avg_pool2d(corr, 2, stride=2)
@@ -53,8 +53,8 @@ def __call__(self, coords):
     def corr(fmap1, fmap2):
         batch, dim, ht, wd = fmap1.shape
         fmap1 = fmap1.view(batch, dim, ht*wd)
-        fmap2 = fmap2.view(batch, dim, ht*wd) 
-        
+        fmap2 = fmap2.view(batch, dim, ht*wd)
+
         corr = torch.matmul(fmap1.transpose(1,2), fmap2)
         corr = corr.view(batch, ht, wd, 1, ht, wd)
         return corr  / torch.sqrt(torch.tensor(dim).float())
@@ -83,7 +83,7 @@ def __call__(self, coords):
             fmap2_i = self.pyramid[i][1].permute(0, 2, 3, 1).contiguous()
 
             coords_i = (coords / 2**i).reshape(B, 1, H, W, 2).contiguous()
-            corr, = alt_cuda_corr.forward(fmap1_i, fmap2_i, coords_i, r)
+            corr, = self.alt_corr_fwd(fmap1_i, fmap2_i, coords_i, r)
             corr_list.append(corr.squeeze(1))
 
         corr = torch.stack(corr_list, dim=1)
diff --git a/core/raft.py b/core/raft.py
index 652b81a3..f98433d4 100644
--- a/core/raft.py
+++ b/core/raft.py
@@ -31,7 +31,7 @@ def __init__(self, args):
             self.context_dim = cdim = 64
             args.corr_levels = 4
             args.corr_radius = 3
-        
+
         else:
             self.hidden_dim = hdim = 128
             self.context_dim = cdim = 128
@@ -46,12 +46,12 @@ def __init__(self, args):
 
         # feature network, context network, and update block
         if args.small:
-            self.fnet = SmallEncoder(output_dim=128, norm_fn='instance', dropout=args.dropout)        
+            self.fnet = SmallEncoder(output_dim=128, norm_fn='instance', dropout=args.dropout)
             self.cnet = SmallEncoder(output_dim=hdim+cdim, norm_fn='none', dropout=args.dropout)
             self.update_block = SmallUpdateBlock(self.args, hidden_dim=hdim)
 
         else:
-            self.fnet = BasicEncoder(output_dim=256, norm_fn='instance', dropout=args.dropout)        
+            self.fnet = BasicEncoder(output_dim=256, norm_fn='instance', dropout=args.dropout)
             self.cnet = BasicEncoder(output_dim=hdim+cdim, norm_fn='batch', dropout=args.dropout)
             self.update_block = BasicUpdateBlock(self.args, hidden_dim=hdim)
 
@@ -97,8 +97,8 @@ def forward(self, image1, image2, iters=12, flow_init=None, upsample=True, test_
 
         # run the feature network
         with autocast(enabled=self.args.mixed_precision):
-            fmap1, fmap2 = self.fnet([image1, image2])        
-        
+            fmap1, fmap2 = self.fnet([image1, image2])
+
         fmap1 = fmap1.float()
         fmap2 = fmap2.float()
         if self.args.alternate_corr:
@@ -135,10 +135,10 @@ def forward(self, image1, image2, iters=12, flow_init=None, upsample=True, test_
                 flow_up = upflow8(coords1 - coords0)
             else:
                 flow_up = self.upsample_flow(coords1 - coords0, up_mask)
-            
+
             flow_predictions.append(flow_up)
 
         if test_mode:
             return coords1 - coords0, flow_up
-            
+
         return flow_predictions
diff --git a/demo.py b/demo.py
index 5abc1da8..2b3b3850 100644
--- a/demo.py
+++ b/demo.py
@@ -26,7 +26,7 @@ def load_image(imfile):
 def viz(img, flo):
     img = img[0].permute(1,2,0).cpu().numpy()
     flo = flo[0].permute(1,2,0).cpu().numpy()
-    
+
     # map flow to rgb image
     flo = flow_viz.flow_to_image(flo)
     img_flo = np.concatenate([img, flo], axis=0)
@@ -50,7 +50,7 @@ def demo(args):
     with torch.no_grad():
         images = glob.glob(os.path.join(args.path, '*.png')) + \
                  glob.glob(os.path.join(args.path, '*.jpg'))
-        
+
         images = sorted(images)
         for imfile1, imfile2 in zip(images[:-1], images[1:]):
             image1 = load_image(imfile1)
diff --git a/evaluate.py b/evaluate.py
index 431a0f58..14da5159 100644
--- a/evaluate.py
+++ b/evaluate.py
@@ -24,13 +24,13 @@ def create_sintel_submission(model, iters=32, warm_start=False, output_path='sin
     model.eval()
     for dstype in ['clean', 'final']:
         test_dataset = datasets.MpiSintel(split='test', aug_params=None, dstype=dstype)
-        
+
         flow_prev, sequence_prev = None, None
         for test_id in range(len(test_dataset)):
             image1, image2, (sequence, frame) = test_dataset[test_id]
             if sequence != sequence_prev:
                 flow_prev = None
-            
+
             padder = InputPadder(image1.shape)
             image1, image2 = padder.pad(image1[None].cuda(), image2[None].cuda())
 
@@ -39,7 +39,7 @@ def create_sintel_submission(model, iters=32, warm_start=False, output_path='sin
 
             if warm_start:
                 flow_prev = forward_interpolate(flow_low[0])[None].cuda()
-            
+
             output_dir = os.path.join(output_path, dstype, sequence)
             output_file = os.path.join(output_dir, 'frame%04d.flo' % (frame+1))
 
diff --git a/train.py b/train.py
index 30757309..438f554e 100644
--- a/train.py
+++ b/train.py
@@ -47,7 +47,7 @@ def update(self):
 def sequence_loss(flow_preds, flow_gt, valid, gamma=0.8, max_flow=MAX_FLOW):
     """ Loss function defined over sequence of flow predictions """
 
-    n_predictions = len(flow_preds)    
+    n_predictions = len(flow_preds)
     flow_loss = 0.0
 
     # exlude invalid pixels and extremely large diplacements
@@ -84,7 +84,7 @@ def fetch_optimizer(args, model):
         pct_start=0.05, cycle_momentum=False, anneal_strategy='linear')
 
     return optimizer, scheduler
-    
+
 
 class Logger:
     def __init__(self, model, scheduler):
@@ -98,7 +98,7 @@ def _print_training_status(self):
         metrics_data = [self.running_loss[k]/SUM_FREQ for k in sorted(self.running_loss.keys())]
         training_str = "[{:6d}, {:10.7f}] ".format(self.total_steps+1, self.scheduler.get_last_lr()[0])
         metrics_str = ("{:10.4f}, "*len(metrics_data)).format(*metrics_data)
-        
+
         # print the training status
         print(training_str + metrics_str)
 
@@ -169,13 +169,13 @@ def train(args):
                 image1 = (image1 + stdv * torch.randn(*image1.shape).cuda()).clamp(0.0, 255.0)
                 image2 = (image2 + stdv * torch.randn(*image2.shape).cuda()).clamp(0.0, 255.0)
 
-            flow_predictions = model(image1, image2, iters=args.iters)            
+            flow_predictions = model(image1, image2, iters=args.iters)
 
             loss, metrics = sequence_loss(flow_predictions, flow, valid, args.gamma)
             scaler.scale(loss).backward()
-            scaler.unscale_(optimizer)                
+            scaler.unscale_(optimizer)
             torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
-            
+
             scaler.step(optimizer)
             scheduler.step()
             scaler.update()
@@ -196,11 +196,11 @@ def train(args):
                         results.update(evaluate.validate_kitti(model.module))
 
                 logger.write_dict(results)
-                
+
                 model.train()
                 if args.stage != 'chairs':
                     model.module.freeze_bn()
-            
+
             total_steps += 1
 
             if total_steps > args.num_steps:
@@ -217,7 +217,7 @@ def train(args):
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--name', default='raft', help="name your experiment")
-    parser.add_argument('--stage', help="determines which dataset to use for training") 
+    parser.add_argument('--stage', help="determines which dataset to use for training")
     parser.add_argument('--restore_ckpt', help="restore checkpoint")
     parser.add_argument('--small', action='store_true', help='use small model')
     parser.add_argument('--validation', type=str, nargs='+')

From 8c51bdc0b43bb03d6dc7f126fe0e56a41f2bf49c Mon Sep 17 00:00:00 2001
From: Christian Rauch <Christian.Rauch@ed.ac.uk>
Date: Mon, 11 Oct 2021 11:50:16 +0100
Subject: [PATCH 2/5] python3 shebang

---
 demo.py     | 2 ++
 evaluate.py | 2 ++
 train.py    | 2 ++
 3 files changed, 6 insertions(+)
 mode change 100644 => 100755 demo.py
 mode change 100644 => 100755 evaluate.py
 mode change 100644 => 100755 train.py

diff --git a/demo.py b/demo.py
old mode 100644
new mode 100755
index 2b3b3850..66929535
--- a/demo.py
+++ b/demo.py
@@ -1,3 +1,5 @@
+#! /usr/bin/env python3
+
 import sys
 sys.path.append('core')
 
diff --git a/evaluate.py b/evaluate.py
old mode 100644
new mode 100755
index 14da5159..075a3106
--- a/evaluate.py
+++ b/evaluate.py
@@ -1,3 +1,5 @@
+#! /usr/bin/env python3
+
 import sys
 sys.path.append('core')
 
diff --git a/train.py b/train.py
old mode 100644
new mode 100755
index 438f554e..c1736afa
--- a/train.py
+++ b/train.py
@@ -1,3 +1,5 @@
+#! /usr/bin/env python3
+
 from __future__ import print_function, division
 import sys
 sys.path.append('core')

From f5b0dcb733626b1d25e51cb373000557388a6f80 Mon Sep 17 00:00:00 2001
From: Christian Rauch <Christian.Rauch@ed.ac.uk>
Date: Mon, 8 Mar 2021 14:25:22 +0000
Subject: [PATCH 3/5] turn python values into scalar tensors

---
 core/raft.py | 4 ++--
 demo.py      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/raft.py b/core/raft.py
index f98433d4..0bcfa826 100644
--- a/core/raft.py
+++ b/core/raft.py
@@ -83,7 +83,7 @@ def upsample_flow(self, flow, mask):
         return up_flow.reshape(N, 2, 8*H, 8*W)
 
 
-    def forward(self, image1, image2, iters=12, flow_init=None, upsample=True, test_mode=False):
+    def forward(self, image1, image2, iters=torch.tensor(12), flow_init=torch.tensor([]), upsample=torch.tensor(True), test_mode=torch.tensor(False)):
         """ Estimate optical flow between pair of frames """
 
         image1 = 2 * (image1 / 255.0) - 1.0
@@ -115,7 +115,7 @@ def forward(self, image1, image2, iters=12, flow_init=None, upsample=True, test_
 
         coords0, coords1 = self.initialize_flow(image1)
 
-        if flow_init is not None:
+        if flow_init is not None and flow_init.numel()>0:
             coords1 = coords1 + flow_init
 
         flow_predictions = []
diff --git a/demo.py b/demo.py
index 66929535..d0fb62be 100755
--- a/demo.py
+++ b/demo.py
@@ -61,7 +61,7 @@ def demo(args):
             padder = InputPadder(image1.shape)
             image1, image2 = padder.pad(image1, image2)
 
-            flow_low, flow_up = model(image1, image2, iters=20, test_mode=True)
+            flow_low, flow_up = model(image1, image2, iters=torch.tensor(20), test_mode=torch.tensor(True))
             viz(image1, flow_up)
 
 

From d70c7016de9a76bbc455da2d9af5469863460155 Mon Sep 17 00:00:00 2001
From: Christian Rauch <Christian.Rauch@ed.ac.uk>
Date: Mon, 11 Oct 2021 11:47:57 +0100
Subject: [PATCH 4/5] only import 'alt_cuda_corr' when 'AlternateCorrBlock' is
 used

---
 core/corr.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/core/corr.py b/core/corr.py
index 89e0d914..8a497698 100644
--- a/core/corr.py
+++ b/core/corr.py
@@ -2,12 +2,6 @@
 import torch.nn.functional as F
 from utils.utils import bilinear_sampler, coords_grid
 
-try:
-    import alt_cuda_corr
-except:
-    # alt_cuda_corr is not compiled
-    pass
-
 
 class CorrBlock:
     def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
@@ -62,6 +56,9 @@ def corr(fmap1, fmap2):
 
 class AlternateCorrBlock:
     def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
+        import alt_cuda_corr
+        self.alt_corr_fwd = alt_cuda_corr.forward
+
         self.num_levels = num_levels
         self.radius = radius
 

From b84b8448f03cb6f8c49cd96911ee0c0cf32a36aa Mon Sep 17 00:00:00 2001
From: Christian Rauch <Christian.Rauch@ed.ac.uk>
Date: Mon, 11 Oct 2021 11:56:23 +0100
Subject: [PATCH 5/5] update documentation for compiling module 'alt_cuda_corr'

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 650275ed..1f705af2 100644
--- a/README.md
+++ b/README.md
@@ -75,6 +75,6 @@ If you have a RTX GPU, training can be accelerated using mixed precision. You ca
 ## (Optional) Efficent Implementation
 You can optionally use our alternate (efficent) implementation by compiling the provided cuda extension
 ```Shell
-cd alt_cuda_corr && python setup.py install && cd ..
+cd alt_cuda_corr && python3 setup.py install --user && cd ..
 ```
 and running `demo.py` and `evaluate.py` with the `--alternate_corr` flag Note, this implementation is somewhat slower than all-pairs, but uses significantly less GPU memory during the forward pass.