Merge pull request BVLC#1228 from longjon/solver-step

Refactor Solver to allow interactive stepping Conflicts: src/caffe/solver.cpp
longjon · Jan 2, 2015 · f9fa540 · f9fa540
2 parents d389945 + fda4809
commit f9fa540
Show file tree

Hide file tree

Showing 6 changed files with 86 additions and 37 deletions.
diff --git a/README.md b/README.md
@@ -1,4 +1,5 @@
-# Caffe
+This is Caffe with several unmerged PRs and no guarantees.
 
-Caffe is a deep learning framework developed with cleanliness, readability, and speed in mind.<br />
-Consult the [project website](http://caffe.berkeleyvision.org) for all documentation.
+Everything here is subject to change, including the history of this branch.
+
+See `future.sh` for details.
diff --git a/future.sh b/future.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+git checkout dev
+git branch -D future
+git checkout -b future
+# deconv layer, coord maps, net pointer, crop layer
+hub merge https://github.com/BVLC/caffe/pull/1639
+# reshaping data layer
+hub merge https://github.com/BVLC/caffe/pull/1313
+# softmax missing values
+hub merge https://github.com/BVLC/caffe/pull/1654
+# python testing
+hub merge https://github.com/BVLC/caffe/pull/1473
+# gradient accumulation
+hub merge https://github.com/BVLC/caffe/pull/1663
+# solver stepping
+hub merge https://github.com/BVLC/caffe/pull/1228
+git add future.sh
+git commit -m 'add creation script'
+
+cat << 'EOF' > README.md
+This is Caffe with several unmerged PRs and no guarantees.
+
+Everything here is subject to change, including the history of this branch.
+
+See `future.sh` for details.
+EOF
+
+git add README.md
+git commit -m 'update readme'
diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp
@@ -26,6 +26,7 @@ class Solver {
   // in a non-zero iter number to resume training for a pre-trained net.
   virtual void Solve(const char* resume_file = NULL);
   inline void Solve(const string resume_file) { Solve(resume_file.c_str()); }
+  void Step(int iters);
   virtual ~Solver() {}
   inline shared_ptr<Net<Dtype> > net() { return net_; }
   inline const vector<shared_ptr<Net<Dtype> > >& test_nets() {
@@ -36,7 +37,7 @@ class Solver {
  protected:
   // PreSolve is run before any solving iteration starts, allowing one to
   // put up some scaffold.
-  virtual void PreSolve() {}
+  virtual void PreSolve();
   // Get the update value for the current iteration.
   virtual void ComputeUpdateValue() = 0;
   // The Solver::Snapshot function implements the basic snapshotting utility
@@ -60,6 +61,7 @@ class Solver {
   int current_step_;
   shared_ptr<Net<Dtype> > net_;
   vector<shared_ptr<Net<Dtype> > > test_nets_;
+  bool initialized_;
 
   DISABLE_COPY_AND_ASSIGN(Solver);
 };

diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
@@ -197,7 +197,8 @@ BOOST_PYTHON_MODULE(_caffe) {
       .add_property("test_nets", &PySGDSolver::test_nets)
       .add_property("iter",      &PySGDSolver::iter)
       .def("solve",              &PySGDSolver::Solve)
-      .def("solve",              &PySGDSolver::SolveResume);
+      .def("solve",              &PySGDSolver::SolveResume)
+      .def("step",               &PySGDSolver::Step);
 
   bp::class_<vector<shared_ptr<PyNet> > >("NetVec")
       .def(bp::vector_indexing_suite<vector<shared_ptr<PyNet> >, true>());

diff --git a/python/caffe/_caffe.hpp b/python/caffe/_caffe.hpp
@@ -181,6 +181,7 @@ class PySGDSolver {
   vector<shared_ptr<PyNet> > test_nets() { return test_nets_; }
   int iter() { return solver_->iter(); }
   void Solve() { return solver_->Solve(); }
+  void Step(int iters) { solver_->Step(iters); }
   void SolveResume(const string& resume_file);
 
  protected:

diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
@@ -29,9 +29,12 @@ Solver<Dtype>::Solver(const string& param_file)
 
 template <typename Dtype>
 void Solver<Dtype>::Init(const SolverParameter& param) {
+  initialized_ = false;
+  iter_ = 0;
   LOG(INFO) << "Initializing solver from parameters: " << std::endl
             << param.DebugString();
   param_ = param;
+  CHECK_GE(param_.average_loss(), 1) << "average_loss should be non-negative.";
   if (param_.random_seed() >= 0) {
     Caffe::set_random_seed(param_.random_seed());
   }
@@ -155,39 +158,19 @@ void Solver<Dtype>::InitTestNets() {
 }
 
 template <typename Dtype>
-void Solver<Dtype>::Solve(const char* resume_file) {
-  Caffe::set_phase(Caffe::TRAIN);
-  LOG(INFO) << "Solving " << net_->name();
-  LOG(INFO) << "Learning Rate Policy: " << param_.lr_policy();
-  PreSolve();
-
-  iter_ = 0;
-  current_step_ = 0;
-  if (resume_file) {
-    LOG(INFO) << "Restoring previous solver status from " << resume_file;
-    Restore(resume_file);
+void Solver<Dtype>::Step(int iters) {
+  if (!initialized_) {
+    PreSolve();
   }
-  // Remember the initial iter_ value; will be non-zero if we loaded from a
-  // resume_file above.
-  const int start_iter = iter_;
 
+  vector<Blob<Dtype>*> bottom_vec;
+  const int start_iter = iter_;
+  const int stop_iter = iter_ + iters;
   int average_loss = this->param_.average_loss();
-
-  CHECK_GE(average_loss, 1) << "average_loss should be non-negative.";
-
   vector<Dtype> losses;
   Dtype smoothed_loss = 0;
 
-  // For a network that is trained by the solver, no bottom or top vecs
-  // should be given, and we will just provide dummy vecs.
-  vector<Blob<Dtype>*> bottom_vec;
-  for (; iter_ < param_.max_iter(); ++iter_) {
-    // Save a snapshot if needed.
-    if (param_.snapshot() && iter_ > start_iter &&
-        iter_ % param_.snapshot() == 0) {
-      Snapshot();
-    }
-
+  for (; iter_ < stop_iter; ++iter_) {
     // zero-init the params
     for (int i = 0; i < net_->params().size(); ++i) {
       shared_ptr<Blob<Dtype> > blob = net_->params()[i];
@@ -252,13 +235,44 @@ void Solver<Dtype>::Solve(const char* resume_file) {
         }
       }
     }
-
     ComputeUpdateValue();
     net_->Update();
+
+    // Save a snapshot if needed.
+    if (param_.snapshot() && (iter_ + 1) % param_.snapshot() == 0) {
+      Snapshot();
+    }
+  }
+}
+
+template <typename Dtype>
+void Solver<Dtype>::PreSolve() {
+  initialized_ = true;
+  iter_ = 0;
+  current_step_ = 0;
+}
+
+template <typename Dtype>
+void Solver<Dtype>::Solve(const char* resume_file) {
+  Caffe::set_phase(Caffe::TRAIN);
+  LOG(INFO) << "Solving " << net_->name();
+  LOG(INFO) << "Learning Rate Policy: " << param_.lr_policy();
+
+  PreSolve();
+  if (resume_file) {
+    LOG(INFO) << "Restoring previous solver status from " << resume_file;
+    Restore(resume_file);
+  }
+
+  // For a network that is trained by the solver, no bottom or top vecs
+  // should be given, and we will just provide dummy vecs.
+  Step(param_.max_iter() - iter_);
+  // If we haven't already, save a snapshot after optimization, unless
+  // overridden by setting snapshot_after_train := false
+  if (param_.snapshot_after_train()
+      && (!param_.snapshot() || iter_ % param_.snapshot() != 0)) {
+    Snapshot();
   }
-  // Always save a snapshot after optimization, unless overridden by setting
-  // snapshot_after_train := false.
-  if (param_.snapshot_after_train()) { Snapshot(); }
   // After the optimization is done, run an additional train and test pass to
   // display the train and test loss/outputs if appropriate (based on the
   // display and test_interval settings, respectively).  Unlike in the rest of
@@ -267,7 +281,7 @@ void Solver<Dtype>::Solve(const char* resume_file) {
   // display the loss, which is computed in the forward pass.
   if (param_.display() && iter_ % param_.display() == 0) {
     Dtype loss;
-    net_->Forward(bottom_vec, &loss);
+    net_->ForwardPrefilled(&loss);
     LOG(INFO) << "Iteration " << iter_ << ", loss = " << loss;
   }
   if (param_.test_interval() && iter_ % param_.test_interval() == 0) {
@@ -439,6 +453,7 @@ Dtype SGDSolver<Dtype>::GetLearningRate() {
 
 template <typename Dtype>
 void SGDSolver<Dtype>::PreSolve() {
+  Solver<Dtype>::PreSolve();
   // Initialize the history
   vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params();
   history_.clear();