Merge pull request #1 from yunjey/master

sjyttkl · web-flow · commit 48ebfa38b074 · 2020-04-06T00:33:24.000+08:00
更新代码
diff --git a/README.md b/README.md
@@ -46,7 +46,7 @@ $ python main.py
 
 ## Dependencies
 * [Python 2.7 or 3.5+](https://www.continuum.io/downloads)
-* [PyTorch 0.4.0](http://pytorch.org/)
+* [PyTorch 0.4.0+](http://pytorch.org/)
 
 
 
diff --git a/tutorials/01-basics/logistic_regression/main.py b/tutorials/01-basics/logistic_regression/main.py
@@ -5,7 +5,7 @@
 
 
 # Hyper-parameters 
-input_size = 784
+input_size = 28 * 28    # 784
 num_classes = 10
 num_epochs = 5
 batch_size = 100
@@ -43,7 +43,7 @@
 for epoch in range(num_epochs):
     for i, (images, labels) in enumerate(train_loader):
         # Reshape images to (batch_size, input_size)
-        images = images.reshape(-1, 28*28)
+        images = images.reshape(-1, input_size)
         
         # Forward pass
         outputs = model(images)
@@ -64,7 +64,7 @@
     correct = 0
     total = 0
     for images, labels in test_loader:
-        images = images.reshape(-1, 28*28)
+        images = images.reshape(-1, input_size)
         outputs = model(images)
         _, predicted = torch.max(outputs.data, 1)
         total += labels.size(0)
diff --git a/tutorials/01-basics/pytorch_basics/main.py b/tutorials/01-basics/pytorch_basics/main.py
@@ -98,7 +98,7 @@
 
 
 # ================================================================== #
-#                         4. Input pipline                           #
+#                         4. Input pipeline                           #
 # ================================================================== #
 
 # Download and construct CIFAR-10 dataset.
@@ -130,7 +130,7 @@
 
 
 # ================================================================== #
-#                5. Input pipline for custom dataset                 #
+#                5. Input pipeline for custom dataset                 #
 # ================================================================== #
 
 # You should build your custom dataset as below.
@@ -186,4 +186,4 @@ def __len__(self):
 
 # Save and load only the model parameters (recommended).
 torch.save(resnet.state_dict(), 'params.ckpt')
-resnet.load_state_dict(torch.load('params.ckpt'))
+resnet.load_state_dict(torch.load('params.ckpt'))
diff --git a/tutorials/02-intermediate/deep_residual_network/main.py b/tutorials/02-intermediate/deep_residual_network/main.py
@@ -16,6 +16,7 @@
 
 # Hyper-parameters
 num_epochs = 80
+batch_size = 100
 learning_rate = 0.001
 
 # Image preprocessing modules
@@ -37,11 +38,11 @@
 
 # Data loader
 train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
-                                           batch_size=100, 
+                                           batch_size=batch_size,
                                            shuffle=True)
 
 test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
-                                          batch_size=100, 
+                                          batch_size=batch_size,
                                           shuffle=False)
 
 # 3x3 convolution
diff --git a/tutorials/02-intermediate/language_model/main.py b/tutorials/02-intermediate/language_model/main.py
@@ -76,7 +76,7 @@ def detach(states):
         loss = criterion(outputs, targets.reshape(-1))
         
         # Backward and optimize
-        model.zero_grad()
+        optimizer.zero_grad()
         loss.backward()
         clip_grad_norm_(model.parameters(), 0.5)
         optimizer.step()
diff --git a/tutorials/02-intermediate/recurrent_neural_network/main.py b/tutorials/02-intermediate/recurrent_neural_network/main.py
@@ -85,6 +85,7 @@ def forward(self, x):
                    .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
 
 # Test the model
+model.eval()
 with torch.no_grad():
     correct = 0
     total = 0
diff --git a/tutorials/03-advanced/generative_adversarial_network/main.py b/tutorials/03-advanced/generative_adversarial_network/main.py
@@ -22,10 +22,14 @@
     os.makedirs(sample_dir)
 
 # Image processing
+# transform = transforms.Compose([
+#                 transforms.ToTensor(),
+#                 transforms.Normalize(mean=(0.5, 0.5, 0.5),   # 3 for RGB channels
+#                                      std=(0.5, 0.5, 0.5))])
 transform = transforms.Compose([
                 transforms.ToTensor(),
-                transforms.Normalize(mean=(0.5, 0.5, 0.5),   # 3 for RGB channels
-                                     std=(0.5, 0.5, 0.5))])
+                transforms.Normalize(mean=[0.5],   # 1 for greyscale channels
+                                     std=[0.5])])
 
 # MNIST dataset
 mnist = torchvision.datasets.MNIST(root='../../data/',
diff --git a/tutorials/03-advanced/image_captioning/README.md b/tutorials/03-advanced/image_captioning/README.md
@@ -16,41 +16,41 @@ In the test phase, the encoder part is almost same as the training phase. The on
 
 #### 1. Clone the repositories
 ```bash
-$ git clone https://github.com/pdollar/coco.git
-$ cd coco/PythonAPI/
-$ make
-$ python setup.py build
-$ python setup.py install
-$ cd ../../
-$ git clone https://github.com/yunjey/pytorch-tutorial.git
-$ cd pytorch-tutorial/tutorials/03-advanced/image_captioning/
+git clone https://github.com/pdollar/coco.git
+cd coco/PythonAPI/
+make
+python setup.py build
+python setup.py install
+cd ../../
+git clone https://github.com/yunjey/pytorch-tutorial.git
+cd pytorch-tutorial/tutorials/03-advanced/image_captioning/
 ```
 
 #### 2. Download the dataset
 
 ```bash
-$ pip install -r requirements.txt
-$ chmod +x download.sh
-$ ./download.sh
+pip install -r requirements.txt
+chmod +x download.sh
+./download.sh
 ```
 
 #### 3. Preprocessing
 
 ```bash
-$ python build_vocab.py   
-$ python resize.py
+python build_vocab.py   
+python resize.py
 ```
 
 #### 4. Train the model
 
 ```bash
-$ python train.py    
+python train.py    
 ```
 
 #### 5. Test the model 
 
 ```bash
-$ python sample.py --image='png/example.png'
+python sample.py --image='png/example.png'
 ```
 
 <br>
diff --git a/tutorials/03-advanced/image_captioning/sample.py b/tutorials/03-advanced/image_captioning/sample.py
@@ -14,7 +14,7 @@
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
 def load_image(image_path, transform=None):
-    image = Image.open(image_path)
+    image = Image.open(image_path).convert('RGB')
     image = image.resize([224, 224], Image.LANCZOS)
     
     if transform is not None:
@@ -69,13 +69,13 @@ def main(args):
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--image', type=str, required=True, help='input image for generating caption')
-    parser.add_argument('--encoder_path', type=str, default='models/encoder-2-1000.ckpt', help='path for trained encoder')
-    parser.add_argument('--decoder_path', type=str, default='models/decoder-2-1000.ckpt', help='path for trained decoder')
+    parser.add_argument('--encoder_path', type=str, default='models/encoder-5-3000.pkl', help='path for trained encoder')
+    parser.add_argument('--decoder_path', type=str, default='models/decoder-5-3000.pkl', help='path for trained decoder')
     parser.add_argument('--vocab_path', type=str, default='data/vocab.pkl', help='path for vocabulary wrapper')
     
     # Model parameters (should be same as paramters in train.py)
     parser.add_argument('--embed_size', type=int , default=256, help='dimension of word embedding vectors')
     parser.add_argument('--hidden_size', type=int , default=512, help='dimension of lstm hidden states')
     parser.add_argument('--num_layers', type=int , default=1, help='number of layers in lstm')
     args = parser.parse_args()
-    main(args)
+    main(args)