-
Notifications
You must be signed in to change notification settings - Fork 224
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
20f14d2
commit 8415053
Showing
12 changed files
with
1,646 additions
and
0 deletions.
There are no files selected for viewing
61 changes: 61 additions & 0 deletions
61
download_and_clean_data_scripts/Copy_50_files_to_another_folder.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from shutil import copyfile\n", | ||
"import os" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"img_dir = r\"./clipart_new/\"\n", | ||
"\n", | ||
"k = 0\n", | ||
"s = 0\n", | ||
"for filename in os.listdir(img_dir):\n", | ||
" \n", | ||
" if s < 100:\n", | ||
" filepath = os.path.join(img_dir, filename)\n", | ||
" copyfile(filepath, '/home/ubuntu/storage/clipart/' + filename)\n", | ||
" s += 1\n", | ||
" k += 1" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Environment (conda_tensorflow_p36)", | ||
"language": "python", | ||
"name": "conda_tensorflow_p36" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.6" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Large diffs are not rendered by default.
Oops, something went wrong.
424 changes: 424 additions & 0 deletions
424
download_and_clean_data_scripts/Create_random_cropped_images_from_large_images.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
105 changes: 105 additions & 0 deletions
105
download_and_clean_data_scripts/Remove_Black_and_White_Photos.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from PIL import Image\n", | ||
"from random import randint\n", | ||
"import uuid\n", | ||
"from multiprocessing import Pool\n", | ||
"import os" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/PIL/TiffImagePlugin.py:764: UserWarning: Corrupt EXIF data. Expecting to read 4 bytes but only got 0. \n", | ||
" warnings.warn(str(msg))\n" | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Done!\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"def remove_bw(filepath_filename):\n", | ||
" try:\n", | ||
" with Image.open(filepath_filename[0]) as im:\n", | ||
" x, y = im.size\n", | ||
" pixels = im.load()\n", | ||
" if im.mode == 'L' or type(pixels) == int:\n", | ||
" os.rename(filepath_filename[0], '/home/ubuntu/storage/BW_images/' + filepath_filename[1])\n", | ||
" else:\n", | ||
" bw_pixel = 0\n", | ||
" for i in range(1000):\n", | ||
" x_pixel = randint(0, (x-1))\n", | ||
" y_pixel = randint(0, (y-1))\n", | ||
" if len(pixels[x_pixel, y_pixel]) != 3:\n", | ||
" bw_pixel += 1000\n", | ||
" else:\n", | ||
" match_pixel = pixels[x_pixel, y_pixel][0]\n", | ||
" if match_pixel == pixels[x_pixel, y_pixel][1] or match_pixel == pixels[x_pixel, y_pixel][2]:\n", | ||
" bw_pixel += 1\n", | ||
" if bw_pixel > 990:\n", | ||
" os.rename(filepath_filename[0], '/home/ubuntu/storage/BW_images/' + filepath_filename[1])\n", | ||
" except:\n", | ||
" print('Error!')\n", | ||
" os.rename(filepath_filename[0], '/home/ubuntu/storage/error_files/' + filepath_filename[1])\n", | ||
"\n", | ||
"if __name__ == \"__main__\":\n", | ||
" img_dir = r\"./pixa_ready/\"\n", | ||
" images = []\n", | ||
"\n", | ||
" for filename in os.listdir(img_dir):\n", | ||
" filepath = os.path.join(img_dir, filename)\n", | ||
" images.append([filepath, filename])\n", | ||
"\n", | ||
" pool = Pool(processes=16) \n", | ||
" pool.map(remove_bw, images)\n", | ||
"\n", | ||
" print(\"Done!\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Environment (conda_tensorflow_p36)", | ||
"language": "python", | ||
"name": "conda_tensorflow_p36" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.6" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
128 changes: 128 additions & 0 deletions
128
download_and_clean_data_scripts/Remove_clipart_images_from_dataset.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from PIL import Image\n", | ||
"from random import randint\n", | ||
"import uuid\n", | ||
"from multiprocessing import Pool\n", | ||
"import os" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/PIL/TiffImagePlugin.py:764: UserWarning: Corrupt EXIF data. Expecting to read 4 bytes but only got 0. \n", | ||
" warnings.warn(str(msg))\n" | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Done!\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"def remove_clipart(filepath_filename):\n", | ||
" try:\n", | ||
" with Image.open(filepath_filename[0]) as im:\n", | ||
" x, y = im.size\n", | ||
" pixels = im.load()\n", | ||
" if im.mode == 'L' or type(pixels) == int or x < 200 or y < 200:\n", | ||
" os.rename(filepath_filename[0], '/home/ubuntu/storage/error_files/' + filepath_filename[1])\n", | ||
" else: \n", | ||
" status = False\n", | ||
"\n", | ||
" for i in range(4):\n", | ||
" light_pixel = 0\n", | ||
"\n", | ||
" if i == 0:\n", | ||
" x_pixel = 0\n", | ||
" y_pixel = 0\n", | ||
"\n", | ||
" if i == 1:\n", | ||
" x_pixel = x - 35\n", | ||
" y_pixel = 0\n", | ||
"\n", | ||
" if i == 2:\n", | ||
" x_pixel = 0\n", | ||
" y_pixel = y - 1\n", | ||
"\n", | ||
" if i == 3:\n", | ||
" x_pixel = x - 35\n", | ||
" y_pixel = y - 1\n", | ||
"\n", | ||
" for k in range(30):\n", | ||
" if len(pixels[x_pixel, y_pixel]) != 3:\n", | ||
" status = True\n", | ||
" else:\n", | ||
" first = pixels[x_pixel, y_pixel][0]\n", | ||
" second = pixels[x_pixel, y_pixel][1]\n", | ||
" third = pixels[x_pixel, y_pixel][2]\n", | ||
" if first > 246 and second > 246 and third > 246:\n", | ||
" light_pixel += 1\n", | ||
" x_pixel += 1\n", | ||
" if light_pixel == 30:\n", | ||
" status = True\n", | ||
"\n", | ||
" if status:\n", | ||
" os.rename(filepath_filename[0], '/home/ubuntu/storage/BW_images/' + filepath_filename[1])\n", | ||
" except IOError:\n", | ||
" os.rename(filepath_filename[0], '/home/ubuntu/storage/error_files/' + filepath_filename[1])\n", | ||
"\n", | ||
"if __name__ == \"__main__\":\n", | ||
" img_dir = r\"./pixa_ready/\"\n", | ||
" images = []\n", | ||
"\n", | ||
" for filename in os.listdir(img_dir):\n", | ||
" filepath = os.path.join(img_dir, filename)\n", | ||
" images.append([filepath, filename])\n", | ||
"\n", | ||
" pool = Pool(processes=16) \n", | ||
" pool.map(remove_clipart, images)\n", | ||
"\n", | ||
" print(\"Done!\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Environment (conda_tensorflow_p36)", | ||
"language": "python", | ||
"name": "conda_tensorflow_p36" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.6" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import os\n", | ||
"from PIL import Image\n", | ||
"from shutil import copyfile" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"img_dir = r\"./all_images/\"\n", | ||
"\n", | ||
"images = []\n", | ||
"for filename in os.listdir(img_dir):\n", | ||
" images.append(filename)\n", | ||
"\n", | ||
"images.sort()\n", | ||
"\n", | ||
"def get_orgname(org_name):\n", | ||
" name = org_name.partition(\" \")[0]\n", | ||
" if name == org_name:\n", | ||
" name = org_name.partition(\".\")[0]\n", | ||
" return name\n", | ||
"\n", | ||
"def get_size(filepath):\n", | ||
" with Image.open(first_filepath) as im:\n", | ||
" x, y = im.size\n", | ||
" return x * y\n", | ||
" \n", | ||
"\n", | ||
"curr_name = ''\n", | ||
"for i in range(len(images) - 1):\n", | ||
" if curr_name == '':\n", | ||
" curr_name = images[i]\n", | ||
" \n", | ||
" second_name = images[i + 1]\n", | ||
" \n", | ||
" if get_orgname(curr_name) == get_orgname(second_name):\n", | ||
" first_filepath = os.path.join(img_dir, curr_name)\n", | ||
" second_filepath = os.path.join(img_dir, second_name)\n", | ||
" first_size = get_size(first_filepath)\n", | ||
" second_size = get_size(second_filepath)\n", | ||
" \n", | ||
" if first_size < second_size:\n", | ||
" curr_name = second_name\n", | ||
" i += 1\n", | ||
" else:\n", | ||
" copyfile(os.path.join(img_dir, curr_name), '/home/ubuntu/storage/org_large/' + curr_name)\n", | ||
" curr_name = ''\n", | ||
" i += 1\n", | ||
" " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Environment (conda_tensorflow_p36)", | ||
"language": "python", | ||
"name": "conda_tensorflow_p36" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.5" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.