#!/usr/bin/env python3
# Copyright 2019 Christian Henning
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# @title :data/cub_200_2011_data.py
# @author :ch
# @contact :henningc@ethz.ch
# @created :05/17/2019
# @version :1.0
# @python_version :3.6.8
"""
CUB-200-2011 Dataset
--------------------
The module :mod:`data.cub_200_2011_data` contains a dataloader for the
Caltech-UCSD Birds-200-2011 Dataset (CUB-200-2011).
The dataset is available at:
http://www.vision.caltech.edu/visipedia/CUB-200-2011.html
For more information on the dataset, please refer to the corresponding
publication:
Wah et al., "The Caltech-UCSD Birds-200-2011 Dataset",
California Institute of Technology, 2011.
http://www.vision.caltech.edu/visipedia/papers/CUB_200_2011.pdf
The dataset consists of 11,788 images divided into 200 categories. The dataset
has a specified train/test split and a lot of additional information (bounding
boxes, segmentation, parts annotation, ...) that we don't make use of yet.
.. note::
This dataset should not be confused with the older version CUB-200,
containing only 6,033 images.
.. note ::
We use the same data augmentation as for class
:class:`data.ilsvrc2012_data.ILSVRC2012Data`.
.. note::
The original category labels range from 1-200. We modify them to range
from 0 - 199.
"""
# FIXME We currently rely too much on the internals of class ImageFolder.
import torchvision
import warnings
from packaging import version
if version.parse(torchvision.__version__) < version.parse('0.2.2'):
# FIXME Probably not necessary to enforce, just ignore non-existing
# "targets" field.
raise Exception('Code requires torchvision to have at least version ' +
'"0.2.2" (current version: %s).' % torchvision.__version__)
elif version.parse(torchvision.__version__) != version.parse('0.2.2'):
warnings.warn('Code not been tested with torchvision version %s!'
% torchvision.__version__)
import torchvision.datasets as datasets
import os
import time
import urllib.request
import tarfile
import pandas
from copy import deepcopy
import numpy as np
import matplotlib.pyplot as plt
from hypnettorch.data.large_img_dataset import LargeImgDataset
from hypnettorch.data.ilsvrc2012_data import ILSVRC2012Data
[docs]class CUB2002011(LargeImgDataset):
"""An instance of the class shall represent the CUB-200-2011 dataset.
The input data of the dataset will be strings to image files. The output
data corresponds to object labels (bird categories).
Note:
The dataset will be downloaded if not available.
Note:
The original category labels range from 1-200. We modify them to
range from 0 - 199.
Args:
data_path (str): Where should the dataset be read from? If not existing,
the dataset will be downloaded into this folder.
use_one_hot (bool): Whether the class labels should be represented in a
one-hot encoding.
.. note::
This option does not influence the internal PyTorch
Dataset classes (e.g., cmp.
:attr:`data.large_img_dataset.LargeImgDataset.torch_train`),
that can be used in conjunction with PyTorch data loaders.
num_val_per_class (int): The number of validation samples per class.
For instance: If value 10 is given, a validation set of size
5 * 200 = 1,000 is constructed (these samples will be removed
from the training set).
.. note::
Validation samples use the same data augmentation pipeline
as test samples.
"""
_DOWNLOAD_PATH = 'http://www.vision.caltech.edu/visipedia-data/' + \
'CUB-200-2011/'
_IMG_ANNO_FILE = 'CUB_200_2011.tgz'
_SEGMENTATION_FILE = 'segmentations.tgz' # UNUSED
# In which subfolder of the datapath should the data be stored.
_SUBFOLDER = 'cub_200_2011'
# After extracting the downloaded archive, the data will be in
# this subfolder.
_REL_BASE = 'CUB_200_2011'
_IMG_DIR = 'images' # Realitve to _REL_BASE
_CLASSES_FILE = 'classes.txt' # Realitve to _REL_BASE
_IMG_CLASS_LBLS_FILE = 'image_class_labels.txt' # Realitve to _REL_BASE
_IMG_FILE = 'images.txt' # Realitve to _REL_BASE
_TRAIN_TEST_SPLIT_FILE = 'train_test_split.txt' # Realitve to _REL_BASE
def __init__(self, data_path, use_one_hot=False, num_val_per_class=0):
# We keep the full path to each image in memory, so we don't need to
# tell the super class the root path to each image (i.e., samples
# contain absolute not relative paths).
super().__init__('')
start = time.time()
print('Reading CUB-200-2011 dataset ...')
# Actual data path
data_path = os.path.join(data_path, CUB2002011._SUBFOLDER)
if not os.path.exists(data_path):
print('Creating directory "%s" ...' % (data_path))
os.makedirs(data_path)
full_data_path = os.path.join(data_path, CUB2002011._REL_BASE)
image_dir = os.path.join(full_data_path, CUB2002011._IMG_DIR)
classes_fn = os.path.join(full_data_path, CUB2002011._CLASSES_FILE)
img_class_fn = os.path.join(full_data_path,
CUB2002011._IMG_CLASS_LBLS_FILE)
image_fn = os.path.join(full_data_path, CUB2002011._IMG_FILE)
train_test_split_fn = os.path.join(full_data_path,
CUB2002011._TRAIN_TEST_SPLIT_FILE)
########################
### Download dataset ###
########################
if not os.path.exists(image_dir) or \
not os.path.exists(classes_fn) or \
not os.path.exists(img_class_fn) or \
not os.path.exists(image_fn) or \
not os.path.exists(train_test_split_fn):
print('Downloading dataset ...')
archive_fn = os.path.join(data_path, CUB2002011._IMG_ANNO_FILE)
urllib.request.urlretrieve(CUB2002011._DOWNLOAD_PATH + \
CUB2002011._IMG_ANNO_FILE, \
archive_fn)
# Extract downloaded dataset.
tar = tarfile.open(archive_fn, "r:gz")
tar.extractall(path=data_path)
tar.close()
os.remove(archive_fn)
####################
### Read dataset ###
####################
# We use the same transforms as
train_transform, test_transform = \
ILSVRC2012Data.torch_input_transforms()
# Consider all images as training images. We split the dataset later.
ds_train = datasets.ImageFolder(image_dir, train_transform)
# Ability to translate image IDs into image paths and back.
image_ids_csv = pandas.read_csv(image_fn, sep=' ',
names=['img_id', 'img_path'])
id2img = dict(zip(list(image_ids_csv['img_id']),
list(image_ids_csv['img_path'])))
# Since the ImageFolder class uses absolute paths, we have to change
# the just read relative paths.
for iid in id2img.keys():
id2img[iid] = os.path.join(image_dir, id2img[iid])
img2id = {v: k for k, v in id2img.items()}
# Image ID to label.
img_lbl_csv = pandas.read_csv(img_class_fn, sep=' ',
names=['img_id', 'label'])
id2lbl = dict(zip(list(img_lbl_csv['img_id']),
list(img_lbl_csv['label'])))
# Note, categories go from 1-200. We change them to go from 0 - 199.
for iid in id2lbl.keys():
id2lbl[iid] = id2lbl[iid] - 1
# Image ID to label name.
img_lbl_name_csv = pandas.read_csv(classes_fn, sep=' ',
names=['label', 'label_name'])
lbl2lbl_name_tmp = dict(zip(list(img_lbl_name_csv['label']),
list(img_lbl_name_csv['label_name'])))
# Here, we also have to modify the labels to be within 0-199.
lbl2lbl_name = {k-1: v for k, v in lbl2lbl_name_tmp.items()}
# Train-test-split.
train_test_csv = pandas.read_csv(train_test_split_fn, sep=' ',
names=['img_id', 'is_train'])
id2train = dict(zip(list(train_test_csv['img_id']),
list(train_test_csv['is_train'])))
self._label_to_name = lbl2lbl_name
####################
### Sanity check ###
####################
for i, (img_path, lbl) in enumerate(ds_train.samples):
iid = img2id[img_path]
assert(id2img[iid] == img_path)
assert(lbl == id2lbl[iid])
################################
### Train / val / test split ###
################################
orig_samples = ds_train.samples
ds_train.samples = []
ds_train.imgs = ds_train.samples
ds_train.targets = []
ds_test = deepcopy(ds_train)
ds_test.transform = test_transform
assert(ds_test.target_transform is None)
if num_val_per_class > 0:
ds_val = deepcopy(ds_train)
# NOTE we use test input transforms for the validation set.
ds_val.transform = test_transform
else:
ds_val = None
num_classes = len(lbl2lbl_name_tmp.keys())
assert(num_classes == 200)
val_counts = np.zeros(num_classes, dtype=np.int)
for img_path, img_lbl in orig_samples:
iid = img2id[img_path]
if id2train[iid] == 1: # In train split.
if val_counts[img_lbl] >= num_val_per_class: # train sample
ds_train.samples.append((img_path, img_lbl))
else: # validation sample
val_counts[img_lbl] += 1
ds_val.samples.append((img_path, img_lbl))
else: # In test split.
ds_test.samples.append((img_path, img_lbl))
for ds_obj in [ds_train, ds_test] + \
([ds_val] if num_val_per_class > 0 else []):
ds_obj.targets = [s[1] for s in ds_obj.samples]
assert(len(ds_obj.samples) == len(ds_obj.imgs) and \
len(ds_obj.samples) == len(ds_obj.targets))
self._torch_ds_train = ds_train
self._torch_ds_test = ds_test
self._torch_ds_val = ds_val
#####################################
### Build internal data structure ###
#####################################
num_train = len(self._torch_ds_train)
num_test = len(self._torch_ds_test)
num_val = 0 if self._torch_ds_val is None else \
len(self._torch_ds_val)
num_samples = num_train + num_test + num_val
max_path_len = len(max(orig_samples, key=lambda t : len(t[0]))[0])
self._data['classification'] = True
self._data['sequence'] = False
self._data['num_classes'] = 200
self._data['is_one_hot'] = use_one_hot
self._data['in_shape'] = [224, 224, 3]
self._data['out_shape'] = [200 if use_one_hot else 1]
self._data['in_data'] = np.chararray([num_samples, 1],
itemsize=max_path_len, unicode=True)
for i, (img_path, _) in enumerate(ds_train.samples +
([] if num_val == 0 else ds_val.samples) +
ds_test.samples):
self._data['in_data'][i, :] = img_path
labels = np.array(ds_train.targets +
([] if num_val == 0 else ds_val.targets) +
ds_test.targets).reshape(-1, 1)
if use_one_hot:
labels = self._to_one_hot(labels)
self._data['out_data'] = labels
self._data['train_inds'] = np.arange(num_train)
self._data['test_inds'] = np.arange(num_train + num_val, num_samples)
if num_val == 0:
self._data['val_inds'] = None
else:
self._data['val_inds'] = np.arange(num_train, num_train + num_val)
print('Dataset consists of %d training, %d validation and %d test '
% (num_train, num_val, num_test) + 'samples.')
end = time.time()
print('Elapsed time to read dataset: %f sec' % (end-start))
[docs] def get_identifier(self):
"""Returns the name of the dataset."""
return 'CUB-200-2011'
def _plot_sample(self, fig, inner_grid, num_inner_plots, ind, inputs,
outputs=None, predictions=None):
"""Implementation of abstract method
:meth:`data.dataset.Dataset._plot_sample`.
"""
ax = plt.Subplot(fig, inner_grid[0])
if outputs is None:
ax.set_title("CUB-200-2011 Sample")
else:
assert(np.size(outputs) == 1)
label = np.asscalar(outputs)
label_name = self._label_to_name[label]
if predictions is None:
ax.set_title('Label of shown sample:\n%s (%d)' % \
(label_name, label))
else:
if np.size(predictions) == self.num_classes:
pred_label = np.argmax(predictions)
else:
pred_label = np.asscalar(predictions)
pred_label_name = self._label_to_name[pred_label]
ax.set_title('Label of shown sample:\n%s (%d)' % \
(label_name, label) + '\nPrediction: %s (%d)' % \
(pred_label_name, pred_label))
if inputs.size == 1:
img = self.read_images(inputs)
else:
img = inputs
ax.set_axis_off()
ax.imshow(np.squeeze(np.reshape(img, self.in_shape)))
fig.add_subplot(ax)
if __name__ == '__main__':
pass