Source code for hypnettorch.data.special.regression1d_bimodal_data

#!/usr/bin/env python3
# Copyright 2020 Rafael Daetwyler
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# @title          :data/special/regression1d_bimodal_data.py
# @author         :rd
# @contact        :rafael.daetwyler@uzh.ch
# @created        :11/06/2020
# @version        :1.0
# @python_version :3.7.4
"""
1D Regression Dataset with bimodal error
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

The module :mod:`data.special.regression1d_bimodal_data` contains a data handler
for a CL toy regression problem. The user can construct individual datasets with
this data handler and use each of these datasets to train a model in a continual
learning setting.
"""
import numpy as np
from hypnettorch.data.special.regression1d_data import ToyRegression

[docs]class BimodalToyRegression(ToyRegression):
    """An instance of this class shall represent a simple regression task, but
    with a bimodal Gaussian mixture error distribution.
    """
    def __init__(self, train_inter=[-10, 10], num_train=20,
                 test_inter=[-10, 10], num_test=80, val_inter=None,
                 num_val=None, map_function=lambda x : x, alpha1=0.5, dist1=5,
                 dist2=None, std1=1, std2=None, rseed=None,
                 perturb_test_val=False):
        """Generate a new dataset.

        The input data x will be uniformly drawn for train samples and
        equidistant for test samples. The user has to specify a function that
        will map this random input data onto output samples y.

        Args:
            (....): See docstring of class
                :class:`data.special.regression_1d_data.ToyRegression`.
            alpha1: Mixture coefficient of the first Gaussian mode of the error.
            dist1: The distance from zero of mean of the first Gaussian
                component of the error.
            dist2 (optional): The distance from zero of mean of the first
                Gaussian component of the error.  If ``None``, the value of
                `dist1` will be taken.
            std1: The standard deviation of the first Gaussian component of the
                error.
            std2 (optional): The standard deviation of the first Gaussian
                component of the error. If ``None``, the value of `std1` will be
                taken.
        """
        super().__init__()

        assert(val_inter is None and num_val is None or \
               val_inter is not None and num_val is not None)
        assert(0 <= alpha1 <= 1)

        if rseed is None:
            rand = np.random
        else:
            rand = np.random.RandomState(rseed)

        train_x = rand.uniform(low=train_inter[0], high=train_inter[1],
                               size=(num_train, 1))
        test_x = np.linspace(start=test_inter[0], stop=test_inter[1],
                             num=num_test).reshape((num_test, 1))

        train_y = map_function(train_x)
        test_y = map_function(test_x)

        # Perturb training outputs.
        if dist2 is None:
            dist2 = dist1
        if std2 is None:
            std2 = std1

        dist = np.array([-dist1, dist2])
        std = np.array([std1, std2])
        train_mode = rand.binomial(1, alpha1, (num_train, 1))
        train_eps = rand.normal(loc=dist[train_mode], scale=std[train_mode])
        train_y += train_eps

        if perturb_test_val:
            test_mode = rand.binomial(1, alpha1, (num_test, 1))
            test_eps = rand.normal(loc=dist[test_mode], scale=std[test_mode])
            test_y += test_eps

        # Create validation data if requested.
        if num_val is not None:
            val_x = np.linspace(start=val_inter[0], stop=val_inter[1],
                                num=num_val).reshape((num_val, 1))
            val_y = map_function(val_x)

            if perturb_test_val:
                val_mode = rand.binomial(1, alpha1, (num_val, 1))
                val_eps = rand.normal(loc=dist[val_mode],
                                       scale=std[val_mode])
                val_y += val_eps

            in_data = np.vstack([train_x, test_x, val_x])
            out_data = np.vstack([train_y, test_y, val_y])
        else:
            in_data = np.vstack([train_x, test_x])
            out_data = np.vstack([train_y, test_y])

        # Specify internal data structure.
        self._data['classification'] = False
        self._data['sequence'] = False
        self._data['in_data'] = in_data
        self._data['in_shape'] = [1]
        self._data['out_data'] = out_data
        self._data['out_shape'] = [1]
        self._data['train_inds'] = np.arange(num_train)
        self._data['test_inds'] = np.arange(num_train, num_train + num_test)

        if num_val is not None:
            n_start = num_train + num_test
            self._data['val_inds'] = np.arange(n_start, n_start + num_val)

        self._alpha1 = alpha1
        self._dist1 = dist1
        self._dist2 = dist2
        self._std1 = std1
        self._std2 = std2
        self._map = map_function
        self._train_inter = train_inter
        self._test_inter = test_inter
        self._val_inter = val_inter

[docs]    def get_identifier(self):
        """Returns the name of the dataset."""
        return '1D Bimodal Regression'


if __name__ == '__main__':
    pass