Source code for hypnettorch.mnets.bio_conv_net

#!/usr/bin/env python3
# Copyright 2019 Christian Henning
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# @title          :mnets/bio_conv_net.py
# @author         :ch
# @contact        :henningc@ethz.ch
# @created        :10/30/2019
# @version        :1.0
# @python_version :3.6.8
"""
A bio-plausible convolutional network for CIFAR
-----------------------------------------------

The module :mod:`mnets.bio_conv_net` implements a simple biologically-plausible
network with convolutional and fully-connected layers. The bio-plausibility
arises through the usage of conv-layers without weight sharing, i.e., layers
from class :class:`utils.local_conv2d_layer.LocalConv2dLayer`. The network
specification has been taken from the following paper

    `Bartunov et al., "Assessing the Scalability of Biologically-Motivated Deep
    Learning Algorithms and Architectures", NeurIPS 2018.
    <http://papers.nips.cc/paper/8148-assessing-the-scalability-of-biologically\
-motivated-deep-learning-algorithms-and-architectures>`_

in which this kind of network has been termed "locally-connected network".

In particular, we consider the network architecture specified in table 3 on page
13 for the CIFAR dataset.

.. autosummary::

    hypnettorch.mnets.bio_conv_net.BioConvNet
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

from hypnettorch.mnets.classifier_interface import Classifier
from hypnettorch.mnets.mnet_interface import MainNetInterface
from hypnettorch.utils.context_mod_layer import ContextModLayer
from hypnettorch.utils.local_conv2d_layer import LocalConv2dLayer
from hypnettorch.utils.torch_utils import init_params

[docs]class BioConvNet(Classifier): """Implementation of a locally-connected network for CIFAR. The network consists of 3 bio-plausible convolutional layers (using class :class:`utils.local_conv2d_layer.LocalConv2dLayer`) followed by two fully-connected layers. Assume conv layers are specified by the tuple ``(K x K, C, S, P)``, where ``K`` denotes the kernel size, ``C`` the number of channels, ``S`` the stride and ``P`` the padding. The network is defined as follows - Bio-conv layer (5 x 5, 64, 2, 0) - Bio-conv layer (5 x 5, 128, 2, 0) - Bio-conv layer (3 x 3, 256, 1, 1) - FC layer with 1024 outputs - FC layer with 10 outputs Note, the padding for the first two convolutional layers was not specified in the paper, so we just assumed it to be zero. The **network output will be linear**, so we do not apply the softmax inside the :meth:`forward` method. Note, the paper states that ``tanh`` was used in all networks as non-linearity. Therefore, we use this non-linearity too. Args: in_shape: The shape of an input sample. .. note:: We assume the Tensorflow format, where the last entry denotes the number of channels. num_classes: The number of output neurons. no_weights (bool): If set to ``True``, no trainable parameters will be constructed, i.e., weights are assumed to be produced ad-hoc by a hypernetwork and passed to the :meth:`forward` method. init_weights (optional): This option is for convinience reasons. The option expects a list of parameter values that are used to initialize the network weights. As such, it provides a convinient way of initializing a network with a weight draw produced by the hypernetwork. Note, internal weights (see :attr:`mnets.mnet_interface.MainNetInterface.weights`) will be affected by this argument only. use_context_mod (bool): Add context-dependent modulation layers :class:`utils.context_mod_layer.ContextModLayer` after the linear computation of each layer. context_mod_inputs (bool): Whether context-dependent modulation should also be applied to network intpus directly. I.e., assume :math:`\mathbf{x}` is the input to the network. Then the first network operation would be to modify the input via :math:`\mathbf{x} \cdot \mathbf{g} + \mathbf{s}` using context- dependent gain and shift parameters. Note: Argument applies only if ``use_context_mod`` is ``True``. no_last_layer_context_mod (bool): If ``True``, context-dependent modulation will not be applied to the output layer. Note: Argument applies only if ``use_context_mod`` is ``True``. context_mod_no_weights (bool): The weights of the context-mod layers (:class:`utils.context_mod_layer.ContextModLayer`) are treated independently of the option ``no_weights``. This argument can be used to decide whether the context-mod parameters (gains and shifts) are maintained internally or externally. Note: Check out argument ``weights`` of the :meth:`forward` method on how to correctly pass weights to the network that are externally maintained. context_mod_post_activation (bool): Apply context-mod layers after the activation function (``activation_fn``) in hidden layer rather than before, which is the default behavior. Note: This option only applies if ``use_context_mod`` is ``True``. Note: This option does not affect argument ``context_mod_inputs``. Note: Note, there is no non-linearity applied to the output layer, such that this argument has no effect there. context_mod_gain_offset (bool): Activates option ``apply_gain_offset`` of class :class:`utils.context_mod_layer.ContextModLayer` for all context-mod layers that will be instantiated. context_mod_gain_softplus (bool): Activates option ``apply_gain_softplus`` of class :class:`utils.context_mod_layer.ContextModLayer` for all context-mod layers that will be instantiated. context_mod_apply_pixel_wise (bool): If ``False``, the context-dependent modulation applies a scalar gain and shift to all feature maps in the output of a convolutional layer. When activating this option, the gain and shift will be a per-pixel parameter in all feature maps. To be more precise, consider the output of a convolutional layer of shape ``[C,H,W]``. If ``False``, there will be ``C`` gain and shift parameters for such a layer. Upon activating this option, the number of gain and shift parameters for such a layer will increase to ``C x H x W``. """ def __init__(self, in_shape=(32, 32, 3), num_classes=10, no_weights=False, init_weights=None, use_context_mod=False, context_mod_inputs=False, no_last_layer_context_mod=False, context_mod_no_weights=False, context_mod_post_activation=False, context_mod_gain_offset=False, context_mod_gain_softplus=False, context_mod_apply_pixel_wise=False): super(BioConvNet, self).__init__(num_classes, True) assert(len(in_shape) == 3) # FIXME This assertion is not mandatory but a sanity check that the user # uses the Tensorflow layout. assert(in_shape[2] in [1, 3]) assert(init_weights is None or \ (not no_weights or not context_mod_no_weights)) self._in_shape = in_shape self._no_weights = no_weights self._use_context_mod = use_context_mod self._context_mod_inputs = context_mod_inputs self._no_last_layer_context_mod = no_last_layer_context_mod self._context_mod_no_weights = context_mod_no_weights self._context_mod_post_activation = context_mod_post_activation self._context_mod_gain_offset = context_mod_gain_offset self._context_mod_gain_softplus = context_mod_gain_softplus self._context_mod_apply_pixel_wise = context_mod_apply_pixel_wise self._has_bias = True self._has_fc_out = True # We need to make sure that the last 2 entries of `weights` correspond # to the weight matrix and bias vector of the last layer. self._mask_fc_out = True self._has_linear_out = True self._param_shapes = [] self._weights = None if no_weights and context_mod_no_weights \ else nn.ParameterList() self._hyper_shapes_learned = None \ if not no_weights and not context_mod_no_weights else [] self._layer_weight_tensors = nn.ParameterList() self._layer_bias_vectors = nn.ParameterList() # Shapes of output activities for context-modulation, if used. cm_shapes = [] # Output shape of all layers. if context_mod_inputs: cm_shapes.append([in_shape[2], *in_shape[:2]]) ### Define and initialize all conv and linear layers ### Bio-conv layers. H = in_shape[0] W = in_shape[1] C_in = in_shape[2] C = [64, 128, 256] K = [5, 5, 3] S = [2, 2, 1] P = [0, 0, 1] self._conv_layer = [] for i, C_out in enumerate(C): self._conv_layer.append(LocalConv2dLayer(C_in, C_out, H, W, K[i], stride=S[i], padding=P[i], no_weights=no_weights)) H = self._conv_layer[-1].out_height W = self._conv_layer[-1].out_width cm_shapes.append([C_out, H, W]) C_in = C_out self._param_shapes.extend(self._conv_layer[-1].param_shapes) if no_weights: self._hyper_shapes_learned.extend( \ self._conv_layer[-1].param_shapes) else: self._weights.extend(self._conv_layer[-1].weights) assert(len(self._conv_layer[-1].weights) == 2) self._layer_weight_tensors.append( \ self._conv_layer[-1].filters) self._layer_bias_vectors.append( \ self._conv_layer[-1].bias) ### Linear layers n_in = H * W * C_out assert(n_in == 6400) n = [1024, num_classes] for i, n_out in enumerate(n): W_shape = [n_out, n_in] b_shape = [n_out] # Note, that the last layer shape might not be used for context- # modulation. if i < (len(n)-1) or not no_last_layer_context_mod: cm_shapes.append([n_out]) n_in = n_out self._param_shapes.extend([W_shape, b_shape]) if no_weights: self._hyper_shapes_learned.extend([W_shape, b_shape]) else: W = nn.Parameter(torch.Tensor(*W_shape), requires_grad=True) b = nn.Parameter(torch.Tensor(*b_shape), requires_grad=True) init_params(W, b) self._weights.extend([W, b]) self._layer_weight_tensors.append(W) self._layer_bias_vectors.append(b) ### Define and initialize context mod weights. self._context_mod_layers = nn.ModuleList() if use_context_mod else None self._context_mod_shapes = [] if use_context_mod else None self._context_mod_weights = nn.ParameterList() if use_context_mod \ else None if use_context_mod: if not context_mod_apply_pixel_wise: # Only scalar gain and shift per feature map! for i, s in enumerate(cm_shapes): if len(s) == 3: cm_shapes[i] = [s[0], 1, 1] for i, s in enumerate(cm_shapes): cmod_layer = ContextModLayer(s, no_weights=context_mod_no_weights, apply_gain_offset=context_mod_gain_offset, apply_gain_softplus=context_mod_gain_softplus) self._context_mod_layers.append(cmod_layer) self._context_mod_shapes.extend(cmod_layer.param_shapes) if not context_mod_no_weights: self._context_mod_weights.extend(cmod_layer.weights) # We always had the context mod weights/shapes at the beginning of # our list attributes. self._param_shapes = self._context_mod_shapes + self._param_shapes if context_mod_no_weights: self._hyper_shapes_learned = self._context_mod_shapes + \ self._hyper_shapes_learned else: tmp = self._weights self._weights = nn.ParameterList(self._context_mod_weights) for w in tmp: self._weights.append(w) ### Apply custom init if given. if init_weights is not None: assert(len(self.weights) == len(init_weights)) for i in range(len(init_weights)): assert(np.all(np.equal(list(init_weights[i].shape), list(self._weights[i].shape)))) self._weights[i].data = init_weights[i] ### Print user info. num_weights = MainNetInterface.shapes_to_num_weights( \ self._param_shapes) if use_context_mod: cm_num_weights = MainNetInterface.shapes_to_num_weights( \ self._context_mod_shapes) print('Creating bio-plausible convnet with %d weights' % num_weights + (' (including %d weights associated with-' % cm_num_weights + 'context modulation)' if use_context_mod else '') + '.') self._is_properly_setup()
[docs] def forward(self, x, weights=None, distilled_params=None, condition=None, collect_activations=False): """Compute the output :math:`y` of this network given the input :math:`x`. Args: (....): See docstring of method :meth:`mnets.mnet_interface.MainNetInterface.forward`. We provide some more specific information below. x: Input image. .. note:: We assume the Tensorflow format, where the last entry denotes the number of channels. weights (list or dict): If a list of parameter tensors is given and context modulation is used (see argument ``use_context_mod`` in constructor), then these parameters are interpreted as context- modulation parameters if the length of ``weights`` equals :code:`2*len(net.context_mod_layers)`. Otherwise, the length is expected to be equal to the length of the attribute :attr:`mnets.mnet_interface.MainNetInterface.param_shapes`. Alternatively, a dictionary can be passed with the possible keywords ``internal_weights`` and ``mod_weights``. Each keyword is expected to map onto a list of tensors. The keyword ``internal_weights`` refers to all weights of this network except for the weights of the context-modulation layers. The keyword ``mod_weights``, on the other hand, refers specifically to the weights of the context-modulation layers. It is not necessary to specify both keywords. condition (int, optional): Will be passed as argument ``ckpt_id`` to the method :meth:`utils.context_mod_layer.ContextModLayer.forward` for all context-mod layers in this network. collect_activations (bool, optional): If one wants to return the activations in the network. This information can be used for credit assignment later on, in case an alternative to PyTorch its :mod:`torch.autograd` should be used. Returns: (:class:`torch.Tensor` or tuple): Tuple containing: - **y**: The output of the network. - **layer_activation** (optional): The activations of the network. Only returned if ``collect_activations`` was set to ``True``. The list will contain the activations of all convolutional and linear layers. """ if ((not self._use_context_mod and self._no_weights) or \ (self._no_weights or self._context_mod_no_weights)) and \ weights is None: raise Exception('Network was generated without weights. ' + 'Hence, "weights" option may not be None.') ############################################ ### Extract which weights should be used ### ############################################ # I.e., are we using internally maintained weights or externally given # ones or are we even mixing between these groups. # FIXME code mostly copied from MLP forward method. n_cm = 0 if self.context_mod_layers is None else \ 2 * len(self.context_mod_layers) if weights is None: weights = self.weights if self._use_context_mod: cm_weights = weights[:n_cm] int_weights = weights[n_cm:] else: int_weights = weights else: int_weights = None cm_weights = None if isinstance(weights, dict): assert('internal_weights' in weights.keys() or \ 'mod_weights' in weights.keys()) if 'internal_weights' in weights.keys(): int_weights = weights['internal_weights'] if 'mod_weights' in weights.keys(): cm_weights = weights['mod_weights'] else: if self._use_context_mod and \ len(weights) == n_cm: cm_weights = weights else: assert(len(weights) == len(self.param_shapes)) if self._use_context_mod: cm_weights = weights[:n_cm] int_weights = weights[n_cm:] else: int_weights = weights if self._use_context_mod and cm_weights is None: if self._context_mod_no_weights: raise Exception('Network was generated without weights ' + 'for context-mod layers. Hence, they must be passed ' + 'via the "weights" option.') cm_weights = self.weights[:n_cm] if int_weights is None: if self._no_weights: raise Exception('Network was generated without internal ' + 'weights. Hence, they must be passed via the ' + '"weights" option.') if self._context_mod_no_weights: int_weights = self.weights else: int_weights = self.weights[n_cm:] # Note, context-mod weights might have different shapes, as they # may be parametrized on a per-sample basis. if self._use_context_mod: assert(len(cm_weights) == len(self._context_mod_shapes)) int_shapes = self.param_shapes[n_cm:] assert(len(int_weights) == len(int_shapes)) for i, s in enumerate(int_shapes): assert(np.all(np.equal(s, list(int_weights[i].shape)))) ############################################### ### Extract weight tensors and bias vectors ### ############################################### w_weights = [] b_weights = [] for i, p in enumerate(int_weights): if self.has_bias and i % 2 == 1: b_weights.append(p) else: w_weights.append(p) ######################## ### Parse condition ### ####################### cmod_cond = condition ########################### ### Forward Computation ### ########################### cm_ind = 0 layer_ind = 0 x = x.view(-1, *self._in_shape) x = x.permute(0, 3, 1, 2) h = x activations = [] # Context-dependent modulation of inputs directly. if self._use_context_mod and self._context_mod_inputs: h = self._context_mod_layers[cm_ind].forward(h, weights=cm_weights[2*cm_ind:2*cm_ind+2], ckpt_id=cmod_cond) cm_ind += 1 # Convolutional layers. for i, conv_layer in enumerate(self._conv_layer): h = conv_layer.forward(h, weights=[w_weights[layer_ind], b_weights[layer_ind]]) layer_ind += 1 if collect_activations: activations.append(h.clone()) # Non-linearity (if context-dependent mod is applied post non-lin.) if self._context_mod_post_activation: h = torch.tanh(h) # Context-dependent modulation. if self._use_context_mod: h = self._context_mod_layers[cm_ind].forward(h, weights=cm_weights[2*cm_ind:2*cm_ind+2], ckpt_id=cmod_cond) cm_ind += 1 # Non-linearity (if context-dependent mod is applied pre non-lin.) if not self._context_mod_post_activation: h = torch.tanh(h) # Flatten feature maps. h = h.view(h.size(0), -1) # Linear layers. assert(len(w_weights) == layer_ind + 2) for _ in range(2): W = w_weights[layer_ind] if self.has_bias: b = b_weights[layer_ind] else: b = None # Linear layer. h = F.linear(h, W, bias=b) if collect_activations: activations.append(h.clone()) # Do not consider the output layer. if layer_ind < len(w_weights) - 1: # Non-linearity (if context-dependent mod is applied post # non-lin.) if self._context_mod_post_activation: h = torch.tanh(h) # Context-dependent modulation. if self._use_context_mod: h = self._context_mod_layers[cm_ind].forward(h, weights=cm_weights[2*cm_ind:2*cm_ind+2], ckpt_id=cmod_cond) cm_ind += 1 # Non-linearity (if context-dependent mod is applied pre # non-lin.) if not self._context_mod_post_activation: h = torch.tanh(h) if collect_activations and layer_ind == len(w_weights) - 2: last_hidden = h layer_ind += 1 # Context-dependent modulation in output layer. if self._use_context_mod and not self._no_last_layer_context_mod: h = self._context_mod_layers[cm_ind].forward(h, weights=cm_weights[2*cm_ind:2*cm_ind+2], ckpt_id=cmod_cond) if collect_activations: return h, activations, last_hidden else: return h
[docs] def distillation_targets(self): """Targets to be distilled after training. See docstring of abstract super method :meth:`mnets.mnet_interface.MainNetInterface.distillation_targets`. This network does not have any distillation targets. Returns: ``None`` """ return None
if __name__ == '__main__': pass