#!/usr/bin/env python3
# Copyright 2019 Christian Henning
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# title :utils/local_conv2d_layer.py
# author :ch
# contact :henningc@ethz.ch
# created :10/30/2019
# version :1.0
# python_version :3.6.8
"""
2D-convolutional layer without weight sharing
---------------------------------------------
This module implements a biologically-plausible version of a convolutional layer
that does not use weight-sharing. Such a convnet is termed "locally-connected
network" in:
`Bartunov et al., "Assessing the Scalability of Biologically-Motivated Deep
Learning Algorithms and Architectures", NeurIPS 2018.
<http://papers.nips.cc/paper/8148-assessing-the-scalability-of-biologically\
-motivated-deep-learning-algorithms-and-architectures>`_
.. autosummary::
hypnettorch.utils.local_conv2d_layer.LocalConv2dLayer
"""
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from hypnettorch.utils.torch_utils import init_params
[docs]class LocalConv2dLayer(nn.Module):
r"""Implementation of a locally-connected 2D convolutional layer.
Since this implementation of a convolutional layer doesn't use weight-
sharing, it will have more parameters than a conventional convolutional
layer such as :class:`torch.nn.Conv2d`.
For example, consider a convolutional layer with kernel size ``[K, K]``,
``C_in`` input channels and ``C_out`` output channels, that has an output
feature map size of ``[H, W]``. Each receptive field [#f1]_ will have its
own weights, a parameter tensor of size ``K x K``. Thus, in total the layer
will have ``C_out * C_in * H * W * K * K`` weights compared to
``C_out * C_in * K * K`` weights that a conventional
:class:`torch.nn.Conv2d` would have.
Consider the :math:`i`-th input feature map :math:`F^{(i)}`
(:math:`1 \leq i \leq C_{\text{in}}`), the :math:`j`-th output feature map
:math:`G^{(j)}` (:math:`1 \leq j \leq C_{\text{out}}`) and the pixel with
coordinates :math:`(x,y)` in the :math:`j`-th output feature map
:math:`G^{(j)}_{xy}` (:math:`1 \leq x \leq W` and :math:`1 \leq y \leq H`).
We denote the filter weights of this pixel connecting to the :math:`i`-th
input feature map by :math:`W_{xy}^{(i,j)} \in \mathbb{R}^{K \times K}`.
The corresponding receptive field inside :math:`F^{(i)}` that is used to
compute pixel :math:`G^{(j)}_{xy}` is denoted by
:math:`\hat{F}^{(i)}(x,y) \in \mathbb{R}^{K \times K}`.
The bias weights for feature map :math:`G^{(j)}` are denoted by
:math:`B^{(j)}`, with a scalar weight :math:`B^{(j)}_{xy}` for pixel
:math:`(x,y)`.
Using this notation, the computation of this layer can be described by the
following formula
.. math::
G^{(j)}_{xy} &= B^{(j)}_{xy} + \sum_{i=1}^{C_{\text{in}}} \text{sum}
(W_{xy}^{(i,j)} \odot \hat{F}^{(i)}(x,y)) \\
&= B^{(j)}_{xy} + \sum_{i=1}^{C_{\text{in}}} \langle W_{xy}^{(i,j)},
\hat{F}^{(i)}(x,y) \rangle_F
where :math:`\text{sum}(\cdot)` is the unary operator that computes the sum
of all elements in a matrix, :math:`\odot` denotes the Hadamard product
and :math:`\langle \cdot, \cdot \rangle_F` denotes the Frobenius inner
product, which computes the sum of the entries of the Hadamard product
between real-valued matrices.
**Implementation details**
Let :math:`N` denote the batch size. We can use the function
:func:`torch.nn.functional.unfold` to split our input, which is of shape
``[N, C_in, H_in, W_in]``, into receptive fields ``F_hat`` of dimension
``[N, C_in * K * K, H * W]``. The receptive field :math:`\hat{F}^{(i)}(x,y)`
would then correspond to :code:`F_hat[:, i * K*K:(i+1) * K*K, y*H + x]`,
assuming that indices now start at ``0`` and not at ``1``.
In addition, we have a weight tensor ``W`` of shape
``[C_out, C_in * K * K, H * W]``.
Now, we can compute the element-wise product of receptive fields and their
filters by introducing a slack dimension into the shape of ``F_hat`` (i.e.,
``[N, 1, C_in * K * K, H * W]``) and by using broadcasting. ``F_hat * W``
will result into a tensor of shape ``[N, C_out, C_in * K * K, H * W]``.
By summing over the third dimension ``dim=2`` and reshaping the output we
retrieve the result of our local convolutional layer.
Args:
in_channels (int): Number of channels in the input image.
out_channels (int): Number of channels produced by the convolution.
in_height (int): Height of the input feature maps, assuming that input
feature maps have shape ``[C_in, H, W]`` (omitting the batch
dimension). This argument is necessary to compute the size of
output feature maps, as we need a filter for each pixel in each
output feature map.
in_width (int): Width of input feature maps.
kernel_size (int or tuple): Size of the convolving kernel.
stride (int or tuple, optional): Stride of the convolution.
padding (int or tuple, optional): Zero-padding added to both sides of
the input.
bias (bool, optional): If ``True``, adds a learnable bias to the output.
There will be one scalar bias per filter.
no_weights (bool): If ``True``, the layer will have no trainable
weights. Hence, weights are expected to be passed to the
:meth:`forward` method.
.. rubric:: Footnotes
.. [#f1] For each of the ``C_in`` input feature maps, there is one receptive
field for each pixel in all ``C_out`` feature maps.
"""
def __init__(self, in_channels, out_channels, in_height, in_width,
kernel_size, stride=1, padding=0, bias=True, no_weights=False):
super(LocalConv2dLayer, self).__init__()
if isinstance(kernel_size, int):
kernel_size = (kernel_size, kernel_size)
if isinstance(stride, int):
stride = (stride, stride)
if isinstance(padding, int):
padding = (padding, padding)
self._in_channels = in_channels
self._out_channels = out_channels
self._in_height = in_height
self._in_width = in_width
self._kernel_size = kernel_size
self._stride = stride
self._padding = padding
self._has_bias = bias
self._no_weights = no_weights
self._out_height = (in_height - kernel_size[0] + 2 * padding[0]) // \
stride[0] + 1
self._out_width = (in_width - kernel_size[1] + 2 * padding[1]) // \
stride[1] + 1
# Size of a single receptive field.
rf_size = in_channels * kernel_size[0] * kernel_size[1]
self._rf_size = rf_size
# Number of pixels per output feature map.
num_pix = self._out_height * self._out_width
self._num_pix = num_pix
self._weights = None
self._param_shapes = [[out_channels, rf_size, num_pix]]
if bias:
self._param_shapes.append([out_channels, num_pix])
if not no_weights:
self._weights = nn.ParameterList()
self.register_parameter('filters', nn.Parameter( \
torch.Tensor(*self._param_shapes[0]), requires_grad=True))
self._weights.append(self.filters)
if bias:
self.register_parameter('bias', nn.Parameter( \
torch.Tensor(*self._param_shapes[1]), requires_grad=True))
self._weights.append(self.bias)
init_params(self.filters, self.bias)
else:
self.register_parameter('bias', None)
init_params(self.filters)
@property
def weights(self):
"""A list of all internal weights of this layer. If all weights are
assumed to be generated externally, then this attribute will be
``None``.
:type: torch.nn.ParameterList or None
"""
return self._weights
@property
def param_shapes(self):
"""A list of list of integers. Each list represents the shape of a
parameter tensor. Note, this attribute is independent of the attribute
:attr:`weights`, it always comprises the shapes of all weight tensors as
if the network would be stand-alone (i.e., no weights being passed to
the :meth:`forward` method).
:type: list
"""
return self._param_shapes
@property
def out_height(self):
"""Height of the output feature maps.
:type: int
"""
return self._out_height
@property
def out_width(self):
"""Width of the output feature maps.
:type: int
"""
return self._out_width
[docs] def forward(self, x, weights=None):
"""Compute output of local convolutional layer.
Args:
x: The input images of shape ``[N, C_in, H_in, W_in]``, where ``N``
denotes the batch size..
weights: Weights that should be used instead of the internally
maintained once (determined by attribute :attr:`weights`). Note,
if ``no_weights`` was ``True`` in the constructor, then this
parameter is mandatory.
Returns:
The output feature maps of shape ``[N, C_out, H, W]``.
"""
if self._no_weights and weights is None:
raise ValueError('Layer was generated without weights. ' +
'Hence, "weights" option may not be None.')
if weights is None:
filters = self.filters
bias = self.bias
else:
assert(len(weights) == len(self.param_shapes))
for i, p in enumerate(weights):
assert(np.all(np.equal(p.shape, self.param_shapes[i])))
filters = weights[0]
if self._has_bias:
bias = weights[1]
else:
bias = None
# Extract receptive fields.
F_hat = F.unfold(x, self._kernel_size, padding=self._padding,
stride=self._stride)
assert(np.all(np.equal(F_hat.shape[1:],
[self._rf_size, self._num_pix])))
# Insert extra dim for broadcasting.
F_hat = F_hat.view(-1, 1, self._rf_size, self._num_pix)
# Compute Frobenius inner product.
G = (F_hat * filters).sum(dim=2).squeeze(dim=2)
assert(np.all(np.equal(G.shape[1:],
[self._out_channels, self._num_pix])))
if bias is not None:
G = G + bias
G = G.view(-1, self._out_channels, self._out_height, self._out_width)
return G
if __name__ == '__main__':
pass