Source code for akida.layers.attention

from akida.core import (Layer, LayerParams, LayerType)



[docs]
class Attention(Layer):
    """Multi-head attention layer.

    From A. Vaswani et al., "Attention is All You Need" (arXiv:1706.03762):
    "Self-attention, sometimes called intra-attention is an attention mechanism
    relating different positions of a single sequence in order to compute a
    representation of the sequence."

    This layer will take three inputs, Query, Key and Value, and perform these
    actions on each head:

    * Multiply Query and Key to obtain a vector of attention scores expressing
      how tokens/patches relate to one another.
    * Divide by a scale factor.
    * Convert the score to a probability mask using a Softmax function
      (replaced by a Shiftmax in our implementation).
    * Multiply the mask by the Values.

    Note that outputs and masks will be saturated on the range that can be
    represented with output_bits.

    Args:
        num_heads (int): number of heads.
        output_bits (int, optional): output bitwidth. Defaults to 8
        buffer_bits (int, optional): internal bitwidth. Defaults to 32
        post_op_buffer_bits (int, optional): internal bitwidth for post operations. Defaults to 32.
        shiftmax_output_bits (int, optional): output bitwidth for shiftmax,
            must be no more than 1/2 of buffer_bits. Defaults to 10
        name (str, optional): name of the layer. Defaults to empty string

    """

    def __init__(self,
                 num_heads,
                 output_bits=8,
                 buffer_bits=32,
                 post_op_buffer_bits=32,
                 shiftmax_output_bits=10,
                 name=""):
        try:
            params = LayerParams(
                LayerType.Attention, {
                    "num_heads": num_heads,
                    "output_bits": output_bits,
                    "buffer_bits": buffer_bits,
                    "post_op_buffer_bits": post_op_buffer_bits,
                    "shiftmax_output_bits": shiftmax_output_bits
                })
            # Call parent constructor to initialize C++ bindings
            # Note that we invoke directly __init__ instead of using super, as
            # specified in pybind documentation
            Layer.__init__(self, params, name)
        except BaseException:
            self = None
            raise