Position embedding in self-attention

Absolute position

def position_matrix(start=0, stop=2, T=2, n=4):
    """
    Create a positional encoding matrix.
    Args:
    start: lowest value in the positional encoding vector
    stop: stop is the highest value in the positional encoding vector
    T: number of values in one cycle of the lowest bit in the PE vector
    n: length of the positional encoding vector
    Return: a matrix of shape (2^(n-1)*T, n)
    """

    step = (stop - start) / T
    one_cycle = np.arange(start, stop, step)
    pos_matrix = np.zeros((np.power(2, (n-1))*T, n))
    for j in range(n):
        block = np.repeat(one_cycle, np.power(2, j))
        pos_matrix[:,n-j-1] = np.tile(block, np.power(2, (n-1-j)))
    return pos_matrix

In forward:

ppmm = self.position_mat[:x.size(1)].repeat(x.size(0), 1, 1)
        ppmm = ppmm.cuda()
        ppmm = ppmm.to(torch.float32)
        # print(ppmm)
        #x = x + ppmm

In main:

num = 90; n = 12  # num---seq_len; n---input_dim
T = max(2, int(num/2**(n-1))+1)
pm = torch.from_numpy(position_matrix(0,2,T,n)) # (*,n)
pm.to(device)
model=Self_attention(*,*,pm)