-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmodels.py
108 lines (84 loc) · 4.38 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
from torch.nn.utils import weight_norm, spectral_norm
from utils import init_weights, get_padding
import numpy as np
LRELU_SLOPE = 0.1
class ResBlock(torch.nn.Module):
def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)):
super(ResBlock, self).__init__()
self.convs1 = nn.ModuleList([
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
padding=get_padding(kernel_size, dilation[0]))),
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
padding=get_padding(kernel_size, dilation[1]))),
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2],
padding=get_padding(kernel_size, dilation[2])))
])
self.convs1.apply(init_weights)
self.convs2 = nn.ModuleList([
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
padding=get_padding(kernel_size, 1))),
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
padding=get_padding(kernel_size, 1))),
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
padding=get_padding(kernel_size, 1)))
])
self.convs2.apply(init_weights)
def forward(self, x):
for c1, c2 in zip(self.convs1, self.convs2):
xt = F.leaky_relu(x, LRELU_SLOPE)
xt = c1(xt)
xt = F.leaky_relu(xt, LRELU_SLOPE)
xt = c2(xt)
x = xt + x
return x
class NSPP_Model(torch.nn.Module):
def __init__(self, h):
super(NSPP_Model, self).__init__()
self.h = h
self.num_kernels = len(h.ResNet_ResBlock_kernel_sizes)
self.input_conv = weight_norm(Conv1d(h.n_fft//2+1, h.ResNet_channel, h.ResNet_input_conv_kernel_size, 1,
padding=get_padding(h.ResNet_input_conv_kernel_size, 1)))
self.ResNet = nn.ModuleList()
for j, (k, d) in enumerate(zip(h.ResNet_ResBlock_kernel_sizes, h.ResNet_ResBlock_dilation_sizes)):
self.ResNet.append(ResBlock(h.ResNet_channel, k, d))
self.PEA_conv_R = weight_norm(Conv1d(h.ResNet_channel, h.n_fft//2+1, h.PEA_R_conv_kernel_size, 1,
padding=get_padding(h.PEA_R_conv_kernel_size, 1)))
self.PEA_conv_I = weight_norm(Conv1d(h.ResNet_channel, h.n_fft//2+1, h.PEA_I_conv_kernel_size, 1,
padding=get_padding(h.PEA_I_conv_kernel_size, 1)))
self.PEA_conv_R.apply(init_weights)
self.PEA_conv_I.apply(init_weights)
def forward(self, log_amp):
output = self.input_conv(log_amp)
outputs = None
for j in range(self.num_kernels):
if outputs is None:
outputs = self.ResNet[j](output)
else:
outputs += self.ResNet[j](output)
output = outputs / self.num_kernels
output = F.leaky_relu(output)
pseudo_real_part = self.PEA_conv_R(output)
pseudo_imaginary_part = self.PEA_conv_I(output)
phase = torch.atan2(pseudo_imaginary_part, pseudo_real_part)
return phase
def losses(phase_r, phase_g, n_fft, frames):
IP_r = phase_r
IP_g = phase_g
GD_matrix = torch.triu(torch.ones(n_fft//2+1, n_fft//2+1), diagonal=1) - torch.triu(torch.ones(n_fft//2+1, n_fft//2+1), diagonal=2) - torch.eye(n_fft//2 + 1)
GD_matrix = GD_matrix.to(phase_g.device)
GD_r = torch.matmul(phase_r.permute(0,2,1), GD_matrix)
GD_g = torch.matmul(phase_g.permute(0,2,1), GD_matrix)
IAF_matrix = torch.triu(torch.ones(frames, frames), diagonal=1) - torch.triu(torch.ones(frames, frames), diagonal=2) - torch.eye(frames)
IAF_matrix = IAF_matrix.to(phase_g.device)
IAF_r = torch.matmul(phase_r, IAF_matrix)
IAF_g = torch.matmul(phase_g, IAF_matrix)
L_IP = torch.mean(anti_wrapping_function(IP_r-IP_g))
L_GD = torch.mean(anti_wrapping_function(GD_r-GD_g))
L_IAF = torch.mean(anti_wrapping_function(IAF_r-IAF_g))
return L_IP, L_GD, L_IAF
def anti_wrapping_function(x):
return torch.abs(x - torch.round(x / (2 * np.pi)) * 2 * np.pi)