Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

新增模型vits-svc模型 #568

Open
wants to merge 13 commits into
base: develop
Choose a base branch
from
Empty file.
72 changes: 72 additions & 0 deletions paddlemix/models/vits-svc/configs/base.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
train:
model: "sovits"
seed: 1234
epochs: 10000
learning_rate: 5e-5
betas: [0.8, 0.99]
lr_decay: 0.999875
eps: 1e-9
batch_size: 8
accum_step: 2
c_stft: 9
c_mel: 1.
c_kl: 0.2
port: 8001
pretrain: "./vits_pretrain/sovits5.0.pretrain.pth"
#############################
data:
training_files: "files/train.txt"
validation_files: "files/valid.txt"
segment_size: 8000 # WARNING: base on hop_length
max_wav_value: 32768.0
sampling_rate: 32000
filter_length: 1024
hop_length: 320
win_length: 1024
mel_channels: 100
mel_fmin: 50.0
mel_fmax: 16000.0
#############################
vits:
ppg_dim: 1280
vec_dim: 256
spk_dim: 256
gin_channels: 256
inter_channels: 192
hidden_channels: 192
filter_channels: 640
#############################
gen:
upsample_input: 192
upsample_rates: [5,4,4,2,2]
upsample_kernel_sizes: [15,8,8,4,4]
upsample_initial_channel: 320
resblock_kernel_sizes: [3,7,11]
resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]]
#############################
mpd:
periods: [2,3,5,7,11]
kernel_size: 5
stride: 3
use_spectral_norm: False
lReLU_slope: 0.2
#############################
mrd:
resolutions: "[(1024, 120, 600), (2048, 240, 1200), (4096, 480, 2400), (512, 50, 240)]" # (filter_length, hop_length, win_length)
use_spectral_norm: False
lReLU_slope: 0.2
#############################
log:
info_interval: 100
eval_interval: 1
save_interval: 5
num_audio: 6
pth_dir: 'chkpt'
log_dir: 'logs'
keep_ckpts: 0
#############################
dist_config:
dist_backend: "nccl"
dist_url: "tcp://localhost:54321"
world_size: 1

8 changes: 8 additions & 0 deletions paddlemix/models/vits-svc/crepe/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from . import decode
from .core import *
from .model import Crepe
from . import convert
from . import filter
from . import load
# from . import loudness
# from . import threshold
58 changes: 58 additions & 0 deletions paddlemix/models/vits-svc/crepe/convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import scipy
import paddle
import math
import crepe


###############################################################################
# Pitch unit conversions
###############################################################################


def bins_to_cents(bins):
"""Converts pitch bins to cents"""
cents = crepe.CENTS_PER_BIN * bins + 1997.3794084376191

# Trade quantization error for noise
return dither(cents)


def bins_to_frequency(bins):
"""Converts pitch bins to frequency in Hz"""
return cents_to_frequency(bins_to_cents(bins))


def cents_to_bins(cents, quantize_fn=math.floor):
"""Converts cents to pitch bins"""
bins = (cents - 1997.3794084376191) / crepe.CENTS_PER_BIN
return quantize_fn(bins)


def cents_to_frequency(cents):
"""Converts cents to frequency in Hz"""
return 10 * 2 ** (cents / 1200)


def frequency_to_bins(frequency, quantize_fn=math.floor):
"""Convert frequency in Hz to pitch bins"""
return cents_to_bins(frequency_to_cents(frequency), quantize_fn)


def frequency_to_cents(frequency):
"""Convert frequency in Hz to cents"""
return 1200 * math.log2(frequency / 10.)


# ###############################################################################
# # Utilities
# ###############################################################################


def dither(cents):
"""Dither the predicted pitch in cents to remove quantization error"""
noise = scipy.stats.triang.rvs(c=0.5,
loc=-crepe.CENTS_PER_BIN,
scale=2 * crepe.CENTS_PER_BIN,
size=cents.shape)
# return cents + cents.new_tensor(noise)
return cents + paddle.to_tensor(noise, dtype=cents.dtype, stop_gradient=cents.stop_gradient)
Loading