Add custom nodes, Civitai loras (LFS), and vast.ai setup script
Some checks failed
Python Linting / Run Ruff (push) Has been cancelled
Python Linting / Run Pylint (push) Has been cancelled
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Has been cancelled
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Has been cancelled
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Has been cancelled
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Has been cancelled
Execution Tests / test (macos-latest) (push) Has been cancelled
Execution Tests / test (ubuntu-latest) (push) Has been cancelled
Execution Tests / test (windows-latest) (push) Has been cancelled
Test server launches without errors / test (push) Has been cancelled
Unit Tests / test (macos-latest) (push) Has been cancelled
Unit Tests / test (ubuntu-latest) (push) Has been cancelled
Unit Tests / test (windows-2022) (push) Has been cancelled

Includes 30 custom nodes committed directly, 7 Civitai-exclusive
loras stored via Git LFS, and a setup script that installs all
dependencies and downloads HuggingFace-hosted models on vast.ai.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-09 00:55:26 +00:00
parent 2b70ab9ad0
commit f09734b0ee
2274 changed files with 748556 additions and 3 deletions

View File

@@ -0,0 +1,5 @@
# Copyright (c) 20232025 Fannovel16 and contributors
# See LICENSES/MIT-ComfyUI-Frame-Interpolation.txt for the full text.
#Plz don't delete this file, just edit it when neccessary.
ckpts_path: "./ckpts"
ops_backend: "cupy" #Either "taichi" or "cupy"

View File

@@ -0,0 +1,140 @@
# Copyright (c) 20232025 Fannovel16 and contributors
# See LICENSES/MIT-ComfyUI-Frame-Interpolation.txt for the full text.
import torch
from torch.utils.data import DataLoader
import pathlib
from vfi_utils import (
load_file_from_github_release,
preprocess_frames,
postprocess_frames,
generic_frame_loop,
InterpolationStateList,
)
import typing
from comfy.model_management import get_torch_device
import re
from functools import cmp_to_key
from packaging import version
MODEL_TYPE = pathlib.Path(__file__).parent.name
CKPT_NAME_VER_DICT = {
"rife40.pth": "4.0",
"rife41.pth": "4.0",
"rife42.pth": "4.2",
"rife43.pth": "4.3",
"rife44.pth": "4.3",
"rife45.pth": "4.5",
"rife46.pth": "4.6",
"rife47.pth": "4.7",
"rife48.pth": "4.7",
"rife49.pth": "4.7",
"sudo_rife4_269.662_testV1_scale1.pth": "4.0",
# Arch 4.10 doesn't work due to state dict mismatch
# TODO: Investigating and fix it
# "rife410.pth": "4.10",
# "rife411.pth": "4.10",
# "rife412.pth": "4.10"
}
class RIFE_VFI:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"ckpt_name": (
sorted(
list(CKPT_NAME_VER_DICT.keys()),
key=lambda ckpt_name: version.parse(
CKPT_NAME_VER_DICT[ckpt_name]
),
),
{"default": "rife47.pth"},
),
"frames": ("IMAGE",),
"clear_cache_after_n_frames": (
"INT",
{"default": 10, "min": 1, "max": 1000},
),
"multiplier": ("INT", {"default": 2, "min": 1}),
"fast_mode": ("BOOLEAN", {"default": True}),
"ensemble": ("BOOLEAN", {"default": True}),
"scale_factor": ([0.25, 0.5, 1.0, 2.0, 4.0], {"default": 1.0}),
},
"optional": {"optional_interpolation_states": ("INTERPOLATION_STATES",)},
}
RETURN_TYPES = ("IMAGE",)
FUNCTION = "vfi"
CATEGORY = "ComfyUI-Frame-Interpolation/VFI"
def vfi(
self,
ckpt_name: typing.AnyStr,
frames: torch.Tensor,
clear_cache_after_n_frames=10,
multiplier: typing.SupportsInt = 2,
fast_mode=False,
ensemble=False,
scale_factor=1.0,
optional_interpolation_states: InterpolationStateList = None,
**kwargs
):
"""
Perform video frame interpolation using a given checkpoint model.
Args:
ckpt_name (str): The name of the checkpoint model to use.
frames (torch.Tensor): A tensor containing input video frames.
clear_cache_after_n_frames (int, optional): The number of frames to process before clearing CUDA cache
to prevent memory overflow. Defaults to 10. Lower numbers are safer but mean more processing time.
How high you should set it depends on how many input frames there are, input resolution (after upscaling),
how many times you want to multiply them, and how long you're willing to wait for the process to complete.
multiplier (int, optional): The multiplier for each input frame. 60 input frames * 2 = 120 output frames. Defaults to 2.
Returns:
tuple: A tuple containing the output interpolated frames.
Note:
This method interpolates frames in a video sequence using a specified checkpoint model.
It processes each frame sequentially, generating interpolated frames between them.
To prevent memory overflow, it clears the CUDA cache after processing a specified number of frames.
"""
from .rife_arch import IFNet
model_path = load_file_from_github_release(MODEL_TYPE, ckpt_name)
arch_ver = CKPT_NAME_VER_DICT[ckpt_name]
interpolation_model = IFNet(arch_ver=arch_ver)
interpolation_model.load_state_dict(torch.load(model_path))
interpolation_model.eval().to(get_torch_device())
frames = preprocess_frames(frames)
def return_middle_frame(
frame_0, frame_1, timestep, model, scale_list, in_fast_mode, in_ensemble
):
return model(
frame_0, frame_1, timestep, scale_list, in_fast_mode, in_ensemble
)
scale_list = [
8 / scale_factor,
4 / scale_factor,
2 / scale_factor,
1 / scale_factor,
]
args = [interpolation_model, scale_list, fast_mode, ensemble]
out = postprocess_frames(
generic_frame_loop(
type(self).__name__,
frames,
clear_cache_after_n_frames,
multiplier,
return_middle_frame,
*args,
interpolation_states=optional_interpolation_states,
dtype=torch.float32
)
)
return (out,)

View File

@@ -0,0 +1,588 @@
# Copyright (c) 20232025 Fannovel16 and contributors
# See LICENSES/MIT-ComfyUI-Frame-Interpolation.txt for the full text.
"""
26-Dez-21
https://github.com/hzwer/Practical-RIFE
https://github.com/hzwer/Practical-RIFE/blob/main/model/warplayer.py
https://github.com/HolyWu/vs-rife/blob/master/vsrife/__init__.py
"""
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.optim import AdamW
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
import warnings
from comfy.model_management import get_torch_device
device = get_torch_device()
backwarp_tenGrid = {}
class ResConv(nn.Module):
def __init__(self, c, dilation=1):
super(ResConv, self).__init__()
self.conv = nn.Conv2d(c, c, 3, 1, dilation, dilation=dilation, groups=1)
self.beta = nn.Parameter(torch.ones((1, c, 1, 1)), requires_grad=True)
self.relu = nn.LeakyReLU(0.2, True)
def forward(self, x):
return self.relu(self.conv(x) * self.beta + x)
def warp(tenInput, tenFlow):
k = (str(tenFlow.device), str(tenFlow.size()))
if k not in backwarp_tenGrid:
tenHorizontal = (
torch.linspace(-1.0, 1.0, tenFlow.shape[3], device=device)
.view(1, 1, 1, tenFlow.shape[3])
.expand(tenFlow.shape[0], -1, tenFlow.shape[2], -1)
)
tenVertical = (
torch.linspace(-1.0, 1.0, tenFlow.shape[2], device=device)
.view(1, 1, tenFlow.shape[2], 1)
.expand(tenFlow.shape[0], -1, -1, tenFlow.shape[3])
)
backwarp_tenGrid[k] = torch.cat([tenHorizontal, tenVertical], 1).to(device)
tenFlow = torch.cat(
[
tenFlow[:, 0:1, :, :] / ((tenInput.shape[3] - 1.0) / 2.0),
tenFlow[:, 1:2, :, :] / ((tenInput.shape[2] - 1.0) / 2.0),
],
1,
)
g = (backwarp_tenGrid[k] + tenFlow).permute(0, 2, 3, 1)
if tenInput.type() == "torch.cuda.HalfTensor":
g = g.half()
padding_mode = "border"
if device.type == "mps":
# https://github.com/pytorch/pytorch/issues/125098
padding_mode = "zeros"
g = g.clamp(-1, 1)
return torch.nn.functional.grid_sample(
input=tenInput,
grid=g,
mode="bilinear",
padding_mode=padding_mode,
align_corners=True,
)
def conv(
in_planes,
out_planes,
kernel_size=3,
stride=1,
padding=1,
dilation=1,
arch_ver="4.0",
):
if arch_ver == "4.0":
return nn.Sequential(
nn.Conv2d(
in_planes,
out_planes,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
bias=True,
),
nn.PReLU(out_planes),
)
if arch_ver in ["4.2", "4.3", "4.5", "4.6", "4.7", "4.10"]:
return nn.Sequential(
nn.Conv2d(
in_planes,
out_planes,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
bias=True,
),
nn.LeakyReLU(0.2, True),
)
def conv_woact(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1):
return nn.Sequential(
nn.Conv2d(
in_planes,
out_planes,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
bias=True,
),
)
def conv_woact(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1):
return nn.Sequential(
nn.Conv2d(
in_planes,
out_planes,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
bias=True,
)
)
def deconv(in_planes, out_planes, kernel_size=4, stride=2, padding=1, arch_ver="4.0"):
if arch_ver == "4.0":
return nn.Sequential(
torch.nn.ConvTranspose2d(
in_channels=in_planes,
out_channels=out_planes,
kernel_size=4,
stride=2,
padding=1,
bias=True,
),
nn.PReLU(out_planes),
)
if arch_ver in ["4.2", "4.3", "4.5", "4.6", "4.7", "4.10"]:
return nn.Sequential(
torch.nn.ConvTranspose2d(
in_channels=in_planes,
out_channels=out_planes,
kernel_size=4,
stride=2,
padding=1,
bias=True,
),
nn.LeakyReLU(0.2, True),
)
class Conv2(nn.Module):
def __init__(self, in_planes, out_planes, stride=2, arch_ver="4.0"):
super(Conv2, self).__init__()
self.conv1 = conv(in_planes, out_planes, 3, stride, 1, arch_ver=arch_ver)
self.conv2 = conv(out_planes, out_planes, 3, 1, 1, arch_ver=arch_ver)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
return x
class IFBlock(nn.Module):
def __init__(self, in_planes, c=64, arch_ver="4.0"):
super(IFBlock, self).__init__()
self.arch_ver = arch_ver
self.conv0 = nn.Sequential(
conv(in_planes, c // 2, 3, 2, 1, arch_ver=arch_ver),
conv(c // 2, c, 3, 2, 1, arch_ver=arch_ver),
)
self.arch_ver = arch_ver
if arch_ver in ["4.0", "4.2", "4.3"]:
self.convblock = nn.Sequential(
conv(c, c, arch_ver=arch_ver),
conv(c, c, arch_ver=arch_ver),
conv(c, c, arch_ver=arch_ver),
conv(c, c, arch_ver=arch_ver),
conv(c, c, arch_ver=arch_ver),
conv(c, c, arch_ver=arch_ver),
conv(c, c, arch_ver=arch_ver),
conv(c, c, arch_ver=arch_ver),
)
self.lastconv = nn.ConvTranspose2d(c, 5, 4, 2, 1)
if arch_ver in ["4.5", "4.6", "4.7", "4.10"]:
self.convblock = nn.Sequential(
ResConv(c),
ResConv(c),
ResConv(c),
ResConv(c),
ResConv(c),
ResConv(c),
ResConv(c),
ResConv(c),
)
if arch_ver == "4.5":
self.lastconv = nn.Sequential(
nn.ConvTranspose2d(c, 4 * 5, 4, 2, 1), nn.PixelShuffle(2)
)
if arch_ver in ["4.6", "4.7", "4.10"]:
self.lastconv = nn.Sequential(
nn.ConvTranspose2d(c, 4 * 6, 4, 2, 1), nn.PixelShuffle(2)
)
def forward(self, x, flow=None, scale=1):
x = F.interpolate(
x, scale_factor=1.0 / scale, mode="bilinear", align_corners=False
)
if flow is not None:
flow = (
F.interpolate(
flow, scale_factor=1.0 / scale, mode="bilinear", align_corners=False
)
* 1.0
/ scale
)
x = torch.cat((x, flow), 1)
feat = self.conv0(x)
if self.arch_ver == "4.0":
feat = self.convblock(feat) + feat
if self.arch_ver in ["4.2", "4.3", "4.5", "4.6", "4.7", "4.10"]:
feat = self.convblock(feat)
tmp = self.lastconv(feat)
if self.arch_ver in ["4.0", "4.2", "4.3"]:
tmp = F.interpolate(
tmp, scale_factor=scale * 2, mode="bilinear", align_corners=False
)
flow = tmp[:, :4] * scale * 2
if self.arch_ver in ["4.5", "4.6", "4.7", "4.10"]:
tmp = F.interpolate(
tmp, scale_factor=scale, mode="bilinear", align_corners=False
)
flow = tmp[:, :4] * scale
mask = tmp[:, 4:5]
return flow, mask
class Contextnet(nn.Module):
def __init__(self, arch_ver="4.0"):
super(Contextnet, self).__init__()
c = 16
self.conv1 = Conv2(3, c, arch_ver=arch_ver)
self.conv2 = Conv2(c, 2 * c, arch_ver=arch_ver)
self.conv3 = Conv2(2 * c, 4 * c, arch_ver=arch_ver)
self.conv4 = Conv2(4 * c, 8 * c, arch_ver=arch_ver)
def forward(self, x, flow):
x = self.conv1(x)
flow = (
F.interpolate(flow, scale_factor=0.5, mode="bilinear", align_corners=False)
* 0.5
)
f1 = warp(x, flow)
x = self.conv2(x)
flow = (
F.interpolate(flow, scale_factor=0.5, mode="bilinear", align_corners=False)
* 0.5
)
f2 = warp(x, flow)
x = self.conv3(x)
flow = (
F.interpolate(flow, scale_factor=0.5, mode="bilinear", align_corners=False)
* 0.5
)
f3 = warp(x, flow)
x = self.conv4(x)
flow = (
F.interpolate(flow, scale_factor=0.5, mode="bilinear", align_corners=False)
* 0.5
)
f4 = warp(x, flow)
return [f1, f2, f3, f4]
class Unet(nn.Module):
def __init__(self, arch_ver="4.0"):
super(Unet, self).__init__()
c = 16
self.down0 = Conv2(17, 2 * c, arch_ver=arch_ver)
self.down1 = Conv2(4 * c, 4 * c, arch_ver=arch_ver)
self.down2 = Conv2(8 * c, 8 * c, arch_ver=arch_ver)
self.down3 = Conv2(16 * c, 16 * c, arch_ver=arch_ver)
self.up0 = deconv(32 * c, 8 * c, arch_ver=arch_ver)
self.up1 = deconv(16 * c, 4 * c, arch_ver=arch_ver)
self.up2 = deconv(8 * c, 2 * c, arch_ver=arch_ver)
self.up3 = deconv(4 * c, c, arch_ver=arch_ver)
self.conv = nn.Conv2d(c, 3, 3, 1, 1)
def forward(self, img0, img1, warped_img0, warped_img1, mask, flow, c0, c1):
s0 = self.down0(
torch.cat((img0, img1, warped_img0, warped_img1, mask, flow), 1)
)
s1 = self.down1(torch.cat((s0, c0[0], c1[0]), 1))
s2 = self.down2(torch.cat((s1, c0[1], c1[1]), 1))
s3 = self.down3(torch.cat((s2, c0[2], c1[2]), 1))
x = self.up0(torch.cat((s3, c0[3], c1[3]), 1))
x = self.up1(torch.cat((x, s2), 1))
x = self.up2(torch.cat((x, s1), 1))
x = self.up3(torch.cat((x, s0), 1))
x = self.conv(x)
return torch.sigmoid(x)
"""
currently supports 4.0-4.12
4.0: 4.0, 4.1
4.2: 4.2
4.3: 4.3, 4.4
4.5: 4.5
4.6: 4.6
4.7: 4.7, 4.8, 4.9
4.10: 4.10 4.11 4.12
"""
class IFNet(nn.Module):
def __init__(self, arch_ver="4.0"):
super(IFNet, self).__init__()
self.arch_ver = arch_ver
if arch_ver in ["4.0", "4.2", "4.3", "4.5", "4.6"]:
self.block0 = IFBlock(7, c=192, arch_ver=arch_ver)
self.block1 = IFBlock(8 + 4, c=128, arch_ver=arch_ver)
self.block2 = IFBlock(8 + 4, c=96, arch_ver=arch_ver)
self.block3 = IFBlock(8 + 4, c=64, arch_ver=arch_ver)
if arch_ver in ["4.7"]:
self.block0 = IFBlock(7 + 8, c=192, arch_ver=arch_ver)
self.block1 = IFBlock(8 + 4 + 8, c=128, arch_ver=arch_ver)
self.block2 = IFBlock(8 + 4 + 8, c=96, arch_ver=arch_ver)
self.block3 = IFBlock(8 + 4 + 8, c=64, arch_ver=arch_ver)
self.encode = nn.Sequential(
nn.Conv2d(3, 16, 3, 2, 1), nn.ConvTranspose2d(16, 4, 4, 2, 1)
)
if arch_ver in ["4.10"]:
self.block0 = IFBlock(7 + 16, c=192)
self.block1 = IFBlock(8 + 4 + 16, c=128)
self.block2 = IFBlock(8 + 4 + 16, c=96)
self.block3 = IFBlock(8 + 4 + 16, c=64)
self.encode = nn.Sequential(
nn.Conv2d(3, 32, 3, 2, 1),
nn.LeakyReLU(0.2, True),
nn.Conv2d(32, 32, 3, 1, 1),
nn.LeakyReLU(0.2, True),
nn.Conv2d(32, 32, 3, 1, 1),
nn.LeakyReLU(0.2, True),
nn.ConvTranspose2d(32, 8, 4, 2, 1),
)
if arch_ver in ["4.0", "4.2", "4.3"]:
self.contextnet = Contextnet(arch_ver=arch_ver)
self.unet = Unet(arch_ver=arch_ver)
self.arch_ver = arch_ver
def forward(
self,
img0,
img1,
timestep=0.5,
scale_list=[8, 4, 2, 1],
training=True,
fastmode=True,
ensemble=False,
return_flow=False,
):
img0 = torch.clamp(img0, 0, 1)
img1 = torch.clamp(img1, 0, 1)
n, c, h, w = img0.shape
ph = ((h - 1) // 64 + 1) * 64
pw = ((w - 1) // 64 + 1) * 64
padding = (0, pw - w, 0, ph - h)
img0 = F.pad(img0, padding)
img1 = F.pad(img1, padding)
x = torch.cat((img0, img1), 1)
if training == False:
channel = x.shape[1] // 2
img0 = x[:, :channel]
img1 = x[:, channel:]
if not torch.is_tensor(timestep):
timestep = (x[:, :1].clone() * 0 + 1) * timestep
else:
timestep = timestep.repeat(1, 1, img0.shape[2], img0.shape[3])
flow_list = []
merged = []
mask_list = []
if self.arch_ver in ["4.7", "4.10"]:
f0 = self.encode(img0[:, :3])
f1 = self.encode(img1[:, :3])
warped_img0 = img0
warped_img1 = img1
flow = None
mask = None
block = [self.block0, self.block1, self.block2, self.block3]
for i in range(4):
if flow is None:
# 4.0-4.6
if self.arch_ver in ["4.0", "4.2", "4.3", "4.5", "4.6"]:
flow, mask = block[i](
torch.cat((img0[:, :3], img1[:, :3], timestep), 1),
None,
scale=scale_list[i],
)
if ensemble:
f1, m1 = block[i](
torch.cat((img1[:, :3], img0[:, :3], 1 - timestep), 1),
None,
scale=scale_list[i],
)
flow = (flow + torch.cat((f1[:, 2:4], f1[:, :2]), 1)) / 2
mask = (mask + (-m1)) / 2
# 4.7+
if self.arch_ver in ["4.7", "4.10"]:
flow, mask = block[i](
torch.cat((img0[:, :3], img1[:, :3], f0, f1, timestep), 1),
None,
scale=scale_list[i],
)
if ensemble:
f_, m_ = block[i](
torch.cat(
(img1[:, :3], img0[:, :3], f1, f0, 1 - timestep), 1
),
None,
scale=scale_list[i],
)
flow = (flow + torch.cat((f_[:, 2:4], f_[:, :2]), 1)) / 2
mask = (mask + (-m_)) / 2
else:
# 4.0-4.6
if self.arch_ver in ["4.0", "4.2", "4.3", "4.5", "4.6"]:
f0, m0 = block[i](
torch.cat(
(warped_img0[:, :3], warped_img1[:, :3], timestep, mask), 1
),
flow,
scale=scale_list[i],
)
if self.arch_ver in ["4.0"]:
if (
i == 1
and f0[:, :2].abs().max() > 32
and f0[:, 2:4].abs().max() > 32
and not training
):
for k in range(4):
scale_list[k] *= 2
flow, mask = block[0](
torch.cat((img0[:, :3], img1[:, :3], timestep), 1),
None,
scale=scale_list[0],
)
warped_img0 = warp(img0, flow[:, :2])
warped_img1 = warp(img1, flow[:, 2:4])
f0, m0 = block[i](
torch.cat(
(
warped_img0[:, :3],
warped_img1[:, :3],
timestep,
mask,
),
1,
),
flow,
scale=scale_list[i],
)
# 4.7+
if self.arch_ver in ["4.7", "4.10"]:
fd, m0 = block[i](
torch.cat(
(
warped_img0[:, :3],
warped_img1[:, :3],
warp(f0, flow[:, :2]),
warp(f1, flow[:, 2:4]),
timestep,
mask,
),
1,
),
flow,
scale=scale_list[i],
)
flow = flow + fd
# 4.0-4.6 ensemble
if ensemble and self.arch_ver in [
"4.0",
"4.2",
"4.3",
"4.5",
"4.6",
]:
f1, m1 = block[i](
torch.cat(
(
warped_img1[:, :3],
warped_img0[:, :3],
1 - timestep,
-mask,
),
1,
),
torch.cat((flow[:, 2:4], flow[:, :2]), 1),
scale=scale_list[i],
)
f0 = (f0 + torch.cat((f1[:, 2:4], f1[:, :2]), 1)) / 2
m0 = (m0 + (-m1)) / 2
# 4.7+ ensemble
if ensemble and self.arch_ver in ["4.7", "4.10"]:
wf0 = warp(f0, flow[:, :2])
wf1 = warp(f1, flow[:, 2:4])
f_, m_ = block[i](
torch.cat(
(
warped_img1[:, :3],
warped_img0[:, :3],
wf1,
wf0,
1 - timestep,
-mask,
),
1,
),
torch.cat((flow[:, 2:4], flow[:, :2]), 1),
scale=scale_list[i],
)
fd = (fd + torch.cat((f_[:, 2:4], f_[:, :2]), 1)) / 2
mask = (m0 + (-m_)) / 2
if self.arch_ver in ["4.0", "4.2", "4.3", "4.5", "4.6"]:
flow = flow + f0
mask = mask + m0
if not ensemble and self.arch_ver in ["4.7", "4.10"]:
mask = m0
mask_list.append(mask)
flow_list.append(flow)
warped_img0 = warp(img0, flow[:, :2])
warped_img1 = warp(img1, flow[:, 2:4])
merged.append((warped_img0, warped_img1))
if self.arch_ver in ["4.0", "4.1", "4.2", "4.3", "4.4", "4.5", "4.6"]:
mask_list[3] = torch.sigmoid(mask_list[3])
merged[3] = merged[3][0] * mask_list[3] + merged[3][1] * (1 - mask_list[3])
if self.arch_ver in ["4.7", "4.10"]:
mask = torch.sigmoid(mask)
merged[3] = warped_img0 * mask + warped_img1 * (1 - mask)
if not fastmode and self.arch_ver in ["4.0", "4.2", "4.3"]:
c0 = self.contextnet(img0, flow[:, :2])
c1 = self.contextnet(img1, flow[:, 2:4])
tmp = self.unet(img0, img1, warped_img0, warped_img1, mask, flow, c0, c1)
res = tmp[:, :3] * 2 - 1
merged[3] = torch.clamp(merged[3] + res, 0, 1)
return merged[3][:, :, :h, :w]

View File

@@ -0,0 +1,358 @@
# Copyright (c) 20232025 Fannovel16 and contributors
# See LICENSES/MIT-ComfyUI-Frame-Interpolation.txt for the full text.
import yaml
import os
from torch.hub import download_url_to_file, get_dir
from urllib.parse import urlparse
import torch
import typing
import traceback
import einops
import gc
import torchvision.transforms.functional as transform
from comfy.model_management import soft_empty_cache, get_torch_device
import numpy as np
BASE_MODEL_DOWNLOAD_URLS = [
"https://github.com/styler00dollar/VSGAN-tensorrt-docker/releases/download/models/",
"https://github.com/Fannovel16/ComfyUI-Frame-Interpolation/releases/download/models/",
"https://github.com/dajes/frame-interpolation-pytorch/releases/download/v1.0.0/",
]
config_path = os.path.join(os.path.dirname(__file__), "./config.yaml")
if os.path.exists(config_path):
config = yaml.load(open(config_path, "r", encoding="utf-8"), Loader=yaml.FullLoader)
else:
raise Exception(
"config.yaml file is neccessary, plz recreate the config file by downloading it from https://github.com/Fannovel16/ComfyUI-Frame-Interpolation"
)
DEVICE = get_torch_device()
class InterpolationStateList:
def __init__(self, frame_indices: typing.List[int], is_skip_list: bool):
self.frame_indices = frame_indices
self.is_skip_list = is_skip_list
def is_frame_skipped(self, frame_index):
is_frame_in_list = frame_index in self.frame_indices
return (
self.is_skip_list
and is_frame_in_list
or not self.is_skip_list
and not is_frame_in_list
)
class MakeInterpolationStateList:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"frame_indices": ("STRING", {"multiline": True, "default": "1,2,3"}),
"is_skip_list": (
"BOOLEAN",
{"default": True},
),
},
}
RETURN_TYPES = ("INTERPOLATION_STATES",)
FUNCTION = "create_options"
CATEGORY = "ComfyUI-Frame-Interpolation/VFI"
def create_options(self, frame_indices: str, is_skip_list: bool):
frame_indices_list = [int(item) for item in frame_indices.split(",")]
interpolation_state_list = InterpolationStateList(
frame_indices=frame_indices_list,
is_skip_list=is_skip_list,
)
return (interpolation_state_list,)
def get_ckpt_container_path(model_type):
return os.path.abspath(
os.path.join(os.path.dirname(__file__), config["ckpts_path"], model_type)
)
def load_file_from_url(url, model_dir=None, progress=True, file_name=None):
"""Load file form http url, will download models if necessary.
Ref:https://github.com/1adrianb/face-alignment/blob/master/face_alignment/utils.py
Args:
url (str): URL to be downloaded.
model_dir (str): The path to save the downloaded model. Should be a full path. If None, use pytorch hub_dir.
Default: None.
progress (bool): Whether to show the download progress. Default: True.
file_name (str): The downloaded file name. If None, use the file name in the url. Default: None.
Returns:
str: The path to the downloaded file.
"""
if model_dir is None: # use the pytorch hub_dir
hub_dir = get_dir()
model_dir = os.path.join(hub_dir, "checkpoints")
os.makedirs(model_dir, exist_ok=True)
parts = urlparse(url)
file_name = os.path.basename(parts.path)
if file_name is not None:
file_name = file_name
cached_file = os.path.abspath(os.path.join(model_dir, file_name))
if not os.path.exists(cached_file):
print(f'Downloading: "{url}" to {cached_file}\n')
download_url_to_file(url, cached_file, hash_prefix=None, progress=progress)
return cached_file
def load_file_from_github_release(model_type, ckpt_name):
error_strs = []
for i, base_model_download_url in enumerate(BASE_MODEL_DOWNLOAD_URLS):
try:
return load_file_from_url(
base_model_download_url + ckpt_name, get_ckpt_container_path(model_type)
)
except Exception:
traceback_str = traceback.format_exc()
if i < len(BASE_MODEL_DOWNLOAD_URLS) - 1:
print("Failed! Trying another endpoint.")
error_strs.append(
f"Error when downloading from: {base_model_download_url + ckpt_name}\n\n{traceback_str}"
)
error_str = "\n\n".join(error_strs)
raise Exception(
f"Tried all GitHub base urls to download {ckpt_name} but no suceess. Below is the error log:\n\n{error_str}"
)
def load_file_from_direct_url(model_type, url):
return load_file_from_url(url, get_ckpt_container_path(model_type))
def preprocess_frames(frames):
return einops.rearrange(frames[..., :3], "n h w c -> n c h w")
def postprocess_frames(frames):
return einops.rearrange(frames, "n c h w -> n h w c")[..., :3].cpu()
def assert_batch_size(frames, batch_size=2, vfi_name=None):
subject_verb = (
"Most VFI models require"
if vfi_name is None
else f"VFI model {vfi_name} requires"
)
assert (
len(frames) >= batch_size
), f"{subject_verb} at least {batch_size} frames to work with, only found {frames.shape[0]}. Please check the frame input using PreviewImage."
def _generic_frame_loop(
frames,
clear_cache_after_n_frames,
multiplier: typing.Union[typing.SupportsInt, typing.List],
return_middle_frame_function,
*return_middle_frame_function_args,
interpolation_states: InterpolationStateList = None,
use_timestep=True,
dtype=torch.float16,
final_logging=True,
):
# https://github.com/hzwer/Practical-RIFE/blob/main/inference_video.py#L169
def non_timestep_inference(frame0, frame1, n):
middle = return_middle_frame_function(
frame0, frame1, None, *return_middle_frame_function_args
)
if n == 1:
return [middle]
first_half = non_timestep_inference(frame0, middle, n=n // 2)
second_half = non_timestep_inference(middle, frame1, n=n // 2)
if n % 2:
return [*first_half, middle, *second_half]
else:
return [*first_half, *second_half]
output_frames = torch.zeros(
multiplier * frames.shape[0], *frames.shape[1:], dtype=dtype, device="cpu"
)
out_len = 0
number_of_frames_processed_since_last_cleared_cuda_cache = 0
for frame_itr in range(
len(frames) - 1
): # Skip the final frame since there are no frames after it
frame0 = frames[frame_itr : frame_itr + 1]
output_frames[out_len] = frame0 # Start with first frame
out_len += 1
# Ensure that input frames are in fp32 - the same dtype as model
frame0 = frame0.to(dtype=torch.float32)
frame1 = frames[frame_itr + 1 : frame_itr + 2].to(dtype=torch.float32)
if interpolation_states is not None and interpolation_states.is_frame_skipped(
frame_itr
):
continue
# Generate and append a batch of middle frames
middle_frame_batches = []
if use_timestep:
for middle_i in range(1, multiplier):
timestep = middle_i / multiplier
middle_frame = (
return_middle_frame_function(
frame0.to(DEVICE),
frame1.to(DEVICE),
timestep,
*return_middle_frame_function_args,
)
.detach()
.cpu()
)
middle_frame_batches.append(middle_frame.to(dtype=dtype))
else:
middle_frames = non_timestep_inference(
frame0.to(DEVICE), frame1.to(DEVICE), multiplier - 1
)
middle_frame_batches.extend(
torch.cat(middle_frames, dim=0).detach().cpu().to(dtype=dtype)
)
# Copy middle frames to output
for middle_frame in middle_frame_batches:
output_frames[out_len] = middle_frame
out_len += 1
number_of_frames_processed_since_last_cleared_cuda_cache += 1
# Try to avoid a memory overflow by clearing cuda cache regularly
if (
number_of_frames_processed_since_last_cleared_cuda_cache
>= clear_cache_after_n_frames
):
print("Comfy-VFI: Clearing cache...", end=" ")
soft_empty_cache()
number_of_frames_processed_since_last_cleared_cuda_cache = 0
print("Done cache clearing")
gc.collect()
if final_logging:
print(
f"Comfy-VFI done! {len(output_frames)} frames generated at resolution: {output_frames[0].shape}"
)
# Append final frame
output_frames[out_len] = frames[-1:]
out_len += 1
# clear cache for courtesy
if final_logging:
print("Comfy-VFI: Final clearing cache...", end=" ")
soft_empty_cache()
if final_logging:
print("Done cache clearing")
return output_frames[:out_len]
def generic_frame_loop(
model_name,
frames,
clear_cache_after_n_frames,
multiplier: typing.Union[typing.SupportsInt, typing.List],
return_middle_frame_function,
*return_middle_frame_function_args,
interpolation_states: InterpolationStateList = None,
use_timestep=True,
dtype=torch.float32,
):
assert_batch_size(frames, vfi_name=model_name.replace("_", " ").replace("VFI", ""))
if type(multiplier) == int:
return _generic_frame_loop(
frames,
clear_cache_after_n_frames,
multiplier,
return_middle_frame_function,
*return_middle_frame_function_args,
interpolation_states=interpolation_states,
use_timestep=use_timestep,
dtype=dtype,
)
if type(multiplier) == list:
multipliers = list(map(int, multiplier))
multipliers += [2] * (len(frames) - len(multipliers) - 1)
frame_batches = []
for frame_itr in range(len(frames) - 1):
multiplier = multipliers[frame_itr]
if multiplier == 0:
continue
frame_batch = _generic_frame_loop(
frames[frame_itr : frame_itr + 2],
clear_cache_after_n_frames,
multiplier,
return_middle_frame_function,
*return_middle_frame_function_args,
interpolation_states=interpolation_states,
use_timestep=use_timestep,
dtype=dtype,
final_logging=False,
)
if (
frame_itr != len(frames) - 2
): # Not append last frame unless this batch is the last one
frame_batch = frame_batch[:-1]
frame_batches.append(frame_batch)
output_frames = torch.cat(frame_batches)
print(
f"Comfy-VFI done! {len(output_frames)} frames generated at resolution: {output_frames[0].shape}"
)
return output_frames
raise NotImplementedError(f"multipiler of {type(multiplier)}")
class FloatToInt:
@classmethod
def INPUT_TYPES(s):
return {
"required": {"float": ("FLOAT", {"default": 0, "min": 0, "step": 0.01})}
}
RETURN_TYPES = ("INT",)
FUNCTION = "convert"
CATEGORY = "ComfyUI-Frame-Interpolation"
def convert(self, float):
if hasattr(float, "__iter__"):
return (list(map(int, float)),)
return (int(float),)
""" def generic_4frame_loop(
frames,
clear_cache_after_n_frames,
multiplier: typing.SupportsInt,
return_middle_frame_function,
*return_middle_frame_function_args,
interpolation_states: InterpolationStateList = None,
use_timestep=False):
if use_timestep: raise NotImplementedError("Timestep 4 frame VFI model")
def non_timestep_inference(frame_0, frame_1, frame_2, frame_3, n):
middle = return_middle_frame_function(frame_0, frame_1, None, *return_middle_frame_function_args)
if n == 1:
return [middle]
first_half = non_timestep_inference(frame_0, middle, n=n//2)
second_half = non_timestep_inference(middle, frame_1, n=n//2)
if n%2:
return [*first_half, middle, *second_half]
else:
return [*first_half, *second_half] """