Speed up lora compute and lower memory usage by doing it in fp16. (#11161)

This commit is contained in:
comfyanonymous
2025-12-06 15:36:20 -08:00
committed by GitHub
parent 7ac7d69d94
commit 50ca97e776
2 changed files with 17 additions and 2 deletions

View File

@@ -1492,6 +1492,20 @@ def extended_fp16_support():
return True
LORA_COMPUTE_DTYPES = {}
def lora_compute_dtype(device):
dtype = LORA_COMPUTE_DTYPES.get(device, None)
if dtype is not None:
return dtype
if should_use_fp16(device):
dtype = torch.float16
else:
dtype = torch.float32
LORA_COMPUTE_DTYPES[device] = dtype
return dtype
def soft_empty_cache(force=False):
global cpu_state
if cpu_state == CPUState.MPS: