Spaces:
Running
on
Zero
Running
on
Zero
Switch torch.compile mode from 'reduce-overhead' to 'max-autotune' for faster inference
Browse files
app.py
CHANGED
|
@@ -365,7 +365,7 @@ def _apply_torch_compile_optimizations():
|
|
| 365 |
set_torch_compile_wrapper(
|
| 366 |
model=standard_model,
|
| 367 |
backend="inductor",
|
| 368 |
-
mode="
|
| 369 |
fullgraph=False, # Allow SAG to capture attention maps (disabled in SAG code)
|
| 370 |
dynamic=True, # Handle variable batch sizes during CFG without recompiling
|
| 371 |
keys=["diffusion_model"], # Compile UNet only
|
|
@@ -377,7 +377,7 @@ def _apply_torch_compile_optimizations():
|
|
| 377 |
set_torch_compile_wrapper(
|
| 378 |
model=artistic_model,
|
| 379 |
backend="inductor",
|
| 380 |
-
mode="
|
| 381 |
fullgraph=False, # Allow SAG to capture attention maps (disabled in SAG code)
|
| 382 |
dynamic=True, # Handle variable batch sizes during CFG without recompiling
|
| 383 |
keys=["diffusion_model"], # Compile UNet only
|
|
|
|
| 365 |
set_torch_compile_wrapper(
|
| 366 |
model=standard_model,
|
| 367 |
backend="inductor",
|
| 368 |
+
mode="max-autotune", # Maximum runtime speed (longer compile time is OK during warmup)
|
| 369 |
fullgraph=False, # Allow SAG to capture attention maps (disabled in SAG code)
|
| 370 |
dynamic=True, # Handle variable batch sizes during CFG without recompiling
|
| 371 |
keys=["diffusion_model"], # Compile UNet only
|
|
|
|
| 377 |
set_torch_compile_wrapper(
|
| 378 |
model=artistic_model,
|
| 379 |
backend="inductor",
|
| 380 |
+
mode="max-autotune", # Maximum runtime speed (longer compile time is OK during warmup)
|
| 381 |
fullgraph=False, # Allow SAG to capture attention maps (disabled in SAG code)
|
| 382 |
dynamic=True, # Handle variable batch sizes during CFG without recompiling
|
| 383 |
keys=["diffusion_model"], # Compile UNet only
|