Oysiyl commited on
Commit
3490b07
·
1 Parent(s): 1652ae0

Switch torch.compile mode from 'reduce-overhead' to 'max-autotune' for faster inference

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -365,7 +365,7 @@ def _apply_torch_compile_optimizations():
365
  set_torch_compile_wrapper(
366
  model=standard_model,
367
  backend="inductor",
368
- mode="reduce-overhead", # Best for iterative sampling
369
  fullgraph=False, # Allow SAG to capture attention maps (disabled in SAG code)
370
  dynamic=True, # Handle variable batch sizes during CFG without recompiling
371
  keys=["diffusion_model"], # Compile UNet only
@@ -377,7 +377,7 @@ def _apply_torch_compile_optimizations():
377
  set_torch_compile_wrapper(
378
  model=artistic_model,
379
  backend="inductor",
380
- mode="reduce-overhead",
381
  fullgraph=False, # Allow SAG to capture attention maps (disabled in SAG code)
382
  dynamic=True, # Handle variable batch sizes during CFG without recompiling
383
  keys=["diffusion_model"], # Compile UNet only
 
365
  set_torch_compile_wrapper(
366
  model=standard_model,
367
  backend="inductor",
368
+ mode="max-autotune", # Maximum runtime speed (longer compile time is OK during warmup)
369
  fullgraph=False, # Allow SAG to capture attention maps (disabled in SAG code)
370
  dynamic=True, # Handle variable batch sizes during CFG without recompiling
371
  keys=["diffusion_model"], # Compile UNet only
 
377
  set_torch_compile_wrapper(
378
  model=artistic_model,
379
  backend="inductor",
380
+ mode="max-autotune", # Maximum runtime speed (longer compile time is OK during warmup)
381
  fullgraph=False, # Allow SAG to capture attention maps (disabled in SAG code)
382
  dynamic=True, # Handle variable batch sizes during CFG without recompiling
383
  keys=["diffusion_model"], # Compile UNet only