Spaces:
Running
on
Zero
Running
on
Zero
add archive unpacking plus enable torch.compile skip if mps
Browse files
app.py
CHANGED
|
@@ -1,5 +1,16 @@
|
|
| 1 |
-
import json
|
| 2 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import random
|
| 4 |
import sys
|
| 5 |
import warnings
|
|
@@ -355,8 +366,8 @@ def _apply_torch_compile_optimizations():
|
|
| 355 |
model=standard_model,
|
| 356 |
backend="inductor",
|
| 357 |
mode="reduce-overhead", # Best for iterative sampling
|
| 358 |
-
fullgraph=
|
| 359 |
-
dynamic=
|
| 360 |
keys=["diffusion_model"], # Compile UNet only
|
| 361 |
)
|
| 362 |
print(" ✓ Compiled standard pipeline diffusion model")
|
|
@@ -367,8 +378,8 @@ def _apply_torch_compile_optimizations():
|
|
| 367 |
model=artistic_model,
|
| 368 |
backend="inductor",
|
| 369 |
mode="reduce-overhead",
|
| 370 |
-
fullgraph=
|
| 371 |
-
dynamic=
|
| 372 |
keys=["diffusion_model"],
|
| 373 |
)
|
| 374 |
print(" ✓ Compiled artistic pipeline diffusion model")
|
|
@@ -379,14 +390,15 @@ def _apply_torch_compile_optimizations():
|
|
| 379 |
print(" Continuing without compilation (slower but functional)\n")
|
| 380 |
|
| 381 |
|
| 382 |
-
# torch.compile
|
| 383 |
-
#
|
| 384 |
-
#
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
print(
|
| 389 |
-
|
|
|
|
| 390 |
|
| 391 |
|
| 392 |
@spaces.GPU(duration=60)
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
+
import tarfile
|
| 3 |
+
|
| 4 |
+
# Extract pre-compiled Triton kernels if they exist
|
| 5 |
+
if os.path.exists("triton_cache.tar.gz") and not os.path.exists(
|
| 6 |
+
os.path.expanduser("~/.triton/cache")
|
| 7 |
+
):
|
| 8 |
+
print("📦 Extracting pre-compiled Triton kernels...")
|
| 9 |
+
with tarfile.open("triton_cache.tar.gz", "r:gz") as tar:
|
| 10 |
+
tar.extractall(path=os.path.expanduser("~"))
|
| 11 |
+
print("✅ Triton kernels ready!")
|
| 12 |
+
|
| 13 |
+
import json
|
| 14 |
import random
|
| 15 |
import sys
|
| 16 |
import warnings
|
|
|
|
| 366 |
model=standard_model,
|
| 367 |
backend="inductor",
|
| 368 |
mode="reduce-overhead", # Best for iterative sampling
|
| 369 |
+
fullgraph=False, # Allow SAG to capture attention maps
|
| 370 |
+
dynamic=False, # Support all sizes (512-1024, step 64) with one kernel
|
| 371 |
keys=["diffusion_model"], # Compile UNet only
|
| 372 |
)
|
| 373 |
print(" ✓ Compiled standard pipeline diffusion model")
|
|
|
|
| 378 |
model=artistic_model,
|
| 379 |
backend="inductor",
|
| 380 |
mode="reduce-overhead",
|
| 381 |
+
fullgraph=False, # Allow SAG to capture attention maps
|
| 382 |
+
dynamic=False, # Support all sizes (512-1024, step 64) with one kernel
|
| 383 |
keys=["diffusion_model"],
|
| 384 |
)
|
| 385 |
print(" ✓ Compiled artistic pipeline diffusion model")
|
|
|
|
| 390 |
print(" Continuing without compilation (slower but functional)\n")
|
| 391 |
|
| 392 |
|
| 393 |
+
# Enable torch.compile optimizations (timestep_embedding fixed!)
|
| 394 |
+
# Now works with fullgraph=False for compatibility with SAG
|
| 395 |
+
# Skip on MPS (MacBooks) - torch.compile with MPS can cause issues
|
| 396 |
+
if not torch.backends.mps.is_available():
|
| 397 |
+
_apply_torch_compile_optimizations()
|
| 398 |
+
else:
|
| 399 |
+
print(
|
| 400 |
+
"ℹ️ torch.compile skipped on MPS (MacBook) - using fp32 optimizations instead"
|
| 401 |
+
)
|
| 402 |
|
| 403 |
|
| 404 |
@spaces.GPU(duration=60)
|