Apple-Sharp-Image-to-3D-View-Synthesis

Running on Zero

App Files Files Community

gagndeep commited on 4 days ago

Commit

46d271d

0 Parent(s):

Clean repo without large files

Browse files

Files changed (16) hide show

.gitattributes +38 -0
.gitignore +219 -0
.python-version +1 -0
README.md +17 -0
app.py +274 -0
assets/examples/Booster_train_balanced_Bathroom_camera_00_im0_png_00000_0000-0001.jpg +3 -0
assets/examples/Booster_train_balanced_Bathroom_camera_00_im0_png_00000_0000-0001.mp4 +3 -0
assets/examples/Booster_train_balanced_Bathroom_camera_00_im0_png_00000_0000-0001.ply +3 -0
assets/examples/ETH3D_courtyard_00000_0000-0001.jpg +3 -0
assets/examples/ETH3D_courtyard_00000_0000-0001.mp4 +3 -0
assets/examples/ETH3D_courtyard_00000_0000-0001.ply +3 -0
assets/examples/manifest.json +50 -0
model_utils.py +612 -0
pyproject.toml +23 -0
requirements.txt +6 -0
uv.lock +0 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,38 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.ply filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,219 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#   Usually these files are written by a python script from a template
+#   before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+# Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+# uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+# poetry.lock
+# poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+# pdm.lock
+# pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+# pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# Redis
+*.rdb
+*.aof
+*.pid
+# RabbitMQ
+mnesia/
+rabbitmq/
+rabbitmq-data/
+# ActiveMQ
+activemq-data/
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#   JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#   be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#   and can be added to the global gitignore or merged into this file.  For a more nuclear
+#   option (not recommended) you can uncomment the following to ignore the entire idea folder.
+# .idea/
+# Abstra
+#   Abstra is an AI-powered process automation framework.
+#   Ignore directories containing user credentials, local state, and settings.
+#   Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#   Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#   that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#   and can be added to the global gitignore or merged into this file. However, if you prefer,
+#   you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+# Streamlit
+.streamlit/secrets.toml
+# Kilo Code
+.kilocode/

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.13

README.md ADDED Viewed

	@@ -0,0 +1,17 @@

+---
+title: "SHARP - 3D Gaussian Scene Prediction"
+emoji: 🔪
+colorFrom: purple
+colorTo: indigo
+sdk: gradio
+sdk_version: 6.1.0
+python_version: 3.13
+app_file: app.py
+pinned: false
+short_description: "Sharp Monocular View Synthesis in Less Than a Second"
+models:
+  - apple/Sharp
+startup_duration_timeout: 1h
+preload_from_hub:
+  - apple/Sharp sharp_2572gikvuh.pt
+---

app.py ADDED Viewed

	@@ -0,0 +1,274 @@

+"""SHARP Gradio demo (Modern, Single-Page UI).
+This Space:
+- Runs Apple's SHARP model to predict a 3D Gaussian scene from a single image.
+- Exports a canonical `.ply` file for download.
+- Optionally renders a camera trajectory `.mp4` (CUDA / ZeroGPU only).
+"""
+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Final
+import gradio as gr
+# Ensure model_utils is present in your directory
+from model_utils import TrajectoryType, predict_and_maybe_render_gpu
+# -----------------------------------------------------------------------------
+# Paths & Configuration
+# -----------------------------------------------------------------------------
+APP_DIR: Final[Path] = Path(__file__).resolve().parent
+OUTPUTS_DIR: Final[Path] = APP_DIR / "outputs"
+ASSETS_DIR: Final[Path] = APP_DIR / "assets"
+EXAMPLES_DIR: Final[Path] = ASSETS_DIR / "examples"
+# Valid image extensions for discovery
+IMAGE_EXTS: Final[tuple[str, ...]] = (".png", ".jpg", ".jpeg", ".webp")
+# CSS for a responsive, contained layout
+CSS: Final[str] = """
+.gradio-container {
+    max-width: 1400px !important;
+    margin: 0 auto;
+}
+/* constrain media height so it doesn't take up the whole screen */
+#input-image img, #output-video video {
+    max-height: 500px;
+    width: 100%;
+    object-fit: contain;
+}
+/* Make the generate button pop slightly */
+#run-btn {
+    font-size: 1.1em;
+    font-weight: bold;
+}
+"""
+# -----------------------------------------------------------------------------
+# Logic & Helpers
+# -----------------------------------------------------------------------------
+def _ensure_dir(path: Path) -> Path:
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+def get_example_files() -> list[list[str]]:
+    """
+    Scans assets/examples for images to populate the gr.Examples component.
+    Returns a list of lists: [['path/to/img1.jpg'], ['path/to/img2.png']]
+    """
+    _ensure_dir(EXAMPLES_DIR)
+    # Priority 1: Check manifest.json
+    manifest_path = EXAMPLES_DIR / "manifest.json"
+    if manifest_path.exists():
+        try:
+            data = json.loads(manifest_path.read_text(encoding="utf-8"))
+            examples = []
+            for entry in data:
+                if "image" in entry:
+                    img_path = EXAMPLES_DIR / entry["image"]
+                    if img_path.exists():
+                        examples.append([str(img_path)])
+            if examples:
+                return examples
+        except Exception as e:
+            print(f"Error reading manifest: {e}")
+    # Priority 2: Auto-discovery
+    examples = []
+    for ext in IMAGE_EXTS:
+        for img in sorted(EXAMPLES_DIR.glob(f"*{ext}")):
+            examples.append([str(img)])
+    return examples
+def run_sharp(
+    image_path: str | None,
+    trajectory_type: str,
+    output_long_side: int,
+    num_frames: int,
+    fps: int,
+    render_video: bool,
+    progress=gr.Progress()
+) -> tuple[str | None, str | None, str]:
+    """
+    Main inference wrapper.
+    """
+    if not image_path:
+        raise gr.Error("Please upload or select an input image first.")
+    # Validate output resolution
+    out_long_side_val: int | None = (
+        None if int(output_long_side) <= 0 else int(output_long_side)
+    )
+    try:
+        progress(0.1, desc="Initializing model...")
+        # Convert string dropdown back to Enum if needed, or pass string if model accepts it
+        # Assuming model_utils handles string conversion or we map it here:
+        traj_enum = TrajectoryType[trajectory_type.upper()] if hasattr(TrajectoryType, trajectory_type.upper()) else trajectory_type
+        progress(0.3, desc="Predicting Gaussians...")
+        video_path, ply_path = predict_and_maybe_render_gpu(
+            image_path,
+            trajectory_type=traj_enum,
+            num_frames=int(num_frames),
+            fps=int(fps),
+            output_long_side=out_long_side_val,
+            render_video=bool(render_video),
+        )
+        progress(0.9, desc="Finalizing...")
+        status_msg = f"✅ **Success**\n\nPLY: `{ply_path.name}`"
+        if video_path:
+            status_msg += f"\nVideo: `{video_path.name}`"
+        else:
+            status_msg += "\n(Video rendering skipped or unavailable)"
+        return (
+            str(video_path) if video_path else None,
+            str(ply_path),
+            status_msg,
+        )
+    except Exception as e:
+        raise gr.Error(f"Generation failed: {str(e)}")
+# -----------------------------------------------------------------------------
+# UI Construction
+# -----------------------------------------------------------------------------
+def build_demo() -> gr.Blocks:
+    # Use the Default theme for a clean, modern look
+    theme = gr.themes.Default()
+    with gr.Blocks(theme=theme, css=CSS, title="SHARP 3D") as demo:
+        # --- Header ---
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown(
+                    """
+                    # SHARP: Single-Image 3D
+                    Convert any static image into a 3D Gaussian Splat scene in seconds.
+                    """
+                )
+        # --- Main Interface ---
+        with gr.Row():
+            # --- Left Column: Input & Controls ---
+            with gr.Column(scale=1):
+                image_in = gr.Image(
+                    label="Input Image",
+                    type="filepath",
+                    sources=["upload", "clipboard"],
+                    elem_id="input-image",
+                    height=400
+                )
+                # Collapsible Advanced Settings for a cleaner UI
+                with gr.Accordion("⚙️ Advanced Configuration", open=False):
+                    with gr.Row():
+                        trajectory = gr.Dropdown(
+                            label="Camera Trajectory",
+                            choices=["swipe", "shake", "rotate", "rotate_forward"],
+                            value="rotate_forward",
+                            info="Camera movement for video preview"
+                        )
+                        output_res = gr.Dropdown(
+                            label="Resolution (Long Side)",
+                            choices=[("Match Input", 0), ("512", 512), ("1024", 1024)],
+                            value=0,
+                        )
+                    with gr.Row():
+                        frames = gr.Slider(
+                            label="Frames", minimum=24, maximum=120, step=1, value=60
+                        )
+                        fps_in = gr.Slider(
+                            label="FPS", minimum=8, maximum=60, step=1, value=30
+                        )
+                    render_toggle = gr.Checkbox(
+                        label="Render Video Preview (Requires GPU)", value=True
+                    )
+                run_btn = gr.Button("✨ Generate 3D Scene", variant="primary", elem_id="run-btn")
+            # --- Right Column: Output ---
+            with gr.Column(scale=1):
+                video_out = gr.Video(
+                    label="Preview Trajectory",
+                    elem_id="output-video",
+                    autoplay=True,
+                    height=400
+                )
+                with gr.Group():
+                    ply_download = gr.DownloadButton(
+                        label="Download .PLY Scene",
+                        variant="secondary"
+                    )
+                    status_md = gr.Markdown("Ready to run.")
+        # --- Footer: Examples ---
+        # Standard Gradio Examples component
+        example_files = get_example_files()
+        if example_files:
+            gr.Examples(
+                examples=example_files,
+                inputs=[image_in],
+                label="Try an Example",
+                examples_per_page=5
+            )
+        # --- Event Handlers ---
+        run_btn.click(
+            fn=run_sharp,
+            inputs=[
+                image_in,
+                trajectory,
+                output_res,
+                frames,
+                fps_in,
+                render_toggle,
+            ],
+            outputs=[video_out, ply_download, status_md],
+            concurrency_limit=1
+        )
+        # --- Citation ---
+        with gr.Accordion("About & Citation", open=False):
+            gr.Markdown(
+                """
+                **SHARP: Sharp Monocular View Synthesis in Less Than a Second** (Apple, 2025).
+                If you use this model, please cite:
+                ```bibtex
+                @inproceedings{Sharp2025:arxiv,
+                  title = {Sharp Monocular View Synthesis in Less Than a Second},
+                  author = {Mescheder, Dong, Li, Bai, et al.},
+                  year = {2025},
+                  journal = {arXiv preprint arXiv:2512.10685}
+                }
+                ```
+                """
+            )
+    return demo
+# -----------------------------------------------------------------------------
+# Entry Point
+# -----------------------------------------------------------------------------
+_ensure_dir(OUTPUTS_DIR)
+if __name__ == "__main__":
+    demo = build_demo()
+    demo.queue().launch(allowed_paths=[str(ASSETS_DIR)])

assets/examples/Booster_train_balanced_Bathroom_camera_00_im0_png_00000_0000-0001.jpg ADDED Viewed

Git LFS Details

SHA256: 819880be5ee569c066aac4f20b5cb08c450c683eda7e188981b8f30bf25cfd72
Pointer size: 131 Bytes
Size of remote file: 137 kB

assets/examples/Booster_train_balanced_Bathroom_camera_00_im0_png_00000_0000-0001.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:23946e8345738bec5052c11ef259490e8fa003a9f0c87c5cae4b0434d6b0b211
+size 506496

assets/examples/Booster_train_balanced_Bathroom_camera_00_im0_png_00000_0000-0001.ply ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:54d28194b0ae41fd2a2f09b07de28d2305c5181b0664cd25ce29f9e114ba2ea8
+size 66061086

assets/examples/ETH3D_courtyard_00000_0000-0001.jpg ADDED Viewed

Git LFS Details

SHA256: 806be6fcaa6064a7a880835e20aafa4b509fa4d2dee42c7b4d58631f0bed1cd5
Pointer size: 131 Bytes
Size of remote file: 261 kB

assets/examples/ETH3D_courtyard_00000_0000-0001.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a5bdbcc32493174b684aff6b2ab0701f4c037e40929991948a379c9d7c323792
+size 538810

assets/examples/ETH3D_courtyard_00000_0000-0001.ply ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:282fe4719d2822ea66cf3ab02160ec6bc030c7a68bff8849033d679a9d522438
+size 66061086

assets/examples/manifest.json ADDED Viewed

	@@ -0,0 +1,50 @@

+[
+  {
+    "label": "Bathroom",
+    "image": "Booster_train_balanced_Bathroom_camera_00_im0_png_00000_0000-0001.jpg",
+    "video": "Booster_train_balanced_Bathroom_camera_00_im0_png_00000_0000-0001.mp4",
+    "ply": "Booster_train_balanced_Bathroom_camera_00_im0_png_00000_0000-0001.ply"
+  },
+  {
+    "label": "Courtyard",
+    "image": "ETH3D_courtyard_00000_0000-0001.jpg",
+    "video": "ETH3D_courtyard_00000_0000-0001.mp4",
+    "ply": "ETH3D_courtyard_00000_0000-0001.ply"
+  },
+  {
+    "label": "Bicycle",
+    "image": "Middlebury_49b2bcfdd9_000_0000-0001.jpg",
+    "video": "Middlebury_49b2bcfdd9_000_0000-0001.mp4",
+    "ply": "Middlebury_49b2bcfdd9_000_0000-0001.ply"
+  },
+  {
+    "label": "Interior",
+    "image": "ScanNetPP_09c1414f1b_00000_0000-0001.jpg",
+    "video": "ScanNetPP_09c1414f1b_00000_0000-0001.mp4",
+    "ply": "ScanNetPP_09c1414f1b_00000_0000-0001.ply"
+  },
+  {
+    "label": "Church",
+    "image": "TanksAndTemples_Church_00022_0000-0002.jpg",
+    "video": "TanksAndTemples_Church_00022_0000-0002.mp4",
+    "ply": "TanksAndTemples_Church_00022_0000-0002.ply"
+  },
+  {
+    "label": "Flowers",
+    "image": "Unsplash_-591oIJnyEQ_0000-0001.jpg",
+    "video": "Unsplash_-591oIJnyEQ_0000-0001.mp4",
+    "ply": "Unsplash_-591oIJnyEQ_0000-0001.ply"
+  },
+  {
+    "label": "Horse",
+    "image": "Unsplash_SharpPaperVideo_-B_lu05yfgE_0000-0001.jpg",
+    "video": "Unsplash_SharpPaperVideo_-B_lu05yfgE_0000-0001.mp4",
+    "ply": "Unsplash_SharpPaperVideo_-B_lu05yfgE_0000-0001.ply"
+  },
+  {
+    "label": "TV",
+    "image": "WildRGBD_TV_scene_000_00028_0000-0002.jpg",
+    "video": "WildRGBD_TV_scene_000_00028_0000-0002.mp4",
+    "ply": "WildRGBD_TV_scene_000_00028_0000-0002.ply"
+  }
+]

model_utils.py ADDED Viewed

	@@ -0,0 +1,612 @@

+"""SHARP inference + optional CUDA video rendering utilities.
+Design goals:
+- Reuse SHARP's own predict/render pipeline (no subprocess calls).
+- Be robust on Hugging Face Spaces + ZeroGPU.
+- Cache model weights and predictor construction across requests.
+Public API (used by the Gradio app):
+- TrajectoryType
+- predict_and_maybe_render_gpu(...)
+"""
+from __future__ import annotations
+import os
+import threading
+import time
+import uuid
+from contextlib import contextmanager
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Final, Literal
+import torch
+try:
+    import spaces
+except Exception:  # pragma: no cover
+    spaces = None  # type: ignore[assignment]
+try:
+    # Prefer HF cache / Hub downloads (works with Spaces `preload_from_hub`).
+    from huggingface_hub import hf_hub_download, try_to_load_from_cache
+except Exception:  # pragma: no cover
+    hf_hub_download = None  # type: ignore[assignment]
+    try_to_load_from_cache = None  # type: ignore[assignment]
+from sharp.cli.predict import DEFAULT_MODEL_URL, predict_image
+from sharp.cli.render import render_gaussians as sharp_render_gaussians
+from sharp.models import PredictorParams, create_predictor
+from sharp.utils import camera, io
+from sharp.utils.gaussians import Gaussians3D, SceneMetaData, save_ply
+from sharp.utils.gsplat import GSplatRenderer
+TrajectoryType = Literal["swipe", "shake", "rotate", "rotate_forward"]
+# -----------------------------------------------------------------------------
+# Helpers
+# -----------------------------------------------------------------------------
+def _now_ms() -> int:
+    return int(time.time() * 1000)
+def _ensure_dir(path: Path) -> Path:
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+def _make_even(x: int) -> int:
+    return x if x % 2 == 0 else x + 1
+def _select_device(preference: str = "auto") -> torch.device:
+    """Select the best available device for inference (CPU/CUDA/MPS)."""
+    if preference not in {"auto", "cpu", "cuda", "mps"}:
+        raise ValueError("device preference must be one of: auto|cpu|cuda|mps")
+    if preference == "cpu":
+        return torch.device("cpu")
+    if preference == "cuda":
+        return torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    if preference == "mps":
+        return torch.device("mps" if torch.backends.mps.is_available() else "cpu")
+    # auto
+    if torch.cuda.is_available():
+        return torch.device("cuda")
+    if torch.backends.mps.is_available():
+        return torch.device("mps")
+    return torch.device("cpu")
+# -----------------------------------------------------------------------------
+# Prediction outputs
+# -----------------------------------------------------------------------------
+@dataclass(frozen=True, slots=True)
+class PredictionOutputs:
+    """Outputs of SHARP inference (plus derived metadata for rendering)."""
+    ply_path: Path
+    gaussians: Gaussians3D
+    metadata_for_render: SceneMetaData
+    input_resolution_hw: tuple[int, int]
+    focal_length_px: float
+# -----------------------------------------------------------------------------
+# Patch SHARP VideoWriter to properly close the optional depth writer
+# -----------------------------------------------------------------------------
+class _PatchedVideoWriter(io.VideoWriter):
+    """Ensure depth writer is closed so files can be safely cleaned up."""
+    def __init__(
+        self, output_path: Path, fps: float = 30.0, render_depth: bool = True
+    ) -> None:
+        super().__init__(output_path, fps=fps, render_depth=render_depth)
+        # Ensure attribute exists for downstream code paths.
+        if not hasattr(self, "depth_writer"):
+            self.depth_writer = None  # type: ignore[attribute-defined-outside-init]
+    def close(self):
+        super().close()
+        depth_writer = getattr(self, "depth_writer", None)
+        try:
+            if depth_writer is not None:
+                depth_writer.close()
+        except Exception:
+            pass
+@contextmanager
+def _patched_sharp_videowriter():
+    """Temporarily patch `sharp.utils.io.VideoWriter` used by `sharp.cli.render`."""
+    original = io.VideoWriter
+    io.VideoWriter = _PatchedVideoWriter  # type: ignore[assignment]
+    try:
+        yield
+    finally:
+        io.VideoWriter = original  # type: ignore[assignment]
+# -----------------------------------------------------------------------------
+# Model wrapper
+# -----------------------------------------------------------------------------
+class ModelWrapper:
+    """Cached SHARP model wrapper for Gradio/Spaces."""
+    def __init__(
+        self,
+        *,
+        outputs_dir: str | Path = "outputs",
+        checkpoint_url: str = DEFAULT_MODEL_URL,
+        checkpoint_path: str | Path | None = None,
+        device_preference: str = "auto",
+        keep_model_on_device: bool | None = None,
+        hf_repo_id: str | None = None,
+        hf_filename: str | None = None,
+        hf_revision: str | None = None,
+    ) -> None:
+        self.outputs_dir = _ensure_dir(Path(outputs_dir))
+        self.checkpoint_url = checkpoint_url
+        env_ckpt = os.getenv("SHARP_CHECKPOINT_PATH") or os.getenv("SHARP_CHECKPOINT")
+        if checkpoint_path:
+            self.checkpoint_path = Path(checkpoint_path)
+        elif env_ckpt:
+            self.checkpoint_path = Path(env_ckpt)
+        else:
+            self.checkpoint_path = None
+        # Optional Hugging Face Hub fallback (useful when direct CDN download fails).
+        self.hf_repo_id = hf_repo_id or os.getenv("SHARP_HF_REPO_ID", "apple/Sharp")
+        self.hf_filename = hf_filename or os.getenv(
+            "SHARP_HF_FILENAME", "sharp_2572gikvuh.pt"
+        )
+        self.hf_revision = hf_revision or os.getenv("SHARP_HF_REVISION") or None
+        self.device_preference = device_preference
+        # For ZeroGPU, it's safer to not keep large tensors on CUDA across calls.
+        if keep_model_on_device is None:
+            keep_env = (
+                os.getenv("SHARP_KEEP_MODEL_ON_DEVICE")
+            )
+            self.keep_model_on_device = keep_env == "1"
+        else:
+            self.keep_model_on_device = keep_model_on_device
+        self._lock = threading.RLock()
+        self._predictor: torch.nn.Module | None = None
+        self._predictor_device: torch.device | None = None
+        self._state_dict: dict | None = None
+    def has_cuda(self) -> bool:
+        return torch.cuda.is_available()
+    def _load_state_dict(self) -> dict:
+        with self._lock:
+            if self._state_dict is not None:
+                return self._state_dict
+            # 1) Explicit local checkpoint path
+            if self.checkpoint_path is not None:
+                try:
+                    self._state_dict = torch.load(
+                        self.checkpoint_path,
+                        weights_only=True,
+                        map_location="cpu",
+                    )
+                    return self._state_dict
+                except Exception as e:
+                    raise RuntimeError(
+                        "Failed to load SHARP checkpoint from local path.\n\n"
+                        f"Path:\n  {self.checkpoint_path}\n\n"
+                        f"Original error:\n  {type(e).__name__}: {e}"
+                    ) from e
+            # 2) HF cache (no-network): best match for Spaces `preload_from_hub`.
+            hf_cache_error: Exception | None = None
+            if try_to_load_from_cache is not None:
+                try:
+                    cached = try_to_load_from_cache(
+                        repo_id=self.hf_repo_id,
+                        filename=self.hf_filename,
+                        revision=self.hf_revision,
+                        repo_type="model",
+                    )
+                except TypeError:
+                    cached = try_to_load_from_cache(self.hf_repo_id, self.hf_filename)  # type: ignore[misc]
+                try:
+                    if isinstance(cached, str) and Path(cached).exists():
+                        self._state_dict = torch.load(
+                            cached, weights_only=True, map_location="cpu"
+                        )
+                        return self._state_dict
+                except Exception as e:
+                    hf_cache_error = e
+            # 3) HF Hub download (reuse cache when available; may download otherwise).
+            hf_error: Exception | None = None
+            if hf_hub_download is not None:
+                # Attempt "local only" mode if supported (avoids network).
+                try:
+                    import inspect
+                    if "local_files_only" in inspect.signature(hf_hub_download).parameters:
+                        ckpt_path = hf_hub_download(
+                            repo_id=self.hf_repo_id,
+                            filename=self.hf_filename,
+                            revision=self.hf_revision,
+                            local_files_only=True,
+                        )
+                        if Path(ckpt_path).exists():
+                            self._state_dict = torch.load(
+                                ckpt_path, weights_only=True, map_location="cpu"
+                            )
+                            return self._state_dict
+                except Exception:
+                    pass
+                try:
+                    ckpt_path = hf_hub_download(
+                        repo_id=self.hf_repo_id,
+                        filename=self.hf_filename,
+                        revision=self.hf_revision,
+                    )
+                    self._state_dict = torch.load(
+                        ckpt_path,
+                        weights_only=True,
+                        map_location="cpu",
+                    )
+                    return self._state_dict
+                except Exception as e:
+                    hf_error = e
+            # 4) Default upstream CDN (torch hub cache). Last resort.
+            url_error: Exception | None = None
+            try:
+                self._state_dict = torch.hub.load_state_dict_from_url(
+                    self.checkpoint_url,
+                    progress=True,
+                    map_location="cpu",
+                )
+                return self._state_dict
+            except Exception as e:
+                url_error = e
+            # If we got here: all options failed.
+            hint_lines = [
+                "Failed to load SHARP checkpoint.",
+                "",
+                "Tried (in order):",
+                f"  1) HF cache (preload_from_hub): repo_id={self.hf_repo_id}, filename={self.hf_filename}, revision={self.hf_revision or 'None'}",
+                f"  2) HF Hub download: repo_id={self.hf_repo_id}, filename={self.hf_filename}, revision={self.hf_revision or 'None'}",
+                f"  3) URL (torch hub): {self.checkpoint_url}",
+                "",
+                "If network access is restricted, set a local checkpoint path:",
+                "  - SHARP_CHECKPOINT_PATH=/path/to/sharp_2572gikvuh.pt",
+                "",
+                "Original errors:",
+            ]
+            if try_to_load_from_cache is None:
+                hint_lines.append("  HF cache: huggingface_hub not installed")
+            elif hf_cache_error is not None:
+                hint_lines.append(
+                    f"  HF cache: {type(hf_cache_error).__name__}: {hf_cache_error}"
+                )
+            else:
+                hint_lines.append("  HF cache: (not found in cache)")
+            if hf_hub_download is None:
+                hint_lines.append("  HF download: huggingface_hub not installed")
+            else:
+                hint_lines.append(f"  HF download: {type(hf_error).__name__}: {hf_error}")
+            hint_lines.append(f"  URL: {type(url_error).__name__}: {url_error}")
+            raise RuntimeError("\n".join(hint_lines))
+    def _get_predictor(self, device: torch.device) -> torch.nn.Module:
+        with self._lock:
+            if self._predictor is None:
+                state_dict = self._load_state_dict()
+                predictor = create_predictor(PredictorParams())
+                predictor.load_state_dict(state_dict)
+                predictor.eval()
+                self._predictor = predictor
+                self._predictor_device = torch.device("cpu")
+            assert self._predictor is not None
+            assert self._predictor_device is not None
+            if self._predictor_device != device:
+                self._predictor.to(device)
+                self._predictor_device = device
+            return self._predictor
+    def _maybe_move_model_back_to_cpu(self) -> None:
+        if self.keep_model_on_device:
+            return
+        with self._lock:
+            if self._predictor is not None and self._predictor_device is not None:
+                if self._predictor_device.type != "cpu":
+                    self._predictor.to("cpu")
+                    self._predictor_device = torch.device("cpu")
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+    def _make_output_stem(self, input_path: Path) -> str:
+        return f"{input_path.stem}-{_now_ms()}-{uuid.uuid4().hex[:8]}"
+    def predict_to_ply(self, image_path: str | Path) -> PredictionOutputs:
+        """Run SHARP inference and export a .ply file."""
+        image_path = Path(image_path)
+        if not image_path.exists():
+            raise FileNotFoundError(f"Image does not exist: {image_path}")
+        device = _select_device(self.device_preference)
+        predictor = self._get_predictor(device)
+        image_np, _, f_px = io.load_rgb(image_path)
+        height, width = image_np.shape[:2]
+        with torch.no_grad():
+            gaussians = predict_image(predictor, image_np, f_px, device)
+        stem = self._make_output_stem(image_path)
+        ply_path = self.outputs_dir / f"{stem}.ply"
+        # save_ply expects (height, width).
+        save_ply(gaussians, f_px, (height, width), ply_path)
+        # SceneMetaData expects (width, height) for resolution.
+        metadata_for_render = SceneMetaData(
+            focal_length_px=float(f_px),
+            resolution_px=(int(width), int(height)),
+            color_space="linearRGB",
+        )
+        self._maybe_move_model_back_to_cpu()
+        return PredictionOutputs(
+            ply_path=ply_path,
+            gaussians=gaussians,
+            metadata_for_render=metadata_for_render,
+            input_resolution_hw=(int(height), int(width)),
+            focal_length_px=float(f_px),
+        )
+    def _render_video_impl(
+        self,
+        *,
+        gaussians: Gaussians3D,
+        metadata: SceneMetaData,
+        output_path: Path,
+        trajectory_type: TrajectoryType,
+        num_frames: int,
+        fps: int,
+        output_long_side: int | None,
+    ) -> Path:
+        if not torch.cuda.is_available():
+            raise RuntimeError("Rendering requires CUDA (gsplat).")
+        if num_frames < 2:
+            raise ValueError("num_frames must be >= 2")
+        if fps < 1:
+            raise ValueError("fps must be >= 1")
+        # Keep aligned with upstream CLI pipeline where possible.
+        if output_long_side is None and int(fps) == 30:
+            params = camera.TrajectoryParams(
+                type=trajectory_type,
+                num_steps=int(num_frames),
+                num_repeats=1,
+            )
+            with _patched_sharp_videowriter():
+                sharp_render_gaussians(
+                    gaussians=gaussians,
+                    metadata=metadata,
+                    params=params,
+                    output_path=output_path,
+                )
+            depth_path = output_path.with_suffix(".depth.mp4")
+            try:
+                if depth_path.exists():
+                    depth_path.unlink()
+            except Exception:
+                pass
+            return output_path
+        # Adapted pipeline for custom output resolution / FPS.
+        src_w, src_h = metadata.resolution_px
+        src_f = float(metadata.focal_length_px)
+        if output_long_side is None:
+            out_w, out_h, out_f = src_w, src_h, src_f
+        else:
+            long_side = max(src_w, src_h)
+            scale = float(output_long_side) / float(long_side)
+            out_w = _make_even(max(2, int(round(src_w * scale))))
+            out_h = _make_even(max(2, int(round(src_h * scale))))
+            out_f = src_f * scale
+        traj_params = camera.TrajectoryParams(
+            type=trajectory_type,
+            num_steps=int(num_frames),
+            num_repeats=1,
+        )
+        device = torch.device("cuda")
+        gaussians_cuda = gaussians.to(device)
+        intrinsics = torch.tensor(
+            [
+                [out_f, 0.0, (out_w - 1) / 2.0, 0.0],
+                [0.0, out_f, (out_h - 1) / 2.0, 0.0],
+                [0.0, 0.0, 1.0, 0.0],
+                [0.0, 0.0, 0.0, 1.0],
+            ],
+            device=device,
+            dtype=torch.float32,
+        )
+        cam_model = camera.create_camera_model(
+            gaussians_cuda,
+            intrinsics,
+            resolution_px=(out_w, out_h),
+            lookat_mode=traj_params.lookat_mode,
+        )
+        trajectory = camera.create_eye_trajectory(
+            gaussians_cuda,
+            traj_params,
+            resolution_px=(out_w, out_h),
+            f_px=out_f,
+        )
+        renderer = GSplatRenderer(color_space=metadata.color_space)
+        # IMPORTANT: Keep render_depth=True (avoids upstream AttributeError).
+        video_writer = _PatchedVideoWriter(output_path, fps=float(fps), render_depth=True)
+        for eye_position in trajectory:
+            cam_info = cam_model.compute(eye_position)
+            rendering = renderer(
+                gaussians_cuda,
+                extrinsics=cam_info.extrinsics[None].to(device),
+                intrinsics=cam_info.intrinsics[None].to(device),
+                image_width=cam_info.width,
+                image_height=cam_info.height,
+            )
+            color = (rendering.color[0].permute(1, 2, 0) * 255.0).to(dtype=torch.uint8)
+            depth = rendering.depth[0]
+            video_writer.add_frame(color, depth)
+        video_writer.close()
+        depth_path = output_path.with_suffix(".depth.mp4")
+        try:
+            if depth_path.exists():
+                depth_path.unlink()
+        except Exception:
+            pass
+        return output_path
+    def render_video(
+        self,
+        *,
+        gaussians: Gaussians3D,
+        metadata: SceneMetaData,
+        output_stem: str,
+        trajectory_type: TrajectoryType = "rotate_forward",
+        num_frames: int = 60,
+        fps: int = 30,
+        output_long_side: int | None = None,
+    ) -> Path:
+        """Render a camera trajectory as an MP4 (CUDA-only)."""
+        output_path = self.outputs_dir / f"{output_stem}.mp4"
+        return self._render_video_impl(
+            gaussians=gaussians,
+            metadata=metadata,
+            output_path=output_path,
+            trajectory_type=trajectory_type,
+            num_frames=num_frames,
+            fps=fps,
+            output_long_side=output_long_side,
+        )
+    def predict_and_maybe_render(
+        self,
+        image_path: str | Path,
+        *,
+        trajectory_type: TrajectoryType,
+        num_frames: int,
+        fps: int,
+        output_long_side: int | None,
+        render_video: bool = True,
+    ) -> tuple[Path | None, Path]:
+        """One-shot helper for the UI: returns (video_path, ply_path)."""
+        pred = self.predict_to_ply(image_path)
+        if not render_video:
+            return None, pred.ply_path
+        if not torch.cuda.is_available():
+            return None, pred.ply_path
+        output_stem = pred.ply_path.with_suffix("").name
+        video_path = self.render_video(
+            gaussians=pred.gaussians,
+            metadata=pred.metadata_for_render,
+            output_stem=output_stem,
+            trajectory_type=trajectory_type,
+            num_frames=num_frames,
+            fps=fps,
+            output_long_side=output_long_side,
+        )
+        return video_path, pred.ply_path
+# -----------------------------------------------------------------------------
+# ZeroGPU entrypoints
+# -----------------------------------------------------------------------------
+#
+# IMPORTANT: Do NOT decorate bound instance methods with `@spaces.GPU` on ZeroGPU.
+# The wrapper uses multiprocessing queues and pickles args/kwargs. If `self` is
+# included, Python will try to pickle the whole instance. ModelWrapper contains
+# a threading.RLock (not pickleable) and the model itself should not be pickled.
+#
+# Expose module-level functions that accept only pickleable arguments and
+# create/cache the ModelWrapper inside the GPU worker process.
+DEFAULT_OUTPUTS_DIR: Final[Path] = _ensure_dir(Path(__file__).resolve().parent / "outputs")
+_GLOBAL_MODEL: ModelWrapper | None = None
+_GLOBAL_MODEL_INIT_LOCK: Final[threading.Lock] = threading.Lock()
+def get_global_model(*, outputs_dir: str | Path = DEFAULT_OUTPUTS_DIR) -> ModelWrapper:
+    global _GLOBAL_MODEL
+    with _GLOBAL_MODEL_INIT_LOCK:
+        if _GLOBAL_MODEL is None:
+            _GLOBAL_MODEL = ModelWrapper(outputs_dir=outputs_dir)
+    return _GLOBAL_MODEL
+def predict_and_maybe_render(
+    image_path: str | Path,
+    *,
+    trajectory_type: TrajectoryType,
+    num_frames: int,
+    fps: int,
+    output_long_side: int | None,
+    render_video: bool = True,
+) -> tuple[Path | None, Path]:
+    model = get_global_model()
+    return model.predict_and_maybe_render(
+        image_path,
+        trajectory_type=trajectory_type,
+        num_frames=num_frames,
+        fps=fps,
+        output_long_side=output_long_side,
+        render_video=render_video,
+    )
+# Export the GPU-wrapped callable (or a no-op wrapper locally).
+if spaces is not None:
+    predict_and_maybe_render_gpu = spaces.GPU(duration=180)(predict_and_maybe_render)
+else:  # pragma: no cover
+    predict_and_maybe_render_gpu = predict_and_maybe_render

pyproject.toml ADDED Viewed

	@@ -0,0 +1,23 @@

+[project]
+name = "ml-sharp"
+version = "1.0.0"
+description = "Sharp Monocular View Synthesis in Less Than a Second"
+readme = "README.md"
+requires-python = ">=3.13"
+dependencies = [
+    "gradio==6.1.0",
+    "huggingface-hub>=1.2.3",
+    "sharp",
+    "spaces==0.44.0",
+    "torch>=2.9.1",
+    "torchvision>=0.24.1",
+]
+[tool.uv.sources]
+sharp = { git = "https://github.com/apple/ml-sharp.git", rev = "cdb4ddc6796402bee5487c7312260f2edd8bd5f0" }
+[dependency-groups]
+dev = [
+    "hf>=1.2.3",
+    "ruff>=0.14.9",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio==6.1.0
+spaces==0.44.0
+huggingface_hub>=1.2.3
+torch
+torchvision
+sharp @ git+https://github.com/apple/ml-sharp.git@cdb4ddc6796402bee5487c7312260f2edd8bd5f0

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff