gagndeep commited on
Commit
46d271d
·
0 Parent(s):

Clean repo without large files

Browse files
.gitattributes ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.ply filter=lfs diff=lfs merge=lfs -text
37
+ *.jpg filter=lfs diff=lfs merge=lfs -text
38
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ # Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ # poetry.lock
109
+ # poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ # pdm.lock
116
+ # pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ # pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # Redis
135
+ *.rdb
136
+ *.aof
137
+ *.pid
138
+
139
+ # RabbitMQ
140
+ mnesia/
141
+ rabbitmq/
142
+ rabbitmq-data/
143
+
144
+ # ActiveMQ
145
+ activemq-data/
146
+
147
+ # SageMath parsed files
148
+ *.sage.py
149
+
150
+ # Environments
151
+ .env
152
+ .envrc
153
+ .venv
154
+ env/
155
+ venv/
156
+ ENV/
157
+ env.bak/
158
+ venv.bak/
159
+
160
+ # Spyder project settings
161
+ .spyderproject
162
+ .spyproject
163
+
164
+ # Rope project settings
165
+ .ropeproject
166
+
167
+ # mkdocs documentation
168
+ /site
169
+
170
+ # mypy
171
+ .mypy_cache/
172
+ .dmypy.json
173
+ dmypy.json
174
+
175
+ # Pyre type checker
176
+ .pyre/
177
+
178
+ # pytype static type analyzer
179
+ .pytype/
180
+
181
+ # Cython debug symbols
182
+ cython_debug/
183
+
184
+ # PyCharm
185
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
186
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
187
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
188
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
189
+ # .idea/
190
+
191
+ # Abstra
192
+ # Abstra is an AI-powered process automation framework.
193
+ # Ignore directories containing user credentials, local state, and settings.
194
+ # Learn more at https://abstra.io/docs
195
+ .abstra/
196
+
197
+ # Visual Studio Code
198
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
199
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
200
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
201
+ # you could uncomment the following to ignore the entire vscode folder
202
+ # .vscode/
203
+
204
+ # Ruff stuff:
205
+ .ruff_cache/
206
+
207
+ # PyPI configuration file
208
+ .pypirc
209
+
210
+ # Marimo
211
+ marimo/_static/
212
+ marimo/_lsp/
213
+ __marimo__/
214
+
215
+ # Streamlit
216
+ .streamlit/secrets.toml
217
+
218
+ # Kilo Code
219
+ .kilocode/
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.13
README.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: "SHARP - 3D Gaussian Scene Prediction"
3
+ emoji: 🔪
4
+ colorFrom: purple
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 6.1.0
8
+ python_version: 3.13
9
+ app_file: app.py
10
+ pinned: false
11
+ short_description: "Sharp Monocular View Synthesis in Less Than a Second"
12
+ models:
13
+ - apple/Sharp
14
+ startup_duration_timeout: 1h
15
+ preload_from_hub:
16
+ - apple/Sharp sharp_2572gikvuh.pt
17
+ ---
app.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SHARP Gradio demo (Modern, Single-Page UI).
2
+
3
+ This Space:
4
+ - Runs Apple's SHARP model to predict a 3D Gaussian scene from a single image.
5
+ - Exports a canonical `.ply` file for download.
6
+ - Optionally renders a camera trajectory `.mp4` (CUDA / ZeroGPU only).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ from pathlib import Path
13
+ from typing import Final
14
+
15
+ import gradio as gr
16
+
17
+ # Ensure model_utils is present in your directory
18
+ from model_utils import TrajectoryType, predict_and_maybe_render_gpu
19
+
20
+ # -----------------------------------------------------------------------------
21
+ # Paths & Configuration
22
+ # -----------------------------------------------------------------------------
23
+
24
+ APP_DIR: Final[Path] = Path(__file__).resolve().parent
25
+ OUTPUTS_DIR: Final[Path] = APP_DIR / "outputs"
26
+ ASSETS_DIR: Final[Path] = APP_DIR / "assets"
27
+ EXAMPLES_DIR: Final[Path] = ASSETS_DIR / "examples"
28
+
29
+ # Valid image extensions for discovery
30
+ IMAGE_EXTS: Final[tuple[str, ...]] = (".png", ".jpg", ".jpeg", ".webp")
31
+
32
+ # CSS for a responsive, contained layout
33
+ CSS: Final[str] = """
34
+ .gradio-container {
35
+ max-width: 1400px !important;
36
+ margin: 0 auto;
37
+ }
38
+ /* constrain media height so it doesn't take up the whole screen */
39
+ #input-image img, #output-video video {
40
+ max-height: 500px;
41
+ width: 100%;
42
+ object-fit: contain;
43
+ }
44
+ /* Make the generate button pop slightly */
45
+ #run-btn {
46
+ font-size: 1.1em;
47
+ font-weight: bold;
48
+ }
49
+ """
50
+
51
+ # -----------------------------------------------------------------------------
52
+ # Logic & Helpers
53
+ # -----------------------------------------------------------------------------
54
+
55
+ def _ensure_dir(path: Path) -> Path:
56
+ path.mkdir(parents=True, exist_ok=True)
57
+ return path
58
+
59
+ def get_example_files() -> list[list[str]]:
60
+ """
61
+ Scans assets/examples for images to populate the gr.Examples component.
62
+ Returns a list of lists: [['path/to/img1.jpg'], ['path/to/img2.png']]
63
+ """
64
+ _ensure_dir(EXAMPLES_DIR)
65
+
66
+ # Priority 1: Check manifest.json
67
+ manifest_path = EXAMPLES_DIR / "manifest.json"
68
+ if manifest_path.exists():
69
+ try:
70
+ data = json.loads(manifest_path.read_text(encoding="utf-8"))
71
+ examples = []
72
+ for entry in data:
73
+ if "image" in entry:
74
+ img_path = EXAMPLES_DIR / entry["image"]
75
+ if img_path.exists():
76
+ examples.append([str(img_path)])
77
+ if examples:
78
+ return examples
79
+ except Exception as e:
80
+ print(f"Error reading manifest: {e}")
81
+
82
+ # Priority 2: Auto-discovery
83
+ examples = []
84
+ for ext in IMAGE_EXTS:
85
+ for img in sorted(EXAMPLES_DIR.glob(f"*{ext}")):
86
+ examples.append([str(img)])
87
+
88
+ return examples
89
+
90
+ def run_sharp(
91
+ image_path: str | None,
92
+ trajectory_type: str,
93
+ output_long_side: int,
94
+ num_frames: int,
95
+ fps: int,
96
+ render_video: bool,
97
+ progress=gr.Progress()
98
+ ) -> tuple[str | None, str | None, str]:
99
+ """
100
+ Main inference wrapper.
101
+ """
102
+ if not image_path:
103
+ raise gr.Error("Please upload or select an input image first.")
104
+
105
+ # Validate output resolution
106
+ out_long_side_val: int | None = (
107
+ None if int(output_long_side) <= 0 else int(output_long_side)
108
+ )
109
+
110
+ try:
111
+ progress(0.1, desc="Initializing model...")
112
+
113
+ # Convert string dropdown back to Enum if needed, or pass string if model accepts it
114
+ # Assuming model_utils handles string conversion or we map it here:
115
+ traj_enum = TrajectoryType[trajectory_type.upper()] if hasattr(TrajectoryType, trajectory_type.upper()) else trajectory_type
116
+
117
+ progress(0.3, desc="Predicting Gaussians...")
118
+
119
+ video_path, ply_path = predict_and_maybe_render_gpu(
120
+ image_path,
121
+ trajectory_type=traj_enum,
122
+ num_frames=int(num_frames),
123
+ fps=int(fps),
124
+ output_long_side=out_long_side_val,
125
+ render_video=bool(render_video),
126
+ )
127
+
128
+ progress(0.9, desc="Finalizing...")
129
+
130
+ status_msg = f"✅ **Success**\n\nPLY: `{ply_path.name}`"
131
+ if video_path:
132
+ status_msg += f"\nVideo: `{video_path.name}`"
133
+ else:
134
+ status_msg += "\n(Video rendering skipped or unavailable)"
135
+
136
+ return (
137
+ str(video_path) if video_path else None,
138
+ str(ply_path),
139
+ status_msg,
140
+ )
141
+
142
+ except Exception as e:
143
+ raise gr.Error(f"Generation failed: {str(e)}")
144
+
145
+ # -----------------------------------------------------------------------------
146
+ # UI Construction
147
+ # -----------------------------------------------------------------------------
148
+
149
+ def build_demo() -> gr.Blocks:
150
+ # Use the Default theme for a clean, modern look
151
+ theme = gr.themes.Default()
152
+
153
+ with gr.Blocks(theme=theme, css=CSS, title="SHARP 3D") as demo:
154
+
155
+ # --- Header ---
156
+ with gr.Row():
157
+ with gr.Column(scale=1):
158
+ gr.Markdown(
159
+ """
160
+ # SHARP: Single-Image 3D
161
+ Convert any static image into a 3D Gaussian Splat scene in seconds.
162
+ """
163
+ )
164
+
165
+ # --- Main Interface ---
166
+ with gr.Row():
167
+
168
+ # --- Left Column: Input & Controls ---
169
+ with gr.Column(scale=1):
170
+ image_in = gr.Image(
171
+ label="Input Image",
172
+ type="filepath",
173
+ sources=["upload", "clipboard"],
174
+ elem_id="input-image",
175
+ height=400
176
+ )
177
+
178
+ # Collapsible Advanced Settings for a cleaner UI
179
+ with gr.Accordion("⚙️ Advanced Configuration", open=False):
180
+ with gr.Row():
181
+ trajectory = gr.Dropdown(
182
+ label="Camera Trajectory",
183
+ choices=["swipe", "shake", "rotate", "rotate_forward"],
184
+ value="rotate_forward",
185
+ info="Camera movement for video preview"
186
+ )
187
+ output_res = gr.Dropdown(
188
+ label="Resolution (Long Side)",
189
+ choices=[("Match Input", 0), ("512", 512), ("1024", 1024)],
190
+ value=0,
191
+ )
192
+ with gr.Row():
193
+ frames = gr.Slider(
194
+ label="Frames", minimum=24, maximum=120, step=1, value=60
195
+ )
196
+ fps_in = gr.Slider(
197
+ label="FPS", minimum=8, maximum=60, step=1, value=30
198
+ )
199
+ render_toggle = gr.Checkbox(
200
+ label="Render Video Preview (Requires GPU)", value=True
201
+ )
202
+
203
+ run_btn = gr.Button("✨ Generate 3D Scene", variant="primary", elem_id="run-btn")
204
+
205
+ # --- Right Column: Output ---
206
+ with gr.Column(scale=1):
207
+ video_out = gr.Video(
208
+ label="Preview Trajectory",
209
+ elem_id="output-video",
210
+ autoplay=True,
211
+ height=400
212
+ )
213
+ with gr.Group():
214
+ ply_download = gr.DownloadButton(
215
+ label="Download .PLY Scene",
216
+ variant="secondary"
217
+ )
218
+ status_md = gr.Markdown("Ready to run.")
219
+
220
+ # --- Footer: Examples ---
221
+ # Standard Gradio Examples component
222
+ example_files = get_example_files()
223
+ if example_files:
224
+ gr.Examples(
225
+ examples=example_files,
226
+ inputs=[image_in],
227
+ label="Try an Example",
228
+ examples_per_page=5
229
+ )
230
+
231
+ # --- Event Handlers ---
232
+ run_btn.click(
233
+ fn=run_sharp,
234
+ inputs=[
235
+ image_in,
236
+ trajectory,
237
+ output_res,
238
+ frames,
239
+ fps_in,
240
+ render_toggle,
241
+ ],
242
+ outputs=[video_out, ply_download, status_md],
243
+ concurrency_limit=1
244
+ )
245
+
246
+ # --- Citation ---
247
+ with gr.Accordion("About & Citation", open=False):
248
+ gr.Markdown(
249
+ """
250
+ **SHARP: Sharp Monocular View Synthesis in Less Than a Second** (Apple, 2025).
251
+
252
+ If you use this model, please cite:
253
+ ```bibtex
254
+ @inproceedings{Sharp2025:arxiv,
255
+ title = {Sharp Monocular View Synthesis in Less Than a Second},
256
+ author = {Mescheder, Dong, Li, Bai, et al.},
257
+ year = {2025},
258
+ journal = {arXiv preprint arXiv:2512.10685}
259
+ }
260
+ ```
261
+ """
262
+ )
263
+
264
+ return demo
265
+
266
+ # -----------------------------------------------------------------------------
267
+ # Entry Point
268
+ # -----------------------------------------------------------------------------
269
+
270
+ _ensure_dir(OUTPUTS_DIR)
271
+
272
+ if __name__ == "__main__":
273
+ demo = build_demo()
274
+ demo.queue().launch(allowed_paths=[str(ASSETS_DIR)])
assets/examples/Booster_train_balanced_Bathroom_camera_00_im0_png_00000_0000-0001.jpg ADDED

Git LFS Details

  • SHA256: 819880be5ee569c066aac4f20b5cb08c450c683eda7e188981b8f30bf25cfd72
  • Pointer size: 131 Bytes
  • Size of remote file: 137 kB
assets/examples/Booster_train_balanced_Bathroom_camera_00_im0_png_00000_0000-0001.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23946e8345738bec5052c11ef259490e8fa003a9f0c87c5cae4b0434d6b0b211
3
+ size 506496
assets/examples/Booster_train_balanced_Bathroom_camera_00_im0_png_00000_0000-0001.ply ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54d28194b0ae41fd2a2f09b07de28d2305c5181b0664cd25ce29f9e114ba2ea8
3
+ size 66061086
assets/examples/ETH3D_courtyard_00000_0000-0001.jpg ADDED

Git LFS Details

  • SHA256: 806be6fcaa6064a7a880835e20aafa4b509fa4d2dee42c7b4d58631f0bed1cd5
  • Pointer size: 131 Bytes
  • Size of remote file: 261 kB
assets/examples/ETH3D_courtyard_00000_0000-0001.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5bdbcc32493174b684aff6b2ab0701f4c037e40929991948a379c9d7c323792
3
+ size 538810
assets/examples/ETH3D_courtyard_00000_0000-0001.ply ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:282fe4719d2822ea66cf3ab02160ec6bc030c7a68bff8849033d679a9d522438
3
+ size 66061086
assets/examples/manifest.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "label": "Bathroom",
4
+ "image": "Booster_train_balanced_Bathroom_camera_00_im0_png_00000_0000-0001.jpg",
5
+ "video": "Booster_train_balanced_Bathroom_camera_00_im0_png_00000_0000-0001.mp4",
6
+ "ply": "Booster_train_balanced_Bathroom_camera_00_im0_png_00000_0000-0001.ply"
7
+ },
8
+ {
9
+ "label": "Courtyard",
10
+ "image": "ETH3D_courtyard_00000_0000-0001.jpg",
11
+ "video": "ETH3D_courtyard_00000_0000-0001.mp4",
12
+ "ply": "ETH3D_courtyard_00000_0000-0001.ply"
13
+ },
14
+ {
15
+ "label": "Bicycle",
16
+ "image": "Middlebury_49b2bcfdd9_000_0000-0001.jpg",
17
+ "video": "Middlebury_49b2bcfdd9_000_0000-0001.mp4",
18
+ "ply": "Middlebury_49b2bcfdd9_000_0000-0001.ply"
19
+ },
20
+ {
21
+ "label": "Interior",
22
+ "image": "ScanNetPP_09c1414f1b_00000_0000-0001.jpg",
23
+ "video": "ScanNetPP_09c1414f1b_00000_0000-0001.mp4",
24
+ "ply": "ScanNetPP_09c1414f1b_00000_0000-0001.ply"
25
+ },
26
+ {
27
+ "label": "Church",
28
+ "image": "TanksAndTemples_Church_00022_0000-0002.jpg",
29
+ "video": "TanksAndTemples_Church_00022_0000-0002.mp4",
30
+ "ply": "TanksAndTemples_Church_00022_0000-0002.ply"
31
+ },
32
+ {
33
+ "label": "Flowers",
34
+ "image": "Unsplash_-591oIJnyEQ_0000-0001.jpg",
35
+ "video": "Unsplash_-591oIJnyEQ_0000-0001.mp4",
36
+ "ply": "Unsplash_-591oIJnyEQ_0000-0001.ply"
37
+ },
38
+ {
39
+ "label": "Horse",
40
+ "image": "Unsplash_SharpPaperVideo_-B_lu05yfgE_0000-0001.jpg",
41
+ "video": "Unsplash_SharpPaperVideo_-B_lu05yfgE_0000-0001.mp4",
42
+ "ply": "Unsplash_SharpPaperVideo_-B_lu05yfgE_0000-0001.ply"
43
+ },
44
+ {
45
+ "label": "TV",
46
+ "image": "WildRGBD_TV_scene_000_00028_0000-0002.jpg",
47
+ "video": "WildRGBD_TV_scene_000_00028_0000-0002.mp4",
48
+ "ply": "WildRGBD_TV_scene_000_00028_0000-0002.ply"
49
+ }
50
+ ]
model_utils.py ADDED
@@ -0,0 +1,612 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SHARP inference + optional CUDA video rendering utilities.
2
+
3
+ Design goals:
4
+ - Reuse SHARP's own predict/render pipeline (no subprocess calls).
5
+ - Be robust on Hugging Face Spaces + ZeroGPU.
6
+ - Cache model weights and predictor construction across requests.
7
+
8
+ Public API (used by the Gradio app):
9
+ - TrajectoryType
10
+ - predict_and_maybe_render_gpu(...)
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import os
16
+ import threading
17
+ import time
18
+ import uuid
19
+ from contextlib import contextmanager
20
+ from dataclasses import dataclass
21
+ from pathlib import Path
22
+ from typing import Final, Literal
23
+
24
+ import torch
25
+
26
+ try:
27
+ import spaces
28
+ except Exception: # pragma: no cover
29
+ spaces = None # type: ignore[assignment]
30
+
31
+ try:
32
+ # Prefer HF cache / Hub downloads (works with Spaces `preload_from_hub`).
33
+ from huggingface_hub import hf_hub_download, try_to_load_from_cache
34
+ except Exception: # pragma: no cover
35
+ hf_hub_download = None # type: ignore[assignment]
36
+ try_to_load_from_cache = None # type: ignore[assignment]
37
+
38
+ from sharp.cli.predict import DEFAULT_MODEL_URL, predict_image
39
+ from sharp.cli.render import render_gaussians as sharp_render_gaussians
40
+ from sharp.models import PredictorParams, create_predictor
41
+ from sharp.utils import camera, io
42
+ from sharp.utils.gaussians import Gaussians3D, SceneMetaData, save_ply
43
+ from sharp.utils.gsplat import GSplatRenderer
44
+
45
+ TrajectoryType = Literal["swipe", "shake", "rotate", "rotate_forward"]
46
+
47
+ # -----------------------------------------------------------------------------
48
+ # Helpers
49
+ # -----------------------------------------------------------------------------
50
+
51
+
52
+ def _now_ms() -> int:
53
+ return int(time.time() * 1000)
54
+
55
+
56
+ def _ensure_dir(path: Path) -> Path:
57
+ path.mkdir(parents=True, exist_ok=True)
58
+ return path
59
+
60
+
61
+ def _make_even(x: int) -> int:
62
+ return x if x % 2 == 0 else x + 1
63
+
64
+
65
+ def _select_device(preference: str = "auto") -> torch.device:
66
+ """Select the best available device for inference (CPU/CUDA/MPS)."""
67
+ if preference not in {"auto", "cpu", "cuda", "mps"}:
68
+ raise ValueError("device preference must be one of: auto|cpu|cuda|mps")
69
+
70
+ if preference == "cpu":
71
+ return torch.device("cpu")
72
+ if preference == "cuda":
73
+ return torch.device("cuda" if torch.cuda.is_available() else "cpu")
74
+ if preference == "mps":
75
+ return torch.device("mps" if torch.backends.mps.is_available() else "cpu")
76
+
77
+ # auto
78
+ if torch.cuda.is_available():
79
+ return torch.device("cuda")
80
+ if torch.backends.mps.is_available():
81
+ return torch.device("mps")
82
+ return torch.device("cpu")
83
+
84
+
85
+ # -----------------------------------------------------------------------------
86
+ # Prediction outputs
87
+ # -----------------------------------------------------------------------------
88
+
89
+
90
+ @dataclass(frozen=True, slots=True)
91
+ class PredictionOutputs:
92
+ """Outputs of SHARP inference (plus derived metadata for rendering)."""
93
+
94
+ ply_path: Path
95
+ gaussians: Gaussians3D
96
+ metadata_for_render: SceneMetaData
97
+ input_resolution_hw: tuple[int, int]
98
+ focal_length_px: float
99
+
100
+
101
+ # -----------------------------------------------------------------------------
102
+ # Patch SHARP VideoWriter to properly close the optional depth writer
103
+ # -----------------------------------------------------------------------------
104
+
105
+
106
+ class _PatchedVideoWriter(io.VideoWriter):
107
+ """Ensure depth writer is closed so files can be safely cleaned up."""
108
+
109
+ def __init__(
110
+ self, output_path: Path, fps: float = 30.0, render_depth: bool = True
111
+ ) -> None:
112
+ super().__init__(output_path, fps=fps, render_depth=render_depth)
113
+ # Ensure attribute exists for downstream code paths.
114
+ if not hasattr(self, "depth_writer"):
115
+ self.depth_writer = None # type: ignore[attribute-defined-outside-init]
116
+
117
+ def close(self):
118
+ super().close()
119
+ depth_writer = getattr(self, "depth_writer", None)
120
+ try:
121
+ if depth_writer is not None:
122
+ depth_writer.close()
123
+ except Exception:
124
+ pass
125
+
126
+
127
+ @contextmanager
128
+ def _patched_sharp_videowriter():
129
+ """Temporarily patch `sharp.utils.io.VideoWriter` used by `sharp.cli.render`."""
130
+ original = io.VideoWriter
131
+ io.VideoWriter = _PatchedVideoWriter # type: ignore[assignment]
132
+ try:
133
+ yield
134
+ finally:
135
+ io.VideoWriter = original # type: ignore[assignment]
136
+
137
+
138
+ # -----------------------------------------------------------------------------
139
+ # Model wrapper
140
+ # -----------------------------------------------------------------------------
141
+
142
+
143
+ class ModelWrapper:
144
+ """Cached SHARP model wrapper for Gradio/Spaces."""
145
+
146
+ def __init__(
147
+ self,
148
+ *,
149
+ outputs_dir: str | Path = "outputs",
150
+ checkpoint_url: str = DEFAULT_MODEL_URL,
151
+ checkpoint_path: str | Path | None = None,
152
+ device_preference: str = "auto",
153
+ keep_model_on_device: bool | None = None,
154
+ hf_repo_id: str | None = None,
155
+ hf_filename: str | None = None,
156
+ hf_revision: str | None = None,
157
+ ) -> None:
158
+ self.outputs_dir = _ensure_dir(Path(outputs_dir))
159
+ self.checkpoint_url = checkpoint_url
160
+
161
+ env_ckpt = os.getenv("SHARP_CHECKPOINT_PATH") or os.getenv("SHARP_CHECKPOINT")
162
+ if checkpoint_path:
163
+ self.checkpoint_path = Path(checkpoint_path)
164
+ elif env_ckpt:
165
+ self.checkpoint_path = Path(env_ckpt)
166
+ else:
167
+ self.checkpoint_path = None
168
+
169
+ # Optional Hugging Face Hub fallback (useful when direct CDN download fails).
170
+ self.hf_repo_id = hf_repo_id or os.getenv("SHARP_HF_REPO_ID", "apple/Sharp")
171
+ self.hf_filename = hf_filename or os.getenv(
172
+ "SHARP_HF_FILENAME", "sharp_2572gikvuh.pt"
173
+ )
174
+ self.hf_revision = hf_revision or os.getenv("SHARP_HF_REVISION") or None
175
+
176
+ self.device_preference = device_preference
177
+
178
+ # For ZeroGPU, it's safer to not keep large tensors on CUDA across calls.
179
+ if keep_model_on_device is None:
180
+ keep_env = (
181
+ os.getenv("SHARP_KEEP_MODEL_ON_DEVICE")
182
+ )
183
+ self.keep_model_on_device = keep_env == "1"
184
+ else:
185
+ self.keep_model_on_device = keep_model_on_device
186
+
187
+ self._lock = threading.RLock()
188
+ self._predictor: torch.nn.Module | None = None
189
+ self._predictor_device: torch.device | None = None
190
+ self._state_dict: dict | None = None
191
+
192
+ def has_cuda(self) -> bool:
193
+ return torch.cuda.is_available()
194
+
195
+ def _load_state_dict(self) -> dict:
196
+ with self._lock:
197
+ if self._state_dict is not None:
198
+ return self._state_dict
199
+
200
+ # 1) Explicit local checkpoint path
201
+ if self.checkpoint_path is not None:
202
+ try:
203
+ self._state_dict = torch.load(
204
+ self.checkpoint_path,
205
+ weights_only=True,
206
+ map_location="cpu",
207
+ )
208
+ return self._state_dict
209
+ except Exception as e:
210
+ raise RuntimeError(
211
+ "Failed to load SHARP checkpoint from local path.\n\n"
212
+ f"Path:\n {self.checkpoint_path}\n\n"
213
+ f"Original error:\n {type(e).__name__}: {e}"
214
+ ) from e
215
+
216
+ # 2) HF cache (no-network): best match for Spaces `preload_from_hub`.
217
+ hf_cache_error: Exception | None = None
218
+ if try_to_load_from_cache is not None:
219
+ try:
220
+ cached = try_to_load_from_cache(
221
+ repo_id=self.hf_repo_id,
222
+ filename=self.hf_filename,
223
+ revision=self.hf_revision,
224
+ repo_type="model",
225
+ )
226
+ except TypeError:
227
+ cached = try_to_load_from_cache(self.hf_repo_id, self.hf_filename) # type: ignore[misc]
228
+
229
+ try:
230
+ if isinstance(cached, str) and Path(cached).exists():
231
+ self._state_dict = torch.load(
232
+ cached, weights_only=True, map_location="cpu"
233
+ )
234
+ return self._state_dict
235
+ except Exception as e:
236
+ hf_cache_error = e
237
+
238
+ # 3) HF Hub download (reuse cache when available; may download otherwise).
239
+ hf_error: Exception | None = None
240
+ if hf_hub_download is not None:
241
+ # Attempt "local only" mode if supported (avoids network).
242
+ try:
243
+ import inspect
244
+
245
+ if "local_files_only" in inspect.signature(hf_hub_download).parameters:
246
+ ckpt_path = hf_hub_download(
247
+ repo_id=self.hf_repo_id,
248
+ filename=self.hf_filename,
249
+ revision=self.hf_revision,
250
+ local_files_only=True,
251
+ )
252
+ if Path(ckpt_path).exists():
253
+ self._state_dict = torch.load(
254
+ ckpt_path, weights_only=True, map_location="cpu"
255
+ )
256
+ return self._state_dict
257
+ except Exception:
258
+ pass
259
+
260
+ try:
261
+ ckpt_path = hf_hub_download(
262
+ repo_id=self.hf_repo_id,
263
+ filename=self.hf_filename,
264
+ revision=self.hf_revision,
265
+ )
266
+ self._state_dict = torch.load(
267
+ ckpt_path,
268
+ weights_only=True,
269
+ map_location="cpu",
270
+ )
271
+ return self._state_dict
272
+ except Exception as e:
273
+ hf_error = e
274
+
275
+ # 4) Default upstream CDN (torch hub cache). Last resort.
276
+ url_error: Exception | None = None
277
+ try:
278
+ self._state_dict = torch.hub.load_state_dict_from_url(
279
+ self.checkpoint_url,
280
+ progress=True,
281
+ map_location="cpu",
282
+ )
283
+ return self._state_dict
284
+ except Exception as e:
285
+ url_error = e
286
+
287
+ # If we got here: all options failed.
288
+ hint_lines = [
289
+ "Failed to load SHARP checkpoint.",
290
+ "",
291
+ "Tried (in order):",
292
+ f" 1) HF cache (preload_from_hub): repo_id={self.hf_repo_id}, filename={self.hf_filename}, revision={self.hf_revision or 'None'}",
293
+ f" 2) HF Hub download: repo_id={self.hf_repo_id}, filename={self.hf_filename}, revision={self.hf_revision or 'None'}",
294
+ f" 3) URL (torch hub): {self.checkpoint_url}",
295
+ "",
296
+ "If network access is restricted, set a local checkpoint path:",
297
+ " - SHARP_CHECKPOINT_PATH=/path/to/sharp_2572gikvuh.pt",
298
+ "",
299
+ "Original errors:",
300
+ ]
301
+ if try_to_load_from_cache is None:
302
+ hint_lines.append(" HF cache: huggingface_hub not installed")
303
+ elif hf_cache_error is not None:
304
+ hint_lines.append(
305
+ f" HF cache: {type(hf_cache_error).__name__}: {hf_cache_error}"
306
+ )
307
+ else:
308
+ hint_lines.append(" HF cache: (not found in cache)")
309
+
310
+ if hf_hub_download is None:
311
+ hint_lines.append(" HF download: huggingface_hub not installed")
312
+ else:
313
+ hint_lines.append(f" HF download: {type(hf_error).__name__}: {hf_error}")
314
+
315
+ hint_lines.append(f" URL: {type(url_error).__name__}: {url_error}")
316
+
317
+ raise RuntimeError("\n".join(hint_lines))
318
+
319
+ def _get_predictor(self, device: torch.device) -> torch.nn.Module:
320
+ with self._lock:
321
+ if self._predictor is None:
322
+ state_dict = self._load_state_dict()
323
+ predictor = create_predictor(PredictorParams())
324
+ predictor.load_state_dict(state_dict)
325
+ predictor.eval()
326
+ self._predictor = predictor
327
+ self._predictor_device = torch.device("cpu")
328
+
329
+ assert self._predictor is not None
330
+ assert self._predictor_device is not None
331
+
332
+ if self._predictor_device != device:
333
+ self._predictor.to(device)
334
+ self._predictor_device = device
335
+
336
+ return self._predictor
337
+
338
+ def _maybe_move_model_back_to_cpu(self) -> None:
339
+ if self.keep_model_on_device:
340
+ return
341
+ with self._lock:
342
+ if self._predictor is not None and self._predictor_device is not None:
343
+ if self._predictor_device.type != "cpu":
344
+ self._predictor.to("cpu")
345
+ self._predictor_device = torch.device("cpu")
346
+ if torch.cuda.is_available():
347
+ torch.cuda.empty_cache()
348
+
349
+ def _make_output_stem(self, input_path: Path) -> str:
350
+ return f"{input_path.stem}-{_now_ms()}-{uuid.uuid4().hex[:8]}"
351
+
352
+ def predict_to_ply(self, image_path: str | Path) -> PredictionOutputs:
353
+ """Run SHARP inference and export a .ply file."""
354
+ image_path = Path(image_path)
355
+ if not image_path.exists():
356
+ raise FileNotFoundError(f"Image does not exist: {image_path}")
357
+
358
+ device = _select_device(self.device_preference)
359
+ predictor = self._get_predictor(device)
360
+
361
+ image_np, _, f_px = io.load_rgb(image_path)
362
+ height, width = image_np.shape[:2]
363
+
364
+ with torch.no_grad():
365
+ gaussians = predict_image(predictor, image_np, f_px, device)
366
+
367
+ stem = self._make_output_stem(image_path)
368
+ ply_path = self.outputs_dir / f"{stem}.ply"
369
+
370
+ # save_ply expects (height, width).
371
+ save_ply(gaussians, f_px, (height, width), ply_path)
372
+
373
+ # SceneMetaData expects (width, height) for resolution.
374
+ metadata_for_render = SceneMetaData(
375
+ focal_length_px=float(f_px),
376
+ resolution_px=(int(width), int(height)),
377
+ color_space="linearRGB",
378
+ )
379
+
380
+ self._maybe_move_model_back_to_cpu()
381
+
382
+ return PredictionOutputs(
383
+ ply_path=ply_path,
384
+ gaussians=gaussians,
385
+ metadata_for_render=metadata_for_render,
386
+ input_resolution_hw=(int(height), int(width)),
387
+ focal_length_px=float(f_px),
388
+ )
389
+
390
+ def _render_video_impl(
391
+ self,
392
+ *,
393
+ gaussians: Gaussians3D,
394
+ metadata: SceneMetaData,
395
+ output_path: Path,
396
+ trajectory_type: TrajectoryType,
397
+ num_frames: int,
398
+ fps: int,
399
+ output_long_side: int | None,
400
+ ) -> Path:
401
+ if not torch.cuda.is_available():
402
+ raise RuntimeError("Rendering requires CUDA (gsplat).")
403
+
404
+ if num_frames < 2:
405
+ raise ValueError("num_frames must be >= 2")
406
+ if fps < 1:
407
+ raise ValueError("fps must be >= 1")
408
+
409
+ # Keep aligned with upstream CLI pipeline where possible.
410
+ if output_long_side is None and int(fps) == 30:
411
+ params = camera.TrajectoryParams(
412
+ type=trajectory_type,
413
+ num_steps=int(num_frames),
414
+ num_repeats=1,
415
+ )
416
+ with _patched_sharp_videowriter():
417
+ sharp_render_gaussians(
418
+ gaussians=gaussians,
419
+ metadata=metadata,
420
+ params=params,
421
+ output_path=output_path,
422
+ )
423
+ depth_path = output_path.with_suffix(".depth.mp4")
424
+ try:
425
+ if depth_path.exists():
426
+ depth_path.unlink()
427
+ except Exception:
428
+ pass
429
+ return output_path
430
+
431
+ # Adapted pipeline for custom output resolution / FPS.
432
+ src_w, src_h = metadata.resolution_px
433
+ src_f = float(metadata.focal_length_px)
434
+
435
+ if output_long_side is None:
436
+ out_w, out_h, out_f = src_w, src_h, src_f
437
+ else:
438
+ long_side = max(src_w, src_h)
439
+ scale = float(output_long_side) / float(long_side)
440
+ out_w = _make_even(max(2, int(round(src_w * scale))))
441
+ out_h = _make_even(max(2, int(round(src_h * scale))))
442
+ out_f = src_f * scale
443
+
444
+ traj_params = camera.TrajectoryParams(
445
+ type=trajectory_type,
446
+ num_steps=int(num_frames),
447
+ num_repeats=1,
448
+ )
449
+
450
+ device = torch.device("cuda")
451
+ gaussians_cuda = gaussians.to(device)
452
+
453
+ intrinsics = torch.tensor(
454
+ [
455
+ [out_f, 0.0, (out_w - 1) / 2.0, 0.0],
456
+ [0.0, out_f, (out_h - 1) / 2.0, 0.0],
457
+ [0.0, 0.0, 1.0, 0.0],
458
+ [0.0, 0.0, 0.0, 1.0],
459
+ ],
460
+ device=device,
461
+ dtype=torch.float32,
462
+ )
463
+
464
+ cam_model = camera.create_camera_model(
465
+ gaussians_cuda,
466
+ intrinsics,
467
+ resolution_px=(out_w, out_h),
468
+ lookat_mode=traj_params.lookat_mode,
469
+ )
470
+
471
+ trajectory = camera.create_eye_trajectory(
472
+ gaussians_cuda,
473
+ traj_params,
474
+ resolution_px=(out_w, out_h),
475
+ f_px=out_f,
476
+ )
477
+
478
+ renderer = GSplatRenderer(color_space=metadata.color_space)
479
+
480
+ # IMPORTANT: Keep render_depth=True (avoids upstream AttributeError).
481
+ video_writer = _PatchedVideoWriter(output_path, fps=float(fps), render_depth=True)
482
+
483
+ for eye_position in trajectory:
484
+ cam_info = cam_model.compute(eye_position)
485
+ rendering = renderer(
486
+ gaussians_cuda,
487
+ extrinsics=cam_info.extrinsics[None].to(device),
488
+ intrinsics=cam_info.intrinsics[None].to(device),
489
+ image_width=cam_info.width,
490
+ image_height=cam_info.height,
491
+ )
492
+ color = (rendering.color[0].permute(1, 2, 0) * 255.0).to(dtype=torch.uint8)
493
+ depth = rendering.depth[0]
494
+ video_writer.add_frame(color, depth)
495
+
496
+ video_writer.close()
497
+
498
+ depth_path = output_path.with_suffix(".depth.mp4")
499
+ try:
500
+ if depth_path.exists():
501
+ depth_path.unlink()
502
+ except Exception:
503
+ pass
504
+
505
+ return output_path
506
+
507
+ def render_video(
508
+ self,
509
+ *,
510
+ gaussians: Gaussians3D,
511
+ metadata: SceneMetaData,
512
+ output_stem: str,
513
+ trajectory_type: TrajectoryType = "rotate_forward",
514
+ num_frames: int = 60,
515
+ fps: int = 30,
516
+ output_long_side: int | None = None,
517
+ ) -> Path:
518
+ """Render a camera trajectory as an MP4 (CUDA-only)."""
519
+ output_path = self.outputs_dir / f"{output_stem}.mp4"
520
+ return self._render_video_impl(
521
+ gaussians=gaussians,
522
+ metadata=metadata,
523
+ output_path=output_path,
524
+ trajectory_type=trajectory_type,
525
+ num_frames=num_frames,
526
+ fps=fps,
527
+ output_long_side=output_long_side,
528
+ )
529
+
530
+ def predict_and_maybe_render(
531
+ self,
532
+ image_path: str | Path,
533
+ *,
534
+ trajectory_type: TrajectoryType,
535
+ num_frames: int,
536
+ fps: int,
537
+ output_long_side: int | None,
538
+ render_video: bool = True,
539
+ ) -> tuple[Path | None, Path]:
540
+ """One-shot helper for the UI: returns (video_path, ply_path)."""
541
+ pred = self.predict_to_ply(image_path)
542
+
543
+ if not render_video:
544
+ return None, pred.ply_path
545
+
546
+ if not torch.cuda.is_available():
547
+ return None, pred.ply_path
548
+
549
+ output_stem = pred.ply_path.with_suffix("").name
550
+ video_path = self.render_video(
551
+ gaussians=pred.gaussians,
552
+ metadata=pred.metadata_for_render,
553
+ output_stem=output_stem,
554
+ trajectory_type=trajectory_type,
555
+ num_frames=num_frames,
556
+ fps=fps,
557
+ output_long_side=output_long_side,
558
+ )
559
+ return video_path, pred.ply_path
560
+
561
+
562
+ # -----------------------------------------------------------------------------
563
+ # ZeroGPU entrypoints
564
+ # -----------------------------------------------------------------------------
565
+ #
566
+ # IMPORTANT: Do NOT decorate bound instance methods with `@spaces.GPU` on ZeroGPU.
567
+ # The wrapper uses multiprocessing queues and pickles args/kwargs. If `self` is
568
+ # included, Python will try to pickle the whole instance. ModelWrapper contains
569
+ # a threading.RLock (not pickleable) and the model itself should not be pickled.
570
+ #
571
+ # Expose module-level functions that accept only pickleable arguments and
572
+ # create/cache the ModelWrapper inside the GPU worker process.
573
+
574
+ DEFAULT_OUTPUTS_DIR: Final[Path] = _ensure_dir(Path(__file__).resolve().parent / "outputs")
575
+
576
+ _GLOBAL_MODEL: ModelWrapper | None = None
577
+ _GLOBAL_MODEL_INIT_LOCK: Final[threading.Lock] = threading.Lock()
578
+
579
+
580
+ def get_global_model(*, outputs_dir: str | Path = DEFAULT_OUTPUTS_DIR) -> ModelWrapper:
581
+ global _GLOBAL_MODEL
582
+ with _GLOBAL_MODEL_INIT_LOCK:
583
+ if _GLOBAL_MODEL is None:
584
+ _GLOBAL_MODEL = ModelWrapper(outputs_dir=outputs_dir)
585
+ return _GLOBAL_MODEL
586
+
587
+
588
+ def predict_and_maybe_render(
589
+ image_path: str | Path,
590
+ *,
591
+ trajectory_type: TrajectoryType,
592
+ num_frames: int,
593
+ fps: int,
594
+ output_long_side: int | None,
595
+ render_video: bool = True,
596
+ ) -> tuple[Path | None, Path]:
597
+ model = get_global_model()
598
+ return model.predict_and_maybe_render(
599
+ image_path,
600
+ trajectory_type=trajectory_type,
601
+ num_frames=num_frames,
602
+ fps=fps,
603
+ output_long_side=output_long_side,
604
+ render_video=render_video,
605
+ )
606
+
607
+
608
+ # Export the GPU-wrapped callable (or a no-op wrapper locally).
609
+ if spaces is not None:
610
+ predict_and_maybe_render_gpu = spaces.GPU(duration=180)(predict_and_maybe_render)
611
+ else: # pragma: no cover
612
+ predict_and_maybe_render_gpu = predict_and_maybe_render
pyproject.toml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "ml-sharp"
3
+ version = "1.0.0"
4
+ description = "Sharp Monocular View Synthesis in Less Than a Second"
5
+ readme = "README.md"
6
+ requires-python = ">=3.13"
7
+ dependencies = [
8
+ "gradio==6.1.0",
9
+ "huggingface-hub>=1.2.3",
10
+ "sharp",
11
+ "spaces==0.44.0",
12
+ "torch>=2.9.1",
13
+ "torchvision>=0.24.1",
14
+ ]
15
+
16
+ [tool.uv.sources]
17
+ sharp = { git = "https://github.com/apple/ml-sharp.git", rev = "cdb4ddc6796402bee5487c7312260f2edd8bd5f0" }
18
+
19
+ [dependency-groups]
20
+ dev = [
21
+ "hf>=1.2.3",
22
+ "ruff>=0.14.9",
23
+ ]
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio==6.1.0
2
+ spaces==0.44.0
3
+ huggingface_hub>=1.2.3
4
+ torch
5
+ torchvision
6
+ sharp @ git+https://github.com/apple/ml-sharp.git@cdb4ddc6796402bee5487c7312260f2edd8bd5f0
uv.lock ADDED
The diff for this file is too large to render. See raw diff