Spaces:
Running
on
Zero
Running
on
Zero
aknapitsch user
commited on
Commit
·
eb74057
1
Parent(s):
3d74194
refactoring
Browse files
app.py
CHANGED
|
@@ -5,7 +5,6 @@
|
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
# conda activate hf3.10
|
| 7 |
|
| 8 |
-
import base64
|
| 9 |
import gc
|
| 10 |
import os
|
| 11 |
import shutil
|
|
@@ -21,22 +20,24 @@ import torch
|
|
| 21 |
|
| 22 |
sys.path.append("mapanything/")
|
| 23 |
|
| 24 |
-
from
|
| 25 |
-
|
| 26 |
-
MEASURE_INSTRUCTIONS_HTML,
|
| 27 |
get_acknowledgements_html,
|
| 28 |
get_description_html,
|
| 29 |
get_gradio_theme,
|
| 30 |
get_header_html,
|
|
|
|
|
|
|
| 31 |
)
|
| 32 |
-
from hf_utils.
|
| 33 |
-
from mapanything.
|
| 34 |
-
from mapanything.utils.geometry import depthmap_to_world_frame, points_to_normals
|
| 35 |
from mapanything.utils.image import load_images, rgb
|
| 36 |
|
| 37 |
|
| 38 |
def get_logo_base64():
|
| 39 |
"""Convert WAI logo to base64 for embedding in HTML"""
|
|
|
|
|
|
|
| 40 |
logo_path = "examples/wai_logo/wai_logo.png"
|
| 41 |
try:
|
| 42 |
with open(logo_path, "rb") as img_file:
|
|
@@ -47,72 +48,39 @@ def get_logo_base64():
|
|
| 47 |
return None
|
| 48 |
|
| 49 |
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
else:
|
| 70 |
-
print(token_path, "token_path doesnt exist")
|
| 71 |
-
|
| 72 |
-
# Also try environment variable
|
| 73 |
-
# see https://huggingface.co/docs/hub/spaces-overview#managing-secrets on options
|
| 74 |
-
token = (
|
| 75 |
-
os.getenv("HF_TOKEN")
|
| 76 |
-
or os.getenv("HUGGING_FACE_HUB_TOKEN")
|
| 77 |
-
or os.getenv("HUGGING_FACE_MODEL_TOKEN")
|
| 78 |
-
)
|
| 79 |
-
if token:
|
| 80 |
-
print("Loaded HuggingFace token from environment variable")
|
| 81 |
-
return token
|
| 82 |
-
|
| 83 |
-
print(
|
| 84 |
-
"Warning: No HuggingFace token found. Model loading may fail for private repositories."
|
| 85 |
-
)
|
| 86 |
-
return None
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
def init_hydra_config(config_path, overrides=None):
|
| 90 |
-
"Initialize Hydra config"
|
| 91 |
-
import hydra
|
| 92 |
-
|
| 93 |
-
config_dir = os.path.dirname(config_path)
|
| 94 |
-
config_name = os.path.basename(config_path).split(".")[0]
|
| 95 |
-
relative_path = os.path.relpath(config_dir, os.path.dirname(__file__))
|
| 96 |
-
hydra.core.global_hydra.GlobalHydra.instance().clear()
|
| 97 |
-
hydra.initialize(version_base=None, config_path=relative_path)
|
| 98 |
-
if overrides is not None:
|
| 99 |
-
cfg = hydra.compose(config_name=config_name, overrides=overrides)
|
| 100 |
-
else:
|
| 101 |
-
cfg = hydra.compose(config_name=config_name)
|
| 102 |
-
return cfg
|
| 103 |
-
|
| 104 |
|
| 105 |
# MapAnything Configuration
|
| 106 |
high_level_config = {
|
| 107 |
"path": "configs/train.yaml",
|
| 108 |
"hf_model_name": "facebook/map-anything-apache",
|
|
|
|
| 109 |
"config_overrides": [
|
| 110 |
"machine=aws",
|
| 111 |
"model=mapanything",
|
| 112 |
"model/task=images_only",
|
| 113 |
"model.encoder.uses_torch_hub=false",
|
| 114 |
],
|
| 115 |
-
"
|
|
|
|
| 116 |
"trained_with_amp": True,
|
| 117 |
"trained_with_amp_dtype": "fp16",
|
| 118 |
"data_norm_type": "dinov2",
|
|
@@ -128,7 +96,7 @@ model = None
|
|
| 128 |
# 1) Core model inference
|
| 129 |
# -------------------------------------------------------------------------
|
| 130 |
@spaces.GPU(duration=120)
|
| 131 |
-
def run_model(target_dir,
|
| 132 |
"""
|
| 133 |
Run the MapAnything model on images in the 'target_dir/images' folder and return predictions.
|
| 134 |
"""
|
|
@@ -143,87 +111,7 @@ def run_model(target_dir, model_placeholder, apply_mask=True, mask_edges=True):
|
|
| 143 |
|
| 144 |
# Initialize model if not already done
|
| 145 |
if model is None:
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
# Initialize Hydra config and create model from configuration
|
| 149 |
-
cfg = init_hydra_config(
|
| 150 |
-
high_level_config["path"], overrides=high_level_config["config_overrides"]
|
| 151 |
-
)
|
| 152 |
-
|
| 153 |
-
# Try using from_pretrained first
|
| 154 |
-
try:
|
| 155 |
-
print("Loading MapAnything model from_pretrained...")
|
| 156 |
-
model = MapAnything.from_pretrained(high_level_config["hf_model_name"]).to(
|
| 157 |
-
device
|
| 158 |
-
)
|
| 159 |
-
print("Loading MapAnything model from_pretrained succeeded...")
|
| 160 |
-
except Exception as e:
|
| 161 |
-
print(f"from_pretrained failed: {e}")
|
| 162 |
-
print("Falling back to local configuration approach...")
|
| 163 |
-
|
| 164 |
-
# Create model from local configuration instead of using from_pretrained
|
| 165 |
-
from mapanything.models import init_model
|
| 166 |
-
|
| 167 |
-
model = init_model(
|
| 168 |
-
model_str=cfg.model.model_str,
|
| 169 |
-
model_config=cfg.model.model_config,
|
| 170 |
-
torch_hub_force_reload=high_level_config.get(
|
| 171 |
-
"torch_hub_force_reload", False
|
| 172 |
-
),
|
| 173 |
-
)
|
| 174 |
-
|
| 175 |
-
# Load the pretrained weights from HuggingFace Hub
|
| 176 |
-
try:
|
| 177 |
-
from huggingface_hub import hf_hub_download, list_repo_files
|
| 178 |
-
|
| 179 |
-
# First, let's see what files are available in the repository
|
| 180 |
-
try:
|
| 181 |
-
repo_files = list_repo_files(
|
| 182 |
-
repo_id=high_level_config["hf_model_name"], token=load_hf_token()
|
| 183 |
-
)
|
| 184 |
-
print(f"Available files in repository: {repo_files}")
|
| 185 |
-
|
| 186 |
-
checkpoint_filename = "model.safetensors"
|
| 187 |
-
|
| 188 |
-
# Download the model weights
|
| 189 |
-
checkpoint_path = hf_hub_download(
|
| 190 |
-
repo_id=high_level_config["hf_model_name"],
|
| 191 |
-
filename=checkpoint_filename,
|
| 192 |
-
token=load_hf_token(),
|
| 193 |
-
)
|
| 194 |
-
|
| 195 |
-
# Load the weights
|
| 196 |
-
print("start loading checkpoint")
|
| 197 |
-
if checkpoint_filename.endswith(".safetensors"):
|
| 198 |
-
from safetensors.torch import load_file
|
| 199 |
-
|
| 200 |
-
checkpoint = load_file(checkpoint_path)
|
| 201 |
-
else:
|
| 202 |
-
checkpoint = torch.load(
|
| 203 |
-
checkpoint_path, map_location="cpu", weights_only=True
|
| 204 |
-
)
|
| 205 |
-
|
| 206 |
-
print("start loading state_dict")
|
| 207 |
-
if "model" in checkpoint:
|
| 208 |
-
model.load_state_dict(checkpoint["model"])
|
| 209 |
-
elif "state_dict" in checkpoint:
|
| 210 |
-
model.load_state_dict(checkpoint["state_dict"])
|
| 211 |
-
else:
|
| 212 |
-
model.load_state_dict(checkpoint)
|
| 213 |
-
|
| 214 |
-
print(
|
| 215 |
-
f"Successfully loaded pretrained weights from HuggingFace Hub ({checkpoint_filename})"
|
| 216 |
-
)
|
| 217 |
-
|
| 218 |
-
except Exception as inner_e:
|
| 219 |
-
print(f"Error listing repository files or loading weights: {inner_e}")
|
| 220 |
-
raise inner_e
|
| 221 |
-
|
| 222 |
-
except Exception as e:
|
| 223 |
-
print(f"Warning: Could not load pretrained weights: {e}")
|
| 224 |
-
print("Proceeding with randomly initialized model...")
|
| 225 |
-
|
| 226 |
-
model = model.to(device)
|
| 227 |
|
| 228 |
else:
|
| 229 |
model = model.to(device)
|
|
@@ -400,7 +288,7 @@ def update_measure_view(processed_data, view_index):
|
|
| 400 |
|
| 401 |
if invalid_mask.any():
|
| 402 |
# Create a light grey overlay (RGB: 192, 192, 192)
|
| 403 |
-
overlay_color = np.array([
|
| 404 |
|
| 405 |
# Apply overlay with some transparency
|
| 406 |
alpha = 0.5 # Transparency level
|
|
@@ -619,7 +507,7 @@ def gradio_demo(
|
|
| 619 |
print("Running MapAnything model...")
|
| 620 |
with torch.no_grad():
|
| 621 |
predictions, processed_data = run_model(
|
| 622 |
-
target_dir,
|
| 623 |
)
|
| 624 |
|
| 625 |
# Save predictions
|
|
@@ -768,18 +656,9 @@ def process_predictions_for_visualization(predictions, views, high_level_config)
|
|
| 768 |
|
| 769 |
# Check if confidence data is available in any view
|
| 770 |
has_confidence_data = False
|
| 771 |
-
# for view_idx, view in enumerate(views):
|
| 772 |
-
# view_key = f"pred{view_idx + 1}"
|
| 773 |
-
# if view_key in pred_result and "conf" in pred_result[view_key]:
|
| 774 |
-
# has_confidence_data = True
|
| 775 |
-
# break
|
| 776 |
|
| 777 |
# Process each view
|
| 778 |
for view_idx, view in enumerate(views):
|
| 779 |
-
# view_key = f"pred{view_idx + 1}"
|
| 780 |
-
# if view_key not in pred_result:
|
| 781 |
-
# continue
|
| 782 |
-
|
| 783 |
# Get image
|
| 784 |
image = rgb(view["img"], norm_type=high_level_config["data_norm_type"])
|
| 785 |
# image = rgb(view["img"], norm_type=high_level_config["data_norm_type"])
|
|
@@ -1241,7 +1120,7 @@ with gr.Blocks(theme=theme, css=GRADIO_CSS) as demo:
|
|
| 1241 |
sources=[],
|
| 1242 |
)
|
| 1243 |
gr.Markdown(
|
| 1244 |
-
"**Note:**
|
| 1245 |
)
|
| 1246 |
measure_text = gr.Markdown("")
|
| 1247 |
|
|
|
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
# conda activate hf3.10
|
| 7 |
|
|
|
|
| 8 |
import gc
|
| 9 |
import os
|
| 10 |
import shutil
|
|
|
|
| 20 |
|
| 21 |
sys.path.append("mapanything/")
|
| 22 |
|
| 23 |
+
from mapanything.utils.geometry import depthmap_to_world_frame, points_to_normals
|
| 24 |
+
from mapanything.utils.hf_utils.css_and_html import (
|
|
|
|
| 25 |
get_acknowledgements_html,
|
| 26 |
get_description_html,
|
| 27 |
get_gradio_theme,
|
| 28 |
get_header_html,
|
| 29 |
+
GRADIO_CSS,
|
| 30 |
+
MEASURE_INSTRUCTIONS_HTML,
|
| 31 |
)
|
| 32 |
+
from mapanything.utils.hf_utils.hf_helpers import initialize_mapanything_model
|
| 33 |
+
from mapanything.utils.hf_utils.visual_util import predictions_to_glb
|
|
|
|
| 34 |
from mapanything.utils.image import load_images, rgb
|
| 35 |
|
| 36 |
|
| 37 |
def get_logo_base64():
|
| 38 |
"""Convert WAI logo to base64 for embedding in HTML"""
|
| 39 |
+
import base64
|
| 40 |
+
|
| 41 |
logo_path = "examples/wai_logo/wai_logo.png"
|
| 42 |
try:
|
| 43 |
with open(logo_path, "rb") as img_file:
|
|
|
|
| 48 |
return None
|
| 49 |
|
| 50 |
|
| 51 |
+
# MapAnything Configuration
|
| 52 |
+
# high_level_config = {
|
| 53 |
+
# "path": "configs/train.yaml",
|
| 54 |
+
# "hf_model_name": "facebook/MapAnything",
|
| 55 |
+
# "model_str": "mapanything",
|
| 56 |
+
# "config_overrides": [
|
| 57 |
+
# "machine=aws",
|
| 58 |
+
# "model=mapanything",
|
| 59 |
+
# "model/task=images_only",
|
| 60 |
+
# "model.encoder.uses_torch_hub=false",
|
| 61 |
+
# ],
|
| 62 |
+
# "checkpoint_name": "mapa_curri_24v_13d_48ipg_64g.pth",
|
| 63 |
+
# "config_name": "config.json",
|
| 64 |
+
# "trained_with_amp": True,
|
| 65 |
+
# "trained_with_amp_dtype": "fp16",
|
| 66 |
+
# "data_norm_type": "dinov2",
|
| 67 |
+
# "patch_size": 14,
|
| 68 |
+
# "resolution": 518,
|
| 69 |
+
# }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
# MapAnything Configuration
|
| 72 |
high_level_config = {
|
| 73 |
"path": "configs/train.yaml",
|
| 74 |
"hf_model_name": "facebook/map-anything-apache",
|
| 75 |
+
"model_str": "mapanything",
|
| 76 |
"config_overrides": [
|
| 77 |
"machine=aws",
|
| 78 |
"model=mapanything",
|
| 79 |
"model/task=images_only",
|
| 80 |
"model.encoder.uses_torch_hub=false",
|
| 81 |
],
|
| 82 |
+
"checkpoint_name": "model.safetensors",
|
| 83 |
+
"config_name": "config.json",
|
| 84 |
"trained_with_amp": True,
|
| 85 |
"trained_with_amp_dtype": "fp16",
|
| 86 |
"data_norm_type": "dinov2",
|
|
|
|
| 96 |
# 1) Core model inference
|
| 97 |
# -------------------------------------------------------------------------
|
| 98 |
@spaces.GPU(duration=120)
|
| 99 |
+
def run_model(target_dir, apply_mask=True, mask_edges=True):
|
| 100 |
"""
|
| 101 |
Run the MapAnything model on images in the 'target_dir/images' folder and return predictions.
|
| 102 |
"""
|
|
|
|
| 111 |
|
| 112 |
# Initialize model if not already done
|
| 113 |
if model is None:
|
| 114 |
+
model = initialize_mapanything_model(high_level_config, device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
else:
|
| 117 |
model = model.to(device)
|
|
|
|
| 288 |
|
| 289 |
if invalid_mask.any():
|
| 290 |
# Create a light grey overlay (RGB: 192, 192, 192)
|
| 291 |
+
overlay_color = np.array([255, 220, 220], dtype=np.uint8)
|
| 292 |
|
| 293 |
# Apply overlay with some transparency
|
| 294 |
alpha = 0.5 # Transparency level
|
|
|
|
| 507 |
print("Running MapAnything model...")
|
| 508 |
with torch.no_grad():
|
| 509 |
predictions, processed_data = run_model(
|
| 510 |
+
target_dir, apply_mask, mask_edges
|
| 511 |
)
|
| 512 |
|
| 513 |
# Save predictions
|
|
|
|
| 656 |
|
| 657 |
# Check if confidence data is available in any view
|
| 658 |
has_confidence_data = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 659 |
|
| 660 |
# Process each view
|
| 661 |
for view_idx, view in enumerate(views):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 662 |
# Get image
|
| 663 |
image = rgb(view["img"], norm_type=high_level_config["data_norm_type"])
|
| 664 |
# image = rgb(view["img"], norm_type=high_level_config["data_norm_type"])
|
|
|
|
| 1120 |
sources=[],
|
| 1121 |
)
|
| 1122 |
gr.Markdown(
|
| 1123 |
+
"**Note:** Light-grey areas indicate regions with no depth information where measurements cannot be taken."
|
| 1124 |
)
|
| 1125 |
measure_text = gr.Markdown("")
|
| 1126 |
|
mapanything/utils/hf_utils/__init__.py
ADDED
|
File without changes
|
{hf_utils → mapanything/utils/hf_utils}/css_and_html.py
RENAMED
|
@@ -164,8 +164,8 @@ def get_acknowledgements_html():
|
|
| 164 |
<h3>Acknowledgements</h3>
|
| 165 |
<p>This site builds upon code from:</p>
|
| 166 |
<ul style="list-style: none; padding: 0;">
|
| 167 |
-
<li>🔗 <a href="https://
|
| 168 |
-
<li>🔗 <a href="https://
|
| 169 |
<li>🔗 <a href="https://github.com/xiongzhu666/Sky-Segmentation-and-Post-processing">ONNX for sky segmentation</a></li>
|
| 170 |
</ul>
|
| 171 |
<p>We extend our gratitude to these projects for their valuable contributions to the research community.</p>
|
|
@@ -200,38 +200,6 @@ def get_gradio_theme():
|
|
| 200 |
)
|
| 201 |
|
| 202 |
|
| 203 |
-
# Example scene thumbnail grid CSS (if needed separately)
|
| 204 |
-
THUMBNAIL_CSS = """
|
| 205 |
-
/* Make thumbnails clickable with pointer cursor */
|
| 206 |
-
.clickable-thumbnail img {
|
| 207 |
-
cursor: pointer !important;
|
| 208 |
-
}
|
| 209 |
-
|
| 210 |
-
.clickable-thumbnail:hover img {
|
| 211 |
-
cursor: pointer !important;
|
| 212 |
-
opacity: 0.8;
|
| 213 |
-
transition: opacity 0.3s ease;
|
| 214 |
-
}
|
| 215 |
-
|
| 216 |
-
/* Make thumbnail containers narrower horizontally */
|
| 217 |
-
.clickable-thumbnail {
|
| 218 |
-
padding: 5px 2px !important;
|
| 219 |
-
margin: 0 2px !important;
|
| 220 |
-
}
|
| 221 |
-
|
| 222 |
-
.clickable-thumbnail .image-container {
|
| 223 |
-
margin: 0 !important;
|
| 224 |
-
padding: 0 !important;
|
| 225 |
-
}
|
| 226 |
-
|
| 227 |
-
.scene-info {
|
| 228 |
-
text-align: center !important;
|
| 229 |
-
padding: 5px 2px !important;
|
| 230 |
-
margin: 0 !important;
|
| 231 |
-
}
|
| 232 |
-
"""
|
| 233 |
-
|
| 234 |
-
|
| 235 |
# Measure tab instructions HTML
|
| 236 |
MEASURE_INSTRUCTIONS_HTML = """
|
| 237 |
### Click on the image to measure the distance between two points.
|
|
|
|
| 164 |
<h3>Acknowledgements</h3>
|
| 165 |
<p>This site builds upon code from:</p>
|
| 166 |
<ul style="list-style: none; padding: 0;">
|
| 167 |
+
<li>🔗 <a href="https://github.com/microsoft/MoGe">MoGe (and MoGe2) on GitHub (and HuggingFace)</a></li>
|
| 168 |
+
<li>🔗 <a href="https://github.com/facebookresearch/vggt">VGGT on GitHub</a></li>
|
| 169 |
<li>🔗 <a href="https://github.com/xiongzhu666/Sky-Segmentation-and-Post-processing">ONNX for sky segmentation</a></li>
|
| 170 |
</ul>
|
| 171 |
<p>We extend our gratitude to these projects for their valuable contributions to the research community.</p>
|
|
|
|
| 200 |
)
|
| 201 |
|
| 202 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
# Measure tab instructions HTML
|
| 204 |
MEASURE_INSTRUCTIONS_HTML = """
|
| 205 |
### Click on the image to measure the distance between two points.
|
mapanything/utils/hf_utils/hf_helpers.py
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Helper functions for HuggingFace integration and model initialization.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import json
|
| 12 |
+
import os
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def load_hf_token():
|
| 16 |
+
"""Load HuggingFace access token from local file"""
|
| 17 |
+
token_file_paths = [
|
| 18 |
+
"/home/aknapitsch/hf_token.txt",
|
| 19 |
+
]
|
| 20 |
+
|
| 21 |
+
for token_path in token_file_paths:
|
| 22 |
+
if os.path.exists(token_path):
|
| 23 |
+
try:
|
| 24 |
+
with open(token_path, "r") as f:
|
| 25 |
+
token = f.read().strip()
|
| 26 |
+
print(f"Loaded HuggingFace token from: {token_path}")
|
| 27 |
+
return token
|
| 28 |
+
except Exception as e:
|
| 29 |
+
print(f"Error reading token from {token_path}: {e}")
|
| 30 |
+
continue
|
| 31 |
+
else:
|
| 32 |
+
print(token_path, "token_path doesnt exist")
|
| 33 |
+
|
| 34 |
+
# Also try environment variable
|
| 35 |
+
# see https://huggingface.co/docs/hub/spaces-overview#managing-secrets on options
|
| 36 |
+
token = (
|
| 37 |
+
os.getenv("HF_TOKEN")
|
| 38 |
+
or os.getenv("HUGGING_FACE_HUB_TOKEN")
|
| 39 |
+
or os.getenv("HUGGING_FACE_MODEL_TOKEN")
|
| 40 |
+
)
|
| 41 |
+
if token:
|
| 42 |
+
print("Loaded HuggingFace token from environment variable")
|
| 43 |
+
return token
|
| 44 |
+
|
| 45 |
+
print(
|
| 46 |
+
"Warning: No HuggingFace token found. Model loading may fail for private repositories."
|
| 47 |
+
)
|
| 48 |
+
return None
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def init_hydra_config(config_path, overrides=None):
|
| 52 |
+
"""Initialize Hydra config"""
|
| 53 |
+
import hydra
|
| 54 |
+
|
| 55 |
+
config_dir = os.path.dirname(config_path)
|
| 56 |
+
config_name = os.path.basename(config_path).split(".")[0]
|
| 57 |
+
relative_path = os.path.relpath(config_dir, os.path.dirname(__file__))
|
| 58 |
+
hydra.core.global_hydra.GlobalHydra.instance().clear()
|
| 59 |
+
hydra.initialize(version_base=None, config_path=relative_path)
|
| 60 |
+
if overrides is not None:
|
| 61 |
+
cfg = hydra.compose(config_name=config_name, overrides=overrides)
|
| 62 |
+
else:
|
| 63 |
+
cfg = hydra.compose(config_name=config_name)
|
| 64 |
+
return cfg
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def initialize_mapanything_model(high_level_config, device):
|
| 68 |
+
"""
|
| 69 |
+
Initialize MapAnything model with three-tier fallback approach:
|
| 70 |
+
1. Try HuggingFace from_pretrained()
|
| 71 |
+
2. Download HF config + use local model factory + load HF weights
|
| 72 |
+
3. Pure local configuration fallback
|
| 73 |
+
|
| 74 |
+
Args:
|
| 75 |
+
high_level_config (dict): Configuration dictionary containing model settings
|
| 76 |
+
device (torch.device): Device to load the model on
|
| 77 |
+
|
| 78 |
+
Returns:
|
| 79 |
+
torch.nn.Module: Initialized MapAnything model
|
| 80 |
+
"""
|
| 81 |
+
import torch
|
| 82 |
+
from huggingface_hub import hf_hub_download
|
| 83 |
+
from mapanything.models import init_model, MapAnything
|
| 84 |
+
|
| 85 |
+
print("Initializing MapAnything model...")
|
| 86 |
+
|
| 87 |
+
# Initialize Hydra config and create model from configuration
|
| 88 |
+
cfg = init_hydra_config(
|
| 89 |
+
high_level_config["path"], overrides=high_level_config["config_overrides"]
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
# Try using from_pretrained first
|
| 93 |
+
try:
|
| 94 |
+
print("Loading MapAnything model from_pretrained...")
|
| 95 |
+
model = MapAnything.from_pretrained(high_level_config["hf_model_name"]).to(
|
| 96 |
+
device
|
| 97 |
+
)
|
| 98 |
+
print("Loading MapAnything model from_pretrained succeeded...")
|
| 99 |
+
return model
|
| 100 |
+
except Exception as e:
|
| 101 |
+
print(f"from_pretrained failed: {e}")
|
| 102 |
+
print("Falling back to local configuration approach using hf_hub_download...")
|
| 103 |
+
|
| 104 |
+
# Create model from local configuration instead of using from_pretrained
|
| 105 |
+
# Try to download and use the config from HuggingFace Hub
|
| 106 |
+
try:
|
| 107 |
+
print("Downloading model configuration from HuggingFace Hub...")
|
| 108 |
+
config_path = hf_hub_download(
|
| 109 |
+
repo_id=high_level_config["hf_model_name"],
|
| 110 |
+
filename=high_level_config["config_name"],
|
| 111 |
+
token=load_hf_token(),
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
# Load the config from the downloaded file
|
| 115 |
+
with open(config_path, "r") as f:
|
| 116 |
+
downloaded_config = json.load(f)
|
| 117 |
+
|
| 118 |
+
print("Using downloaded configuration for model initialization")
|
| 119 |
+
model = init_model(
|
| 120 |
+
model_str=downloaded_config.get(
|
| 121 |
+
"model_str", high_level_config["model_str"]
|
| 122 |
+
),
|
| 123 |
+
model_config=downloaded_config.get(
|
| 124 |
+
"model_config", cfg.model.model_config
|
| 125 |
+
),
|
| 126 |
+
torch_hub_force_reload=high_level_config.get(
|
| 127 |
+
"torch_hub_force_reload", False
|
| 128 |
+
),
|
| 129 |
+
)
|
| 130 |
+
except Exception as config_e:
|
| 131 |
+
print(f"Failed to download/use HuggingFace config: {config_e}")
|
| 132 |
+
print("Falling back to local configuration...")
|
| 133 |
+
# Fall back to local configuration as before
|
| 134 |
+
model = init_model(
|
| 135 |
+
model_str=cfg.model.model_str,
|
| 136 |
+
model_config=cfg.model.model_config,
|
| 137 |
+
torch_hub_force_reload=high_level_config.get(
|
| 138 |
+
"torch_hub_force_reload", False
|
| 139 |
+
),
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
# Load the pretrained weights from HuggingFace Hub
|
| 143 |
+
try:
|
| 144 |
+
# First, let's see what files are available in the repository
|
| 145 |
+
try:
|
| 146 |
+
checkpoint_filename = high_level_config["checkpoint_name"]
|
| 147 |
+
# Download the model weights
|
| 148 |
+
checkpoint_path = hf_hub_download(
|
| 149 |
+
repo_id=high_level_config["hf_model_name"],
|
| 150 |
+
filename=checkpoint_filename,
|
| 151 |
+
token=load_hf_token(),
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
# Load the weights
|
| 155 |
+
print("start loading checkpoint")
|
| 156 |
+
if checkpoint_filename.endswith(".safetensors"):
|
| 157 |
+
from safetensors.torch import load_file
|
| 158 |
+
|
| 159 |
+
checkpoint = load_file(checkpoint_path)
|
| 160 |
+
else:
|
| 161 |
+
checkpoint = torch.load(
|
| 162 |
+
checkpoint_path, map_location="cpu", weights_only=False
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
print("start loading state_dict")
|
| 166 |
+
if "model" in checkpoint:
|
| 167 |
+
model.load_state_dict(checkpoint["model"], strict=False)
|
| 168 |
+
elif "state_dict" in checkpoint:
|
| 169 |
+
model.load_state_dict(checkpoint["state_dict"], strict=False)
|
| 170 |
+
else:
|
| 171 |
+
model.load_state_dict(checkpoint, strict=False)
|
| 172 |
+
|
| 173 |
+
print(
|
| 174 |
+
f"Successfully loaded pretrained weights from HuggingFace Hub ({checkpoint_filename})"
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
except Exception as inner_e:
|
| 178 |
+
print(f"Error listing repository files or loading weights: {inner_e}")
|
| 179 |
+
raise inner_e
|
| 180 |
+
|
| 181 |
+
except Exception as e:
|
| 182 |
+
print(f"Warning: Could not load pretrained weights: {e}")
|
| 183 |
+
print("Proceeding with randomly initialized model...")
|
| 184 |
+
|
| 185 |
+
model = model.to(device)
|
| 186 |
+
return model
|
{hf_utils → mapanything/utils/hf_utils}/visual_util.py
RENAMED
|
File without changes
|
requirements.txt
CHANGED
|
@@ -1,130 +1,22 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
filelock==3.19.1
|
| 24 |
-
flatbuffers==25.2.10
|
| 25 |
-
fonttools==4.59.2
|
| 26 |
-
fsspec==2025.9.0
|
| 27 |
-
gradio==5.44.1
|
| 28 |
-
gradio_client==1.12.1
|
| 29 |
-
groovy==0.1.2
|
| 30 |
-
h11==0.16.0
|
| 31 |
-
hf-xet==1.1.9
|
| 32 |
-
httpcore==1.0.9
|
| 33 |
-
httpx==0.28.1
|
| 34 |
-
huggingface-hub==0.34.4
|
| 35 |
-
humanfriendly==10.0
|
| 36 |
-
hydra-core==1.3.2
|
| 37 |
-
idna==3.10
|
| 38 |
-
ipdb==0.13.13
|
| 39 |
-
ipython==8.37.0
|
| 40 |
-
jaxtyping==0.3.2
|
| 41 |
-
jedi==0.19.2
|
| 42 |
-
Jinja2==3.1.6
|
| 43 |
-
joblib==1.5.2
|
| 44 |
-
kiwisolver==1.4.9
|
| 45 |
-
markdown-it-py==4.0.0
|
| 46 |
-
MarkupSafe==3.0.2
|
| 47 |
-
matplotlib==3.10.6
|
| 48 |
-
matplotlib-inline==0.1.7
|
| 49 |
-
mdurl==0.1.2
|
| 50 |
-
minio==7.2.16
|
| 51 |
-
mpmath==1.3.0
|
| 52 |
-
networkx==3.4.2
|
| 53 |
-
numpy==2.2.6
|
| 54 |
-
nvidia-cublas-cu12==12.8.4.1
|
| 55 |
-
nvidia-cuda-cupti-cu12==12.8.90
|
| 56 |
-
nvidia-cuda-nvrtc-cu12==12.8.93
|
| 57 |
-
nvidia-cuda-runtime-cu12==12.8.90
|
| 58 |
-
nvidia-cudnn-cu12==9.10.2.21
|
| 59 |
-
nvidia-cufft-cu12==11.3.3.83
|
| 60 |
-
nvidia-cufile-cu12==1.13.1.3
|
| 61 |
-
nvidia-curand-cu12==10.3.9.90
|
| 62 |
-
nvidia-cusolver-cu12==11.7.3.90
|
| 63 |
-
nvidia-cusparse-cu12==12.5.8.93
|
| 64 |
-
nvidia-cusparselt-cu12==0.7.1
|
| 65 |
-
nvidia-nccl-cu12==2.27.3
|
| 66 |
-
nvidia-nvjitlink-cu12==12.8.93
|
| 67 |
-
nvidia-nvtx-cu12==12.8.90
|
| 68 |
-
omegaconf==2.3.0
|
| 69 |
-
onnxruntime==1.22.1
|
| 70 |
-
opencv-python-headless==4.12.0.88
|
| 71 |
-
orjson==3.11.3
|
| 72 |
-
packaging==25.0
|
| 73 |
-
pandas==2.3.2
|
| 74 |
-
parso==0.8.5
|
| 75 |
-
pexpect==4.9.0
|
| 76 |
-
pillow==11.3.0
|
| 77 |
-
prompt_toolkit==3.0.52
|
| 78 |
-
protobuf==6.32.0
|
| 79 |
-
psutil==5.9.8
|
| 80 |
-
ptyprocess==0.7.0
|
| 81 |
-
pure_eval==0.2.3
|
| 82 |
-
pyarrow==21.0.0
|
| 83 |
-
pycparser==2.22
|
| 84 |
-
pycryptodome==3.23.0
|
| 85 |
-
pydantic==2.11.7
|
| 86 |
-
pydantic_core==2.33.2
|
| 87 |
-
pydub==0.25.1
|
| 88 |
-
Pygments==2.19.2
|
| 89 |
-
pyparsing==3.2.3
|
| 90 |
-
python-dateutil==2.9.0.post0
|
| 91 |
-
python-multipart==0.0.20
|
| 92 |
-
pytz==2025.2
|
| 93 |
-
PyYAML==6.0.2
|
| 94 |
-
requests==2.32.5
|
| 95 |
-
rerun-sdk==0.24.1
|
| 96 |
-
rich==14.1.0
|
| 97 |
-
ruff==0.12.11
|
| 98 |
-
safehttpx==0.1.6
|
| 99 |
-
safetensors==0.6.2
|
| 100 |
-
scikit-learn==1.7.1
|
| 101 |
-
scipy==1.15.3
|
| 102 |
-
semantic-version==2.10.0
|
| 103 |
-
shellingham==1.5.4
|
| 104 |
-
six==1.17.0
|
| 105 |
-
sniffio==1.3.1
|
| 106 |
-
spaces==0.40.1
|
| 107 |
-
stack-data==0.6.3
|
| 108 |
-
starlette==0.47.3
|
| 109 |
-
sympy==1.14.0
|
| 110 |
-
threadpoolctl==3.6.0
|
| 111 |
-
tomli==2.2.1
|
| 112 |
-
tomlkit==0.13.3
|
| 113 |
-
torch==2.8.0
|
| 114 |
-
torchaudio==2.8.0
|
| 115 |
-
torchvision==0.23.0
|
| 116 |
-
tqdm==4.67.1
|
| 117 |
-
traitlets==5.14.3
|
| 118 |
-
trimesh==4.8.1
|
| 119 |
-
triton==3.4.0
|
| 120 |
-
turm==0.7.3
|
| 121 |
-
typer==0.17.3
|
| 122 |
-
typing-inspection==0.4.1
|
| 123 |
-
typing_extensions==4.15.0
|
| 124 |
-
tzdata==2025.2
|
| 125 |
-
uniception==0.1.4
|
| 126 |
-
urllib3==2.5.0
|
| 127 |
-
uvicorn==0.35.0
|
| 128 |
-
wadler_lindig==0.1.7
|
| 129 |
-
wcwidth==0.2.13
|
| 130 |
-
websockets==15.0.1
|
|
|
|
| 1 |
+
--extra-index-url https://download.pytorch.org/whl/cu113
|
| 2 |
+
torch
|
| 3 |
+
torchvision
|
| 4 |
+
torchaudio
|
| 5 |
+
gradio
|
| 6 |
+
huggingface-hub
|
| 7 |
+
numpy
|
| 8 |
+
opencv-python-headless
|
| 9 |
+
Pillow
|
| 10 |
+
matplotlib
|
| 11 |
+
scikit-learn
|
| 12 |
+
scipy
|
| 13 |
+
spaces
|
| 14 |
+
hydra-core
|
| 15 |
+
omegaconf
|
| 16 |
+
trimesh
|
| 17 |
+
einops
|
| 18 |
+
requests
|
| 19 |
+
psutil
|
| 20 |
+
tqdm
|
| 21 |
+
safetensors
|
| 22 |
+
uniception==0.1.4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|