Spaces:

facebook
/

map-anything

Running on Zero

App Files Files Community

nkeetha user commited on 5 days ago

Commit

9b4efbe

1 Parent(s): 7111539

MapAnything V1.1

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

configs/dataset/bedlam_wai/default.yaml +0 -3
configs/dataset/bedlam_wai/train/default.yaml +0 -26
configs/dataset/bedlam_wai/val/default.yaml +0 -26
configs/dataset/{benchmark_518_snpp_tav2.yaml → benchmark_504_eth3d_snpp_tav2.yaml} +6 -3
configs/dataset/benchmark_512_snpp_tav2.yaml +0 -17
configs/dataset/bmvs_518_many_ar_48ipg_8g.yaml +23 -0
configs/dataset/default.yaml +0 -6
configs/dataset/dtu_wai/default.yaml +0 -2
configs/dataset/dtu_wai/test/default.yaml +0 -22
configs/dataset/gta_sfm_wai/default.yaml +0 -3
configs/dataset/gta_sfm_wai/train/default.yaml +0 -26
configs/dataset/gta_sfm_wai/val/default.yaml +0 -26
configs/dataset/matrixcity_wai/default.yaml +0 -3
configs/dataset/matrixcity_wai/train/default.yaml +0 -26
configs/dataset/matrixcity_wai/val/default.yaml +0 -26
configs/dataset/{megatrain_12d_518_many_ar_24ipg_16g.yaml → megatrain_13d_518_many_ar_24ipg_8g.yaml} +15 -12
configs/dataset/{megatrain_11d_se_518_many_ar_48ipg_64g.yaml → megatrain_13d_518_many_ar_36ipg_64g.yaml} +17 -11
configs/dataset/megatrain_13d_518_many_ar_48ipg_8g_mono.yaml +59 -0
configs/dataset/megatrain_6d_518_many_ar_36ipg_64g.yaml +38 -0
configs/dataset/structured3d_wai/default.yaml +0 -3
configs/dataset/structured3d_wai/train/default.yaml +0 -26
configs/dataset/structured3d_wai/val/default.yaml +0 -26
configs/dataset/xrooms_wai/default.yaml +0 -3
configs/dataset/xrooms_wai/train/default.yaml +0 -26
configs/dataset/xrooms_wai/val/default.yaml +0 -26
configs/loss/moge2_loss.yaml +4 -0
configs/loss/overall_loss_highpm_plus_rel_pose.yaml +4 -0
configs/loss/overall_loss_highpm_plus_rel_pose_no_conf.yaml +4 -0
configs/loss/overall_loss_highpm_rel_pose_no_ref_view.yaml +4 -0
configs/loss/pi3_loss.yaml +4 -0
configs/machine/aws.yaml +7 -5
configs/machine/default.yaml +2 -0
configs/machine/psc.yaml +5 -3
configs/machine/psc_yuchen.yaml +0 -13
configs/machine/xri_dgx.yaml +4 -2
configs/model/da3.yaml +13 -0
configs/model/da3_nested.yaml +13 -0
configs/model/encoder/dinov2_giant_24_layers.yaml +18 -0
configs/model/info_sharing/aat_ifr_16_layers_dinov2_vitg_init.yaml +33 -0
configs/model/info_sharing/aat_ifr_16_layers_vitg_dim.yaml +31 -0
configs/model/mapanything.yaml +4 -2
configs/model/{mapanything_large_inference.yaml → mapanything_dino_init.yaml} +4 -2
configs/model/mapanything_inference.yaml +0 -18
configs/model/{mapanything_large.yaml → mapanything_v1.yaml} +1 -1
configs/rmvd_benchmark.yaml +1 -1
configs/train_params/moge2_finetune.yaml +6 -0
configs/train_params/pi3_finetune.yaml +16 -0
configs/train_params/vggt_finetune.yaml +1 -1
mapanything/datasets/__init__.py +5 -6
mapanything/datasets/base/base_dataset.py +7 -2

configs/dataset/bedlam_wai/default.yaml DELETED Viewed

@@ -1,3 +0,0 @@
-defaults:
-  - train: default
-  - val: default

configs/dataset/bedlam_wai/train/default.yaml DELETED Viewed

@@ -1,26 +0,0 @@
-dataset_str:
-  "BedlamWAI(
-    split='${dataset.bedlam_wai.train.split}',
-    resolution=${dataset.bedlam_wai.train.dataset_resolution},
-    principal_point_centered=${dataset.bedlam_wai.train.principal_point_centered},
-    aug_crop=${dataset.bedlam_wai.train.aug_crop},
-    transform='${dataset.bedlam_wai.train.transform}',
-    data_norm_type='${dataset.bedlam_wai.train.data_norm_type}',
-    ROOT='${dataset.bedlam_wai.train.ROOT}',
-    dataset_metadata_dir='${dataset.bedlam_wai.train.dataset_metadata_dir}',
-    overfit_num_sets=${dataset.bedlam_wai.train.overfit_num_sets},
-    variable_num_views=${dataset.bedlam_wai.train.variable_num_views},
-    num_views=${dataset.bedlam_wai.train.num_views},
-    covisibility_thres=${dataset.bedlam_wai.train.covisibility_thres})"
-split: 'train'
-dataset_resolution: ${dataset.resolution_train}
-principal_point_centered: ${dataset.principal_point_centered}
-aug_crop: 16
-transform: 'colorjitter+grayscale+gaublur'
-data_norm_type: ${model.data_norm_type}
-ROOT: ${root_data_dir}/bedlam
-dataset_metadata_dir: ${mapanything_dataset_metadata_dir}
-overfit_num_sets: null
-variable_num_views: ${dataset.train.variable_num_views}
-num_views: ${dataset.num_views}
-covisibility_thres: 0.25

configs/dataset/bedlam_wai/val/default.yaml DELETED Viewed

@@ -1,26 +0,0 @@
-dataset_str:
-  "BedlamWAI(
-    split='${dataset.bedlam_wai.val.split}',
-    resolution=${dataset.bedlam_wai.val.dataset_resolution},
-    principal_point_centered=${dataset.bedlam_wai.val.principal_point_centered},
-    seed=${dataset.bedlam_wai.val.seed},
-    transform='${dataset.bedlam_wai.val.transform}',
-    data_norm_type='${dataset.bedlam_wai.val.data_norm_type}',
-    ROOT='${dataset.bedlam_wai.val.ROOT}',
-    dataset_metadata_dir='${dataset.bedlam_wai.val.dataset_metadata_dir}',
-    overfit_num_sets=${dataset.bedlam_wai.val.overfit_num_sets},
-    variable_num_views=${dataset.bedlam_wai.val.variable_num_views},
-    num_views=${dataset.bedlam_wai.val.num_views},
-    covisibility_thres=${dataset.bedlam_wai.val.covisibility_thres})"
-split: 'val'
-dataset_resolution: ${dataset.resolution_val_bedlam}
-principal_point_centered: ${dataset.principal_point_centered}
-seed: 777
-transform: 'imgnorm'
-data_norm_type: ${model.data_norm_type}
-ROOT: ${root_data_dir}/bedlam
-dataset_metadata_dir: ${mapanything_dataset_metadata_dir}
-overfit_num_sets: null
-variable_num_views: ${dataset.val.variable_num_views}
-num_views: ${dataset.num_views}
-covisibility_thres: 0.25

configs/dataset/{benchmark_518_snpp_tav2.yaml → benchmark_504_eth3d_snpp_tav2.yaml} RENAMED Viewed

@@ -5,13 +5,16 @@ defaults:
 num_views: 2
 # Test Resolution
-resolution_test_scannetpp: ${dataset.resolution_options.518_1_52_ar}
-resolution_test_tav2_wb: ${dataset.resolution_options.518_1_00_ar}
 # Test Set
 # Sample 10 multi-view sets from each scene
 # ScanNet++V2: 30 scenes
 # TartanAirV2-WB: 5 scenes
 test_dataset:
-  "+ 300 @ ${dataset.scannetpp_wai.test.dataset_str}
   + 50 @ ${dataset.tav2_wb_wai.test.dataset_str}"

 num_views: 2
 # Test Resolution
+resolution_test_eth3d: ${dataset.resolution_options.504_1_52_ar}
+resolution_test_scannetpp: ${dataset.resolution_options.504_1_52_ar}
+resolution_test_tav2_wb: ${dataset.resolution_options.504_1_00_ar}
 # Test Set
 # Sample 10 multi-view sets from each scene
+# ETH3D: 13 scenes
 # ScanNet++V2: 30 scenes
 # TartanAirV2-WB: 5 scenes
 test_dataset:
+  "+ 130 @ ${dataset.eth3d_wai.test.dataset_str}
+  + 300 @ ${dataset.scannetpp_wai.test.dataset_str}
   + 50 @ ${dataset.tav2_wb_wai.test.dataset_str}"

configs/dataset/benchmark_512_snpp_tav2.yaml DELETED Viewed

@@ -1,17 +0,0 @@
-defaults:
-  - default
-# Number of views parameter for the multi-view datasets
-num_views: 2
-# Test Resolution
-resolution_test_scannetpp: ${dataset.resolution_options.512_1_52_ar}
-resolution_test_tav2_wb: ${dataset.resolution_options.512_1_00_ar}
-# Test Set
-# Sample 10 multi-view sets from each scene
-# ScanNet++V2: 30 scenes
-# TartanAirV2-WB: 5 scenes
-test_dataset:
-  "+ 300 @ ${dataset.scannetpp_wai.test.dataset_str}
-  + 50 @ ${dataset.tav2_wb_wai.test.dataset_str}"

configs/dataset/bmvs_518_many_ar_48ipg_8g.yaml ADDED Viewed

	@@ -0,0 +1,23 @@

+defaults:
+  - default
+# Number of views parameter for the multi-view datasets
+num_views: 4
+train:
+  # If True, the number of views can vary from batch to batch. The maximum number of views is num_views and minimum is 2. (On by default for N-view training)
+  variable_num_views: true
+# Train Resolution
+resolution_train: ${dataset.resolution_options.518_many_ar}
+# Validation Resolution
+resolution_val_blendedmvs: ${dataset.resolution_options.518_1_33_ar}
+# Training Set
+train_dataset:
+  "+ 140_000 @ ${dataset.blendedmvs_wai.train.dataset_str}"
+# Validation Set
+test_dataset:
+  "+ 4_000 @ ${dataset.blendedmvs_wai.val.dataset_str}"

configs/dataset/default.yaml CHANGED Viewed

@@ -1,14 +1,10 @@
 defaults:
   - resolution_options: default
   - ase_wai: default
-  - bedlam_wai: default
   - blendedmvs_wai: default
   - dl3dv_wai: default
-  - dtu_wai: default
   - dynamicreplica_wai: default
   - eth3d_wai: default
-  - gta_sfm_wai: default
-  - matrixcity_wai: default
   - megadepth_wai: default
   - mpsd_wai: default
   - mvs_synth_wai: default
@@ -16,10 +12,8 @@ defaults:
   - sailvos3d_wai: default
   - scannetpp_wai: default
   - spring_wai: default
-  - structured3d_wai: default
   - tav2_wb_wai: default
   - unrealstereo4k_wai: default
-  - xrooms_wai: default
 # Training Set, For example: BlendedMVS(split='train', resolution=(512, 384), transform=...)
 train_dataset: ???

 defaults:
   - resolution_options: default
   - ase_wai: default
   - blendedmvs_wai: default
   - dl3dv_wai: default
   - dynamicreplica_wai: default
   - eth3d_wai: default
   - megadepth_wai: default
   - mpsd_wai: default
   - mvs_synth_wai: default
   - sailvos3d_wai: default
   - scannetpp_wai: default
   - spring_wai: default
   - tav2_wb_wai: default
   - unrealstereo4k_wai: default
 # Training Set, For example: BlendedMVS(split='train', resolution=(512, 384), transform=...)
 train_dataset: ???

configs/dataset/dtu_wai/default.yaml DELETED Viewed

	@@ -1,2 +0,0 @@
1	- defaults:
2	- - test: default

configs/dataset/dtu_wai/test/default.yaml DELETED Viewed

@@ -1,22 +0,0 @@
-dataset_str:
-  "DTUWAI(
-    resolution=${dataset.dtu_wai.test.dataset_resolution},
-    principal_point_centered=${dataset.dtu_wai.test.principal_point_centered},
-    seed=${dataset.dtu_wai.test.seed},
-    transform='${dataset.dtu_wai.test.transform}',
-    data_norm_type='${dataset.dtu_wai.test.data_norm_type}',
-    ROOT='${dataset.dtu_wai.test.ROOT}',
-    dataset_metadata_dir='${dataset.dtu_wai.test.dataset_metadata_dir}',
-    variable_num_views=${dataset.dtu_wai.test.variable_num_views},
-    num_views=${dataset.dtu_wai.test.num_views},
-    covisibility_thres=${dataset.dtu_wai.test.covisibility_thres})"
-dataset_resolution: ${dataset.resolution_test_dtu}
-principal_point_centered: ${dataset.principal_point_centered}
-seed: 777
-transform: 'imgnorm'
-data_norm_type: ${model.data_norm_type}
-ROOT: ${root_data_dir}/dtu
-dataset_metadata_dir: ${mapanything_dataset_metadata_dir}
-variable_num_views: ${dataset.test.variable_num_views}
-num_views: ${dataset.num_views}
-covisibility_thres: 0.25

configs/dataset/gta_sfm_wai/default.yaml DELETED Viewed

@@ -1,3 +0,0 @@
-defaults:
-  - train: default
-  - val: default

configs/dataset/gta_sfm_wai/train/default.yaml DELETED Viewed

@@ -1,26 +0,0 @@
-dataset_str:
-  "GTASfMWAI(
-    split='${dataset.gta_sfm_wai.train.split}',
-    resolution=${dataset.gta_sfm_wai.train.dataset_resolution},
-    principal_point_centered=${dataset.gta_sfm_wai.train.principal_point_centered},
-    aug_crop=${dataset.gta_sfm_wai.train.aug_crop},
-    transform='${dataset.gta_sfm_wai.train.transform}',
-    data_norm_type='${dataset.gta_sfm_wai.train.data_norm_type}',
-    ROOT='${dataset.gta_sfm_wai.train.ROOT}',
-    dataset_metadata_dir='${dataset.gta_sfm_wai.train.dataset_metadata_dir}',
-    overfit_num_sets=${dataset.gta_sfm_wai.train.overfit_num_sets},
-    variable_num_views=${dataset.gta_sfm_wai.train.variable_num_views},
-    num_views=${dataset.gta_sfm_wai.train.num_views},
-    covisibility_thres=${dataset.gta_sfm_wai.train.covisibility_thres})"
-split: 'train'
-dataset_resolution: ${dataset.resolution_train}
-principal_point_centered: ${dataset.principal_point_centered}
-aug_crop: 16
-transform: 'colorjitter+grayscale+gaublur'
-data_norm_type: ${model.data_norm_type}
-ROOT: ${root_data_dir}/gta_sfm
-dataset_metadata_dir: ${mapanything_dataset_metadata_dir}
-overfit_num_sets: null
-variable_num_views: ${dataset.train.variable_num_views}
-num_views: ${dataset.num_views}
-covisibility_thres: 0.25

configs/dataset/gta_sfm_wai/val/default.yaml DELETED Viewed

@@ -1,26 +0,0 @@
-dataset_str:
-  "GTASfMWAI(
-    split='${dataset.gta_sfm_wai.val.split}',
-    resolution=${dataset.gta_sfm_wai.val.dataset_resolution},
-    principal_point_centered=${dataset.gta_sfm_wai.val.principal_point_centered},
-    seed=${dataset.gta_sfm_wai.val.seed},
-    transform='${dataset.gta_sfm_wai.val.transform}',
-    data_norm_type='${dataset.gta_sfm_wai.val.data_norm_type}',
-    ROOT='${dataset.gta_sfm_wai.val.ROOT}',
-    dataset_metadata_dir='${dataset.gta_sfm_wai.val.dataset_metadata_dir}',
-    overfit_num_sets=${dataset.gta_sfm_wai.val.overfit_num_sets},
-    variable_num_views=${dataset.gta_sfm_wai.val.variable_num_views},
-    num_views=${dataset.gta_sfm_wai.val.num_views},
-    covisibility_thres=${dataset.gta_sfm_wai.val.covisibility_thres})"
-split: 'val'
-dataset_resolution: ${dataset.resolution_val_gta_sfm}
-principal_point_centered: ${dataset.principal_point_centered}
-seed: 777
-transform: 'imgnorm'
-data_norm_type: ${model.data_norm_type}
-ROOT: ${root_data_dir}/gta_sfm
-dataset_metadata_dir: ${mapanything_dataset_metadata_dir}
-overfit_num_sets: null
-variable_num_views: ${dataset.val.variable_num_views}
-num_views: ${dataset.num_views}
-covisibility_thres: 0.25

configs/dataset/matrixcity_wai/default.yaml DELETED Viewed

@@ -1,3 +0,0 @@
-defaults:
-  - train: default
-  - val: default

configs/dataset/matrixcity_wai/train/default.yaml DELETED Viewed

@@ -1,26 +0,0 @@
-dataset_str:
-  "MatrixCityWAI(
-    split='${dataset.matrixcity_wai.train.split}',
-    resolution=${dataset.matrixcity_wai.train.dataset_resolution},
-    principal_point_centered=${dataset.matrixcity_wai.train.principal_point_centered},
-    aug_crop=${dataset.matrixcity_wai.train.aug_crop},
-    transform='${dataset.matrixcity_wai.train.transform}',
-    data_norm_type='${dataset.matrixcity_wai.train.data_norm_type}',
-    ROOT='${dataset.matrixcity_wai.train.ROOT}',
-    dataset_metadata_dir='${dataset.matrixcity_wai.train.dataset_metadata_dir}',
-    overfit_num_sets=${dataset.matrixcity_wai.train.overfit_num_sets},
-    variable_num_views=${dataset.matrixcity_wai.train.variable_num_views},
-    num_views=${dataset.matrixcity_wai.train.num_views},
-    covisibility_thres=${dataset.matrixcity_wai.train.covisibility_thres})"
-split: 'train'
-dataset_resolution: ${dataset.resolution_train}
-principal_point_centered: ${dataset.principal_point_centered}
-aug_crop: 16
-transform: 'colorjitter+grayscale+gaublur'
-data_norm_type: ${model.data_norm_type}
-ROOT: ${root_data_dir}/matrixcity
-dataset_metadata_dir: ${mapanything_dataset_metadata_dir}
-overfit_num_sets: null
-variable_num_views: ${dataset.train.variable_num_views}
-num_views: ${dataset.num_views}
-covisibility_thres: 0.25

configs/dataset/matrixcity_wai/val/default.yaml DELETED Viewed

@@ -1,26 +0,0 @@
-dataset_str:
-  "MatrixCityWAI(
-    split='${dataset.matrixcity_wai.val.split}',
-    resolution=${dataset.matrixcity_wai.val.dataset_resolution},
-    principal_point_centered=${dataset.matrixcity_wai.val.principal_point_centered},
-    seed=${dataset.matrixcity_wai.val.seed},
-    transform='${dataset.matrixcity_wai.val.transform}',
-    data_norm_type='${dataset.matrixcity_wai.val.data_norm_type}',
-    ROOT='${dataset.matrixcity_wai.val.ROOT}',
-    dataset_metadata_dir='${dataset.matrixcity_wai.val.dataset_metadata_dir}',
-    overfit_num_sets=${dataset.matrixcity_wai.val.overfit_num_sets},
-    variable_num_views=${dataset.matrixcity_wai.val.variable_num_views},
-    num_views=${dataset.matrixcity_wai.val.num_views},
-    covisibility_thres=${dataset.matrixcity_wai.val.covisibility_thres})"
-split: 'val'
-dataset_resolution: ${dataset.resolution_val_matrixcity}
-principal_point_centered: ${dataset.principal_point_centered}
-seed: 777
-transform: 'imgnorm'
-data_norm_type: ${model.data_norm_type}
-ROOT: ${root_data_dir}/matrixcity
-dataset_metadata_dir: ${mapanything_dataset_metadata_dir}
-overfit_num_sets: null
-variable_num_views: ${dataset.val.variable_num_views}
-num_views: ${dataset.num_views}
-covisibility_thres: 0.25

configs/dataset/{megatrain_12d_518_many_ar_24ipg_16g.yaml → megatrain_13d_518_many_ar_24ipg_8g.yaml} RENAMED Viewed

@@ -14,6 +14,7 @@ resolution_train: ${dataset.resolution_options.518_many_ar}
 # Validation Resolution
 resolution_val_ase: ${dataset.resolution_options.518_1_00_ar}
 resolution_val_blendedmvs: ${dataset.resolution_options.518_1_33_ar}
 resolution_val_dynamicreplica: ${dataset.resolution_options.518_1_77_ar}
 resolution_val_megadepth: ${dataset.resolution_options.518_1_52_ar}
 resolution_val_mpsd: ${dataset.resolution_options.518_1_77_ar}
@@ -27,23 +28,25 @@ resolution_val_unrealstereo4k: ${dataset.resolution_options.518_1_77_ar}
 # Training Set
 train_dataset:
-  "+ 58_000 @ ${dataset.ase_wai.train.dataset_str}
-  + 58_000 @ ${dataset.blendedmvs_wai.train.dataset_str}
-  + 45_000 @ ${dataset.dynamicreplica_wai.train.dataset_str}
-  + 58_000 @ ${dataset.megadepth_wai.train.dataset_str}
-  + 58_000 @ ${dataset.mpsd_wai.train.dataset_str}
-  + 58_000 @ ${dataset.mvs_synth_wai.train.dataset_str}
-  + 58_000 @ ${dataset.paralleldomain4d_wai.train.dataset_str}
-  + 58_000 @ ${dataset.sailvos3d_wai.train.dataset_str}
-  + 58_000 @ ${dataset.scannetpp_wai.train.dataset_str}
-  + 2_000 @ ${dataset.spring_wai.train.dataset_str}
-  + 58_000 @ ${dataset.tav2_wb_wai.train.dataset_str}
-  + 5_500 @ ${dataset.unrealstereo4k_wai.train.dataset_str}"
 # Validation Set
 test_dataset:
   "+ 4_000 @ ${dataset.ase_wai.val.dataset_str}
   + 4_000 @ ${dataset.blendedmvs_wai.val.dataset_str}
   + 4_000 @ ${dataset.dynamicreplica_wai.val.dataset_str}
   + 4_000 @ ${dataset.megadepth_wai.val.dataset_str}
   + 4_000 @ ${dataset.mpsd_wai.val.dataset_str}

 # Validation Resolution
 resolution_val_ase: ${dataset.resolution_options.518_1_00_ar}
 resolution_val_blendedmvs: ${dataset.resolution_options.518_1_33_ar}
+resolution_val_dl3dv: ${dataset.resolution_options.518_1_77_ar}
 resolution_val_dynamicreplica: ${dataset.resolution_options.518_1_77_ar}
 resolution_val_megadepth: ${dataset.resolution_options.518_1_52_ar}
 resolution_val_mpsd: ${dataset.resolution_options.518_1_77_ar}
 # Training Set
 train_dataset:
+  "+ 26_250 @ ${dataset.ase_wai.train.dataset_str}
+  + 26_250 @ ${dataset.blendedmvs_wai.train.dataset_str}
+  + 26_250 @ ${dataset.dl3dv_wai.train.dataset_str}
+  + 20_000 @ ${dataset.dynamicreplica_wai.train.dataset_str}
+  + 26_250 @ ${dataset.megadepth_wai.train.dataset_str}
+  + 26_250 @ ${dataset.mpsd_wai.train.dataset_str}
+  + 26_250 @ ${dataset.mvs_synth_wai.train.dataset_str}
+  + 26_250 @ ${dataset.paralleldomain4d_wai.train.dataset_str}
+  + 26_250 @ ${dataset.sailvos3d_wai.train.dataset_str}
+  + 26_250 @ ${dataset.scannetpp_wai.train.dataset_str}
+  + 1_000 @ ${dataset.spring_wai.train.dataset_str}
+  + 26_250 @ ${dataset.tav2_wb_wai.train.dataset_str}
+  + 2_750 @ ${dataset.unrealstereo4k_wai.train.dataset_str}"
 # Validation Set
 test_dataset:
   "+ 4_000 @ ${dataset.ase_wai.val.dataset_str}
   + 4_000 @ ${dataset.blendedmvs_wai.val.dataset_str}
+  + 4_000 @ ${dataset.dl3dv_wai.val.dataset_str}
   + 4_000 @ ${dataset.dynamicreplica_wai.val.dataset_str}
   + 4_000 @ ${dataset.megadepth_wai.val.dataset_str}
   + 4_000 @ ${dataset.mpsd_wai.val.dataset_str}

configs/dataset/{megatrain_11d_se_518_many_ar_48ipg_64g.yaml → megatrain_13d_518_many_ar_36ipg_64g.yaml} RENAMED Viewed

@@ -13,8 +13,10 @@ resolution_train: ${dataset.resolution_options.518_many_ar}
 # Validation Resolution
 resolution_val_ase: ${dataset.resolution_options.518_1_00_ar}
 resolution_val_dl3dv: ${dataset.resolution_options.518_1_77_ar}
 resolution_val_dynamicreplica: ${dataset.resolution_options.518_1_77_ar}
 resolution_val_mpsd: ${dataset.resolution_options.518_1_77_ar}
 resolution_val_mvs_synth: ${dataset.resolution_options.518_1_77_ar}
 resolution_val_paralleldomain4d: ${dataset.resolution_options.518_1_33_ar}
@@ -26,23 +28,27 @@ resolution_val_unrealstereo4k: ${dataset.resolution_options.518_1_77_ar}
 # Training Set
 train_dataset:
-  "+ 2_450_000 @ ${dataset.ase_wai.train.dataset_str}
-  + 250_000 @ ${dataset.dl3dv_wai.train.dataset_str}
-  + 12_400 @ ${dataset.dynamicreplica_wai.train.dataset_str}
-  + 1_675_000 @ ${dataset.mpsd_wai.train.dataset_str}
-  + 3_000 @ ${dataset.mvs_synth_wai.train.dataset_str}
-  + 36_000 @ ${dataset.paralleldomain4d_wai.train.dataset_str}
-  + 4_000 @ ${dataset.sailvos3d_wai.train.dataset_str}
-  + 22_600 @ ${dataset.scannetpp_wai.train.dataset_str}
-  + 800 @ ${dataset.spring_wai.train.dataset_str}
-  + 4_000 @ ${dataset.tav2_wb_wai.train.dataset_str}
-  + 200 @ ${dataset.unrealstereo4k_wai.train.dataset_str}"
 # Validation Set
 test_dataset:
   "+ 4_000 @ ${dataset.ase_wai.val.dataset_str}
   + 4_000 @ ${dataset.dl3dv_wai.val.dataset_str}
   + 4_000 @ ${dataset.dynamicreplica_wai.val.dataset_str}
   + 4_000 @ ${dataset.mpsd_wai.val.dataset_str}
   + 4_000 @ ${dataset.mvs_synth_wai.val.dataset_str}
   + 4_000 @ ${dataset.paralleldomain4d_wai.val.dataset_str}

 # Validation Resolution
 resolution_val_ase: ${dataset.resolution_options.518_1_00_ar}
+resolution_val_blendedmvs: ${dataset.resolution_options.518_1_33_ar}
 resolution_val_dl3dv: ${dataset.resolution_options.518_1_77_ar}
 resolution_val_dynamicreplica: ${dataset.resolution_options.518_1_77_ar}
+resolution_val_megadepth: ${dataset.resolution_options.518_1_52_ar}
 resolution_val_mpsd: ${dataset.resolution_options.518_1_77_ar}
 resolution_val_mvs_synth: ${dataset.resolution_options.518_1_77_ar}
 resolution_val_paralleldomain4d: ${dataset.resolution_options.518_1_33_ar}
 # Training Set
 train_dataset:
+  "+ 315_000 @ ${dataset.ase_wai.train.dataset_str}
+  + 315_000 @ ${dataset.blendedmvs_wai.train.dataset_str}
+  + 315_000 @ ${dataset.dl3dv_wai.train.dataset_str}
+  + 240_000 @ ${dataset.dynamicreplica_wai.train.dataset_str}
+  + 315_000 @ ${dataset.megadepth_wai.train.dataset_str}
+  + 315_000 @ ${dataset.mpsd_wai.train.dataset_str}
+  + 315_000 @ ${dataset.mvs_synth_wai.train.dataset_str}
+  + 315_000 @ ${dataset.paralleldomain4d_wai.train.dataset_str}
+  + 315_000 @ ${dataset.sailvos3d_wai.train.dataset_str}
+  + 315_000 @ ${dataset.scannetpp_wai.train.dataset_str}
+  + 12_000 @ ${dataset.spring_wai.train.dataset_str}
+  + 315_000 @ ${dataset.tav2_wb_wai.train.dataset_str}
+  + 33_000 @ ${dataset.unrealstereo4k_wai.train.dataset_str}"
 # Validation Set
 test_dataset:
   "+ 4_000 @ ${dataset.ase_wai.val.dataset_str}
+  + 4_000 @ ${dataset.blendedmvs_wai.val.dataset_str}
   + 4_000 @ ${dataset.dl3dv_wai.val.dataset_str}
   + 4_000 @ ${dataset.dynamicreplica_wai.val.dataset_str}
+  + 4_000 @ ${dataset.megadepth_wai.val.dataset_str}
   + 4_000 @ ${dataset.mpsd_wai.val.dataset_str}
   + 4_000 @ ${dataset.mvs_synth_wai.val.dataset_str}
   + 4_000 @ ${dataset.paralleldomain4d_wai.val.dataset_str}

configs/dataset/megatrain_13d_518_many_ar_48ipg_8g_mono.yaml ADDED Viewed

	@@ -0,0 +1,59 @@

+defaults:
+  - default
+# Number of views parameter for the multi-view datasets
+num_views: 1
+train:
+  # If True, the number of views can vary from batch to batch. The maximum number of views is num_views and minimum is 2. (On by default for N-view training)
+  variable_num_views: true
+# Train Resolution
+resolution_train: ${dataset.resolution_options.518_many_ar}
+# Validation Resolution
+resolution_val_ase: ${dataset.resolution_options.518_1_00_ar}
+resolution_val_blendedmvs: ${dataset.resolution_options.518_1_33_ar}
+resolution_val_dl3dv: ${dataset.resolution_options.518_1_77_ar}
+resolution_val_dynamicreplica: ${dataset.resolution_options.518_1_77_ar}
+resolution_val_megadepth: ${dataset.resolution_options.518_1_52_ar}
+resolution_val_mpsd: ${dataset.resolution_options.518_1_77_ar}
+resolution_val_mvs_synth: ${dataset.resolution_options.518_1_77_ar}
+resolution_val_paralleldomain4d: ${dataset.resolution_options.518_1_33_ar}
+resolution_val_sailvos3d: ${dataset.resolution_options.518_1_52_ar}
+resolution_val_scannetpp: ${dataset.resolution_options.518_1_52_ar}
+resolution_val_spring: ${dataset.resolution_options.518_1_77_ar}
+resolution_val_tav2_wb: ${dataset.resolution_options.518_1_00_ar}
+resolution_val_unrealstereo4k: ${dataset.resolution_options.518_1_77_ar}
+# Training Set
+train_dataset:
+  "+ 105_000 @ ${dataset.ase_wai.train.dataset_str}
+  + 105_000 @ ${dataset.blendedmvs_wai.train.dataset_str}
+  + 105_000 @ ${dataset.dl3dv_wai.train.dataset_str}
+  + 80_000 @ ${dataset.dynamicreplica_wai.train.dataset_str}
+  + 105_000 @ ${dataset.megadepth_wai.train.dataset_str}
+  + 105_000 @ ${dataset.mpsd_wai.train.dataset_str}
+  + 105_000 @ ${dataset.mvs_synth_wai.train.dataset_str}
+  + 105_000 @ ${dataset.paralleldomain4d_wai.train.dataset_str}
+  + 105_000 @ ${dataset.sailvos3d_wai.train.dataset_str}
+  + 105_000 @ ${dataset.scannetpp_wai.train.dataset_str}
+  + 4_000 @ ${dataset.spring_wai.train.dataset_str}
+  + 105_000 @ ${dataset.tav2_wb_wai.train.dataset_str}
+  + 11_000 @ ${dataset.unrealstereo4k_wai.train.dataset_str}"
+# Validation Set
+test_dataset:
+  "+ 4_000 @ ${dataset.ase_wai.val.dataset_str}
+  + 4_000 @ ${dataset.blendedmvs_wai.val.dataset_str}
+  + 4_000 @ ${dataset.dl3dv_wai.val.dataset_str}
+  + 4_000 @ ${dataset.dynamicreplica_wai.val.dataset_str}
+  + 4_000 @ ${dataset.megadepth_wai.val.dataset_str}
+  + 4_000 @ ${dataset.mpsd_wai.val.dataset_str}
+  + 4_000 @ ${dataset.mvs_synth_wai.val.dataset_str}
+  + 4_000 @ ${dataset.paralleldomain4d_wai.val.dataset_str}
+  + 4_000 @ ${dataset.sailvos3d_wai.val.dataset_str}
+  + 4_000 @ ${dataset.scannetpp_wai.val.dataset_str}
+  + 500 @ ${dataset.spring_wai.val.dataset_str}
+  + 4_000 @ ${dataset.tav2_wb_wai.val.dataset_str}
+  + 500 @ ${dataset.unrealstereo4k_wai.val.dataset_str}"

configs/dataset/megatrain_6d_518_many_ar_36ipg_64g.yaml ADDED Viewed

	@@ -0,0 +1,38 @@

+defaults:
+  - default
+# Number of views parameter for the multi-view datasets
+num_views: 4
+train:
+  # If True, the number of views can vary from batch to batch. The maximum number of views is num_views and minimum is 2. (On by default for N-view training)
+  variable_num_views: true
+# Train Resolution
+resolution_train: ${dataset.resolution_options.518_many_ar}
+# Validation Resolution
+resolution_val_blendedmvs: ${dataset.resolution_options.518_1_33_ar}
+resolution_val_mpsd: ${dataset.resolution_options.518_1_77_ar}
+resolution_val_scannetpp: ${dataset.resolution_options.518_1_52_ar}
+resolution_val_spring: ${dataset.resolution_options.518_1_77_ar}
+resolution_val_tav2_wb: ${dataset.resolution_options.518_1_00_ar}
+resolution_val_unrealstereo4k: ${dataset.resolution_options.518_1_77_ar}
+# Training Set
+train_dataset:
+  "+ 840_000 @ ${dataset.blendedmvs_wai.train.dataset_str}
+  + 840_000 @ ${dataset.mpsd_wai.train.dataset_str}
+  + 840_000 @ ${dataset.scannetpp_wai.train.dataset_str}
+  + 33_000 @ ${dataset.spring_wai.train.dataset_str}
+  + 840_000 @ ${dataset.tav2_wb_wai.train.dataset_str}
+  + 87_000 @ ${dataset.unrealstereo4k_wai.train.dataset_str}"
+# Validation Set
+test_dataset:
+  "+ 4_000 @ ${dataset.blendedmvs_wai.val.dataset_str}
+  + 4_000 @ ${dataset.mpsd_wai.val.dataset_str}
+  + 4_000 @ ${dataset.scannetpp_wai.val.dataset_str}
+  + 500 @ ${dataset.spring_wai.val.dataset_str}
+  + 4_000 @ ${dataset.tav2_wb_wai.val.dataset_str}
+  + 500 @ ${dataset.unrealstereo4k_wai.val.dataset_str}"

configs/dataset/structured3d_wai/default.yaml DELETED Viewed

@@ -1,3 +0,0 @@
-defaults:
-  - train: default
-  - val: default

configs/dataset/structured3d_wai/train/default.yaml DELETED Viewed

@@ -1,26 +0,0 @@
-dataset_str:
-  "Structured3DWAI(
-    split='${dataset.structured3d_wai.train.split}',
-    resolution=${dataset.structured3d_wai.train.dataset_resolution},
-    principal_point_centered=${dataset.structured3d_wai.train.principal_point_centered},
-    aug_crop=${dataset.structured3d_wai.train.aug_crop},
-    transform='${dataset.structured3d_wai.train.transform}',
-    data_norm_type='${dataset.structured3d_wai.train.data_norm_type}',
-    ROOT='${dataset.structured3d_wai.train.ROOT}',
-    dataset_metadata_dir='${dataset.structured3d_wai.train.dataset_metadata_dir}',
-    overfit_num_sets=${dataset.structured3d_wai.train.overfit_num_sets},
-    variable_num_views=${dataset.structured3d_wai.train.variable_num_views},
-    num_views=${dataset.structured3d_wai.train.num_views},
-    covisibility_thres=${dataset.structured3d_wai.train.covisibility_thres})"
-split: 'train'
-dataset_resolution: ${dataset.resolution_train}
-principal_point_centered: ${dataset.principal_point_centered}
-aug_crop: 16
-transform: 'colorjitter+grayscale+gaublur'
-data_norm_type: ${model.data_norm_type}
-ROOT: ${root_data_dir}/structured3d
-dataset_metadata_dir: ${mapanything_dataset_metadata_dir}
-overfit_num_sets: null
-variable_num_views: ${dataset.train.variable_num_views}
-num_views: ${dataset.num_views}
-covisibility_thres: 0.25

configs/dataset/structured3d_wai/val/default.yaml DELETED Viewed

@@ -1,26 +0,0 @@
-dataset_str:
-  "Structured3DWAI(
-    split='${dataset.structured3d_wai.val.split}',
-    resolution=${dataset.structured3d_wai.val.dataset_resolution},
-    principal_point_centered=${dataset.structured3d_wai.val.principal_point_centered},
-    seed=${dataset.structured3d_wai.val.seed},
-    transform='${dataset.structured3d_wai.val.transform}',
-    data_norm_type='${dataset.structured3d_wai.val.data_norm_type}',
-    ROOT='${dataset.structured3d_wai.val.ROOT}',
-    dataset_metadata_dir='${dataset.structured3d_wai.val.dataset_metadata_dir}',
-    overfit_num_sets=${dataset.structured3d_wai.val.overfit_num_sets},
-    variable_num_views=${dataset.structured3d_wai.val.variable_num_views},
-    num_views=${dataset.structured3d_wai.val.num_views},
-    covisibility_thres=${dataset.structured3d_wai.val.covisibility_thres})"
-split: 'val'
-dataset_resolution: ${dataset.resolution_val_structured3d}
-principal_point_centered: ${dataset.principal_point_centered}
-seed: 777
-transform: 'imgnorm'
-data_norm_type: ${model.data_norm_type}
-ROOT: ${root_data_dir}/structured3d
-dataset_metadata_dir: ${mapanything_dataset_metadata_dir}
-overfit_num_sets: null
-variable_num_views: ${dataset.val.variable_num_views}
-num_views: ${dataset.num_views}
-covisibility_thres: 0.25

configs/dataset/xrooms_wai/default.yaml DELETED Viewed

@@ -1,3 +0,0 @@
-defaults:
-  - train: default
-  - val: default

configs/dataset/xrooms_wai/train/default.yaml DELETED Viewed

@@ -1,26 +0,0 @@
-dataset_str:
-  "XRoomsWAI(
-    split='${dataset.xrooms_wai.train.split}',
-    resolution=${dataset.xrooms_wai.train.dataset_resolution},
-    principal_point_centered=${dataset.xrooms_wai.train.principal_point_centered},
-    aug_crop=${dataset.xrooms_wai.train.aug_crop},
-    transform='${dataset.xrooms_wai.train.transform}',
-    data_norm_type='${dataset.xrooms_wai.train.data_norm_type}',
-    ROOT='${dataset.xrooms_wai.train.ROOT}',
-    dataset_metadata_dir='${dataset.xrooms_wai.train.dataset_metadata_dir}',
-    overfit_num_sets=${dataset.xrooms_wai.train.overfit_num_sets},
-    variable_num_views=${dataset.xrooms_wai.train.variable_num_views},
-    num_views=${dataset.xrooms_wai.train.num_views},
-    covisibility_thres=${dataset.xrooms_wai.train.covisibility_thres})"
-split: 'train'
-dataset_resolution: ${dataset.resolution_train}
-principal_point_centered: ${dataset.principal_point_centered}
-aug_crop: 16
-transform: 'colorjitter+grayscale+gaublur'
-data_norm_type: ${model.data_norm_type}
-ROOT: ${root_data_dir}/xrooms
-dataset_metadata_dir: ${mapanything_dataset_metadata_dir}
-overfit_num_sets: null
-variable_num_views: ${dataset.train.variable_num_views}
-num_views: ${dataset.num_views}
-covisibility_thres: 0.25

configs/dataset/xrooms_wai/val/default.yaml DELETED Viewed

@@ -1,26 +0,0 @@
-dataset_str:
-  "XRoomsWAI(
-    split='${dataset.xrooms_wai.val.split}',
-    resolution=${dataset.xrooms_wai.val.dataset_resolution},
-    principal_point_centered=${dataset.xrooms_wai.val.principal_point_centered},
-    seed=${dataset.xrooms_wai.val.seed},
-    transform='${dataset.xrooms_wai.val.transform}',
-    data_norm_type='${dataset.xrooms_wai.val.data_norm_type}',
-    ROOT='${dataset.xrooms_wai.val.ROOT}',
-    dataset_metadata_dir='${dataset.xrooms_wai.val.dataset_metadata_dir}',
-    overfit_num_sets=${dataset.xrooms_wai.val.overfit_num_sets},
-    variable_num_views=${dataset.xrooms_wai.val.variable_num_views},
-    num_views=${dataset.xrooms_wai.val.num_views},
-    covisibility_thres=${dataset.xrooms_wai.val.covisibility_thres})"
-split: 'val'
-dataset_resolution: ${dataset.resolution_val_xrooms}
-principal_point_centered: ${dataset.principal_point_centered}
-seed: 777
-transform: 'imgnorm'
-data_norm_type: ${model.data_norm_type}
-ROOT: ${root_data_dir}/xrooms
-dataset_metadata_dir: ${mapanything_dataset_metadata_dir}
-overfit_num_sets: null
-variable_num_views: ${dataset.val.variable_num_views}
-num_views: ${dataset.num_views}
-covisibility_thres: 0.25

configs/loss/moge2_loss.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Training Loss
+train_criterion: "ExcludeTopNPercentPixelLoss(Regr3D(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='?avg_dis', loss_in_log=True, flatten_across_image_only=True), top_n_percent=5, apply_to_real_data_only=True) + 3.0 * NormalGMLoss(norm_mode='avg_dis', apply_normal_and_gm_loss_to_synthetic_data_only=True)"
+# Validation Loss
+test_criterion: "ExcludeTopNPercentPixelLoss(Regr3D(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='?avg_dis', loss_in_log=True, flatten_across_image_only=True), top_n_percent=5, apply_to_real_data_only=True) + 3.0 * NormalGMLoss(norm_mode='avg_dis', apply_normal_and_gm_loss_to_synthetic_data_only=True)"

configs/loss/overall_loss_highpm_plus_rel_pose.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Training Loss
+train_criterion: "ConfAndExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_absolute_pose_loss=True, compute_pairwise_relative_pose_loss=True, convert_predictions_to_view0_frame=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, cam_frame_points_loss_weight=0.1, depth_loss_weight=0.1, ray_directions_loss_weight=0.1, pose_quats_loss_weight=0.1, pose_trans_loss_weight=0.1, scale_loss_weight=0.1, world_frame_points_loss_weight=1, normal_loss_weight=0.3, gm_loss_weight=0.3), conf_alpha=0.2, top_n_percent=5, apply_to_real_data_only=True, conf_loss_set_indices=[0], exclude_loss_set_indices=[1, 2]) + 0.03 * NonAmbiguousMaskLoss(BCELoss())"
+# Validation Loss
+test_criterion: "ExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_absolute_pose_loss=True, compute_pairwise_relative_pose_loss=True, convert_predictions_to_view0_frame=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, cam_frame_points_loss_weight=0.1, depth_loss_weight=0.1, ray_directions_loss_weight=0.1, pose_quats_loss_weight=0.1, pose_trans_loss_weight=0.1, scale_loss_weight=0.1, world_frame_points_loss_weight=1, normal_loss_weight=0.3, gm_loss_weight=0.3), top_n_percent=5, apply_to_real_data_only=True, loss_set_indices=[0, 1, 2]) + 0.03 * NonAmbiguousMaskLoss(BCELoss())"

configs/loss/overall_loss_highpm_plus_rel_pose_no_conf.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Training Loss
+train_criterion: "ExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_absolute_pose_loss=True, compute_pairwise_relative_pose_loss=True, convert_predictions_to_view0_frame=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, cam_frame_points_loss_weight=0.1, depth_loss_weight=0.1, ray_directions_loss_weight=0.1, pose_quats_loss_weight=0.1, pose_trans_loss_weight=0.1, scale_loss_weight=0.1, world_frame_points_loss_weight=1, normal_loss_weight=0.3, gm_loss_weight=0.3), top_n_percent=5, apply_to_real_data_only=True, loss_set_indices=[0, 1, 2]) + 0.03 * NonAmbiguousMaskLoss(BCELoss())"
+# Validation Loss
+test_criterion: "ExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_absolute_pose_loss=True, compute_pairwise_relative_pose_loss=True, convert_predictions_to_view0_frame=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, cam_frame_points_loss_weight=0.1, depth_loss_weight=0.1, ray_directions_loss_weight=0.1, pose_quats_loss_weight=0.1, pose_trans_loss_weight=0.1, scale_loss_weight=0.1, world_frame_points_loss_weight=1, normal_loss_weight=0.3, gm_loss_weight=0.3), top_n_percent=5, apply_to_real_data_only=True, loss_set_indices=[0, 1, 2]) + 0.03 * NonAmbiguousMaskLoss(BCELoss())"

configs/loss/overall_loss_highpm_rel_pose_no_ref_view.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Training Loss
+train_criterion: "ConfAndExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_absolute_pose_loss=False, compute_pairwise_relative_pose_loss=True, convert_predictions_to_view0_frame=True, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, cam_frame_points_loss_weight=0.1, depth_loss_weight=0.1, ray_directions_loss_weight=0.1, pose_quats_loss_weight=0.1, pose_trans_loss_weight=0.1, scale_loss_weight=0.1, world_frame_points_loss_weight=1, normal_loss_weight=0.3, gm_loss_weight=0.3), conf_alpha=0.2, top_n_percent=5, apply_to_real_data_only=True, conf_loss_set_indices=[0], exclude_loss_set_indices=[1, 2]) + 0.03 * NonAmbiguousMaskLoss(BCELoss())"
+# Validation Loss
+test_criterion: "ExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_absolute_pose_loss=False, compute_pairwise_relative_pose_loss=True, convert_predictions_to_view0_frame=True, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, cam_frame_points_loss_weight=0.1, depth_loss_weight=0.1, ray_directions_loss_weight=0.1, pose_quats_loss_weight=0.1, pose_trans_loss_weight=0.1, scale_loss_weight=0.1, world_frame_points_loss_weight=1, normal_loss_weight=0.3, gm_loss_weight=0.3), top_n_percent=5, apply_to_real_data_only=True, loss_set_indices=[0, 1, 2]) + 0.03 * NonAmbiguousMaskLoss(BCELoss())"

configs/loss/pi3_loss.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+# Training Loss
+train_criterion: "ExcludeTopNPercentPixelLoss(FactoredGeometryRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_z', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=True, convert_predictions_to_view0_frame=True, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0), top_n_percent=5, apply_to_real_data_only=True, loss_set_indices=[0, 1, 2])"
+# Validation Loss
+test_criterion: "ExcludeTopNPercentPixelLoss(FactoredGeometryRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_z', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=True, convert_predictions_to_view0_frame=True, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0), top_n_percent=5, apply_to_real_data_only=True, loss_set_indices=[0, 1, 2])"

configs/machine/aws.yaml CHANGED Viewed

@@ -2,12 +2,14 @@ defaults:
   - default
 # Root directory containing all datasets
-root_data_dir: "/fsx/xrtech/data"
 # Dataset metadata directory
-mapanything_dataset_metadata_dir: "/fsx/nkeetha/mapanything_dataset_metadata"
 # Root directory containing pretrained checkpoints for custom models
-root_pretrained_checkpoints_dir: "/fsx/nkeetha/mapanything_checkpoints"
 # Root directory to log experiments
-root_experiments_dir: "/fsx/nkeetha/experiments"
 # Root directory containing UniCeption pretrained checkpoints
-root_uniception_pretrained_checkpoints_dir: "/fsx/nkeetha/uniception_checkpoints"

   - default
 # Root directory containing all datasets
+root_data_dir: "/ai4rl/fsx/xrtech/data"
 # Dataset metadata directory
+mapanything_dataset_metadata_dir: "/ai4rl/fsx/nkeetha/mapanything_dataset_metadata"
 # Root directory containing pretrained checkpoints for custom models
+root_pretrained_checkpoints_dir: "/ai4rl/fsx/nkeetha/mapanything_checkpoints"
 # Root directory to log experiments
+root_experiments_dir: "/ai4rl/fsx/nkeetha/experiments"
 # Root directory containing UniCeption pretrained checkpoints
+root_uniception_pretrained_checkpoints_dir: "/ai4rl/fsx/nkeetha/uniception_checkpoints"
+# Root directory containing external benchmark data
+external_benchmark_data_root_data_dir: "/ai4rl/fsx/xrtech/external_benchmark_data/rmvd_mvs_benchmark/rmvd_test_data"

configs/machine/default.yaml CHANGED Viewed

@@ -8,3 +8,5 @@ root_pretrained_checkpoints_dir: ???
 root_experiments_dir: ???
 # Root directory containing UniCeption pretrained checkpoints
 root_uniception_pretrained_checkpoints_dir: ???

 root_experiments_dir: ???
 # Root directory containing UniCeption pretrained checkpoints
 root_uniception_pretrained_checkpoints_dir: ???
+# Root directory containing external benchmark data
+external_benchmark_data_root_data_dir: ???

configs/machine/psc.yaml CHANGED Viewed

@@ -6,8 +6,10 @@ root_data_dir: "/ocean/projects/cis220039p/shared/datasets"
 # Dataset metadata directory
 mapanything_dataset_metadata_dir: "/ocean/projects/cis220039p/shared/mapanything_dataset_metadata"
 # Root directory containing pretrained checkpoints for custom models
-root_pretrained_checkpoints_dir: "/ocean/projects/cis220039p/nkeetha/code/AnyMap/checkpoints"
 # Root directory to log experiments
-root_experiments_dir: "/ocean/projects/cis220039p/nkeetha/experiments"
 # Root directory containing UniCeption pretrained checkpoints
-root_uniception_pretrained_checkpoints_dir: "/ocean/projects/cis220039p/nkeetha/code/AnyMap/UniCeption/checkpoints"

 # Dataset metadata directory
 mapanything_dataset_metadata_dir: "/ocean/projects/cis220039p/shared/mapanything_dataset_metadata"
 # Root directory containing pretrained checkpoints for custom models
+root_pretrained_checkpoints_dir: "/jet/home/yzhang25/mapanything/checkpoints"
 # Root directory to log experiments
+root_experiments_dir: "/jet/home/yzhang25/mapanything/outputs"
 # Root directory containing UniCeption pretrained checkpoints
+root_uniception_pretrained_checkpoints_dir: "/ocean/projects/cis220039p/shared/uniception/checkpoints/"
+# Root directory containing external benchmark data
+external_benchmark_data_root_data_dir: "/jet/home/yzhang25/mapanything/benchmarking/rmvd_mvs_benchmark/rmvd_test_data"

configs/machine/psc_yuchen.yaml DELETED Viewed

@@ -1,13 +0,0 @@
-defaults:
-  - default
-# Root directory containing all datasets
-root_data_dir: "/ocean/projects/cis220039p/shared/datasets"
-# Dataset metadata directory
-mapanything_dataset_metadata_dir: "/ocean/projects/cis220039p/shared/mapanything_dataset_metadata"
-# Root directory containing pretrained checkpoints for custom models
-root_pretrained_checkpoints_dir: "/jet/home/yzhang25/AnyMap/checkpoints"
-# Root directory to log experiments
-root_experiments_dir: "/jet/home/yzhang25/AnyMap/outputs"
-# Root directory containing UniCeption pretrained checkpoints
-root_uniception_pretrained_checkpoints_dir: "/ocean/projects/cis220039p/shared/uniception/checkpoints/"

configs/machine/xri_dgx.yaml CHANGED Viewed

@@ -6,8 +6,10 @@ root_data_dir: "/mnt/xri_mapsresearch/data/nkeetha"
 # Dataset metadata directory
 mapanything_dataset_metadata_dir: "/mnt/xri_mapsresearch/data/nkeetha/mapanything_dataset_metadata"
 # Root directory containing pretrained checkpoints for custom models
-root_pretrained_checkpoints_dir: "/mnt/xri_mapsresearch/code/nkeetha/AnyMap/checkpoints"
 # Root directory to log experiments
 root_experiments_dir: "/mnt/xri_mapsresearch/experiments/nkeetha"
 # Root directory containing UniCeption pretrained checkpoints
-root_uniception_pretrained_checkpoints_dir: "/mnt/xri_mapsresearch/code/nkeetha/AnyMap/UniCeption/checkpoints"

 # Dataset metadata directory
 mapanything_dataset_metadata_dir: "/mnt/xri_mapsresearch/data/nkeetha/mapanything_dataset_metadata"
 # Root directory containing pretrained checkpoints for custom models
+root_pretrained_checkpoints_dir: "/mnt/xri_mapsresearch/code/nkeetha/mapanything/checkpoints"
 # Root directory to log experiments
 root_experiments_dir: "/mnt/xri_mapsresearch/experiments/nkeetha"
 # Root directory containing UniCeption pretrained checkpoints
+root_uniception_pretrained_checkpoints_dir: "/mnt/xri_mapsresearch/code/nkeetha/mapanything/UniCeption/checkpoints"
+# Root directory containing external benchmark data
+external_benchmark_data_root_data_dir: "/mnt/xri_mapsresearch/data/nkeetha/rmvd_mvs_benchmark/rmvd_test_data"

configs/model/da3.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+# String for model factory
+model_str: "da3"
+# Model config
+model_config:
+  name: "da3"
+  # HF model string
+  hf_model_name: "depth-anything/DA3-GIANT"
+# Image Normalization Type
+data_norm_type: "dinov2"
+# DA3 checkpoint is already loaded in the inference wrapper
+pretrained: null
+# Torch hub force reload
+torch_hub_force_reload: False

configs/model/da3_nested.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+# String for model factory
+model_str: "da3"
+# Model config
+model_config:
+  name: "da3_nested"
+  # HF model string
+  hf_model_name: "depth-anything/DA3NESTED-GIANT-LARGE"
+# Image Normalization Type
+data_norm_type: "dinov2"
+# DA3 checkpoint is already loaded in the inference wrapper
+pretrained: null
+# Torch hub force reload
+torch_hub_force_reload: False

configs/model/encoder/dinov2_giant_24_layers.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+# UniCeption encoder string used for selecting encoder class (python3 -m uniception.models.encoders.list)
+encoder_str: "dinov2"
+# Name of the encoder
+name: "dinov2_giant_24_layers"
+# Data normalization type
+data_norm_type: "dinov2"
+# ViT size
+size: "giant"
+# Registers
+with_registers: False
+# Flag to indicate whether model class uses torch hub
+uses_torch_hub: True
+# Flag to indicate whether to use gradient checkpointing for encoder
+gradient_checkpointing: False
+# Turn off final normalization so that the features can be passed to DINOv2 init multi-view transformer
+norm_returned_features: False
+# Keep only the first 24 layers of DINOv2 ViT-G (other 16 layers are in multi-view transformer)
+keep_first_n_layers: 24

configs/model/info_sharing/aat_ifr_16_layers_dinov2_vitg_init.yaml ADDED Viewed

	@@ -0,0 +1,33 @@

+# Model type (Options: ["cross_attention", "global_attention", "alternating_attention"])
+model_type: "alternating_attention"
+# Model class type (Options: ["no_intermediate_features", "intermediate_features"])
+model_return_type: "intermediate_features"
+# Custom positional encoding (Options: ["RoPEfreq"], Callable Function, null)
+custom_positional_encoding: null
+# Module arguments
+module_args:
+  # Name of the info sharing module
+  name: "aat_16_layers_dinov2_vitg_init"
+  # Indices of the intermediate features to be shared (indices start from 0)
+  indices: [7, 11]
+  # Normalize intermediate features
+  norm_intermediate: True
+  # Size string
+  size: "16_layers"
+  # Depth (this includes both frame-wise and gloabl attention layers)
+  depth: 16
+  # Distinguish Reference and Non-Reference Views
+  distinguish_ref_and_non_ref_views: True
+  # Flag to indicate whether to use gradient checkpointing
+  gradient_checkpointing: False
+  # Feature dim (similar to ViT-Giant)
+  dim: 1536
+  # Number of heads (similar to ViT-Giant)
+  num_heads: 24
+  # Set transformer parameters similar to DINOv2
+  mlp_ratio: 4
+  qkv_bias: True
+  qk_norm: False
+  init_values: 1e-5
+  # Load layers 24 to 40 from DINOv2 ViT-G as init
+  pretrained_checkpoint_path: '${machine.root_pretrained_checkpoints_dir}/aat_init_w_dinov2_vitg_layers_24_to_40.pth'

configs/model/info_sharing/aat_ifr_16_layers_vitg_dim.yaml ADDED Viewed

	@@ -0,0 +1,31 @@

+# Model type (Options: ["cross_attention", "global_attention", "alternating_attention"])
+model_type: "alternating_attention"
+# Model class type (Options: ["no_intermediate_features", "intermediate_features"])
+model_return_type: "intermediate_features"
+# Custom positional encoding (Options: ["RoPEfreq"], Callable Function, null)
+custom_positional_encoding: null
+# Module arguments
+module_args:
+  # Name of the info sharing module
+  name: "aat_16_layers_vitg_dim_ifr"
+  # Indices of the intermediate features to be shared (indices start from 0)
+  indices: [7, 11]
+  # Normalize intermediate features
+  norm_intermediate: True
+  # Size string
+  size: "16_layers"
+  # Depth (this includes both frame-wise and gloabl attention layers)
+  depth: 16
+  # Distinguish Reference and Non-Reference Views
+  distinguish_ref_and_non_ref_views: True
+  # Flag to indicate whether to use gradient checkpointing
+  gradient_checkpointing: False
+  # Feature dim (similar to ViT-Giant)
+  dim: 1536
+  # Number of heads (similar to ViT-Giant)
+  num_heads: 24
+  # Set transformer parameters similar to DINOv2
+  mlp_ratio: 4
+  qkv_bias: True
+  qk_norm: False
+  init_values: 1e-5

configs/model/mapanything.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 defaults:
   - default
-  - encoder: dinov2_large
-  - info_sharing: aat_ifr_24_layers
   - pred_head: dpt_pose_scale
   - task: images_only
@@ -14,5 +14,7 @@ model_config:
   info_sharing_config: ${model.info_sharing}
   pred_head_config: ${model.pred_head}
   geometric_input_config: ${model.task}
 # Image Normalization Type
 data_norm_type: ${model.encoder.data_norm_type}

 defaults:
   - default
+  - encoder: dinov2_giant_24_layers
+  - info_sharing: aat_ifr_16_layers_vitg_dim
   - pred_head: dpt_pose_scale
   - task: images_only
   info_sharing_config: ${model.info_sharing}
   pred_head_config: ${model.pred_head}
   geometric_input_config: ${model.task}
+  use_register_tokens_from_encoder: True
+  info_sharing_mlp_layer_str: "swiglufused"
 # Image Normalization Type
 data_norm_type: ${model.encoder.data_norm_type}

configs/model/{mapanything_large_inference.yaml → mapanything_dino_init.yaml} RENAMED Viewed

@@ -1,7 +1,7 @@
 defaults:
   - default
-  - encoder: dinov2_large
-  - info_sharing: aat_ifr_48_layers_escaling
   - pred_head: dpt_pose_scale
   - task: images_only
@@ -14,5 +14,7 @@ model_config:
   info_sharing_config: ${model.info_sharing}
   pred_head_config: ${model.pred_head}
   geometric_input_config: ${model.task}
 # Image Normalization Type
 data_norm_type: ${model.encoder.data_norm_type}

 defaults:
   - default
+  - encoder: dinov2_giant_24_layers
+  - info_sharing: aat_ifr_16_layers_dinov2_vitg_init
   - pred_head: dpt_pose_scale
   - task: images_only
   info_sharing_config: ${model.info_sharing}
   pred_head_config: ${model.pred_head}
   geometric_input_config: ${model.task}
+  use_register_tokens_from_encoder: True
+  info_sharing_mlp_layer_str: "swiglufused"
 # Image Normalization Type
 data_norm_type: ${model.encoder.data_norm_type}

configs/model/mapanything_inference.yaml DELETED Viewed

@@ -1,18 +0,0 @@
-defaults:
-  - default
-  - encoder: dinov2_large
-  - info_sharing: aat_ifr_24_layers_escaling
-  - pred_head: dpt_pose_scale
-  - task: images_only
-# String for model factory
-model_str: "mapanything"
-# Model config
-model_config:
-  name: "mapanything"
-  encoder_config: ${model.encoder}
-  info_sharing_config: ${model.info_sharing}
-  pred_head_config: ${model.pred_head}
-  geometric_input_config: ${model.task}
-# Image Normalization Type
-data_norm_type: ${model.encoder.data_norm_type}

configs/model/{mapanything_large.yaml → mapanything_v1.yaml} RENAMED Viewed

@@ -1,7 +1,7 @@
 defaults:
   - default
   - encoder: dinov2_large
-  - info_sharing: aat_ifr_48_layers
   - pred_head: dpt_pose_scale
   - task: images_only

 defaults:
   - default
   - encoder: dinov2_large
+  - info_sharing: aat_ifr_24_layers
   - pred_head: dpt_pose_scale
   - task: images_only

configs/rmvd_benchmark.yaml CHANGED Viewed

@@ -6,7 +6,7 @@ defaults:
 # Path Settings
 output_dir: ${hydra:run.dir}
-root_data_dir: ${machine.root_data_dir}
 mapanything_dataset_metadata_dir: ${machine.mapanything_dataset_metadata_dir}
 root_pretrained_checkpoints_dir: ${machine.root_pretrained_checkpoints_dir}
 root_experiments_dir: ${machine.root_experiments_dir}

 # Path Settings
 output_dir: ${hydra:run.dir}
+external_benchmark_data_root_data_dir: ${machine.external_benchmark_data_root_data_dir}
 mapanything_dataset_metadata_dir: ${machine.mapanything_dataset_metadata_dir}
 root_pretrained_checkpoints_dir: ${machine.root_pretrained_checkpoints_dir}
 root_experiments_dir: ${machine.root_experiments_dir}

configs/train_params/moge2_finetune.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+defaults:
+  - default
+# Use lower lr for finetuning
+lr: 1e-05
+min_lr: 1e-07

configs/train_params/pi3_finetune.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+defaults:
+  - default
+# Use lower lr for finetuning
+lr: 1e-05
+min_lr: 1e-07
+# Optimizer parameters specific to submodules
+submodule_configs:
+  # DINOv2
+  model.encoder:
+    lr: 5e-07
+    min_lr: 5e-09
+    warmup_epochs: ${train_params.warmup_epochs}
+    weight_decay: ${train_params.weight_decay}
+    schedule_type: ${train_params.schedule_type}

configs/train_params/vggt_finetune.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 defaults:
   - default
-# Use 10x lower lr for finetuning
 lr: 1e-05
 min_lr: 1e-07

 defaults:
   - default
+# Use lower lr for finetuning
 lr: 1e-05
 min_lr: 1e-07

mapanything/datasets/__init__.py CHANGED Viewed

@@ -1,3 +1,8 @@
 """
 MapAnything Datasets
 """
@@ -5,14 +10,10 @@ MapAnything Datasets
 import torch
 from mapanything.datasets.wai.ase import ASEWAI  # noqa
-from mapanything.datasets.wai.bedlam import BedlamWAI  # noqa
 from mapanything.datasets.wai.blendedmvs import BlendedMVSWAI  # noqa
 from mapanything.datasets.wai.dl3dv import DL3DVWAI  # noqa
-from mapanything.datasets.wai.dtu import DTUWAI  # noqa
 from mapanything.datasets.wai.dynamicreplica import DynamicReplicaWAI  # noqa
 from mapanything.datasets.wai.eth3d import ETH3DWAI  # noqa
-from mapanything.datasets.wai.gta_sfm import GTASfMWAI  # noqa
-from mapanything.datasets.wai.matrixcity import MatrixCityWAI  # noqa
 from mapanything.datasets.wai.megadepth import MegaDepthWAI  # noqa
 from mapanything.datasets.wai.mpsd import MPSDWAI  # noqa
 from mapanything.datasets.wai.mvs_synth import MVSSynthWAI  # noqa
@@ -20,10 +21,8 @@ from mapanything.datasets.wai.paralleldomain4d import ParallelDomain4DWAI  # noq
 from mapanything.datasets.wai.sailvos3d import SAILVOS3DWAI  # noqa
 from mapanything.datasets.wai.scannetpp import ScanNetPPWAI  # noqa
 from mapanything.datasets.wai.spring import SpringWAI  # noqa
-from mapanything.datasets.wai.structured3d import Structured3DWAI  # noqa
 from mapanything.datasets.wai.tav2_wb import TartanAirV2WBWAI  # noqa
 from mapanything.datasets.wai.unrealstereo4k import UnrealStereo4KWAI  # noqa
-from mapanything.datasets.wai.xrooms import XRoomsWAI  # noqa
 from mapanything.utils.train_tools import get_rank, get_world_size

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
 """
 MapAnything Datasets
 """
 import torch
 from mapanything.datasets.wai.ase import ASEWAI  # noqa
 from mapanything.datasets.wai.blendedmvs import BlendedMVSWAI  # noqa
 from mapanything.datasets.wai.dl3dv import DL3DVWAI  # noqa
 from mapanything.datasets.wai.dynamicreplica import DynamicReplicaWAI  # noqa
 from mapanything.datasets.wai.eth3d import ETH3DWAI  # noqa
 from mapanything.datasets.wai.megadepth import MegaDepthWAI  # noqa
 from mapanything.datasets.wai.mpsd import MPSDWAI  # noqa
 from mapanything.datasets.wai.mvs_synth import MVSSynthWAI  # noqa
 from mapanything.datasets.wai.sailvos3d import SAILVOS3DWAI  # noqa
 from mapanything.datasets.wai.scannetpp import ScanNetPPWAI  # noqa
 from mapanything.datasets.wai.spring import SpringWAI  # noqa
 from mapanything.datasets.wai.tav2_wb import TartanAirV2WBWAI  # noqa
 from mapanything.datasets.wai.unrealstereo4k import UnrealStereo4KWAI  # noqa
 from mapanything.utils.train_tools import get_rank, get_world_size

mapanything/datasets/base/base_dataset.py CHANGED Viewed

@@ -1,3 +1,8 @@
 """
 Base class for MapAnything datasets.
 """
@@ -314,7 +319,7 @@ class BaseDataset(EasyDataset):
         use_bidirectional_covis=True,
     ):
         """
-        Randomly samples S indices from an N x N covisbility matrix by forming adjacency edges such that the resulting subgraph (given by the indices) is connected.
         If the current node has no new unvisited neighbors, backtracking occurs.
         Retries with different starting indices if the desired number of samples is not reached, excluding previously visited components.
@@ -569,7 +574,7 @@ class BaseDataset(EasyDataset):
             if "non_ambiguous_mask" in view:
                 assert view["depthmap"].shape == view["non_ambiguous_mask"].shape
-            # Expand the last dimennsion of the depthmap
             view["depthmap"] = view["depthmap"][..., None]
             # Append RNG state to the views, this allows to check whether the RNG is in the same state each time

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
 """
 Base class for MapAnything datasets.
 """
         use_bidirectional_covis=True,
     ):
         """
+        Randomly samples S indices from an N x N covisibility matrix by forming adjacency edges such that the resulting subgraph (given by the indices) is connected.
         If the current node has no new unvisited neighbors, backtracking occurs.
         Retries with different starting indices if the desired number of samples is not reached, excluding previously visited components.
             if "non_ambiguous_mask" in view:
                 assert view["depthmap"].shape == view["non_ambiguous_mask"].shape
+            # Expand the last dimension of the depthmap
             view["depthmap"] = view["depthmap"][..., None]
             # Append RNG state to the views, this allows to check whether the RNG is in the same state each time