diff --git a/lamar/tasks/feature_matching.py b/lamar/tasks/feature_matching.py index 5a36347..bfba8c1 100644 --- a/lamar/tasks/feature_matching.py +++ b/lamar/tasks/feature_matching.py @@ -33,6 +33,15 @@ class FeatureMatching: }, }, }, + 'lightglue': { + 'name': 'lightglue', + 'hloc': { + 'model': { + 'name': 'lightglue', + 'features': 'superpoint', + }, + }, + }, 'mnn': { 'name': 'mnn', 'hloc': { diff --git a/lamar/tasks/mapping.py b/lamar/tasks/mapping.py index 1406bbe..298e28d 100644 --- a/lamar/tasks/mapping.py +++ b/lamar/tasks/mapping.py @@ -17,6 +17,7 @@ logger = logging.getLogger(__name__) +invalid_point3d_id = pycolmap.Point2D().point3D_id class MappingPaths: @@ -94,6 +95,10 @@ def __init__(self, config, outputs, capture, session_id, self.name2key[image.name]: image.image_id for image in self.reconstruction.images.values() } + self.imageid_to_point3dids = { + image_id: np.array([p.point3D_id for p in image.points2D], np.uint64) + for image_id, image in self.reconstruction.images.items() + } def run(self, capture): run_capture_to_empty_colmap.run(capture, [self.session_id], self.paths.sfm_empty) @@ -107,18 +112,20 @@ def run(self, capture): ) def get_points3D(self, key, point2D_indices): - image = self.reconstruction.images[self.key2imageid[key]] - valid = [] - xyz = [] - ids = [] - if len(image.points2D) > 0: - for idx in point2D_indices: - p = image.points2D[idx] - valid.append(p.has_point3D()) - if valid[-1]: - ids.append(p.point3D_id) - xyz.append(self.reconstruction.points3D[ids[-1]].xyz) - return np.array(valid, bool), xyz, ids + image_id = self.key2imageid[key] + all_ids = self.imageid_to_point3dids[image_id] + if len(all_ids) > 0: + ids = all_ids[point2D_indices] + valid = ids != invalid_point3d_id + ids = ids[valid] + else: + valid = np.empty(0, bool) + ids = np.empty(0, int) + if len(ids) > 0: + xyz = np.stack([self.reconstruction.points3D[i].xyz for i in ids]) + else: + xyz = np.empty((0, 3), float) + return valid, xyz, ids class MeshLifting(Mapping): diff --git a/lamar/tasks/pose_estimation.py b/lamar/tasks/pose_estimation.py index 585136a..b1a17f5 100644 --- a/lamar/tasks/pose_estimation.py +++ b/lamar/tasks/pose_estimation.py @@ -33,6 +33,7 @@ def __init__(self, root, config, query_id, ref_id, override_workdir_root=None): root / 'pose_estimation' / query_id / ref_id / config['features']['name'] / config['matches']['name'] / config['pairs']['name'] / config['mapping']['name'] / config['name'] + / config['estimator'] ) self.poses = self.workdir / 'poses.txt' self.config = self.workdir / 'configuration.json' @@ -145,6 +146,7 @@ def convert_poses_for_eval(self, T_c2w): class SingleImagePoseEstimation(PoseEstimation): method = { 'name': 'single_image', + 'estimator': 'pycolmap', 'pnp_error_multiplier': 3.0, } @@ -167,7 +169,7 @@ def _worker_fn(idx: int): camera, ref_key_names, self.recover_matches_2d3d, - self.config['pnp_error_multiplier'], + self.config, return_covariance=self.return_covariance ) if pose is not None: @@ -185,6 +187,7 @@ def convert_poses_for_eval(self, T_c2w): class RigPoseEstimation(PoseEstimation): method = { 'name': 'rig', + 'estimator': 'pycolmap', 'pnp_error_multiplier': 1.0 } @@ -211,7 +214,7 @@ def _worker_fn(idx: int): query_names, cameras, T_cams2rig, ref_key_names, self.recover_matches_2d3d, - self.config['pnp_error_multiplier'], + self.config, return_covariance=self.return_covariance ) if pose is not None: @@ -229,6 +232,7 @@ def convert_poses_for_eval(self, T_c2w): class RigSinglePoseEstimation(SingleImagePoseEstimation): method = { 'name': 'rig_single', + 'estimator': 'pycolmap', 'pnp_error_multiplier': 1.0 } @@ -303,11 +307,13 @@ def recover_matches_2d3d(self, query: str, ref_key_names: List[Tuple[KeyType, st class SingleImageDensePoseEstimation(DensePoseEstimation, SingleImagePoseEstimation): method = { 'name': 'dense_single_image', + 'estimator': 'pycolmap', 'pnp_error_multiplier': 3.0, } class RigDensePoseEstimation(DensePoseEstimation, RigPoseEstimation): method = { 'name': 'dense_rig', + 'estimator': 'pycolmap', 'pnp_error_multiplier': 1.0, } diff --git a/lamar/utils/localization.py b/lamar/utils/localization.py index fa281d6..516b35b 100644 --- a/lamar/utils/localization.py +++ b/lamar/utils/localization.py @@ -1,9 +1,12 @@ from pathlib import Path -from typing import List, Tuple, Callable -from collections import defaultdict +from typing import Any, Dict, List, Tuple, Callable import numpy as np import pycolmap +try: + import poselib +except ImportError: + poselib = None from scantools.capture import Camera, Pose, Trajectories from scantools.proc.alignment.image_matching import get_keypoints, get_matches @@ -14,8 +17,6 @@ def recover_matches_2d3d(query: str, ref_key_names: List[Tuple[KeyType, str]], mapping, query_features: Path, match_file: Path): (p2d,), (noise,) = get_keypoints(query_features, [query]) - p2d_to_p3d = defaultdict(list) - num_matches = 0 if len(ref_key_names) == 0: ref_keys = ref_names = [] @@ -29,32 +30,30 @@ def recover_matches_2d3d(query: str, ref_key_names: List[Tuple[KeyType, str]], 'indices': [np.empty((0,), int)], 'node_ids_ref': [np.empty((0, 2), object)] } + p2d_to_p3d = set() for idx, (ref_key, matches) in enumerate(zip(ref_keys, all_matches)): if len(matches) == 0: continue valid, p3ds, p3d_ids = mapping.get_points3D(ref_key, matches[:, 1]) matches = matches[valid] - num_matches += len(matches) - p2d_q = [] - p3d = [] - indices = [] + p2d_ids = [] + match_indices = [] node_ids_ref = [] - for (i, j), p3d_id, xyz in zip(matches, p3d_ids, p3ds): - # avoid duplicate observations - if p3d_id != -1 and p3d_id in p2d_to_p3d[i]: + for match_idx, ((i, j), p3d_id) in enumerate(zip(matches, p3d_ids)): + if p3d_id != -1 and (i, p3d_id) in p2d_to_p3d: + # Avoid duplicate observations. continue - p2d_to_p3d[i].append(p3d_id) - p2d_q.append(p2d[i]) - p3d.append(xyz) - indices.append(idx) + p2d_to_p3d.add((i, p3d_id)) + p2d_ids.append(i) + match_indices.append(match_idx) node_ids_ref.append((ref_key, j)) - if len(p2d_q) == 0: + if len(p2d_ids) == 0: continue - ret['kp_q'].append(np.array(p2d_q)) - ret['p3d'].append(np.array(p3d)) - ret['indices'].append(np.array(indices)) + ret['kp_q'].append(p2d[p2d_ids]) + ret['p3d'].append(p3ds[match_indices]) + ret['indices'].append(np.full(len(match_indices), idx)) ret['node_ids_ref'].append(np.array(node_ids_ref, dtype=object)) ret = {k: np.concatenate(v, 0) for k, v in ret.items()} @@ -64,26 +63,39 @@ def recover_matches_2d3d(query: str, ref_key_names: List[Tuple[KeyType, str]], def estimate_camera_pose(query: str, camera: Camera, ref_key_names: List[Tuple[KeyType, str]], recover_matches: Callable, - pnp_error_multiplier: float, - return_covariance: bool) -> Pose: + config: Dict[str, Any], + return_covariance: bool) -> Tuple[Pose, Dict[str, Any]]: matches_2d3d = recover_matches(query, ref_key_names) keypoint_noise = matches_2d3d['keypoint_noise'] - - ret = pycolmap.absolute_pose_estimation( - matches_2d3d['kp_q'], matches_2d3d['p3d'], - camera.asdict, pnp_error_multiplier * keypoint_noise, - return_covariance=return_covariance) - - if ret['success']: - if return_covariance: - ret['covariance'] *= keypoint_noise ** 2 - # the covariance returned by pycolmap is on the left side, - # which is the right side of the inverse. - pose = Pose(*Pose(ret['qvec'], ret['tvec']).inv.qt, ret['covariance']) + points_2d = matches_2d3d['kp_q'] + points_3d = matches_2d3d['p3d'] + inlier_threshold = config['pnp_error_multiplier'] * keypoint_noise + + if config['estimator'] == 'pycolmap': + ret = pycolmap.absolute_pose_estimation( + points_2d, points_3d, camera.asdict, inlier_threshold, + return_covariance=return_covariance) + if ret['success']: + if return_covariance: + ret['covariance'] *= keypoint_noise ** 2 + # the covariance returned by pycolmap is on the left side, + # which is the right side of the inverse. + pose = Pose(*Pose(ret['qvec'], ret['tvec']).inv.qt, ret['covariance']) + else: + pose = Pose(ret['qvec'], ret['tvec']).inv else: - pose = Pose(ret['qvec'], ret['tvec']).inv + pose = None + elif config['estimator'] == 'poselib': + if poselib is None: + raise ImportError('Could not import PoseLib - did you forget to install it?') + if return_covariance: + raise ValueError('PoseLib does not support return_covariance=True') + pose, ret = poselib.estimate_absolute_pose( + points_2d, points_3d, camera.asdict, + {'max_reproj_error': inlier_threshold}, {}) + pose = Pose(pose.q, pose.t).inv else: - pose = None + raise NotImplementedError(f'Unknown estimator: {config["estimator"]}') ret = {**ret, 'matches_2d3d_list': [matches_2d3d]} return pose, ret @@ -92,7 +104,8 @@ def estimate_camera_pose(query: str, camera: Camera, def estimate_camera_pose_rig(queries: List[str], cameras: List[Camera], T_cams2rig: List[Pose], refs_key_names: List[List[Tuple[KeyType, str]]], recover_matches: Callable, - pnp_error_multiplier: float, return_covariance: bool) -> Pose: + config: Dict[str, Any], + return_covariance: bool) -> Tuple[Pose, Dict[str, Any]]: matches_2d3d_list = [] keypoint_noises = [] for query, ref_key_names in zip(queries, refs_key_names): @@ -100,29 +113,46 @@ def estimate_camera_pose_rig(queries: List[str], cameras: List[Camera], T_cams2r matches_2d3d_list.append(matches_2d3d) keypoint_noises.append(matches_2d3d['keypoint_noise']) - p2d_m_list = [m['kp_q'] for m in matches_2d3d_list] - p3d_m_list = [m['p3d'] for m in matches_2d3d_list] + points_2d = [m['kp_q'] for m in matches_2d3d_list] + points_3d = [m['p3d'] for m in matches_2d3d_list] camera_dicts = [camera.asdict for camera in cameras] rel_poses = [T.inverse() for T in T_cams2rig] qvecs = [p.qvec for p in rel_poses] tvecs = [p.t for p in rel_poses] keypoint_noise = np.mean(keypoint_noises) - - ret = pycolmap.rig_absolute_pose_estimation( - p2d_m_list, p3d_m_list, camera_dicts, qvecs, - tvecs, pnp_error_multiplier * keypoint_noise, - return_covariance=return_covariance) - - if ret['success']: - if return_covariance: - ret['covariance'] *= keypoint_noise ** 2 - # the covariance returned by pycolmap is on the left side, - # which is the right side of the inverse. - pose = Pose(*Pose(ret['qvec'], ret['tvec']).inv.qt, ret['covariance']) + inlier_threshold = config['pnp_error_multiplier'] * keypoint_noise + + if config['estimator'] == 'pycolmap': + ret = pycolmap.rig_absolute_pose_estimation( + points_2d, points_3d, camera_dicts, qvecs, tvecs, inlier_threshold, + return_covariance=return_covariance) + if ret['success']: + if return_covariance: + ret['covariance'] *= keypoint_noise ** 2 + # the covariance returned by pycolmap is on the left side, + # which is the right side of the inverse. + pose = Pose(*Pose(ret['qvec'], ret['tvec']).inv.qt, ret['covariance']) + else: + pose = Pose(ret['qvec'], ret['tvec']).inv else: - pose = Pose(ret['qvec'], ret['tvec']).inv + pose = None + elif config['estimator'] == 'poselib': + if poselib is None: + raise ImportError('Could not import PoseLib - did you forget to install it?') + if return_covariance: + raise ValueError('PoseLib does not support return_covariance=True') + rel_poses_poselib = [] + for q, t in zip(qvecs, tvecs): + p = poselib.CameraPose() + p.q = q + p.t = t + rel_poses_poselib.append(p) + pose, ret = poselib.estimate_generalized_absolute_pose( + points_2d, points_3d, rel_poses_poselib, camera_dicts, + {'max_reproj_error': inlier_threshold}, {}) + pose = Pose(pose.q, pose.t).inv else: - pose = None + raise NotImplementedError(f'Unknown estimator: {config["estimator"]}') ret = {**ret, 'matches_2d3d_list': matches_2d3d_list} return pose, ret