diff --git a/inference.py b/inference.py index bd1b661..22ef8f4 100644 --- a/inference.py +++ b/inference.py @@ -4,6 +4,7 @@ import torch import numpy as np +import open3d as o3d from tqdm import tqdm from threading import Thread from transformers import AutoTokenizer, AutoModelForCausalLM @@ -19,6 +20,294 @@ } +def save_processed_pointcloud(point_cloud_tensor, output_path, original_min_extent=None): + """ + Save the processed point cloud tensor as a PLY file to visualize what the model sees. + + Args: + point_cloud_tensor: The processed point cloud tensor from preprocess_point_cloud + output_path: Path to save the PLY file + original_min_extent: Original minimum extent for coordinate restoration + """ + # Extract the point cloud data (remove batch dimension) + point_data = point_cloud_tensor.squeeze(0).cpu().numpy() + + # The tensor structure is [grid_coord(3), xyz(3), rgb(3)] = 9 columns + if point_data.shape[1] >= 9: + grid_coords = point_data[:, 0:3] # Discretized grid coordinates + xyz_coords = point_data[:, 3:6] # Actual XYZ coordinates (after PositiveShift) + rgb_colors = point_data[:, 6:9] # RGB colors (normalized) + + # Use actual XYZ coordinates (these are the shifted/processed coordinates) + points = xyz_coords + colors = rgb_colors + + # Restore original coordinate system if min_extent provided + if original_min_extent is not None: + points = points + original_min_extent + + # Ensure colors are in [0,1] range + colors = np.clip(colors, 0, 1) + + # Create Open3D point cloud + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(points) + pcd.colors = o3d.utility.Vector3dVector(colors) + + # Save as PLY + o3d.io.write_point_cloud(output_path, pcd) + print(f"Saved processed point cloud to: {output_path}") + print(f" - Number of points: {len(points)}") + print(f" - Coordinate range: [{points.min(axis=0)}, {points.max(axis=0)}]") + print(f" - Grid coordinate range: [{grid_coords.min(axis=0)}, {grid_coords.max(axis=0)}]") + else: + print(f"Warning: Unexpected tensor shape {point_data.shape}, cannot save PLY") + + +def save_grid_coordinates_pointcloud(point_cloud_tensor, output_path, grid_size_m=None, min_extent=None): + """ + Save a point cloud using the discretized grid coordinates to see the voxel grid structure. + """ + point_data = point_cloud_tensor.squeeze(0).cpu().numpy() + + if point_data.shape[1] >= 9: + grid_coords = point_data[:, 0:3] # Discretized grid coordinates + rgb_colors = point_data[:, 6:9] # RGB colors + + # Use grid coordinates as positions (voxel grid). Optionally convert to meters for downstream use + if grid_size_m is not None: + points = grid_coords.astype(float) * float(grid_size_m) + if min_extent is not None: + points = points + np.array(min_extent, dtype=float) + else: + points = grid_coords + colors = np.clip(rgb_colors, 0, 1) + + # Create Open3D point cloud + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(points.astype(float)) + pcd.colors = o3d.utility.Vector3dVector(colors) + + # Save as PLY + o3d.io.write_point_cloud(output_path, pcd) + print(f"Saved grid coordinate point cloud to: {output_path}") + print(f" - Number of voxels: {len(points)}") + print(f" - Grid coordinate range: [{points.min(axis=0)}, {points.max(axis=0)}]") + + +def preprocess_point_cloud_adaptive_unified(points, colors, base_grid_size, num_bins, density_levels=3): + """ + Unified adaptive preprocessing that maintains single coordinate system. + Uses density-weighted sampling within a single grid. + """ + from sklearn.neighbors import NearestNeighbors + + print(f"Unified adaptive preprocessing with {density_levels} density levels...") + + # Calculate local point density + k_neighbors = min(20, len(points) // 10) + nbrs = NearestNeighbors(n_neighbors=k_neighbors).fit(points) + distances, _ = nbrs.kneighbors(points) + local_density = 1.0 / (distances[:, -1] + 1e-6) + + # Normalize density to [0, 1] range + min_density = np.min(local_density) + max_density = np.max(local_density) + normalized_density = (local_density - min_density) / (max_density - min_density + 1e-6) + + # Create density-based sampling weights + # Higher density points get higher sampling probability + sampling_weights = 0.1 + 0.9 * normalized_density # Range [0.1, 1.0] + + # Use adaptive grid size based on overall density distribution + density_percentile_75 = np.percentile(normalized_density, 75) + adaptive_grid_scale = 1.0 / (1.0 + density_percentile_75) # Smaller grid for denser clouds + final_grid_size = base_grid_size * adaptive_grid_scale + + print(f" Adaptive grid size: {final_grid_size:.6f} (scale: {adaptive_grid_scale:.3f})") + print(f" Density range: [{min_density:.2f}, {max_density:.2f}]") + print(f" Sampling weights range: [{np.min(sampling_weights):.2f}, {np.max(sampling_weights):.2f}]") + + # Apply standard preprocessing with adaptive grid + transform = Compose([ + dict(type="PositiveShift"), + dict(type="NormalizeColor"), + dict(type="DensityWeightedGridSample", + grid_size=final_grid_size, + hash_type="fnv", + mode="test", + keys=("coord", "color"), + return_grid_coord=True, + max_grid_coord=num_bins, + sampling_weights=sampling_weights), # Custom transform + ]) + + # Fallback to regular GridSample if custom transform not available + try: + point_cloud = transform({ + "coord": points.copy(), + "color": colors.copy(), + }) + except: + print(" Fallback to regular GridSample with adaptive grid size") + fallback_transform = Compose([ + dict(type="PositiveShift"), + dict(type="NormalizeColor"), + dict(type="GridSample", + grid_size=final_grid_size, + hash_type="fnv", + mode="test", + keys=("coord", "color"), + return_grid_coord=True, + max_grid_coord=num_bins), + ]) + point_cloud = fallback_transform({ + "coord": points.copy(), + "color": colors.copy(), + }) + + print(f" Processed points: {len(point_cloud['grid_coord']):,}") + + # Convert to tensor format + coord = point_cloud["grid_coord"] + xyz = point_cloud["coord"] + rgb = point_cloud["color"] + point_cloud_data = np.concatenate([coord, xyz, rgb], axis=1) + return torch.as_tensor(np.stack([point_cloud_data], axis=0)) + + +def preprocess_point_cloud_adaptive(points, colors, base_grid_size, num_bins, density_levels=3): + """ + Adaptive point cloud preprocessing with variable voxel density based on local point density. + + Args: + points: Original point coordinates + colors: Original point colors + base_grid_size: Base voxel size for sparse regions + num_bins: Number of bins for coordinate discretization + density_levels: Number of different density levels to use + """ + from sklearn.neighbors import NearestNeighbors + + print(f"Adaptive preprocessing with {density_levels} density levels...") + + # Calculate local point density for each point + k_neighbors = min(20, len(points) // 10) # Adaptive k based on point cloud size + nbrs = NearestNeighbors(n_neighbors=k_neighbors).fit(points) + distances, _ = nbrs.kneighbors(points) + + # Use average distance to k-th neighbor as density measure (smaller = denser) + local_density = 1.0 / (distances[:, -1] + 1e-6) # Inverse distance = density + + # Create density-based regions + density_percentiles = np.linspace(0, 100, density_levels + 1) + density_thresholds = np.percentile(local_density, density_percentiles) + + processed_clouds = [] + total_points = 0 + + for level in range(density_levels): + # Define density range for this level + min_density = density_thresholds[level] + max_density = density_thresholds[level + 1] + + # Select points in this density range + if level == density_levels - 1: # Last level includes maximum + mask = (local_density >= min_density) + else: + mask = (local_density >= min_density) & (local_density < max_density) + + if not np.any(mask): + continue + + level_points = points[mask] + level_colors = colors[mask] + + # Calculate grid size for this density level + # Higher density regions get smaller voxels (more detail) + density_factor = (level + 1) / density_levels # 0.33, 0.67, 1.0 for 3 levels + level_grid_size = base_grid_size * (1.0 / (density_factor ** 0.5)) # Square root scaling + + print(f" Level {level + 1}: {np.sum(mask):,} points, grid_size={level_grid_size:.6f}") + + # Process this density level + transform = Compose([ + dict(type="PositiveShift"), + dict(type="NormalizeColor"), + dict( + type="GridSample", + grid_size=level_grid_size, + hash_type="fnv", + mode="test", + keys=("coord", "color"), + return_grid_coord=True, + max_grid_coord=num_bins, # Use same coordinate space for all levels + ), + ]) + + level_cloud = transform({ + "coord": level_points.copy(), + "color": level_colors.copy(), + }) + + processed_clouds.append(level_cloud) + total_points += len(level_cloud["grid_coord"]) + + # Combine all density levels + if len(processed_clouds) == 1: + final_cloud = processed_clouds[0] + else: + final_cloud = combine_density_levels(processed_clouds, num_bins) + + print(f" Total processed points: {total_points:,}") + + # Convert to tensor format + coord = final_cloud["grid_coord"] + xyz = final_cloud["coord"] + rgb = final_cloud["color"] + point_cloud = np.concatenate([coord, xyz, rgb], axis=1) + return torch.as_tensor(np.stack([point_cloud], axis=0)) + + +def combine_density_levels(processed_clouds, num_bins): + """Combine multiple density-level point clouds into one unified coordinate system.""" + all_coords = [] + all_xyz = [] + all_colors = [] + + for i, cloud in enumerate(processed_clouds): + all_coords.append(cloud["grid_coord"]) + all_xyz.append(cloud["coord"]) + all_colors.append(cloud["color"]) + + combined_cloud = { + "grid_coord": np.vstack(all_coords), + "coord": np.vstack(all_xyz), + "color": np.vstack(all_colors), + } + + # Remove duplicate voxels (keep the one from the highest density level) + # Since we process from low to high density, later entries have higher priority + grid_coords = combined_cloud["grid_coord"] + + # Create unique voxel keys + voxel_keys = grid_coords[:, 0] * (num_bins ** 2) + grid_coords[:, 1] * num_bins + grid_coords[:, 2] + + # Keep last occurrence (highest density level) for each voxel + _, unique_indices = np.unique(voxel_keys[::-1], return_index=True) + unique_indices = len(voxel_keys) - 1 - unique_indices # Reverse back to original indices + unique_indices = np.sort(unique_indices) # Sort to maintain order + + # Keep only unique voxels (prioritizing higher density levels) + combined_cloud["grid_coord"] = combined_cloud["grid_coord"][unique_indices] + combined_cloud["coord"] = combined_cloud["coord"][unique_indices] + combined_cloud["color"] = combined_cloud["color"][unique_indices] + + print(f" Combined: {len(unique_indices):,} unique voxels after deduplication") + + return combined_cloud + + def preprocess_point_cloud(points, colors, grid_size, num_bins): transform = Compose( [ @@ -78,47 +367,70 @@ def generate_layout( prompt = f"<|point_start|><|point_pad|><|point_end|>{task_prompt} The reference code is as followed: {code_template}" # prepare the conversation data - if model.config.model_type == "spatiallm_qwen": - conversation = [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": prompt}, - ] - else: - conversation = [{"role": "user", "content": prompt}] + conversation = [{"role": "user", "content": prompt}] input_ids = tokenizer.apply_chat_template( conversation, add_generation_prompt=True, return_tensors="pt" ) input_ids = input_ids.to(model.device) - streamer = TextIteratorStreamer( - tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True - ) + # Choose between streaming and non-streaming based on num_beams + if num_beams == 1: + # Use streaming for single beam (default behavior) + print("Using streaming generation (num_beams=1)") + + streamer = TextIteratorStreamer( + tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True + ) - generate_kwargs = dict( - {"input_ids": input_ids, "point_clouds": point_cloud}, - streamer=streamer, - max_new_tokens=max_new_tokens, - do_sample=True, - use_cache=True, - temperature=temperature, - top_p=top_p, - top_k=top_k, - num_beams=num_beams, - ) - t = Thread(target=model.generate, kwargs=generate_kwargs) - t.start() + generate_kwargs = dict( + {"input_ids": input_ids, "point_clouds": point_cloud}, + streamer=streamer, + max_new_tokens=max_new_tokens, + do_sample=True, + use_cache=True, + temperature=temperature, + top_p=top_p, + top_k=top_k, + num_beams=num_beams, + ) + t = Thread(target=model.generate, kwargs=generate_kwargs) + t.start() - print("Generating layout...\n") - generate_texts = [] - for text in streamer: - generate_texts.append(text) - print(text, end="", flush=True) - print("\nDone!") + print("Generating layout...\n") + generate_texts = [] + for text in streamer: + generate_texts.append(text) + print(text, end="", flush=True) + print("\nDone!") + + layout_str = "".join(generate_texts) + + else: + # Use non-streaming for multiple beams (num_beams > 1) + print(f"Using non-streaming generation (num_beams={num_beams})") + + generate_kwargs = dict( + input_ids=input_ids, + point_clouds=point_cloud, + max_new_tokens=max_new_tokens, + do_sample=True, + use_cache=True, + temperature=temperature, + top_p=top_p, + top_k=top_k, + num_beams=num_beams, + ) + + print("Generating layout...\n") + outputs = model.generate(**generate_kwargs) + print("Done!") + + generated_ids = outputs[:, input_ids.shape[-1]:] + layout_str = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] - layout_str = "".join(generate_texts) layout = Layout(layout_str) - layout.undiscretize_and_unnormalize(num_bins=model.config.point_config["num_bins"]) + layout.undiscretize_and_unnormalize(num_bins=num_bins) return layout @@ -267,9 +579,47 @@ def generate_layout( parser.add_argument( "--seed", type=int, - default=-1, + default=1, help="The seed to use during inference, negative value means no seed", ) + parser.add_argument( + "--save_processed_pcd", + action="store_true", + help="Save the processed point cloud as PLY to see what the model sees", + ) + parser.add_argument( + "--save_grid_pcd", + action="store_true", + help="Save the grid coordinate point cloud as PLY to see the voxel structure", + ) + parser.add_argument( + "--voxel_density_multiplier", + type=float, + default=1.0, + help="Multiply voxel count by this factor (2.0 = double voxels, default: 2.0)", + ) + parser.add_argument( + "--adaptive_sampling", + action="store_true", + help="Use adaptive voxel density based on local point density", + ) + parser.add_argument( + "--unified_adaptive", + action="store_true", + help="Use unified adaptive sampling (single coordinate system, recommended)", + ) + parser.add_argument( + "--density_levels", + type=int, + default=1, + help="Number of density levels for adaptive sampling (default: 3)", + ) + parser.add_argument("--scale", action="store_true", help="Optimize wall scales to fit grid.ply perimeter") + parser.add_argument( + "--cleanup", + action="store_true", + help="Remove intermediate artifacts (processed/grid PLYs, scale PNG). Final TXT remains.", + ) args = parser.parse_args() # load the model @@ -283,6 +633,7 @@ def generate_layout( # number of bins used for discretization num_bins = model.config.point_config["num_bins"] + print(f"Number of bins: {num_bins}") # check if the input is a single point cloud file or a folder containing multiple point cloud files if os.path.isfile(args.point_cloud): @@ -291,18 +642,48 @@ def generate_layout( point_cloud_files = glob.glob(os.path.join(args.point_cloud, "*.ply")) for point_cloud_file in tqdm(point_cloud_files): + # Track intermediates for optional cleanup + intermediates = [] + processed_ply_path = None + grid_ply_path = None + diagram_path = None + base_filename = os.path.splitext(os.path.basename(point_cloud_file))[0] # load the point cloud point_cloud = load_o3d_pcd(point_cloud_file) + # Use the standard grid size for coordinate system consistency + # The model was trained with this grid size and expects this coordinate scaling grid_size = Layout.get_grid_size(num_bins) + + # For higher voxel density, we'll modify the cleanup_pcd voxel_size instead + # This maintains coordinate system consistency while increasing point density + cleanup_voxel_size = grid_size / args.voxel_density_multiplier + print(f"Grid size: {grid_size:.6f} (standard), Cleanup voxel size: {cleanup_voxel_size:.6f} (density multiplier: {args.voxel_density_multiplier})") if not args.no_cleanup: - point_cloud = cleanup_pcd(point_cloud, voxel_size=grid_size) + point_cloud = cleanup_pcd(point_cloud, voxel_size=cleanup_voxel_size) points, colors = get_points_and_colors(point_cloud) min_extent = np.min(points, axis=0) # preprocess the point cloud to tensor features - input_pcd = preprocess_point_cloud(points, colors, grid_size, num_bins) + if args.unified_adaptive: + input_pcd = preprocess_point_cloud_adaptive_unified(points, colors, grid_size, num_bins, args.density_levels) + elif args.adaptive_sampling: + input_pcd = preprocess_point_cloud_adaptive(points, colors, grid_size, num_bins, args.density_levels) + else: + input_pcd = preprocess_point_cloud(points, colors, grid_size, num_bins) + + # Save intermediate processed point cloud if requested + if args.save_processed_pcd or args.save_grid_pcd: + if args.save_processed_pcd: + processed_ply_path = f"{base_filename}_processed.ply" + save_processed_pointcloud(input_pcd, processed_ply_path, min_extent) + intermediates.append(processed_ply_path) + + if args.save_grid_pcd: + grid_ply_path = f"{base_filename}_grid.ply" + save_grid_coordinates_pointcloud(input_pcd, grid_ply_path, grid_size_m=grid_size, min_extent=min_extent) + intermediates.append(grid_ply_path) # generate the layout layout = generate_layout( @@ -321,6 +702,36 @@ def generate_layout( layout.translate(min_extent) pred_language_string = layout.to_language_string() + # Perform scale optimization if requested + if args.scale: + if not args.save_grid_pcd: + print("Warning: --scale flag requires --save_grid_pcd to be active. Forcing grid PCD generation.") + grid_ply_path = f"{base_filename}_grid.ply" + save_grid_coordinates_pointcloud(input_pcd, grid_ply_path, grid_size_m=grid_size, min_extent=min_extent) + intermediates.append(grid_ply_path) + + if os.path.exists(grid_ply_path): + from scale_optimizer import run_scale_optimization + output_dir = os.path.dirname(args.output) if os.path.splitext(args.output)[-1] else args.output + if output_dir: + os.makedirs(output_dir, exist_ok=True) + diagram_path = os.path.join(output_dir, f"{base_filename}_scale_optimization.png") + intermediates.append(diagram_path) + + print(f"Running scale optimization with grid file: {grid_ply_path}") + # Prefer using the original input PLY (meters) for perimeter extraction + optimized_layout = run_scale_optimization(layout, grid_ply_path, diagram_path, source_ply_path=point_cloud_file) + + if optimized_layout: + print("Scale optimization successful. Using optimized layout.") + layout = optimized_layout + pred_language_string = layout.to_language_string() + else: + print("Scale optimization failed. Using original layout.") + else: + print(f"Warning: Grid PLY file not found at {grid_ply_path}. Skipping scale optimization.") + + # check if the output path is a file or directory if os.path.splitext(args.output)[-1]: with open(args.output, "w") as f: @@ -330,3 +741,13 @@ def generate_layout( os.makedirs(args.output, exist_ok=True) with open(os.path.join(args.output, output_filename), "w") as f: f.write(pred_language_string) + + # Cleanup intermediates if requested + if args.cleanup and intermediates: + for p in set(intermediates): + try: + if p and os.path.exists(p): + os.remove(p) + print(f"Cleaned up: {p}") + except Exception as e: + print(f"Warning: Failed to remove {p}: {e}") diff --git a/inference_original.py b/inference_original.py new file mode 100644 index 0000000..bd1b661 --- /dev/null +++ b/inference_original.py @@ -0,0 +1,332 @@ +import os +import glob +import argparse + +import torch +import numpy as np +from tqdm import tqdm +from threading import Thread +from transformers import AutoTokenizer, AutoModelForCausalLM +from transformers import TextIteratorStreamer, set_seed + +from spatiallm import Layout +from spatiallm.pcd import load_o3d_pcd, get_points_and_colors, cleanup_pcd, Compose + +DETECT_TYPE_PROMPT = { + "all": "Detect walls, doors, windows, boxes.", + "arch": "Detect walls, doors, windows.", + "object": "Detect boxes.", +} + + +def preprocess_point_cloud(points, colors, grid_size, num_bins): + transform = Compose( + [ + dict(type="PositiveShift"), + dict(type="NormalizeColor"), + dict( + type="GridSample", + grid_size=grid_size, + hash_type="fnv", + mode="test", + keys=("coord", "color"), + return_grid_coord=True, + max_grid_coord=num_bins, + ), + ] + ) + point_cloud = transform( + { + "name": "pcd", + "coord": points.copy(), + "color": colors.copy(), + } + ) + coord = point_cloud["grid_coord"] + xyz = point_cloud["coord"] + rgb = point_cloud["color"] + point_cloud = np.concatenate([coord, xyz, rgb], axis=1) + return torch.as_tensor(np.stack([point_cloud], axis=0)) + + +def generate_layout( + model, + point_cloud, + tokenizer, + code_template_file, + top_k=10, + top_p=0.95, + temperature=0.6, + num_beams=1, + seed=-1, + max_new_tokens=4096, + detect_type="all", + categories=[], +): + if seed >= 0: + set_seed(seed) + + # load the code template + with open(code_template_file, "r") as f: + code_template = f.read() + + task_prompt = DETECT_TYPE_PROMPT[detect_type] + if detect_type != "arch" and categories: + task_prompt = task_prompt.replace("boxes", ", ".join(categories)) + print("Task prompt: ", task_prompt) + + prompt = f"<|point_start|><|point_pad|><|point_end|>{task_prompt} The reference code is as followed: {code_template}" + + # prepare the conversation data + if model.config.model_type == "spatiallm_qwen": + conversation = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": prompt}, + ] + else: + conversation = [{"role": "user", "content": prompt}] + + input_ids = tokenizer.apply_chat_template( + conversation, add_generation_prompt=True, return_tensors="pt" + ) + input_ids = input_ids.to(model.device) + + streamer = TextIteratorStreamer( + tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True + ) + + generate_kwargs = dict( + {"input_ids": input_ids, "point_clouds": point_cloud}, + streamer=streamer, + max_new_tokens=max_new_tokens, + do_sample=True, + use_cache=True, + temperature=temperature, + top_p=top_p, + top_k=top_k, + num_beams=num_beams, + ) + t = Thread(target=model.generate, kwargs=generate_kwargs) + t.start() + + print("Generating layout...\n") + generate_texts = [] + for text in streamer: + generate_texts.append(text) + print(text, end="", flush=True) + print("\nDone!") + + layout_str = "".join(generate_texts) + layout = Layout(layout_str) + layout.undiscretize_and_unnormalize(num_bins=model.config.point_config["num_bins"]) + return layout + + +if __name__ == "__main__": + parser = argparse.ArgumentParser("SpatialLM inference script") + parser.add_argument( + "-p", + "--point_cloud", + type=str, + required=True, + help="Path to the input point cloud file or a folder containing multiple point cloud files", + ) + parser.add_argument( + "-o", + "--output", + type=str, + required=True, + help="Path to the output layout txt file or a folder to save multiple layout txt files", + ) + parser.add_argument( + "-m", + "--model_path", + type=str, + default="manycore-research/SpatialLM-Llama-1B", + help="Path to the model checkpoint", + ) + parser.add_argument( + "-d", + "--detect_type", + type=str, + default="all", + choices=["all", "arch", "object"], + help="The type of indoor elements to detect. all: (wall, door, window, box), arch: (wall, door, window), object: (box)", + ) + parser.add_argument( + "-c", + "--category", + nargs="+", + default=[], + choices=[ + "sofa", + "chair", + "dining_chair", + "bar_chair", + "stool", + "bed", + "pillow", + "wardrobe", + "nightstand", + "tv_cabinet", + "wine_cabinet", + "bathroom_cabinet", + "shoe_cabinet", + "entrance_cabinet", + "decorative_cabinet", + "washing_cabinet", + "wall_cabinet", + "sideboard", + "cupboard", + "coffee_table", + "dining_table", + "side_table", + "dressing_table", + "desk", + "integrated_stove", + "gas_stove", + "range_hood", + "micro-wave_oven", + "sink", + "stove", + "refrigerator", + "hand_sink", + "shower", + "shower_room", + "toilet", + "tub", + "illumination", + "chandelier", + "floor-standing_lamp", + "wall_decoration", + "painting", + "curtain", + "carpet", + "plants", + "potted_bonsai", + "tv", + "computer", + "air_conditioner", + "washing_machine", + "clothes_rack", + "mirror", + "bookcase", + "cushion", + "bar", + "screen", + "combination_sofa", + "dining_table_combination", + "leisure_table_and_chair_combination", + "multifunctional_combination_bed", + ], + help="A list of categories of objects to detect. If not specified, all categories will be detected.", + ) + parser.add_argument( + "-t", + "--code_template_file", + type=str, + default="code_template.txt", + help="Path to the code template file", + ) + parser.add_argument( + "--top_k", + type=int, + default=10, + help="The number of highest probability vocabulary tokens to keep for top-k filtering", + ) + parser.add_argument( + "--top_p", + type=float, + default=0.95, + help="The smallest set of most probable tokens with probabilities that add up to top_p or higher are kept", + ) + parser.add_argument( + "--temperature", + type=float, + default=0.6, + help="The value used to module the next token probabilities", + ) + parser.add_argument( + "--num_beams", + type=int, + default=1, + help="The number of beams for beam search", + ) + parser.add_argument( + "--inference_dtype", + type=str, + default="bfloat16", + help="The torch dtype to use for inference, bfloat16 or float32", + ) + parser.add_argument( + "--no_cleanup", + default=False, + action="store_true", + help="Whether to not cleanup the point cloud", + ) + parser.add_argument( + "--seed", + type=int, + default=-1, + help="The seed to use during inference, negative value means no seed", + ) + args = parser.parse_args() + + # load the model + tokenizer = AutoTokenizer.from_pretrained(args.model_path) + model = AutoModelForCausalLM.from_pretrained( + args.model_path, torch_dtype=getattr(torch, args.inference_dtype) + ) + model.to("cuda") + model.set_point_backbone_dtype(torch.float32) + model.eval() + + # number of bins used for discretization + num_bins = model.config.point_config["num_bins"] + + # check if the input is a single point cloud file or a folder containing multiple point cloud files + if os.path.isfile(args.point_cloud): + point_cloud_files = [args.point_cloud] + else: + point_cloud_files = glob.glob(os.path.join(args.point_cloud, "*.ply")) + + for point_cloud_file in tqdm(point_cloud_files): + # load the point cloud + point_cloud = load_o3d_pcd(point_cloud_file) + grid_size = Layout.get_grid_size(num_bins) + + if not args.no_cleanup: + point_cloud = cleanup_pcd(point_cloud, voxel_size=grid_size) + + points, colors = get_points_and_colors(point_cloud) + min_extent = np.min(points, axis=0) + + # preprocess the point cloud to tensor features + input_pcd = preprocess_point_cloud(points, colors, grid_size, num_bins) + + # generate the layout + layout = generate_layout( + model, + input_pcd, + tokenizer, + args.code_template_file, + top_k=args.top_k, + top_p=args.top_p, + temperature=args.temperature, + num_beams=args.num_beams, + seed=args.seed, + detect_type=args.detect_type, + categories=args.category, + ) + layout.translate(min_extent) + pred_language_string = layout.to_language_string() + + # check if the output path is a file or directory + if os.path.splitext(args.output)[-1]: + with open(args.output, "w") as f: + f.write(pred_language_string) + else: + output_filename = os.path.basename(point_cloud_file).replace(".ply", ".txt") + os.makedirs(args.output, exist_ok=True) + with open(os.path.join(args.output, output_filename), "w") as f: + f.write(pred_language_string) diff --git a/scale_optimizer.py b/scale_optimizer.py new file mode 100644 index 0000000..c3b9941 --- /dev/null +++ b/scale_optimizer.py @@ -0,0 +1,780 @@ +import numpy as np +import open3d as o3d +from scipy.optimize import minimize, Bounds, LinearConstraint +from scipy.spatial import cKDTree + +# Make matplotlib optional - if it's not available, diagrams won't be generated +try: + import matplotlib.pyplot as plt + HAS_MATPLOTLIB = True +except ImportError: + HAS_MATPLOTLIB = False + print("Warning: matplotlib not found. Diagrams will not be generated.") + +from copy import deepcopy + +from spatiallm.layout.layout import Layout +from spatiallm.layout.entity import Wall + +def huber_loss(distance: float, delta: float) -> float: + """Huber loss for non-negative distance.""" + if distance <= delta: + return 0.5 * distance * distance + return delta * (distance - 0.5 * delta) + +def point_to_segment_distance(point: np.ndarray, seg_start: np.ndarray, seg_end: np.ndarray) -> float: + """Distance from a point to a 2D segment.""" + edge = seg_end - seg_start + denom = float(np.dot(edge, edge)) + if denom == 0.0: + return float(np.linalg.norm(point - seg_start)) + t = float(np.dot(point - seg_start, edge) / denom) + t = max(0.0, min(1.0, t)) + closest = seg_start + t * edge + return float(np.linalg.norm(point - closest)) + +def compute_principal_angle(points: np.ndarray) -> float: + centered = points - np.mean(points, axis=0) + cov = np.cov(centered.T) + vals, vecs = np.linalg.eigh(cov) + principal_vec = vecs[:, int(np.argmax(vals))] + return float(np.arctan2(principal_vec[1], principal_vec[0])) + +def rotate_vectors(vectors: np.ndarray, angle: float) -> np.ndarray: + c, s = np.cos(angle), np.sin(angle) + R = np.array([[c, -s], [s, c]]) + return (R @ vectors.T).T + +def reconstruct_vertices_from_scales(scales: np.ndarray, vectors: np.ndarray) -> np.ndarray: + vertices = [np.zeros(2, dtype=float)] + for i in range(len(scales)): + vertices.append(vertices[-1] + scales[i] * vectors[i]) + return np.array(vertices[:-1]) + +def build_closure_matrix(vectors: np.ndarray) -> np.ndarray: + if vectors.size == 0: + return np.zeros((2, 0)) + return np.vstack([vectors[:, 0], vectors[:, 1]]) + +def compute_hull_edges(hull_points: np.ndarray): + edges = [] + dirs = [] + angles = [] + H = len(hull_points) + for i in range(H): + p0 = hull_points[i] + p1 = hull_points[(i + 1) % H] + e = p1 - p0 + L = float(np.linalg.norm(e)) + if L == 0.0: + u = np.array([1.0, 0.0]) + else: + u = e / L + edges.append((p0, p1)) + dirs.append(u) + angles.append(float(np.arctan2(u[1], u[0]))) + return edges, np.array(dirs), np.array(angles) + +def select_candidate_edges(vectors: np.ndarray, hull_dirs: np.ndarray, angle_threshold_deg: float = 20.0): + candidates = [] + cos_thresh = float(np.cos(np.deg2rad(angle_threshold_deg))) + for v in vectors: + Lv = float(np.linalg.norm(v)) + if Lv == 0.0: + uv = np.array([1.0, 0.0]) + else: + uv = v / Lv + dots = hull_dirs @ uv + idx = np.where(np.abs(dots) >= cos_thresh)[0] + if idx.size == 0: + # fallback to best 3 by angle similarity + idx = np.argsort(-np.abs(dots))[:3] + candidates.append(idx.astype(int)) + return candidates + +def make_objective( + vectors: np.ndarray, + hull_points: np.ndarray, + num_samples_per_edge: int = 15, + huber_delta: float = 0.04, + reg_lambda: float = 3e-4, + smooth_lambda: float = 1e-3, + hull_edges: list | None = None, + candidate_edges: list | None = None, + use_kdtree: bool = True, + k_neighbors: int = 8, +): + # Precompute hull edges and candidates + if hull_edges is None: + hull_edges, hull_dirs, _ = compute_hull_edges(hull_points) + else: + # Recompute dirs for safety + hull_dirs = [] + for (p0, p1) in hull_edges: + e = p1 - p0 + L = float(np.linalg.norm(e)) + hull_dirs.append(e / L if L != 0.0 else np.array([1.0, 0.0])) + hull_dirs = np.array(hull_dirs) + if candidate_edges is None: + candidate_edges = select_candidate_edges(vectors, hull_dirs) + + hull_center = np.mean(hull_points, axis=0) + sample_ts = np.linspace(0.0, 1.0, num_samples_per_edge, dtype=float) + + # KD-tree over hull edge midpoints for spatial filtering + if use_kdtree: + mids = np.array([(e[0] + e[1]) * 0.5 for e in hull_edges]) + tree = cKDTree(mids) + + def objective(scales: np.ndarray) -> float: + vertices = reconstruct_vertices_from_scales(scales, vectors) + vertices_center = np.mean(vertices, axis=0) + translation = hull_center - vertices_center + vertices_aligned = vertices + translation + + total = 0.0 + # data term + for i in range(len(scales)): + start = vertices_aligned[i] + end = start + scales[i] * vectors[i] + cands = candidate_edges[i] if candidate_edges is not None else range(len(hull_edges)) + if use_kdtree: + # spatial filter: nearest k edge midpoints to the segment midpoint + seg_mid = (start + end) * 0.5 + _, idxs = tree.query(seg_mid, k=min(k_neighbors, len(hull_edges))) + if np.isscalar(idxs): + idxs = np.array([int(idxs)]) + # intersect orientation and spatial candidates + cands = np.intersect1d(np.array(cands, dtype=int), idxs.astype(int), assume_unique=False) + if cands.size == 0: + cands = idxs.astype(int) + for t in sample_ts: + pt = (1.0 - t) * start + t * end + min_d = float('inf') + for j in cands: + h0, h1 = hull_edges[j] + d = point_to_segment_distance(pt, h0, h1) + if d < min_d: + min_d = d + total += huber_loss(min_d, huber_delta) + # L2 on deviation from 1 + diff = scales - 1.0 + total += reg_lambda * float(np.dot(diff, diff)) + # Smoothness across adjacent scales (circular) + if smooth_lambda > 0.0 and len(scales) > 2: + smooth = 0.0 + for i in range(len(scales)): + prev_i = (i - 1) % len(scales) + d = scales[i] - scales[prev_i] + smooth += d * d + total += smooth_lambda * float(smooth) + return float(total) + + return objective + +def transform_points_similarity(points: np.ndarray, theta: float, scale: float, translation: np.ndarray) -> np.ndarray: + """Apply 2D similarity transform x' = s R x + t to a set of points. + + points: (N,2), translation: (2,) + """ + c, s = np.cos(theta), np.sin(theta) + R = np.array([[c, -s], [s, c]]) + return (scale * (R @ points.T)).T + translation + +def transform_points_anisotropic(points: np.ndarray, theta: float, scale_x: float, scale_y: float, translation: np.ndarray) -> np.ndarray: + """Apply 2D anisotropic similarity transform x' = R diag(sx, sy) x + t.""" + c, s = np.cos(theta), np.sin(theta) + R = np.array([[c, -s], [s, c]]) + D = np.array([[scale_x, 0.0], [0.0, scale_y]]) + return (R @ (D @ points.T)).T + translation + +def build_segments_from_vertices(vertices: np.ndarray) -> np.ndarray: + """Return segments as (M,2,2) array from polygon vertices.""" + if vertices.shape[0] < 2: + return np.zeros((0, 2, 2)) + M = vertices.shape[0] + segs = [] + for i in range(M): + p0 = vertices[i] + p1 = vertices[(i + 1) % M] + segs.append([p0, p1]) + return np.array(segs) + +def robust_align_hull_to_walls(hull_points: np.ndarray, orig_vertices: np.ndarray, huber_delta: float = 0.05): + """Estimate similarity transform (theta, scale, tx, ty) aligning noisy hull to straight-line walls. + + Minimizes Huber-robust sum of distances from transformed hull points to the nearest original wall segment. + """ + # Downsample hull for speed + if len(hull_points) > 600: + step = max(1, len(hull_points) // 600) + hull_sample = hull_points[::step] + else: + hull_sample = hull_points + + segments = build_segments_from_vertices(orig_vertices) + + # Initial guess from PCA-based angle and RMS radius ratio + hull_center = np.mean(hull_sample, axis=0) + orig_center = np.mean(orig_vertices, axis=0) + try: + angle_h = compute_principal_angle(hull_sample) + angle_o = compute_principal_angle(orig_vertices) + theta0 = angle_o - angle_h + except Exception: + theta0 = 0.0 + r_o = float(np.sqrt(np.mean(np.sum((orig_vertices - orig_center) ** 2, axis=1)))) + r_h = float(np.sqrt(np.mean(np.sum((hull_sample - hull_center) ** 2, axis=1)))) + s0 = (r_o / r_h) if r_h > 1e-9 else 1.0 + # Translation to align centroids under initial rot-scale + c0, s_ = np.cos(theta0), np.sin(theta0) + R0 = np.array([[c0, -s_], [s_, c0]]) + t0 = orig_center - s0 * (R0 @ hull_center) + + def objective(p: np.ndarray) -> float: + theta, scale, tx, ty = p + transformed = transform_points_similarity(hull_sample, theta, max(scale, 1e-6), np.array([tx, ty])) + total = 0.0 + for pt in transformed: + # distance to nearest wall segment + dmin = float('inf') + for seg in segments: + d = point_to_segment_distance(pt, seg[0], seg[1]) + if d < dmin: + dmin = d + total += huber_loss(dmin, huber_delta) + # small regularizer to keep scale near 1 + total += 1e-3 * (scale - 1.0) * (scale - 1.0) + return float(total) + + # Bounds: theta free in [-pi, pi], scale in [0.5, 2.0], tx,ty wide bounds + bounds = [(-np.pi, np.pi), (0.5, 2.0), (-1e6, 1e6), (-1e6, 1e6)] + x0 = np.array([theta0, s0, t0[0], t0[1]], dtype=float) + res = minimize(objective, x0, method='L-BFGS-B', bounds=bounds, options={'maxiter': 300, 'ftol': 1e-12}) + theta_opt, scale_opt, tx_opt, ty_opt = res.x + return float(theta_opt), float(scale_opt), np.array([tx_opt, ty_opt]), res + +def robust_align_hull_to_walls_anisotropic(hull_points: np.ndarray, orig_vertices: np.ndarray, huber_delta: float = 0.05): + """Estimate anisotropic similarity (theta, sx, sy, tx, ty) aligning hull to straight walls. + + Minimizes Huber-robust sum of distances from transformed hull points to nearest wall segments. + """ + # Downsample hull for speed + if len(hull_points) > 800: + step = max(1, len(hull_points) // 800) + hull_sample = hull_points[::step] + else: + hull_sample = hull_points + + segments = build_segments_from_vertices(orig_vertices) + + hull_center = np.mean(hull_sample, axis=0) + orig_center = np.mean(orig_vertices, axis=0) + try: + angle_h = compute_principal_angle(hull_sample) + angle_o = compute_principal_angle(orig_vertices) + theta0 = angle_o - angle_h + except Exception: + theta0 = 0.0 + + # Start from isotropic guess + r_o = float(np.sqrt(np.mean(np.sum((orig_vertices - orig_center) ** 2, axis=1)))) + r_h = float(np.sqrt(np.mean(np.sum((hull_sample - hull_center) ** 2, axis=1)))) + s0 = (r_o / r_h) if r_h > 1e-9 else 1.0 + sx0, sy0 = s0, s0 + c0, s_ = np.cos(theta0), np.sin(theta0) + R0 = np.array([[c0, -s_], [s_, c0]]) + t0 = orig_center - (R0 @ (np.array([[sx0, 0.0],[0.0, sy0]]) @ hull_center)) + + def objective(p: np.ndarray) -> float: + theta, log_sx, log_sy, tx, ty = p + sx = float(np.exp(log_sx)) + sy = float(np.exp(log_sy)) + pts = transform_points_anisotropic(hull_sample, theta, sx, sy, np.array([tx, ty])) + total = 0.0 + for pt in pts: + # nearest segment distance + dmin = float('inf') + for seg in segments: + d = point_to_segment_distance(pt, seg[0], seg[1]) + if d < dmin: + dmin = d + total += huber_loss(dmin, huber_delta) + # mild regularization on anisotropy to avoid degenerate scaling + total += 1e-3 * ((np.log(sx) - np.log(sy)) ** 2) + return float(total) + + # Bounds and init + bounds = [(-np.pi, np.pi), (np.log(0.5), np.log(2.0)), (np.log(0.5), np.log(2.0)), (-1e6, 1e6), (-1e6, 1e6)] + x0 = np.array([theta0, np.log(sx0), np.log(sy0), t0[0], t0[1]], dtype=float) + res = minimize(objective, x0, method='L-BFGS-B', bounds=bounds, options={'maxiter': 400, 'ftol': 1e-12}) + theta_opt, log_sx_opt, log_sy_opt, tx_opt, ty_opt = res.x + return float(theta_opt), float(np.exp(log_sx_opt)), float(np.exp(log_sy_opt)), np.array([tx_opt, ty_opt]), res + +def order_walls_connected(walls, tol_primary: float = 0.05, tol_secondary: float = 0.15, allow_reverse: bool = True, inplace: bool = False): + """ + Order walls so they form a connected chain with tolerant matching. + + - Prefer direct start match within tol_primary + - Else allow reversing a wall and match within tol_primary + - Else snap nearest candidate within tol_secondary by translating endpoints equally + + Returns (ordered_walls, is_closed). + """ + if not walls: + return [], False + walls_in = list(walls) if inplace else [deepcopy(w) for w in walls] + n = len(walls_in) + if n == 1: + return [walls_in[0]], True + + def d2(p1, p2): + v = np.array(p1) - np.array(p2) + return float(np.dot(v, v)) + + ordered = [walls_in[0]] + used = {0} + + for step in range(n - 1): + last_wall = ordered[-1] + last_end = np.array([last_wall.bx, last_wall.by]) + + # 1) direct start match + next_idx = -1 + min_dist2 = float('inf') + for i, w in enumerate(walls_in): + if i in used: + continue + dist2 = d2([w.ax, w.ay], last_end) + if dist2 < min_dist2: + min_dist2 = dist2 + next_idx = i + if next_idx != -1 and np.sqrt(min_dist2) <= tol_primary: + ordered.append(walls_in[next_idx]) + used.add(next_idx) + continue + + # 2) reverse match + if allow_reverse: + next_idx_r = -1 + min_dist2_r = float('inf') + for i, w in enumerate(walls_in): + if i in used: + continue + dist2 = d2([w.bx, w.by], last_end) + if dist2 < min_dist2_r: + min_dist2_r = dist2 + next_idx_r = i + if next_idx_r != -1 and np.sqrt(min_dist2_r) <= tol_primary: + wcopy = walls_in[next_idx_r] + wcopy.ax, wcopy.bx = wcopy.bx, wcopy.ax + wcopy.ay, wcopy.by = wcopy.by, wcopy.ay + ordered.append(wcopy) + used.add(next_idx_r) + print(f"Connected by reversing wall at index {next_idx_r} (step {step}).") + continue + + # 3) snap nearest within tol_secondary + candidate_idx = -1 + best_is_start = True + best_dist2 = float('inf') + for i, w in enumerate(walls_in): + if i in used: + continue + d_start = d2([w.ax, w.ay], last_end) + d_end = d2([w.bx, w.by], last_end) + if d_start < best_dist2: + best_dist2 = d_start + candidate_idx = i + best_is_start = True + if allow_reverse and d_end < best_dist2: + best_dist2 = d_end + candidate_idx = i + best_is_start = False + if candidate_idx != -1 and np.sqrt(best_dist2) <= tol_secondary: + wcopy = walls_in[candidate_idx] + if best_is_start: + delta = last_end - np.array([wcopy.ax, wcopy.ay]) + wcopy.ax += delta[0]; wcopy.ay += delta[1] + wcopy.bx += delta[0]; wcopy.by += delta[1] + ordered.append(wcopy) + used.add(candidate_idx) + print(f"Snapped wall at index {candidate_idx} by {np.linalg.norm(delta):.3f} m to connect (step {step}).") + else: + wcopy.ax, wcopy.bx = wcopy.bx, wcopy.ax + wcopy.ay, wcopy.by = wcopy.by, wcopy.ay + delta = last_end - np.array([wcopy.ax, wcopy.ay]) + wcopy.ax += delta[0]; wcopy.ay += delta[1] + wcopy.bx += delta[0]; wcopy.by += delta[1] + ordered.append(wcopy) + used.add(candidate_idx) + print(f"Reversed+snapped wall at index {candidate_idx} by {np.linalg.norm(delta):.3f} m to connect (step {step}).") + continue + + print(f"Warning: Walls are not connected! Missing connection after step {step} (wall id={getattr(last_wall, 'id', step)}).") + return list(walls_in), False + + # Closure check and snap if close + first_start = np.array([ordered[0].ax, ordered[0].ay]) + last_end = np.array([ordered[-1].bx, ordered[-1].by]) + gap = float(np.linalg.norm(first_start - last_end)) + if gap <= tol_secondary: + delta = first_start - last_end + ordered[-1].bx += delta[0]; ordered[-1].by += delta[1] + if gap > tol_primary: + print(f"Closed loop by snapping final gap of {gap:.3f} m.") + return ordered, True + + print(f"Warning: Walls don't form closed loop! Gap: {gap:.6f}") + return ordered, False + +def extract_vectors_from_ordered_walls(ordered_walls): + """ + Extract direction vectors from ordered walls. + Ensures vectors follow the connectivity chain. + """ + vectors = [] + vertices = [] + + if not ordered_walls: + return np.array([]), np.array([]) + + # Start from first wall's start point + start = np.array([ordered_walls[0].ax, ordered_walls[0].ay]) + vertices.append(start) + + for wall in ordered_walls: + # Vector is always from start to end of this wall + vec = np.array([wall.bx - wall.ax, wall.by - wall.ay]) + vectors.append(vec) + # Next vertex is this wall's endpoint + vertices.append(np.array([wall.bx, wall.by])) + + # Remove last vertex (it should equal the first for closed loop) + vertices = vertices[:-1] + + return np.array(vertices), np.array(vectors) + +def solve_for_closure(partial_scales, vectors, opt_indices, solve_indices): + """ + Given N-2 scales, solve for the 2 remaining scales to ensure closed loop. + """ + n = len(vectors) + full_scales = np.ones(n) + full_scales[opt_indices] = partial_scales + + # The constraint is that sum of all scaled vectors = 0 + # We have: sum(s_i * v_i for i in opt_indices) + s_a * v_a + s_b * v_b = 0 + # So: s_a * v_a + s_b * v_b = -sum(s_i * v_i for i in opt_indices) + + sum_opt = np.zeros(2) + for i, idx in enumerate(opt_indices): + sum_opt += partial_scales[i] * vectors[idx] + + # Build 2x2 system + A = np.column_stack([vectors[solve_indices[0]], vectors[solve_indices[1]]]) + b = -sum_opt + + try: + scales_solve = np.linalg.solve(A, b) + full_scales[solve_indices[0]] = scales_solve[0] + full_scales[solve_indices[1]] = scales_solve[1] + except np.linalg.LinAlgError: + print("Warning: Cannot solve for closure (walls may be parallel)") + # Keep default scales of 1 + + return full_scales + +def objective_function(partial_scales, vectors, hull_points, opt_indices, solve_indices): + """ + Minimize distance from wall vertices to hull perimeter. + """ + # Get full scales with closure constraint + scales = solve_for_closure(partial_scales, vectors, opt_indices, solve_indices) + + # Check if solved scales are valid (positive) + if np.any(scales <= 0): + return 1e10 # Large penalty for invalid scales + + # Build vertices from scaled vectors + vertices = [np.zeros(2)] # Start at origin (we'll translate later) + for i, vec in enumerate(vectors): + next_vertex = vertices[-1] + scales[i] * vec + vertices.append(next_vertex) + vertices = np.array(vertices[:-1]) # Remove duplicate last vertex + + # Center vertices to match hull + vertices_center = np.mean(vertices, axis=0) + hull_center = np.mean(hull_points, axis=0) + vertices = vertices - vertices_center + hull_center + + # Calculate distance from vertices to hull + total_dist = 0 + for v in vertices: + min_dist = float('inf') + for i in range(len(hull_points)): + p1 = hull_points[i] + p2 = hull_points[(i + 1) % len(hull_points)] + # Distance from point to line segment + edge = p2 - p1 + t = max(0, min(1, np.dot(v - p1, edge) / (np.dot(edge, edge) + 1e-10))) + closest = p1 + t * edge + dist = np.linalg.norm(v - closest) + min_dist = min(min_dist, dist) + total_dist += min_dist ** 2 + + return total_dist + +def generate_diagram(original_layout, optimized_layout, hull_points, output_path, meta_info: dict = None, hull_points_unscaled: np.ndarray | None = None): + """Generate comparison diagram with overlays and vertex markers. + + meta_info (optional) may contain: 'scale_min', 'scale_mean', 'scale_max', + 'closure_residual', 'objective'. + """ + if not HAS_MATPLOTLIB: + print(f"Skipping diagram generation (matplotlib not available). Would have saved to: {output_path}") + return + + plt.figure(figsize=(12, 10)) + + # Plot hull + hull_plot = np.vstack([hull_points, hull_points[0]]) + plt.plot(hull_plot[:, 0], hull_plot[:, 1], 'g.-', linewidth=2, markersize=3, label='Target Perimeter') + + # Plot unscaled/original hull in orange if provided + if hull_points_unscaled is not None: + hull_unscaled_plot = np.vstack([hull_points_unscaled, hull_points_unscaled[0]]) + plt.plot(hull_unscaled_plot[:, 0], hull_unscaled_plot[:, 1], color='orange', linestyle='-', linewidth=1.8, alpha=0.9, label='Target Perimeter (unscaled)') + + # Plot original walls + for wall in original_layout.walls: + plt.plot([wall.ax, wall.bx], [wall.ay, wall.by], 'b-', linewidth=2, alpha=0.7) + plt.plot([], [], 'b-', linewidth=2, label='Original Walls') + + # Plot optimized/scale-applied walls + if optimized_layout: + for wall in optimized_layout.walls: + plt.plot([wall.ax, wall.bx], [wall.ay, wall.by], 'r-', linewidth=2, alpha=0.85) + plt.plot([], [], 'r-', linewidth=2, label='Scale-applied Walls') + + # Vertex markers for visual diff + try: + ow_ordered, _ = order_walls_connected(original_layout.walls) + ov = [] + if ow_ordered: + ov.append([ow_ordered[0].ax, ow_ordered[0].ay]) + for w in ow_ordered: + ov.append([w.bx, w.by]) + ov = np.array(ov[:-1]) if len(ov) > 1 else np.array(ov) + if ov.size: + plt.scatter(ov[:, 0], ov[:, 1], c='b', s=10, alpha=0.6, marker='o', label='Original Vertices') + except Exception: + pass + try: + if optimized_layout: + nw_ordered, _ = order_walls_connected(optimized_layout.walls) + nv = [] + if nw_ordered: + nv.append([nw_ordered[0].ax, nw_ordered[0].ay]) + for w in nw_ordered: + nv.append([w.bx, w.by]) + nv = np.array(nv[:-1]) if len(nv) > 1 else np.array(nv) + if nv.size: + plt.scatter(nv[:, 0], nv[:, 1], c='r', s=12, alpha=0.9, marker='x', label='Scaled Vertices') + except Exception: + pass + + plt.xlabel('X (meters)') + plt.ylabel('Y (meters)') + title = 'Wall Scale Optimization' + if meta_info: + parts = [] + if meta_info.get('scale_min') is not None and meta_info.get('scale_mean') is not None and meta_info.get('scale_max') is not None: + parts.append(f"scales[min/mean/max]={meta_info['scale_min']:.3f}/{meta_info['scale_mean']:.3f}/{meta_info['scale_max']:.3f}") + if meta_info.get('closure_residual') is not None: + parts.append(f"||A s||={meta_info['closure_residual']:.2e}") + if meta_info.get('objective') is not None: + parts.append(f"obj={meta_info['objective']:.3f}") + if parts: + title += ' (' + ', '.join(parts) + ')' + plt.title(title) + plt.legend() + plt.grid(True, alpha=0.3) + plt.axis('equal') + plt.tight_layout() + plt.savefig(output_path) + plt.close() + print(f"Saved diagram to: {output_path}") + +def run_scale_optimization(layout: Layout, grid_ply_path: str, diagram_path: str, source_ply_path: str | None = None): + """Main optimization function. + + If source_ply_path is provided, the perimeter/hull will be extracted from the original input PLY (meters). + Otherwise, it will fall back to the grid PLY path. + """ + # 1. Load and process grid PLY + # Prefer original PLY (meters) if available + ply_to_read = source_ply_path if source_ply_path else grid_ply_path + try: + pcd = o3d.io.read_point_cloud(ply_to_read) + points = np.asarray(pcd.points) + print(f"Loaded PLY for perimeter extraction: {ply_to_read} (points={points.shape[0]})") + except Exception as e: + print(f"Error loading PLY '{ply_to_read}': {e}") + return None + + if points.shape[0] < 3: + print("Not enough points in grid PLY") + return None + + # Filter by height to get wall slice + z_coords = points[:, 2] + z_min = np.min(z_coords) + mask = (z_coords > z_min + 0.1) & (z_coords < z_min + 2.5) + points_2d = points[mask, :2] + + if points_2d.shape[0] < 3: + points_2d = points[:, :2] + + try: + # Efficient perimeter sampling using fixed bins across bounding box + n_bins = 100 + x = points_2d[:, 0] + y = points_2d[:, 1] + x_min, x_max = float(np.min(x)), float(np.max(x)) + y_min, y_max = float(np.min(y)), float(np.max(y)) + + # Y-sliced extremes (min/max X per Y bin) + bins_y = np.linspace(y_min, y_max, n_bins + 1) + idx_y = np.digitize(y, bins_y) - 1 + idx_y = np.clip(idx_y, 0, n_bins - 1) + min_x_by_bin = np.full(n_bins, np.inf) + max_x_by_bin = np.full(n_bins, -np.inf) + for b in range(n_bins): + maskb = (idx_y == b) + if not np.any(maskb): + continue + xb = x[maskb] + min_x_by_bin[b] = float(np.min(xb)) + max_x_by_bin[b] = float(np.max(xb)) + y_centers = 0.5 * (bins_y[:-1] + bins_y[1:]) + + perimeter_pts = [] + for b in range(n_bins): + if np.isfinite(min_x_by_bin[b]): + perimeter_pts.append([min_x_by_bin[b], y_centers[b]]) + if np.isfinite(max_x_by_bin[b]): + perimeter_pts.append([max_x_by_bin[b], y_centers[b]]) + + # X-sliced extremes (min/max Y per X bin) + bins_x = np.linspace(x_min, x_max, n_bins + 1) + idx_x = np.digitize(x, bins_x) - 1 + idx_x = np.clip(idx_x, 0, n_bins - 1) + min_y_by_bin = np.full(n_bins, np.inf) + max_y_by_bin = np.full(n_bins, -np.inf) + for b in range(n_bins): + maskb = (idx_x == b) + if not np.any(maskb): + continue + yb = y[maskb] + min_y_by_bin[b] = float(np.min(yb)) + max_y_by_bin[b] = float(np.max(yb)) + x_centers = 0.5 * (bins_x[:-1] + bins_x[1:]) + for b in range(n_bins): + if np.isfinite(min_y_by_bin[b]): + perimeter_pts.append([x_centers[b], min_y_by_bin[b]]) + if np.isfinite(max_y_by_bin[b]): + perimeter_pts.append([x_centers[b], max_y_by_bin[b]]) + + perimeter_pts = np.unique(np.array(perimeter_pts), axis=0) + centroid = np.mean(perimeter_pts, axis=0) + angles = np.arctan2(perimeter_pts[:, 1] - centroid[1], perimeter_pts[:, 0] - centroid[0]) + hull_points = perimeter_pts[np.argsort(angles)] + except Exception as e: + print(f"Error computing perimeter: {e}") + return None + + # 2. Order walls by connectivity + ordered_walls, connected = order_walls_connected(layout.walls) + if not connected: + print("ERROR: Input walls are not properly connected. Cannot optimize.") + generate_diagram(layout, None, hull_points, diagram_path) + return None + + # 3. Extract vertices and vectors from ordered walls + orig_vertices, vectors = extract_vectors_from_ordered_walls(ordered_walls) + if len(vectors) < 3: + print("Need at least 3 walls to optimize") + return None + + n = len(vectors) + base_lengths = np.linalg.norm(vectors, axis=1) + print(f"Connected loop with {n} walls. Base length stats (m): min={base_lengths.min():.3f} mean={base_lengths.mean():.3f} max={base_lengths.max():.3f}") + + # 4. Robust anisotropic pre-alignment: rotate + (sx, sy) scale + translate + angle_align, sx_align, sy_align, t_align, res_align = robust_align_hull_to_walls_anisotropic(hull_points, orig_vertices, huber_delta=0.05) + # Preserve unscaled (no scale) but rotated+translated hull for visualization + hull_points_unscaled = transform_points_anisotropic(hull_points, angle_align, 1.0, 1.0, t_align) + hull_points = transform_points_anisotropic(hull_points, angle_align, sx_align, sy_align, t_align) + print(f"Robust anisotropic pre-align hull: rot={np.degrees(angle_align):.2f} deg, sx={sx_align:.4f}, sy={sy_align:.4f}, t=({t_align[0]:.3f},{t_align[1]:.3f}).") + + # 5. Keep wall directions in the original frame for optimization + rotated_vectors = vectors + print("Using original wall directions for optimization (no rotation/scale on vectors).") + + # 6. Build linear closure constraints + A = build_closure_matrix(rotated_vectors) + print(f"Using trivial per-axis scale from alignment; skipping nonlinear optimization.") + + # 7. Trivial per-axis: derive per-wall inverse scale to undo anisotropic factors + # For wall i with unit direction u, inverse scale = sqrt((u_x/sx)^2 + (u_y/sy)^2) + final_scales = np.zeros(n, dtype=float) + for i in range(n): + v = rotated_vectors[i] + L = float(np.linalg.norm(v)) + u = (v / L) if L != 0.0 else np.array([1.0, 0.0]) + final_scales[i] = float(np.sqrt((u[0] / (sx_align if sx_align != 0.0 else 1.0))**2 + (u[1] / (sy_align if sy_align != 0.0 else 1.0))**2)) + eq_residual = A @ final_scales + print(f"Closure residual (A @ s): ||A s||={np.linalg.norm(eq_residual):.3e}, components=({eq_residual[0]:.3e}, {eq_residual[1]:.3e})") + + # 8. Apply scales to layout (preserve original orientation; anchor at original first vertex) + optimized_layout = deepcopy(layout) + optimized_walls, _ = order_walls_connected(optimized_layout.walls, inplace=True) + # Re-translate start to best match the aligned hull centroid to reduce drift + orig_start = np.array([optimized_walls[0].ax, optimized_walls[0].ay, optimized_walls[0].az]) + current_pos = orig_start + print("Applying scales to walls without rotation (preserving original orientation):") + for i, wall in enumerate(optimized_walls): + base_vec = vectors[i] + length = float(np.linalg.norm(base_vec)) + if length == 0.0: + direction = np.array([1.0, 0.0]) + new_length = 0.0 + else: + direction = base_vec / length + new_length = length * final_scales[i] + wall.ax, wall.ay, wall.az = current_pos + next_pos_2d = current_pos[:2] + direction * new_length + wall.bx, wall.by = next_pos_2d + wall.bz = wall.az + current_pos = np.array([wall.bx, wall.by, wall.bz]) + print(f" Wall {i} (id={getattr(wall, 'id', i)}): base_len={length:.3f} m, scale_inv={final_scales[i]:.4f} (undo sx={sx_align:.4f}, sy={sy_align:.4f}), new_len={new_length:.3f} m, start=({wall.ax:.3f},{wall.ay:.3f}), end=({wall.bx:.3f},{wall.by:.3f})") + + first_start = np.array([optimized_walls[0].ax, optimized_walls[0].ay]) + last_end = np.array([optimized_walls[-1].bx, optimized_walls[-1].by]) + closure_error = np.linalg.norm(first_start - last_end) + if closure_error > 1e-3: + print(f"WARNING: Closure error = {closure_error:.6f} meters") + else: + print(f"Closure check passed: residual distance between first/last = {closure_error:.6e} m") + + # 9. Diagram with meta + meta = { + 'scale_min': float(final_scales.min()), + 'scale_mean': float(final_scales.mean()), + 'scale_max': float(final_scales.max()), + 'closure_residual': float(np.linalg.norm(eq_residual)), + 'objective': None, + } + generate_diagram(layout, optimized_layout, hull_points, diagram_path, meta_info=meta, hull_points_unscaled=hull_points_unscaled) + return optimized_layout \ No newline at end of file diff --git a/spatiallm/model/sonata_encoder.py b/spatiallm/model/sonata_encoder.py index d4c396e..5077857 100644 --- a/spatiallm/model/sonata_encoder.py +++ b/spatiallm/model/sonata_encoder.py @@ -316,7 +316,7 @@ def __init__( proj_drop=0.0, order_index=0, enable_rpe=False, - enable_flash=True, + enable_flash=False, upcast_attention=True, upcast_softmax=True, ): @@ -523,7 +523,7 @@ def __init__( order_index=0, cpe_indice_key=None, enable_rpe=False, - enable_flash=True, + enable_flash=False, upcast_attention=True, upcast_softmax=True, ): @@ -823,7 +823,7 @@ def __init__( pre_norm=True, shuffle_orders=True, enable_rpe=False, - enable_flash=True, + enable_flash=False, upcast_attention=False, upcast_softmax=False, mask_token=False,