@@ -450,7 +450,7 @@ def _compress_href(self, href: str | None, page_url: str | None = None) -> str:
450450 page_url: Current page URL (to detect same-domain links)
451451
452452 Returns:
453- Compressed href with meaningful path info (e.g., "/dp/ B0FC5SJNQX" or "/s?k= mouse")
453+ Compressed href with meaningful path info (e.g., "B0FC5SJNQX" or "mouse")
454454 """
455455 if not href:
456456 return ""
@@ -469,25 +469,25 @@ def _compress_href(self, href: str | None, page_url: str | None = None) -> str:
469469 )
470470
471471 if parsed.netloc and not is_same_domain:
472- # External link - show domain
472+ # External link - show domain (truncate to 10 chars)
473473 parts = parsed.netloc.split(".")
474474 if len(parts) >= 2:
475- return parts[-2][:15 ]
476- return parsed.netloc[:15 ]
475+ return parts[-2][:10 ]
476+ return parsed.netloc[:10 ]
477477
478478 # Same domain or relative link - extract meaningful path
479479 path = parsed.path or ""
480480
481- # For product pages, extract key identifiers
481+ # For product pages, extract key identifiers (just the ID, not the path prefix)
482482 # Amazon: /dp/XXXXX, /gp/product/XXXXX
483483 # Generic: /product/XXX, /item/XXX, /p/XXX
484484 import re
485485 product_patterns = [
486- r"( /dp/[A-Z0-9]+)", # Amazon product
487- r"( /gp/product/[A-Z0-9]+)", # Amazon alt
488- r"( /product/[^/]+)", # Generic product
489- r"( /item/[^/]+)", # Generic item
490- r"( /p/[^/]+)", # Short product
486+ r"/dp/( [A-Z0-9]+)", # Amazon product
487+ r"/gp/product/( [A-Z0-9]+)", # Amazon alt
488+ r"/product/( [^/]+)", # Generic product
489+ r"/item/( [^/]+)", # Generic item
490+ r"/p/( [^/]+)", # Short product
491491 ]
492492 for pattern in product_patterns:
493493 match = re.search(pattern, path, re.IGNORECASE)
@@ -511,12 +511,11 @@ def _compress_href(self, href: str | None, page_url: str | None = None) -> str:
511511 if "/checkout" in path.lower():
512512 return "/checkout"
513513
514- # Fallback: use last meaningful path segment
514+ # Fallback: use last meaningful path segment only (no leading slash)
515515 segments = [s for s in path.split("/") if s and len(s) > 1]
516516 if segments:
517- # Return last 2 segments for context (max 30 chars)
518- result = "/" + "/".join(segments[-2:])
519- return result[:30]
517+ # Return only the last segment (max 30 chars)
518+ return segments[-1][:30]
520519
521520 return path[:30] if path else ""
522521
0 commit comments