Skip to content

Commit 06b6861

Browse files
author
SentienceDEV
committed
fix tests
1 parent ebc67ef commit 06b6861

File tree

1 file changed

+13
-14
lines changed

1 file changed

+13
-14
lines changed

predicate/backends/sentience_context.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,7 @@ def _compress_href(self, href: str | None, page_url: str | None = None) -> str:
450450
page_url: Current page URL (to detect same-domain links)
451451

452452
Returns:
453-
Compressed href with meaningful path info (e.g., "/dp/B0FC5SJNQX" or "/s?k=mouse")
453+
Compressed href with meaningful path info (e.g., "B0FC5SJNQX" or "mouse")
454454
"""
455455
if not href:
456456
return ""
@@ -469,25 +469,25 @@ def _compress_href(self, href: str | None, page_url: str | None = None) -> str:
469469
)
470470

471471
if parsed.netloc and not is_same_domain:
472-
# External link - show domain
472+
# External link - show domain (truncate to 10 chars)
473473
parts = parsed.netloc.split(".")
474474
if len(parts) >= 2:
475-
return parts[-2][:15]
476-
return parsed.netloc[:15]
475+
return parts[-2][:10]
476+
return parsed.netloc[:10]
477477

478478
# Same domain or relative link - extract meaningful path
479479
path = parsed.path or ""
480480

481-
# For product pages, extract key identifiers
481+
# For product pages, extract key identifiers (just the ID, not the path prefix)
482482
# Amazon: /dp/XXXXX, /gp/product/XXXXX
483483
# Generic: /product/XXX, /item/XXX, /p/XXX
484484
import re
485485
product_patterns = [
486-
r"(/dp/[A-Z0-9]+)", # Amazon product
487-
r"(/gp/product/[A-Z0-9]+)", # Amazon alt
488-
r"(/product/[^/]+)", # Generic product
489-
r"(/item/[^/]+)", # Generic item
490-
r"(/p/[^/]+)", # Short product
486+
r"/dp/([A-Z0-9]+)", # Amazon product
487+
r"/gp/product/([A-Z0-9]+)", # Amazon alt
488+
r"/product/([^/]+)", # Generic product
489+
r"/item/([^/]+)", # Generic item
490+
r"/p/([^/]+)", # Short product
491491
]
492492
for pattern in product_patterns:
493493
match = re.search(pattern, path, re.IGNORECASE)
@@ -511,12 +511,11 @@ def _compress_href(self, href: str | None, page_url: str | None = None) -> str:
511511
if "/checkout" in path.lower():
512512
return "/checkout"
513513

514-
# Fallback: use last meaningful path segment
514+
# Fallback: use last meaningful path segment only (no leading slash)
515515
segments = [s for s in path.split("/") if s and len(s) > 1]
516516
if segments:
517-
# Return last 2 segments for context (max 30 chars)
518-
result = "/" + "/".join(segments[-2:])
519-
return result[:30]
517+
# Return only the last segment (max 30 chars)
518+
return segments[-1][:30]
520519

521520
return path[:30] if path else ""
522521

0 commit comments

Comments
 (0)