From e46fd592645acca301370584d8434b3586cbb85a Mon Sep 17 00:00:00 2001 From: l3d00m Date: Fri, 7 Jun 2024 20:25:46 +0200 Subject: [PATCH 1/3] Add more Berlin companies --- places/berlin.ini | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/places/berlin.ini b/places/berlin.ini index f249dd9..212268a 100644 --- a/places/berlin.ini +++ b/places/berlin.ini @@ -111,3 +111,28 @@ url_path = div/div/div[2]/h2/a/@href title_path = div/div/div[2]/h2/a location_path = div/div/div[2]/address:(?<=,)[^,]* rooms_path = div/div/div[2]/ul/li[1]/span[2] + +[company:www.blp-management.com] +url = https://blp-management.com/immobilien/?post_type=immomakler_object&vermarktungsart=miete&nutzungsart=wohnen&typ=wohnung&ort=berlin¢er&objekt-id&collapse=in +ad_path = .//div[@class='property-container'] +url_path = div[2]/h3/a/@href +title_path = div[2]/h3/a +location_path = div[2]/div +rooms_path = div[2]/div[2]/div[2] + +[company:www.immonexxt.com] +url = https://www.immonexxt.com/de/mietangebote-wohnen#filter=.geo-ort-berlin +ad_path = .//div[@id='estate_list']/a +url_path = ./@href +title_path = div/div[2]/h2 +location_path = div/div[2]/ul/li[2]/div[2]/span[1] +rooms_path = div/div[2]/ul/li[5]/div[2] + +[company:www.harry-gerlach.de] +url = https://www.harry-gerlach.de/wohnung-mieten-berlin/ +ad_path = .//div[@class='card__inner--content'] +url_path = div[4]/a/@href +title_path = div[2]/h2 +location_path = div[1]/p +rooms_path = div[3]/dl/dd[4] +rooms_optional = true From 57f862f45c16ef3074d54d8bacba26cbf9b6c2b0 Mon Sep 17 00:00:00 2001 From: l3d00m Date: Mon, 15 Jul 2024 14:24:28 +0000 Subject: [PATCH 2/3] fix: blp location parsing with regex --- places/berlin.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/places/berlin.ini b/places/berlin.ini index 212268a..8a567dc 100644 --- a/places/berlin.ini +++ b/places/berlin.ini @@ -117,7 +117,7 @@ url = https://blp-management.com/immobilien/?post_type=immomakler_object&vermark ad_path = .//div[@class='property-container'] url_path = div[2]/h3/a/@href title_path = div[2]/h3/a -location_path = div[2]/div +location_path = div[2]/div:[^,]* rooms_path = div[2]/div[2]/div[2] [company:www.immonexxt.com] From 0aa86ab5120efb6f7b90e97299725e47900d86f5 Mon Sep 17 00:00:00 2001 From: l3d00m Date: Mon, 15 Jul 2024 14:33:41 +0000 Subject: [PATCH 3/3] fix: location filter for immonexxt --- places/berlin.ini | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/places/berlin.ini b/places/berlin.ini index 8a567dc..7f60802 100644 --- a/places/berlin.ini +++ b/places/berlin.ini @@ -121,11 +121,12 @@ location_path = div[2]/div:[^,]* rooms_path = div[2]/div[2]/div[2] [company:www.immonexxt.com] -url = https://www.immonexxt.com/de/mietangebote-wohnen#filter=.geo-ort-berlin +url = https://www.immonexxt.com/de/mietangebote-wohnen ad_path = .//div[@id='estate_list']/a url_path = ./@href title_path = div/div[2]/h2 -location_path = div/div[2]/ul/li[2]/div[2]/span[1] +location_path = div/div[2]/ul/li[2]/div[2] +location_filter = Berlin rooms_path = div/div[2]/ul/li[5]/div[2] [company:www.harry-gerlach.de]