-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathDay16.java
More file actions
73 lines (55 loc) · 2.27 KB
/
Day16.java
File metadata and controls
73 lines (55 loc) · 2.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
// ---------------------------------------------------
// Author : Benjamin Kataliko Viranga
// Community : Stunt Business
// Community website : www.stuntbusiness.com
//
// 30 Days - Q&A Java basic
// Day 16 : Web scraping with java
// Day 16 | IG : https://www.instagram.com/benjivrik/
// ----------------------------------------------------
// what would be the output of this program ?
// reference : https://medium.com/hackernoon/introduction-to-web-scraping-with-java-3d8a8d0f250b
import java.util.List;
// import com.fasterxml.jackson.databind.ObjectMapper;
import org.htmlunit.WebClient;
import org.htmlunit.html.HtmlElement;
import org.htmlunit.html.HtmlPage;
@SuppressWarnings("unchecked")
public class Day16 {
public static void main(String[] args)
{
//initialize the url of the website you want to visit
String baseUrl = "https://www.stuntbusiness.ca/fr/legal/terms-and-conditions" ;
// WebClient : The main starting point in HtmlUnit: this class simulates a web browser.
// https://htmlunit.sourceforge.io/apidocs/com/gargoylesoftware/htmlunit/WebClient.html
WebClient client = new WebClient();
// deactivate css and javascript
client.getOptions().setJavaScriptEnabled(false);
client.getOptions().setCssEnabled(false);
System.out.println();
// Start parsing your page
try {
// get your html page
HtmlPage page = client.getPage(baseUrl);
// Get all the html element h5 with class="title-in-content"
List<HtmlElement> items = (List<HtmlElement>) (Object) page.getByXPath("//h5[@class='title-description']") ;
if(items.isEmpty())
{
System.out.println("Nothing found with your request.");
}else{
// print all the elements collected
int index = 0;
// print the h5 found.
for(HtmlElement htmlItem : items){
System.out.println( "> " + (index++) + ":\n" + htmlItem.asXml());
}
}
// close your web client
client.close();
} catch(Exception e){
e.printStackTrace();
}
System.out.println("End of program.");
System.out.println();
}
}