-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathParseHTML.java
More file actions
44 lines (36 loc) · 898 Bytes
/
ParseHTML.java
File metadata and controls
44 lines (36 loc) · 898 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
package edu.ucr.cs172.project.partB;
import java.io.File;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
public class ParseHTML
{
// Requires error checking but it is a low priority at the moment.
// All files passed will be html files obtained from our java crawler
Document htmlDocument;
public ParseHTML()
{
htmlDocument = new Document("");
}
public ParseHTML(File htmlFile)
{
updateFile(htmlFile);
}
public void updateFile(File htmlFile)
{
try
{
htmlDocument = Jsoup.parse(htmlFile, "UTF-8", "");
}
catch(IOException e){ e.printStackTrace(); }
}
public String title()
{
return htmlDocument.body().text();
}
public String body()
{
return htmlDocument.body().text();
}
}