diff --git a/.gitignore b/.gitignore
index a187cf9..e9aa7e9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
bin/*
testreport/*
examples/backtothefuture/build/*
+target/*
diff --git a/build.xml b/build.xml
deleted file mode 100644
index e9f749d..0000000
--- a/build.xml
+++ /dev/null
@@ -1,84 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Main target
-
-
\ No newline at end of file
diff --git a/libs/hamcrest-core.jar b/libs/hamcrest-core.jar
deleted file mode 100644
index 9d5fe16..0000000
Binary files a/libs/hamcrest-core.jar and /dev/null differ
diff --git a/libs/htmlcleaner-2.8.jar b/libs/htmlcleaner-2.8.jar
deleted file mode 100644
index 05d0bd3..0000000
Binary files a/libs/htmlcleaner-2.8.jar and /dev/null differ
diff --git a/libs/junit.jar b/libs/junit.jar
deleted file mode 100644
index aaf7444..0000000
Binary files a/libs/junit.jar and /dev/null differ
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..07c67ca
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,41 @@
+
+ 4.0.0
+ OpenGraph
+ OpenGraph
+ 0.0.1-SNAPSHOT
+ A Facebook OpenGraph implementation for Java
+ OpenGraph for Java
+
+
+
+ org.hamcrest
+ hamcrest-core
+ 1.3
+
+
+ org.jsoup
+ jsoup
+ 1.9.2
+
+
+ junit
+ junit
+ 4.12
+
+
+
+
+ src
+
+
+ maven-compiler-plugin
+ 3.3
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/run.sh b/run.sh
deleted file mode 100755
index 1b65105..0000000
--- a/run.sh
+++ /dev/null
@@ -1 +0,0 @@
-ant junit
\ No newline at end of file
diff --git a/src/org/opengraph/MetaElement.java b/src/main/java/org/opengraph/MetaElement.java
similarity index 100%
rename from src/org/opengraph/MetaElement.java
rename to src/main/java/org/opengraph/MetaElement.java
diff --git a/src/org/opengraph/OpenGraph.java b/src/main/java/org/opengraph/OpenGraph.java
similarity index 87%
rename from src/org/opengraph/OpenGraph.java
rename to src/main/java/org/opengraph/OpenGraph.java
index 62eda63..56c3e05 100644
--- a/src/org/opengraph/OpenGraph.java
+++ b/src/main/java/org/opengraph/OpenGraph.java
@@ -1,7 +1,5 @@
package org.opengraph;
-import org.htmlcleaner.HtmlCleaner;
-import org.htmlcleaner.TagNode;
import java.io.BufferedReader;
import java.io.InputStreamReader;
@@ -13,6 +11,13 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+
+
/**
* A Java object representation of an Open Graph enabled webpage.
* A simplified layer over a Hastable.
@@ -88,51 +93,46 @@ public OpenGraph(String url, boolean ignoreSpecErrors) throws java.io.IOExceptio
}
String headContentsStr = headContents.toString();
- HtmlCleaner cleaner = new HtmlCleaner();
- // parse the string HTML
- TagNode pageData = cleaner.clean(headContentsStr);
- // read in the declared namespaces
- boolean hasOGspec = false;
- TagNode headElement = pageData.findElementByName("head", true);
- if (headElement.hasAttribute("prefix"))
- {
- String namespaceData = headElement.getAttributeByName("prefix");
- Pattern pattern = Pattern.compile("(([A-Za-z0-9_]+):\\s+(http:\\/\\/ogp.me\\/ns(\\/\\w+)*#))\\s*");
- Matcher matcher = pattern.matcher(namespaceData);
- while (matcher.find())
- {
+ // parse the string HTML
+ Document parsedDocument = Jsoup.parse(headContentsStr);
+ // read in the declared namespaces
+ Elements headElement = parsedDocument.getElementsByTag("head");
+
+ boolean hasOGspec = false;
+ if (headElement.hasAttr("prefix")) {
+ String namespaceData = headElement.attr("prefix");
+ Pattern pattern = Pattern.compile("(([A-Za-z0-9_]+):\\s+(http:\\/\\/ogp.me\\/ns(\\/\\w+)*#))\\s*");
+ Matcher matcher = pattern.matcher(namespaceData);
+ while (matcher.find()) {
String prefix = matcher.group(2);
- String documentURI = matcher.group(3);
- pageNamespaces.add(new OpenGraphNamespace(prefix, documentURI));
- if (prefix.equals("og"))
- hasOGspec = true;
+ String documentURI = matcher.group(3);
+ pageNamespaces.add(new OpenGraphNamespace(prefix, documentURI));
+ if (prefix.equals("og"))
+ hasOGspec = true;
}
- }
+ }
- // some pages do not include the new OG spec
- // this fixes compatibility
- if (!hasOGspec)
- pageNamespaces.add(new OpenGraphNamespace("og", "http:// ogp.me/ns#"));
+ // some pages do not include the new OG spec
+ // this fixes compatibility
+ if (!hasOGspec)
+ pageNamespaces.add(new OpenGraphNamespace("og", "http:// ogp.me/ns#"));
// open only the meta tags
- TagNode[] metaData = pageData.getElementsByName("meta", true);
- for (TagNode metaElement : metaData)
- {
- for (OpenGraphNamespace namespace : pageNamespaces)
- {
- String target = null;
- if (metaElement.hasAttribute("property"))
- target = "property";
- else if (metaElement.hasAttribute("name"))
- target = "name";
-
- if (target != null && metaElement.getAttributeByName(target).startsWith(namespace.getPrefix() + ":"))
- {
- setProperty(namespace, metaElement.getAttributeByName(target), metaElement.getAttributeByName("content"));
- break;
- }
- }
+ Elements metaData = parsedDocument.getElementsByTag("meta");
+ for (Element metaElement : metaData) {
+ for (OpenGraphNamespace namespace : pageNamespaces) {
+ String target = null;
+ if (metaElement.hasAttr("property"))
+ target = "property";
+ else if (metaElement.hasAttr("name"))
+ target = "name";
+
+ if (target != null && metaElement.attr(target).startsWith(namespace.getPrefix() + ":")) {
+ setProperty(namespace, metaElement.attr(target), metaElement.attr("content"));
+ break;
+ }
+ }
}
/**
diff --git a/src/org/opengraph/OpenGraphNamespace.java b/src/main/java/org/opengraph/OpenGraphNamespace.java
similarity index 100%
rename from src/org/opengraph/OpenGraphNamespace.java
rename to src/main/java/org/opengraph/OpenGraphNamespace.java
diff --git a/src/org/opengraphtests/OpenGraphTest.java b/src/test/main/java/org/opengraph/OpenGraphTest.java
similarity index 81%
rename from src/org/opengraphtests/OpenGraphTest.java
rename to src/test/main/java/org/opengraph/OpenGraphTest.java
index 5dff654..e8d5a19 100644
--- a/src/org/opengraphtests/OpenGraphTest.java
+++ b/src/test/main/java/org/opengraph/OpenGraphTest.java
@@ -1,4 +1,4 @@
-package org.opengraphtests;
+package org.opengraph;
import org.junit.Test;
import org.opengraph.OpenGraph;
@@ -9,7 +9,7 @@ public class OpenGraphTest {
@Test
public void shouldHandleMissingContentType() throws java.lang.Exception {
OpenGraph site = new OpenGraph("http://www.bbc.com/future/story/20140428-the-myth-of-tech-revolutions", true);
- assertEquals("Why it’s time to ditch the word ‘revolution’ in tech", site.getContent("title"));
+ assertEquals("Why it’s time to ditch the word ‘revolution’ in tech", site.getContent("title"));
assertEquals("624", site.getContent("image:width"));
}