diff --git a/.gitignore b/.gitignore index a187cf9..e9aa7e9 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ bin/* testreport/* examples/backtothefuture/build/* +target/* diff --git a/build.xml b/build.xml deleted file mode 100644 index e9f749d..0000000 --- a/build.xml +++ /dev/null @@ -1,84 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Main target - - \ No newline at end of file diff --git a/libs/hamcrest-core.jar b/libs/hamcrest-core.jar deleted file mode 100644 index 9d5fe16..0000000 Binary files a/libs/hamcrest-core.jar and /dev/null differ diff --git a/libs/htmlcleaner-2.8.jar b/libs/htmlcleaner-2.8.jar deleted file mode 100644 index 05d0bd3..0000000 Binary files a/libs/htmlcleaner-2.8.jar and /dev/null differ diff --git a/libs/junit.jar b/libs/junit.jar deleted file mode 100644 index aaf7444..0000000 Binary files a/libs/junit.jar and /dev/null differ diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..07c67ca --- /dev/null +++ b/pom.xml @@ -0,0 +1,41 @@ + + 4.0.0 + OpenGraph + OpenGraph + 0.0.1-SNAPSHOT + A Facebook OpenGraph implementation for Java + OpenGraph for Java + + + + org.hamcrest + hamcrest-core + 1.3 + + + org.jsoup + jsoup + 1.9.2 + + + junit + junit + 4.12 + + + + + src + + + maven-compiler-plugin + 3.3 + + + + + + + + \ No newline at end of file diff --git a/run.sh b/run.sh deleted file mode 100755 index 1b65105..0000000 --- a/run.sh +++ /dev/null @@ -1 +0,0 @@ -ant junit \ No newline at end of file diff --git a/src/org/opengraph/MetaElement.java b/src/main/java/org/opengraph/MetaElement.java similarity index 100% rename from src/org/opengraph/MetaElement.java rename to src/main/java/org/opengraph/MetaElement.java diff --git a/src/org/opengraph/OpenGraph.java b/src/main/java/org/opengraph/OpenGraph.java similarity index 87% rename from src/org/opengraph/OpenGraph.java rename to src/main/java/org/opengraph/OpenGraph.java index 62eda63..56c3e05 100644 --- a/src/org/opengraph/OpenGraph.java +++ b/src/main/java/org/opengraph/OpenGraph.java @@ -1,7 +1,5 @@ package org.opengraph; -import org.htmlcleaner.HtmlCleaner; -import org.htmlcleaner.TagNode; import java.io.BufferedReader; import java.io.InputStreamReader; @@ -13,6 +11,13 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + + + /** * A Java object representation of an Open Graph enabled webpage. * A simplified layer over a Hastable. @@ -88,51 +93,46 @@ public OpenGraph(String url, boolean ignoreSpecErrors) throws java.io.IOExceptio } String headContentsStr = headContents.toString(); - HtmlCleaner cleaner = new HtmlCleaner(); - // parse the string HTML - TagNode pageData = cleaner.clean(headContentsStr); - // read in the declared namespaces - boolean hasOGspec = false; - TagNode headElement = pageData.findElementByName("head", true); - if (headElement.hasAttribute("prefix")) - { - String namespaceData = headElement.getAttributeByName("prefix"); - Pattern pattern = Pattern.compile("(([A-Za-z0-9_]+):\\s+(http:\\/\\/ogp.me\\/ns(\\/\\w+)*#))\\s*"); - Matcher matcher = pattern.matcher(namespaceData); - while (matcher.find()) - { + // parse the string HTML + Document parsedDocument = Jsoup.parse(headContentsStr); + // read in the declared namespaces + Elements headElement = parsedDocument.getElementsByTag("head"); + + boolean hasOGspec = false; + if (headElement.hasAttr("prefix")) { + String namespaceData = headElement.attr("prefix"); + Pattern pattern = Pattern.compile("(([A-Za-z0-9_]+):\\s+(http:\\/\\/ogp.me\\/ns(\\/\\w+)*#))\\s*"); + Matcher matcher = pattern.matcher(namespaceData); + while (matcher.find()) { String prefix = matcher.group(2); - String documentURI = matcher.group(3); - pageNamespaces.add(new OpenGraphNamespace(prefix, documentURI)); - if (prefix.equals("og")) - hasOGspec = true; + String documentURI = matcher.group(3); + pageNamespaces.add(new OpenGraphNamespace(prefix, documentURI)); + if (prefix.equals("og")) + hasOGspec = true; } - } + } - // some pages do not include the new OG spec - // this fixes compatibility - if (!hasOGspec) - pageNamespaces.add(new OpenGraphNamespace("og", "http:// ogp.me/ns#")); + // some pages do not include the new OG spec + // this fixes compatibility + if (!hasOGspec) + pageNamespaces.add(new OpenGraphNamespace("og", "http:// ogp.me/ns#")); // open only the meta tags - TagNode[] metaData = pageData.getElementsByName("meta", true); - for (TagNode metaElement : metaData) - { - for (OpenGraphNamespace namespace : pageNamespaces) - { - String target = null; - if (metaElement.hasAttribute("property")) - target = "property"; - else if (metaElement.hasAttribute("name")) - target = "name"; - - if (target != null && metaElement.getAttributeByName(target).startsWith(namespace.getPrefix() + ":")) - { - setProperty(namespace, metaElement.getAttributeByName(target), metaElement.getAttributeByName("content")); - break; - } - } + Elements metaData = parsedDocument.getElementsByTag("meta"); + for (Element metaElement : metaData) { + for (OpenGraphNamespace namespace : pageNamespaces) { + String target = null; + if (metaElement.hasAttr("property")) + target = "property"; + else if (metaElement.hasAttr("name")) + target = "name"; + + if (target != null && metaElement.attr(target).startsWith(namespace.getPrefix() + ":")) { + setProperty(namespace, metaElement.attr(target), metaElement.attr("content")); + break; + } + } } /** diff --git a/src/org/opengraph/OpenGraphNamespace.java b/src/main/java/org/opengraph/OpenGraphNamespace.java similarity index 100% rename from src/org/opengraph/OpenGraphNamespace.java rename to src/main/java/org/opengraph/OpenGraphNamespace.java diff --git a/src/org/opengraphtests/OpenGraphTest.java b/src/test/main/java/org/opengraph/OpenGraphTest.java similarity index 81% rename from src/org/opengraphtests/OpenGraphTest.java rename to src/test/main/java/org/opengraph/OpenGraphTest.java index 5dff654..e8d5a19 100644 --- a/src/org/opengraphtests/OpenGraphTest.java +++ b/src/test/main/java/org/opengraph/OpenGraphTest.java @@ -1,4 +1,4 @@ -package org.opengraphtests; +package org.opengraph; import org.junit.Test; import org.opengraph.OpenGraph; @@ -9,7 +9,7 @@ public class OpenGraphTest { @Test public void shouldHandleMissingContentType() throws java.lang.Exception { OpenGraph site = new OpenGraph("http://www.bbc.com/future/story/20140428-the-myth-of-tech-revolutions", true); - assertEquals("Why it’s time to ditch the word ‘revolution’ in tech", site.getContent("title")); + assertEquals("Why it’s time to ditch the word ‘revolution’ in tech", site.getContent("title")); assertEquals("624", site.getContent("image:width")); }