Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
bin/*
testreport/*
examples/backtothefuture/build/*
target/*
84 changes: 0 additions & 84 deletions build.xml

This file was deleted.

Binary file removed libs/hamcrest-core.jar
Binary file not shown.
Binary file removed libs/htmlcleaner-2.8.jar
Binary file not shown.
Binary file removed libs/junit.jar
Binary file not shown.
41 changes: 41 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>OpenGraph</groupId>
<artifactId>OpenGraph</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>A Facebook OpenGraph implementation for Java</name>
<description>OpenGraph for Java</description>

<dependencies>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-core</artifactId>
<version>1.3</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.9.2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
</dependencies>

<build>
<sourceDirectory>src</sourceDirectory>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.3</version>
<configuration>
<source />
<target />
</configuration>
</plugin>
</plugins>
</build>
</project>
1 change: 0 additions & 1 deletion run.sh

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
package org.opengraph;

import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;

import java.io.BufferedReader;
import java.io.InputStreamReader;
Expand All @@ -13,6 +11,13 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;



/**
* A Java object representation of an Open Graph enabled webpage.
* A simplified layer over a Hastable.
Expand Down Expand Up @@ -88,51 +93,46 @@ public OpenGraph(String url, boolean ignoreSpecErrors) throws java.io.IOExceptio
}

String headContentsStr = headContents.toString();
HtmlCleaner cleaner = new HtmlCleaner();
// parse the string HTML
TagNode pageData = cleaner.clean(headContentsStr);

// read in the declared namespaces
boolean hasOGspec = false;
TagNode headElement = pageData.findElementByName("head", true);
if (headElement.hasAttribute("prefix"))
{
String namespaceData = headElement.getAttributeByName("prefix");
Pattern pattern = Pattern.compile("(([A-Za-z0-9_]+):\\s+(http:\\/\\/ogp.me\\/ns(\\/\\w+)*#))\\s*");
Matcher matcher = pattern.matcher(namespaceData);
while (matcher.find())
{
// parse the string HTML
Document parsedDocument = Jsoup.parse(headContentsStr);
// read in the declared namespaces
Elements headElement = parsedDocument.getElementsByTag("head");

boolean hasOGspec = false;
if (headElement.hasAttr("prefix")) {
String namespaceData = headElement.attr("prefix");
Pattern pattern = Pattern.compile("(([A-Za-z0-9_]+):\\s+(http:\\/\\/ogp.me\\/ns(\\/\\w+)*#))\\s*");
Matcher matcher = pattern.matcher(namespaceData);
while (matcher.find()) {
String prefix = matcher.group(2);
String documentURI = matcher.group(3);
pageNamespaces.add(new OpenGraphNamespace(prefix, documentURI));
if (prefix.equals("og"))
hasOGspec = true;
String documentURI = matcher.group(3);
pageNamespaces.add(new OpenGraphNamespace(prefix, documentURI));
if (prefix.equals("og"))
hasOGspec = true;
}
}
}

// some pages do not include the new OG spec
// this fixes compatibility
if (!hasOGspec)
pageNamespaces.add(new OpenGraphNamespace("og", "http:// ogp.me/ns#"));
// some pages do not include the new OG spec
// this fixes compatibility
if (!hasOGspec)
pageNamespaces.add(new OpenGraphNamespace("og", "http:// ogp.me/ns#"));

// open only the meta tags
TagNode[] metaData = pageData.getElementsByName("meta", true);
for (TagNode metaElement : metaData)
{
for (OpenGraphNamespace namespace : pageNamespaces)
{
String target = null;
if (metaElement.hasAttribute("property"))
target = "property";
else if (metaElement.hasAttribute("name"))
target = "name";

if (target != null && metaElement.getAttributeByName(target).startsWith(namespace.getPrefix() + ":"))
{
setProperty(namespace, metaElement.getAttributeByName(target), metaElement.getAttributeByName("content"));
break;
}
}
Elements metaData = parsedDocument.getElementsByTag("meta");
for (Element metaElement : metaData) {
for (OpenGraphNamespace namespace : pageNamespaces) {
String target = null;
if (metaElement.hasAttr("property"))
target = "property";
else if (metaElement.hasAttr("name"))
target = "name";

if (target != null && metaElement.attr(target).startsWith(namespace.getPrefix() + ":")) {
setProperty(namespace, metaElement.attr(target), metaElement.attr("content"));
break;
}
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package org.opengraphtests;
package org.opengraph;

import org.junit.Test;
import org.opengraph.OpenGraph;
Expand All @@ -9,7 +9,7 @@ public class OpenGraphTest {
@Test
public void shouldHandleMissingContentType() throws java.lang.Exception {
OpenGraph site = new OpenGraph("http://www.bbc.com/future/story/20140428-the-myth-of-tech-revolutions", true);
assertEquals("Why its time to ditch the word revolution in tech", site.getContent("title"));
assertEquals("Why its time to ditch the word revolution in tech", site.getContent("title"));
assertEquals("624", site.getContent("image:width"));
}

Expand Down