Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 3 additions & 18 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<groupId>ch.sentric</groupId>
<artifactId>url-normalization</artifactId>
<packaging>jar</packaging>
<version>1.0.0</version>
<version>1.0.0-nodeps</version>
<name>url-normalization</name>
<url>https://github.com/sentric/url-normalization</url>
<scm>
Expand All @@ -13,21 +13,6 @@
<url>https://github.com/sentric/url-normalization.git</url>
</scm>
<dependencies>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
<version>3.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.1</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
Expand All @@ -42,8 +27,8 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<source>1.6</source>
<target>1.6</target>
<source>1.7</source>
<target>1.7</target>
</configuration>
</plugin>
<plugin>
Expand Down
34 changes: 17 additions & 17 deletions src/main/java/ch/sentric/QueryFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,9 @@

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.collections.Predicate;

/**
* <p>
* The QueryFactory parses the in order to assemble a list of key and value
Expand All @@ -34,7 +32,7 @@ public class QueryFactory {

/**
* URL query string filters to apply.
*
*
* <ul>
* <li>WebTrends (WT.): see
* http://www.heureka.com/upload/AdministrationUsersGuide.pdf, Chapter 27
Expand Down Expand Up @@ -108,24 +106,26 @@ public Query build(final String q) {
break;
}
}
CollectionUtils.filter(list, new Predicate() {

@Override
public boolean evaluate(final Object object) {
boolean allowedQueryParameter = true;
final QueryKeyValuePair queryKeyValuePair = (QueryKeyValuePair) object;
for (final String filter : filters) {
if (queryKeyValuePair.getKey().startsWith(filter)) {
allowedQueryParameter = false;
}
}
return allowedQueryParameter;
}
});
for (Iterator<QueryKeyValuePair> it = list.iterator(); it.hasNext();) {
if (shouldFilter(it.next())) {
it.remove();
}
}

return new Query(list, '&');
}

private boolean shouldFilter(QueryKeyValuePair queryKeyValuePair) {
boolean filterParameter = false;
for (final String filter : filters) {
if (queryKeyValuePair.getKey().startsWith(filter)) {
filterParameter = true;
}
}
return filterParameter;
}

private enum ParserState {
KEY, VALUE, DELIMITER, EQUAL, START
}
Expand Down
32 changes: 15 additions & 17 deletions src/main/java/ch/sentric/QueryKeyValuePair.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
*/
package ch.sentric;

import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import java.util.Objects;

/**
* A {@link Comparable} key value pair, representing a query.
Expand All @@ -27,7 +26,7 @@ class QueryKeyValuePair implements Comparable<QueryKeyValuePair> {

/**
* The constructor, initializing the object.
*
*
* @param key
* the key
* @param value
Expand All @@ -53,24 +52,23 @@ public int compareTo(final QueryKeyValuePair other) {

@Override
public int hashCode() {
return new HashCodeBuilder(17, 31). // two randomly chosen prime numbers
append(getKey()).append(getValue()).toHashCode();
return Objects.hash(key, value);
}

@Override
public boolean equals(final Object obj) {
if (obj == null) {
return false;
}
if (obj == this) {
return true;
}
if (obj.getClass() != getClass()) {
return false;
}
if (obj == null) {
return false;
}
if (obj == this) {
return true;
}
if (obj.getClass() != getClass()) {
return false;
}

final QueryKeyValuePair rhs = (QueryKeyValuePair) obj;
return new EqualsBuilder().append(getKey(), rhs.getKey()).append(getValue(), rhs.getValue()).isEquals();
final QueryKeyValuePair rhs = (QueryKeyValuePair) obj;
return Objects.equals(key, rhs.key) && Objects.equals(value, rhs.value);
}

public String getKey() {
Expand All @@ -80,4 +78,4 @@ public String getKey() {
public String getValue() {
return value;
}
}
}
90 changes: 30 additions & 60 deletions src/main/java/ch/sentric/URL.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
import java.net.URI;
import java.net.URISyntaxException;
import java.net.UnknownHostException;

import org.apache.commons.lang3.StringUtils;
import java.util.Objects;

/**
* The url class.
Expand All @@ -37,7 +36,7 @@ public class URL {

/**
* The constructor, initializing a url from {@link String}.
*
*
* @param url
* as string
* @throws MalformedURLException
Expand All @@ -50,7 +49,7 @@ public URL(final String url) throws MalformedURLException {

/**
* The constructor, initializing a url from {@link URI}.
*
*
* @param uri
* as URI
* @throws MalformedURLException
Expand All @@ -75,14 +74,14 @@ private void parse(final String url) throws MalformedURLException {
/**
* Returns a {@link URI} representation of this object or null when not
* valid. All fragments will be removed from the original URL.
*
*
* @return the uri
* @throws URISyntaxException
* when the uri couldn't be parsed
*/
public URI getURI() throws URISyntaxException {
URI uri = null;
if (StringUtils.isNotBlank(getFragment())) {
if (isNotBlank(getFragment())) {
uri = new URI(getUrlWithoutFragment());
} else {
uri = new URI(getGivenInputUrl());
Expand Down Expand Up @@ -121,7 +120,7 @@ public String getFragment() {
/**
* Replaces white spaces with '+' characters, removes jsession and phpsessid
* parameters.
*
*
* @return a url without white spaces and jession or phpsessid
*/
public String getRepairedUrl() {
Expand All @@ -135,7 +134,7 @@ public String getNormalizedUrl() {

/**
* Resolve the ip address from the authority.
*
*
* @return ip address as string
* @throws UnknownHostException
* when ip can not be resolved
Expand All @@ -150,15 +149,7 @@ public String resolveIp() throws UnknownHostException {

@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + (this.authority == null ? 0 : this.authority.hashCode());
result = prime * result + (this.fragment == null ? 0 : this.fragment.hashCode());
result = prime * result + (this.givenInputUrl == null ? 0 : this.givenInputUrl.hashCode());
result = prime * result + (this.path == null ? 0 : this.path.hashCode());
result = prime * result + (this.query == null ? 0 : this.query.hashCode());
result = prime * result + (this.scheme == null ? 0 : this.scheme.hashCode());
return result;
return Objects.hash(authority, fragment, givenInputUrl, path, query, scheme);
}

@Override
Expand All @@ -173,53 +164,32 @@ public boolean equals(final Object obj) {
return false;
}
final URL other = (URL) obj;
if (this.authority == null) {
if (other.authority != null) {
return false;
}
} else if (!this.authority.equals(other.authority)) {
return false;
}
if (this.fragment == null) {
if (other.fragment != null) {
return false;
}
} else if (!this.fragment.equals(other.fragment)) {
return false;
}
if (this.givenInputUrl == null) {
if (other.givenInputUrl != null) {
return false;
}
} else if (!this.givenInputUrl.equals(other.givenInputUrl)) {
return false;
}
if (this.path == null) {
if (other.path != null) {
return false;
}
} else if (!this.path.equals(other.path)) {
return false;
}
if (this.query == null) {
if (other.query != null) {
return false;
}
} else if (!this.query.equals(other.query)) {
return false;
}
if (this.scheme == null) {
if (other.scheme != null) {
return false;
}
} else if (!this.scheme.equals(other.scheme)) {
return false;
}
return true;

return Objects.equals(authority, other.authority) &&
Objects.equals(fragment, other.fragment) &&
Objects.equals(givenInputUrl, other.givenInputUrl) &&
Objects.equals(path, other.path) &&
Objects.equals(scheme, other.scheme);
}


@Override
public String toString() {
return this.givenInputUrl;
}

private boolean isNotBlank(String s) {
if (s == null || s.length() == 0) {
return false;
}

int len = s.length();
for (int i = 0; i < len; i++) {
if (!Character.isWhitespace(s.charAt(i))) {
return true;
}
}

return false;
}
}
13 changes: 6 additions & 7 deletions src/test/java/ch/sentric/QueryFactoryTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

import static org.junit.Assert.*;

import org.apache.commons.lang3.StringUtils;
import org.junit.Test;

/**
Expand Down Expand Up @@ -61,13 +60,13 @@ public void buildShouldIgnoreJessionIdWhenOrdering() {
@Test
public void buildShouldReturnNothingWhenJessionIsTheOnlyParameter() {
final Query query = new QueryFactory().build("jsessionid=9ADD207E33B1E66CE6121BC73AADB986");
assertEquals(StringUtils.EMPTY, query.getAsSortedString());
assertEquals("", query.getAsSortedString());
}

@Test
public void buildShouldReturnNothingWhenPhpsessidIsTheOnlyParameter() {
final Query query = new QueryFactory().build("phpsessid=9ADD207E33B1E66CE6121BC73AADB986");
assertEquals(StringUtils.EMPTY, query.getAsSortedString());
assertEquals("", query.getAsSortedString());
}

@Test
Expand All @@ -79,14 +78,14 @@ public void buildShouldSortQueryWithEqualKeys() {
@Test
public void buildShouldRemoveGoogleUrlTrackingParameter() {
final Query query = new QueryFactory().build("utm_campaign=Feed%3A+TheSouthwesternSunRss+%28The+Southwestern+Sun+RSS%29&utm_medium=feed&utm_source=feedburner");
assertEquals(StringUtils.EMPTY, query.getAsSortedString());
assertEquals("", query.getAsSortedString());
}

@Test
public void buildShouldRemoveGoogleGifRequestTrackingParameter() {
final Query query = new QueryFactory()
.build("utmwv=4&utmn=769876874&utmhn=example.com&utmcs=ISO-8859-1&utmsr=1280x1024&utmsc=32-bit&utmul=en-us&utmje=1&utmfl=9.0%20%20r115&utmcn=1&utmdt=GATC012%20setting%20variables&utmhid=2059107202&utmr=0&utmp=/auto/GATC012.html?utm_source=www.gatc012.org&utm_campaign=campaign+gatc012&utm_term=keywords+gatc012&utm_content=content+gatc012&utm_medium=medium+gatc012&utmac=UA-30138-1&utmcc=__utma%3D97315849.1774621898.1207701397.1207701397.1207701397.1%3B... ");
assertEquals(StringUtils.EMPTY, query.getAsSortedString());
assertEquals("", query.getAsSortedString());
}

@Test
Expand All @@ -101,9 +100,9 @@ public void buildShouldRemoveWebtrendsRequestTrackingParameter() {
@Test
public void buildShouldRemoveYahooRequestTrackingParameter() {
Query query = new QueryFactory().build("OVRAW=cheap%20television&OVKEY=television&OVMTC=advanced&OVKWID=4317717511&OVADID=7306185511");
assertEquals(StringUtils.EMPTY, query.getAsSortedString());
assertEquals("", query.getAsSortedString());
query = new QueryFactory().build("YSMCAMPGID=123456&YSMADGRPID=654321");
assertEquals(StringUtils.EMPTY, query.getAsSortedString());
assertEquals("", query.getAsSortedString());
// but not lowercase
query = new QueryFactory().build("ysmcampgid=123456&YSMADGRPID=654321");
assertEquals("ysmcampgid=123456", query.getAsSortedString());
Expand Down