Skip to content

Commit

Permalink
Initial Commit
Browse files Browse the repository at this point in the history
  • Loading branch information
mark-ents24 committed Mar 1, 2017
0 parents commit 590ea7c
Show file tree
Hide file tree
Showing 3 changed files with 360 additions and 0 deletions.
102 changes: 102 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.ents24.neo4j</groupId>
<artifactId>similarity</artifactId>
<version>1.0.0-SNAPSHOT</version>

<packaging>jar</packaging>
<name>Neo4j Similarity Functions</name>
<description>A container for similarity functions for use natively within Neo4j</description>

<properties>
<neo4j.version>3.1.0</neo4j.version>
</properties>

<dependencies>
<dependency>
<!-- This gives us the Procedure API our runtime code uses.
We have a `provided` scope on it, because when this is
deployed in a Neo4j Instance, the API will be provided
by Neo4j. If you add non-Neo4j dependencies to this
project, their scope should normally be `compile` -->
<groupId>org.neo4j</groupId>
<artifactId>neo4j</artifactId>
<version>${neo4j.version}</version>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>org.neo4j</groupId>
<artifactId>neo4j-lucene-index</artifactId>
<version>${neo4j.version}</version>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-mr</artifactId>
<version>0.12.2</version>
<scope>compile</scope>
</dependency>

<!-- Test Dependencies -->
<dependency>
<!-- This is used for a utility that lets us start Neo4j with
a specific Procedure, which is nice for writing tests. -->
<groupId>org.neo4j.test</groupId>
<artifactId>neo4j-harness</artifactId>
<version>${neo4j.version}</version>
<scope>test</scope>
</dependency>

<dependency>
<!-- Used to send cypher statements to our procedure. -->
<groupId>org.neo4j.driver</groupId>
<artifactId>neo4j-java-driver</artifactId>
<version>1.1.0</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>

</dependencies>

<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<!-- Neo4j Procedures require Java 8 -->
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<!-- This generates a jar-file with our procedure code,
plus any dependencies marked as `compile` scope.
This should then be deployed in the `plugins` directory
of each Neo4j instance in your deployment.
After a restart, the procedure is available for calling. -->
<artifactId>maven-shade-plugin</artifactId>
<version>2.4.3</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
76 changes: 76 additions & 0 deletions src/main/java/similarity/Similarity.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package similarity;

import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.LoglikelihoodSimilarity;
import org.neo4j.procedure.Description;
import org.neo4j.procedure.Name;
import org.neo4j.procedure.UserFunction;

/**
* This is an example how you can create a simple user-defined function for Neo4j.
*/
public class Similarity
{
@UserFunction
@Description("similarity.LLR(AB, A, B, total) - return the log-likelihood ratio of A wrt B")
public double LLR(
@Name("both") long AB,
@Name("all A") long A,
@Name("all B") long B,
@Name("total") long total) {

return org.apache.mahout.math.stats.LogLikelihood.logLikelihoodRatio(AB, A-AB, B-AB, total-A-B+AB);
}

@UserFunction
@Description("similarity.LLSimilarity(AB, A, B, total) - return the log likelihood similarity of A and B")
public double LLSimilarity(
@Name("both") long AB,
@Name("all A") long A,
@Name("all B") long B,
@Name("total") long total) {

LoglikelihoodSimilarity lls = new LoglikelihoodSimilarity();

return lls.similarity(AB, A, B, (int) total);
}

@UserFunction
@Description("similarity.LLDistance(AB, A, B, total) - return the log likelihood distance between A and B")
public double LLDistance(
@Name("both") long AB,
@Name("all A") long A,
@Name("all B") long B,
@Name("total") long total) {

return 1.0 - LLSimilarity(AB, A, B, total);
}

@UserFunction
@Description("similarity.mutualInformation(AB, A, B, total) - return the mutual information of A and B")
public double mutualInformation(
@Name("both") long AB,
@Name("all A") long A,
@Name("all B") long B,
@Name("total") long total) {

// LLR = 2 * N * MI
// MI = LLR / 2 * N

return this.LLR(AB, A, B, total) / (2.0 * total);
}

@UserFunction
@Description("similarity.NMID(AB, A, B, total) - return the normalised mutual information distance between A and B")
public double NMID(
@Name("both") long AB,
@Name("all A") long A,
@Name("all B") long B,
@Name("total") long total) {

// NMID = 1 - MI / H

double normalisedJointEntropy = org.apache.mahout.math.stats.LogLikelihood.entropy(AB, A-AB, B-AB, total-A-B+AB) / total;

return 1.0 - (this.mutualInformation(AB, A, B, total) / normalisedJointEntropy);
}
}
182 changes: 182 additions & 0 deletions src/test/java/similarity/SimilarityTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
package similarity;

import org.junit.Rule;
import org.junit.Test;
import org.neo4j.driver.v1.Config;
import org.neo4j.driver.v1.Driver;
import org.neo4j.driver.v1.GraphDatabase;
import org.neo4j.driver.v1.Session;
import org.neo4j.harness.junit.Neo4jRule;

import static org.hamcrest.CoreMatchers.equalTo;
import static org.junit.Assert.assertThat;

public class SimilarityTest
{
// This rule starts a Neo4j instance
@Rule
public Neo4jRule neo4j = new Neo4jRule()

// This is the function we want to test
.withFunction( Similarity.class );

@Test
public void LLRCorrect() throws Throwable
{
// This is in a try-block, to make sure we close the driver after the test
try( Driver driver = GraphDatabase
.driver( neo4j.boltURI() , Config.build().withEncryptionLevel( Config.EncryptionLevel.NONE ).toConfig() ) )
{
// Given
Session session = driver.session();
double result;

// When - some trackers in common
result = session.run( "RETURN similarity.LLR(1, 4, 3, 11) AS result").single().get("result").asDouble();

// Then
assertThat( result, equalTo( 0.016502205534052905 ) );

// When - no trackers in common (actually with this size data set, that counter-intuitively contains more information
// and the test above!
result = session.run( "RETURN similarity.LLR(0, 3, 2, 10) AS result").single().get("result").asDouble();

// Then
assertThat( result, equalTo( 1.632274230570168 ) );

// When - all trackers in common
result = session.run( "RETURN similarity.LLR(3, 3, 3, 8) AS result").single().get("result").asDouble();

// Then
assertThat( result, equalTo( 10.58501181052771 ) );
}
}

@Test
public void LLSimilarityCorrect() throws Throwable
{
// This is in a try-block, to make sure we close the driver after the test
try( Driver driver = GraphDatabase
.driver( neo4j.boltURI() , Config.build().withEncryptionLevel( Config.EncryptionLevel.NONE ).toConfig() ) )
{
// Given
Session session = driver.session();
double result;

// When - some trackers in common
result = session.run( "RETURN similarity.LLSimilarity(1, 4, 3, 11) AS result").single().get("result").asDouble();

// Then
assertThat( result, equalTo( 0.016234303717406084 ) );

// When - no trackers in common (actually with this size data set, that counter-intuitively contains more information
// and the test above!
result = session.run( "RETURN similarity.LLSimilarity(0, 3, 2, 10) AS result").single().get("result").asDouble();

// Then
assertThat( result, equalTo( 0.6201003723751862 ) );

// When - all trackers in common
result = session.run( "RETURN similarity.LLSimilarity(3, 3, 3, 8) AS result").single().get("result").asDouble();

// Then
assertThat( result, equalTo( 0.9136815726772705 ) );
}
}

@Test
public void LLDistanceCorrect() throws Throwable
{
// This is in a try-block, to make sure we close the driver after the test
try( Driver driver = GraphDatabase
.driver( neo4j.boltURI() , Config.build().withEncryptionLevel( Config.EncryptionLevel.NONE ).toConfig() ) )
{
// Given
Session session = driver.session();
double result;

// When - some trackers in common
result = session.run( "RETURN similarity.LLDistance(1, 4, 3, 11) AS result").single().get("result").asDouble();

// Then
assertThat( result, equalTo( 0.9837656962825939 ) );

// When - no trackers in common (actually with this size data set, that counter-intuitively contains more information
// and the test above!
result = session.run( "RETURN similarity.LLDistance(0, 3, 2, 10) AS result").single().get("result").asDouble();

// Then
assertThat( result, equalTo( 0.3798996276248138 ) );

// When - all trackers in common
result = session.run( "RETURN similarity.LLDistance(3, 3, 3, 8) AS result").single().get("result").asDouble();

// Then
assertThat( result, equalTo( 0.08631842732272954 ) );
}
}

@Test
public void mutualInformationCorrect() throws Throwable
{
// This is in a try-block, to make sure we close the driver after the test
try( Driver driver = GraphDatabase
.driver( neo4j.boltURI() , Config.build().withEncryptionLevel( Config.EncryptionLevel.NONE ).toConfig() ) )
{
// Given
Session session = driver.session();
double result;

// When - some trackers in common
result = session.run( "RETURN similarity.mutualInformation(1, 4, 3, 11) AS result").single().get("result").asDouble();

// Then
assertThat( result, equalTo( 7.501002515478593E-4 ) );

// When - no trackers in common (actually with this size data set, that counter-intuitively contains more information
// and the test above!
result = session.run( "RETURN similarity.mutualInformation(0, 3, 2, 10) AS result").single().get("result").asDouble();

// Then
assertThat( result, equalTo( 0.08161371152850841 ) );

// When - all trackers in common
result = session.run( "RETURN similarity.mutualInformation(3, 3, 3, 8) AS result").single().get("result").asDouble();

// Then
assertThat( result, equalTo( 0.6615632381579819 ) );
}
}

@Test
public void NMIDCorrect() throws Throwable
{
// This is in a try-block, to make sure we close the driver after the test
try( Driver driver = GraphDatabase
.driver( neo4j.boltURI() , Config.build().withEncryptionLevel( Config.EncryptionLevel.NONE ).toConfig() ) )
{
// Given
Session session = driver.session();
double result;

// When - some trackers in common
result = session.run( "RETURN similarity.NMID(1, 4, 3, 11) AS result").single().get("result").asDouble();

// Then
assertThat( result, equalTo( 0.999395414082041 ) );

// When - no trackers in common (actually with this size data set, that counter-intuitively contains more information
// and the test above!
result = session.run( "RETURN similarity.NMID(0, 3, 2, 10) AS result").single().get("result").asDouble();

// Then
assertThat( result, equalTo( 0.9207366846756104 ) );

// When - all trackers in common
result = session.run( "RETURN similarity.NMID(3, 3, 3, 8) AS result").single().get("result").asDouble();

// Then
assertThat( result, equalTo( 0.0 ) );
}
}
}

0 comments on commit 590ea7c

Please sign in to comment.