-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 590ea7c
Showing
3 changed files
with
360 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 | ||
http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<groupId>com.ents24.neo4j</groupId> | ||
<artifactId>similarity</artifactId> | ||
<version>1.0.0-SNAPSHOT</version> | ||
|
||
<packaging>jar</packaging> | ||
<name>Neo4j Similarity Functions</name> | ||
<description>A container for similarity functions for use natively within Neo4j</description> | ||
|
||
<properties> | ||
<neo4j.version>3.1.0</neo4j.version> | ||
</properties> | ||
|
||
<dependencies> | ||
<dependency> | ||
<!-- This gives us the Procedure API our runtime code uses. | ||
We have a `provided` scope on it, because when this is | ||
deployed in a Neo4j Instance, the API will be provided | ||
by Neo4j. If you add non-Neo4j dependencies to this | ||
project, their scope should normally be `compile` --> | ||
<groupId>org.neo4j</groupId> | ||
<artifactId>neo4j</artifactId> | ||
<version>${neo4j.version}</version> | ||
<scope>provided</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.neo4j</groupId> | ||
<artifactId>neo4j-lucene-index</artifactId> | ||
<version>${neo4j.version}</version> | ||
<scope>provided</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.apache.mahout</groupId> | ||
<artifactId>mahout-mr</artifactId> | ||
<version>0.12.2</version> | ||
<scope>compile</scope> | ||
</dependency> | ||
|
||
<!-- Test Dependencies --> | ||
<dependency> | ||
<!-- This is used for a utility that lets us start Neo4j with | ||
a specific Procedure, which is nice for writing tests. --> | ||
<groupId>org.neo4j.test</groupId> | ||
<artifactId>neo4j-harness</artifactId> | ||
<version>${neo4j.version}</version> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<!-- Used to send cypher statements to our procedure. --> | ||
<groupId>org.neo4j.driver</groupId> | ||
<artifactId>neo4j-java-driver</artifactId> | ||
<version>1.1.0</version> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>junit</groupId> | ||
<artifactId>junit</artifactId> | ||
<version>4.12</version> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
</dependencies> | ||
|
||
<build> | ||
<plugins> | ||
<plugin> | ||
<artifactId>maven-compiler-plugin</artifactId> | ||
<version>3.1</version> | ||
<configuration> | ||
<!-- Neo4j Procedures require Java 8 --> | ||
<source>1.8</source> | ||
<target>1.8</target> | ||
</configuration> | ||
</plugin> | ||
<plugin> | ||
<!-- This generates a jar-file with our procedure code, | ||
plus any dependencies marked as `compile` scope. | ||
This should then be deployed in the `plugins` directory | ||
of each Neo4j instance in your deployment. | ||
After a restart, the procedure is available for calling. --> | ||
<artifactId>maven-shade-plugin</artifactId> | ||
<version>2.4.3</version> | ||
<executions> | ||
<execution> | ||
<phase>package</phase> | ||
<goals> | ||
<goal>shade</goal> | ||
</goals> | ||
</execution> | ||
</executions> | ||
</plugin> | ||
</plugins> | ||
</build> | ||
</project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
package similarity; | ||
|
||
import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.LoglikelihoodSimilarity; | ||
import org.neo4j.procedure.Description; | ||
import org.neo4j.procedure.Name; | ||
import org.neo4j.procedure.UserFunction; | ||
|
||
/** | ||
* This is an example how you can create a simple user-defined function for Neo4j. | ||
*/ | ||
public class Similarity | ||
{ | ||
@UserFunction | ||
@Description("similarity.LLR(AB, A, B, total) - return the log-likelihood ratio of A wrt B") | ||
public double LLR( | ||
@Name("both") long AB, | ||
@Name("all A") long A, | ||
@Name("all B") long B, | ||
@Name("total") long total) { | ||
|
||
return org.apache.mahout.math.stats.LogLikelihood.logLikelihoodRatio(AB, A-AB, B-AB, total-A-B+AB); | ||
} | ||
|
||
@UserFunction | ||
@Description("similarity.LLSimilarity(AB, A, B, total) - return the log likelihood similarity of A and B") | ||
public double LLSimilarity( | ||
@Name("both") long AB, | ||
@Name("all A") long A, | ||
@Name("all B") long B, | ||
@Name("total") long total) { | ||
|
||
LoglikelihoodSimilarity lls = new LoglikelihoodSimilarity(); | ||
|
||
return lls.similarity(AB, A, B, (int) total); | ||
} | ||
|
||
@UserFunction | ||
@Description("similarity.LLDistance(AB, A, B, total) - return the log likelihood distance between A and B") | ||
public double LLDistance( | ||
@Name("both") long AB, | ||
@Name("all A") long A, | ||
@Name("all B") long B, | ||
@Name("total") long total) { | ||
|
||
return 1.0 - LLSimilarity(AB, A, B, total); | ||
} | ||
|
||
@UserFunction | ||
@Description("similarity.mutualInformation(AB, A, B, total) - return the mutual information of A and B") | ||
public double mutualInformation( | ||
@Name("both") long AB, | ||
@Name("all A") long A, | ||
@Name("all B") long B, | ||
@Name("total") long total) { | ||
|
||
// LLR = 2 * N * MI | ||
// MI = LLR / 2 * N | ||
|
||
return this.LLR(AB, A, B, total) / (2.0 * total); | ||
} | ||
|
||
@UserFunction | ||
@Description("similarity.NMID(AB, A, B, total) - return the normalised mutual information distance between A and B") | ||
public double NMID( | ||
@Name("both") long AB, | ||
@Name("all A") long A, | ||
@Name("all B") long B, | ||
@Name("total") long total) { | ||
|
||
// NMID = 1 - MI / H | ||
|
||
double normalisedJointEntropy = org.apache.mahout.math.stats.LogLikelihood.entropy(AB, A-AB, B-AB, total-A-B+AB) / total; | ||
|
||
return 1.0 - (this.mutualInformation(AB, A, B, total) / normalisedJointEntropy); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
package similarity; | ||
|
||
import org.junit.Rule; | ||
import org.junit.Test; | ||
import org.neo4j.driver.v1.Config; | ||
import org.neo4j.driver.v1.Driver; | ||
import org.neo4j.driver.v1.GraphDatabase; | ||
import org.neo4j.driver.v1.Session; | ||
import org.neo4j.harness.junit.Neo4jRule; | ||
|
||
import static org.hamcrest.CoreMatchers.equalTo; | ||
import static org.junit.Assert.assertThat; | ||
|
||
public class SimilarityTest | ||
{ | ||
// This rule starts a Neo4j instance | ||
@Rule | ||
public Neo4jRule neo4j = new Neo4jRule() | ||
|
||
// This is the function we want to test | ||
.withFunction( Similarity.class ); | ||
|
||
@Test | ||
public void LLRCorrect() throws Throwable | ||
{ | ||
// This is in a try-block, to make sure we close the driver after the test | ||
try( Driver driver = GraphDatabase | ||
.driver( neo4j.boltURI() , Config.build().withEncryptionLevel( Config.EncryptionLevel.NONE ).toConfig() ) ) | ||
{ | ||
// Given | ||
Session session = driver.session(); | ||
double result; | ||
|
||
// When - some trackers in common | ||
result = session.run( "RETURN similarity.LLR(1, 4, 3, 11) AS result").single().get("result").asDouble(); | ||
|
||
// Then | ||
assertThat( result, equalTo( 0.016502205534052905 ) ); | ||
|
||
// When - no trackers in common (actually with this size data set, that counter-intuitively contains more information | ||
// and the test above! | ||
result = session.run( "RETURN similarity.LLR(0, 3, 2, 10) AS result").single().get("result").asDouble(); | ||
|
||
// Then | ||
assertThat( result, equalTo( 1.632274230570168 ) ); | ||
|
||
// When - all trackers in common | ||
result = session.run( "RETURN similarity.LLR(3, 3, 3, 8) AS result").single().get("result").asDouble(); | ||
|
||
// Then | ||
assertThat( result, equalTo( 10.58501181052771 ) ); | ||
} | ||
} | ||
|
||
@Test | ||
public void LLSimilarityCorrect() throws Throwable | ||
{ | ||
// This is in a try-block, to make sure we close the driver after the test | ||
try( Driver driver = GraphDatabase | ||
.driver( neo4j.boltURI() , Config.build().withEncryptionLevel( Config.EncryptionLevel.NONE ).toConfig() ) ) | ||
{ | ||
// Given | ||
Session session = driver.session(); | ||
double result; | ||
|
||
// When - some trackers in common | ||
result = session.run( "RETURN similarity.LLSimilarity(1, 4, 3, 11) AS result").single().get("result").asDouble(); | ||
|
||
// Then | ||
assertThat( result, equalTo( 0.016234303717406084 ) ); | ||
|
||
// When - no trackers in common (actually with this size data set, that counter-intuitively contains more information | ||
// and the test above! | ||
result = session.run( "RETURN similarity.LLSimilarity(0, 3, 2, 10) AS result").single().get("result").asDouble(); | ||
|
||
// Then | ||
assertThat( result, equalTo( 0.6201003723751862 ) ); | ||
|
||
// When - all trackers in common | ||
result = session.run( "RETURN similarity.LLSimilarity(3, 3, 3, 8) AS result").single().get("result").asDouble(); | ||
|
||
// Then | ||
assertThat( result, equalTo( 0.9136815726772705 ) ); | ||
} | ||
} | ||
|
||
@Test | ||
public void LLDistanceCorrect() throws Throwable | ||
{ | ||
// This is in a try-block, to make sure we close the driver after the test | ||
try( Driver driver = GraphDatabase | ||
.driver( neo4j.boltURI() , Config.build().withEncryptionLevel( Config.EncryptionLevel.NONE ).toConfig() ) ) | ||
{ | ||
// Given | ||
Session session = driver.session(); | ||
double result; | ||
|
||
// When - some trackers in common | ||
result = session.run( "RETURN similarity.LLDistance(1, 4, 3, 11) AS result").single().get("result").asDouble(); | ||
|
||
// Then | ||
assertThat( result, equalTo( 0.9837656962825939 ) ); | ||
|
||
// When - no trackers in common (actually with this size data set, that counter-intuitively contains more information | ||
// and the test above! | ||
result = session.run( "RETURN similarity.LLDistance(0, 3, 2, 10) AS result").single().get("result").asDouble(); | ||
|
||
// Then | ||
assertThat( result, equalTo( 0.3798996276248138 ) ); | ||
|
||
// When - all trackers in common | ||
result = session.run( "RETURN similarity.LLDistance(3, 3, 3, 8) AS result").single().get("result").asDouble(); | ||
|
||
// Then | ||
assertThat( result, equalTo( 0.08631842732272954 ) ); | ||
} | ||
} | ||
|
||
@Test | ||
public void mutualInformationCorrect() throws Throwable | ||
{ | ||
// This is in a try-block, to make sure we close the driver after the test | ||
try( Driver driver = GraphDatabase | ||
.driver( neo4j.boltURI() , Config.build().withEncryptionLevel( Config.EncryptionLevel.NONE ).toConfig() ) ) | ||
{ | ||
// Given | ||
Session session = driver.session(); | ||
double result; | ||
|
||
// When - some trackers in common | ||
result = session.run( "RETURN similarity.mutualInformation(1, 4, 3, 11) AS result").single().get("result").asDouble(); | ||
|
||
// Then | ||
assertThat( result, equalTo( 7.501002515478593E-4 ) ); | ||
|
||
// When - no trackers in common (actually with this size data set, that counter-intuitively contains more information | ||
// and the test above! | ||
result = session.run( "RETURN similarity.mutualInformation(0, 3, 2, 10) AS result").single().get("result").asDouble(); | ||
|
||
// Then | ||
assertThat( result, equalTo( 0.08161371152850841 ) ); | ||
|
||
// When - all trackers in common | ||
result = session.run( "RETURN similarity.mutualInformation(3, 3, 3, 8) AS result").single().get("result").asDouble(); | ||
|
||
// Then | ||
assertThat( result, equalTo( 0.6615632381579819 ) ); | ||
} | ||
} | ||
|
||
@Test | ||
public void NMIDCorrect() throws Throwable | ||
{ | ||
// This is in a try-block, to make sure we close the driver after the test | ||
try( Driver driver = GraphDatabase | ||
.driver( neo4j.boltURI() , Config.build().withEncryptionLevel( Config.EncryptionLevel.NONE ).toConfig() ) ) | ||
{ | ||
// Given | ||
Session session = driver.session(); | ||
double result; | ||
|
||
// When - some trackers in common | ||
result = session.run( "RETURN similarity.NMID(1, 4, 3, 11) AS result").single().get("result").asDouble(); | ||
|
||
// Then | ||
assertThat( result, equalTo( 0.999395414082041 ) ); | ||
|
||
// When - no trackers in common (actually with this size data set, that counter-intuitively contains more information | ||
// and the test above! | ||
result = session.run( "RETURN similarity.NMID(0, 3, 2, 10) AS result").single().get("result").asDouble(); | ||
|
||
// Then | ||
assertThat( result, equalTo( 0.9207366846756104 ) ); | ||
|
||
// When - all trackers in common | ||
result = session.run( "RETURN similarity.NMID(3, 3, 3, 8) AS result").single().get("result").asDouble(); | ||
|
||
// Then | ||
assertThat( result, equalTo( 0.0 ) ); | ||
} | ||
} | ||
} |