Skip to content

Commit 21dc699

Browse files
committed
add re-named packages
1 parent 7c5f698 commit 21dc699

File tree

286 files changed

+290902
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

286 files changed

+290902
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
package commitminer.classify;
2+
3+
import java.io.BufferedReader;
4+
import java.io.File;
5+
import java.io.FileOutputStream;
6+
import java.io.FileReader;
7+
import java.io.PrintStream;
8+
import java.util.HashSet;
9+
import java.util.LinkedList;
10+
import java.util.List;
11+
import java.util.Map;
12+
import java.util.Set;
13+
14+
import org.deri.iris.api.IKnowledgeBase;
15+
import org.deri.iris.api.basics.IQuery;
16+
import org.deri.iris.api.basics.IRule;
17+
import org.deri.iris.api.basics.ITuple;
18+
import org.deri.iris.storage.IRelation;
19+
20+
import commitminer.analysis.Commit;
21+
import commitminer.analysis.DataSet;
22+
23+
/**
24+
* The {@code DataSet} manages the alerts that were generated during the
25+
* analysis.
26+
*/
27+
public class ClassifierDataSet extends DataSet {
28+
29+
/**
30+
* The path to the file where the data set will be cached. This allows us
31+
* to limit our memory use and cache results for the future by storing the
32+
* keyword extraction results on the disk.
33+
*/
34+
private String dataSetPath;
35+
36+
/** The feature vectors generated by the analysis. **/
37+
private List<ClassifierFeatureVector> featureVectors;
38+
39+
/** The queries to run and their transformers to alerts. **/
40+
private Map<IQuery, Transformer> transformers;
41+
42+
/**
43+
* Used to produce a data set of the analysis results.
44+
* @param dataSetPath The file path to store the data set.
45+
* @param supplementaryPath The directory path to store the supplementary
46+
* files.
47+
* @throws Exception Throws an exception when the {@code dataSetPath}
48+
* cannot be read.
49+
*/
50+
public ClassifierDataSet(String dataSetPath, List<IRule> rules,
51+
Map<IQuery, Transformer> transformers) {
52+
super(rules, new LinkedList<IQuery>(transformers.keySet()));
53+
this.featureVectors = new LinkedList<ClassifierFeatureVector>();
54+
this.dataSetPath = dataSetPath;
55+
this.transformers = transformers;
56+
}
57+
58+
/**
59+
* Adds a feature vector to the data set. If a data set file exist
60+
* ({@code dataSetPath}), serializes the feature vector and writes it to
61+
* the file. Otherwise, the feature vector is stored in memory in
62+
* {@code ClassifierDataSet}.
63+
* @param commit The commit that is being analyzed.
64+
* @param knowledgeBase The fact database to query.
65+
*/
66+
@Override
67+
protected void registerAlerts(Commit commit, IKnowledgeBase knowledgeBase)
68+
throws Exception {
69+
70+
Set<ClassifierFeatureVector> featureVectors = new HashSet<ClassifierFeatureVector>();
71+
72+
for(IQuery query : this.queries) {
73+
74+
IRelation results = knowledgeBase.execute(query);
75+
76+
Transformer transformer = this.transformers.get(query);
77+
78+
/* Iterate through the tuples that are members of the relation and add
79+
* them as alerts. */
80+
for(int i = 0; i < results.size(); i++) {
81+
82+
ITuple tuple = results.get(i);
83+
ClassifierFeatureVector featureVector = transformer.transform(commit, tuple);
84+
featureVectors.add(featureVector);
85+
86+
}
87+
88+
}
89+
90+
/* Store the feature vectors. */
91+
for(ClassifierFeatureVector featureVector : featureVectors) {
92+
if(this.dataSetPath != null) {
93+
try {
94+
this.storeClassifierFeatureVector(featureVector);
95+
} catch (Exception e) {
96+
System.err.println("Error while writing feature vector: " + e.getMessage());
97+
}
98+
}
99+
else {
100+
this.featureVectors.add(featureVector);
101+
}
102+
}
103+
104+
}
105+
106+
/**
107+
* Import a data set from a file to this {@code ClassifierDataSet}.
108+
* @param dataSetPath The file path where the data set is stored.
109+
* @throws Exception Occurs when the data set file cannot be read.
110+
*/
111+
public void importDataSet(String dataSetPath) throws Exception {
112+
113+
try(BufferedReader reader = new BufferedReader(new FileReader(dataSetPath))) {
114+
115+
for (String serialClassifierFeatureVector = reader.readLine();
116+
serialClassifierFeatureVector != null;
117+
serialClassifierFeatureVector = reader.readLine()) {
118+
119+
ClassifierFeatureVector featureVector = ClassifierFeatureVector.deSerialize(serialClassifierFeatureVector);
120+
121+
this.featureVectors.add(featureVector);
122+
123+
}
124+
125+
}
126+
catch(Exception e) {
127+
throw e;
128+
}
129+
130+
}
131+
132+
/**
133+
* Prints the feature vectors to the console.
134+
*/
135+
public void printDataSet() {
136+
for(ClassifierFeatureVector featureVector : this.featureVectors) {
137+
System.out.println(featureVector.serialize());
138+
}
139+
}
140+
141+
/**
142+
* @return The list of feature vectors in the data set.
143+
*/
144+
public List<ClassifierFeatureVector> getFeatureVectors() {
145+
return this.featureVectors;
146+
}
147+
148+
/**
149+
* Stores the feature vector in the file specified by {@code dataSetPath}.
150+
* This method is synchronized because it may be used by several
151+
* GitProjectAnalysis thread at the same time, which may cause race
152+
* conditions when writing to the output file.
153+
*
154+
* @param featureVector The feature vector to be managed by this class.
155+
*/
156+
private synchronized void storeClassifierFeatureVector(ClassifierFeatureVector featureVector) throws Exception {
157+
158+
/* The path to the file may not exist. Create it if needed. */
159+
File path = new File(this.dataSetPath);
160+
path.getParentFile().mkdirs();
161+
path.createNewFile();
162+
163+
/* May throw IOException if the path does not exist. */
164+
PrintStream stream = new PrintStream(new FileOutputStream(path, true));
165+
166+
/* Write the data set. */
167+
stream.println(featureVector.serialize());
168+
169+
/* Finished writing the feature vector. */
170+
stream.close();
171+
172+
}
173+
174+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
package commitminer.classify;
2+
3+
import commitminer.analysis.Commit;
4+
import commitminer.analysis.FeatureVector;
5+
import commitminer.analysis.Commit.Type;
6+
7+
/**
8+
* Stores a feature vector for a pattern query.
9+
*
10+
* The feature vector includes information that can be used to localize the
11+
* pattern, and a description of the patterns itself.
12+
*
13+
* Patterns must be specified as Datalog queries passed to the constructor.
14+
*/
15+
public class ClassifierFeatureVector extends FeatureVector {
16+
17+
/** The version of the file (SOURCE or DESTINATION) **/
18+
public String version;
19+
20+
/** The class that was analyzed (if at or below class granularity). **/
21+
public String klass;
22+
23+
/** The method that was analyzed (if at or below method granularity). **/
24+
public String method;
25+
26+
/** The line number for the alert. **/
27+
public String line;
28+
29+
/** The absolute position of the alert in the file. **/
30+
public String absolutePosition;
31+
32+
/** The length of the highlighting in the file. **/
33+
public String length;
34+
35+
/** The type of pattern found. **/
36+
public String type;
37+
38+
/** The subtype of pattern found. **/
39+
public String subtype;
40+
41+
/** A description of the pattern found. **/
42+
public String description;
43+
44+
/**
45+
* @param commit The commit that the features were extracted from.
46+
* @param klass The class that the features were extracted from.
47+
* @param method The method that the features were extracted from.
48+
*/
49+
@Deprecated
50+
public ClassifierFeatureVector(Commit commit, String version,
51+
String klass, String method,
52+
String line,
53+
String type, String subtype,
54+
String description) {
55+
super(commit);
56+
this.version = version;
57+
this.klass = klass;
58+
this.method = method;
59+
this.line = line;
60+
this.absolutePosition = "0";
61+
this.length = "0";
62+
this.type = type;
63+
this.subtype = subtype;
64+
this.description = description;
65+
}
66+
67+
/**
68+
* @param commit The commit that the features were extracted from.
69+
* @param klass The class that the features were extracted from.
70+
* @param method The method that the features were extracted from.
71+
*/
72+
public ClassifierFeatureVector(Commit commit, String version,
73+
String klass, String method,
74+
String line,
75+
String absolutePosition,
76+
String length,
77+
String type, String subtype,
78+
String description) {
79+
super(commit);
80+
this.version = version;
81+
this.klass = klass;
82+
this.method = method;
83+
this.line = line;
84+
this.absolutePosition = absolutePosition;
85+
this.length = length;
86+
this.type = type;
87+
this.subtype = subtype;
88+
this.description = description;
89+
}
90+
91+
/**
92+
* This constructor should only be used if making a feature vector from
93+
* serial. Otherwise the other constructor should be used so the ID is
94+
* automatically generated.
95+
* @param commit The commit that the features were extracted from.
96+
* @param klass The class that the features were extracted from.
97+
* @param method The method that the features were extracted from.
98+
* @param id The unique id for the alert.
99+
*/
100+
public ClassifierFeatureVector(Commit commit, String version,
101+
String klass, String method,
102+
String line, String type, String subtype,
103+
String description, int id) {
104+
super(commit, id);
105+
this.version = version;
106+
this.klass = klass;
107+
this.method = method;
108+
this.line = line;
109+
this.type = type;
110+
this.subtype = subtype;
111+
this.description = description;
112+
}
113+
114+
/**
115+
* This method serializes the alert. This is useful when writing
116+
* a data set to the disk.
117+
* @return The serialized version of the alert.
118+
* Has the format [ID, ProjectID, URL, BuggyCommit, RepairedCommit, [KeywordList]]
119+
* where KeywordList = [Type:Context:ChangeType:Package:Keyword:COUNT].
120+
*/
121+
public String serialize() {
122+
123+
String serialized = id + "," + this.commit.projectID
124+
+ "," + this.commit.commitMessageType.toString()
125+
+ "," + this.commit.url + "/commit/" + this.commit.repairedCommitID
126+
+ "," + this.commit.buggyCommitID + "," + this.commit.repairedCommitID
127+
+ "," + this.version
128+
+ "," + this.klass + "," + this.method
129+
+ "," + this.line
130+
+ "," + this.absolutePosition
131+
+ "," + this.length
132+
+ "," + this.type
133+
+ "," + this.subtype + "," + this.description;
134+
135+
return serialized;
136+
137+
}
138+
139+
/**
140+
* This method de-serializes a feature vector. This is useful when reading
141+
* a data set from the disk.
142+
* @param serialized The serialized version of a feature vector.
143+
* @return The feature vector represented by {@code serialized}.
144+
*/
145+
public static ClassifierFeatureVector deSerialize(String serialized) throws Exception {
146+
147+
String[] features = serialized.split(",");
148+
149+
if(features.length < 8) throw new Exception("De-serialization exception. Serial format not recognized.");
150+
151+
Commit commit = new Commit(features[1], features[3], features[4],
152+
features[5], Type.valueOf(features[2]));
153+
154+
ClassifierFeatureVector featureVector = new ClassifierFeatureVector(commit,
155+
features[6], features[7],
156+
features[8], features[9], features[10],
157+
features[11], features[12],
158+
Integer.parseInt(features[0]));
159+
160+
return featureVector;
161+
162+
}
163+
164+
@Override
165+
public String toString() {
166+
return this.serialize();
167+
}
168+
169+
@Override
170+
public boolean equals(Object o) {
171+
if(o instanceof ClassifierFeatureVector) {
172+
ClassifierFeatureVector sa = (ClassifierFeatureVector)o;
173+
if(this.commit.equals(sa.commit)
174+
&& this.version.equals(sa.version)
175+
&& this.klass.equals(sa.klass)
176+
&& this.method.equals(sa.method)
177+
&& this.line.equals(sa.line)
178+
&& this.absolutePosition.equals(sa.absolutePosition)
179+
&& this.length.equals(sa.length)
180+
&& this.type.equals(sa.type)
181+
&& this.subtype.equals(sa.subtype)
182+
&& this.description.equals(sa.description)) return true;
183+
}
184+
return false;
185+
}
186+
187+
@Override
188+
public int hashCode() {
189+
return (this.commit.projectID + this.commit.repairedCommitID + this.klass + this.method).hashCode();
190+
}
191+
192+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package commitminer.classify;
2+
3+
import org.deri.iris.api.basics.ITuple;
4+
5+
import commitminer.analysis.Commit;
6+
7+
public interface Transformer {
8+
9+
public abstract ClassifierFeatureVector transform(Commit commit,
10+
ITuple tuple);
11+
12+
}

0 commit comments

Comments
 (0)