Skip to content

Commit

Permalink
performance improvement for timestamp lookup, better hostname lookup
Browse files Browse the repository at this point in the history
  • Loading branch information
Cornul11 committed Nov 22, 2023
1 parent 2bc379f commit f90adb2
Show file tree
Hide file tree
Showing 4 changed files with 147 additions and 74 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import nl.tudelft.cornul11.thesis.corpus.jarfile.JarProcessingUtils;
import nl.tudelft.cornul11.thesis.corpus.model.Dependency;
import nl.tudelft.cornul11.thesis.corpus.model.LibraryInfo;
import nl.tudelft.cornul11.thesis.packaging.util.HostNameUtility;
import org.apache.maven.shared.invoker.*;
import org.objectweb.asm.ClassReader;
import org.objectweb.asm.Opcodes;
Expand All @@ -14,6 +15,7 @@
import org.slf4j.LoggerFactory;

import java.io.*;
import java.net.InetAddress;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
Expand Down Expand Up @@ -179,18 +181,16 @@ private boolean hasPublicDefaultConstructor(ClassNode classNode) {

private String getJarLocation(Dependency library) {
Path m2RepositoryPath;
// TODO: find a better way to determine the m2 repository path
// get hostname to determine if we are on the server or not
String hostname = System.getenv("HOSTNAME");
String hostname = HostNameUtility.getHostName();

// sometimes hostname is null
if (hostname == null) {
hostname = "";
}
hostname = Objects.requireNonNullElse(hostname, "");

if (hostname.equals("goteborg")) {
// m2RepositoryPath shoud become /data/.m2/repository
// m2RepositoryPath should become /data/.m2/repository
m2RepositoryPath = Paths.get("/data", ".m2", "repository");

} else {
String userHomeDir = System.getProperty("user.home");
m2RepositoryPath = Paths.get(userHomeDir, ".m2", "repository");
Expand Down
86 changes: 86 additions & 0 deletions util/api_benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import json
import statistics
import time
from concurrent.futures import ThreadPoolExecutor
from pprint import pprint

import requests


def analyze_results(results, max_time):
assert len(results) > 0
response_times = [result[1] for result in results if result[0] == 200]
error_count = len([result for result in results if result[0] != 200])
assert len(response_times) > 0
assert len(results) == len(response_times) + error_count
average_time = statistics.mean(response_times)
median_time = statistics.median(response_times)
min_time = min(response_times)
max_response_time = max(response_times)
std_dev = statistics.stdev(response_times)
percentiles = {
"50th": statistics.quantiles(response_times, n=100)[49],
"90th": statistics.quantiles(response_times, n=100)[89],
"95th": statistics.quantiles(response_times, n=100)[94],
"99th": statistics.quantiles(response_times, n=100)[98],
}
error_rate = error_count / len(results)
throughput = len(results) / max_time

return {
"average": average_time,
"median": median_time,
"min": min_time,
"max_response_time": max_response_time,
"std_dev": std_dev,
"percentiles": percentiles,
"error_rate": error_rate,
"throughput": throughput,
}


def parse_jar_path(jar_path):
parts = jar_path.strip().split("/")
version = parts[-2]
artifactId = parts[-3]
groupId = ".".join(parts[parts.index("repository") + 1 : -3])
return groupId, artifactId, version


def benchmark_api(groupId, artifactId, version, url="http://localhost:8080/lookup"):
params = {
"groupId": groupId,
"artifactId": artifactId,
"version": version,
}
start = time.time()
response = requests.get(url, params=params)
end = time.time()
return response.status_code, end - start


def main(file_path):
with open(file_path, "r") as file:
jar_paths = file.readlines()

start_time = time.time()

with ThreadPoolExecutor(max_workers=10) as executor:
futures = [
executor.submit(benchmark_api, *parse_jar_path(path)) for path in jar_paths
]
results = [future.result() for future in futures]

end_time = time.time()
max_time = end_time - start_time

analysis = analyze_results(results, max_time)

pprint(analysis)


if __name__ == "__main__":
import sys

file_path = sys.argv[1]
main(file_path)
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@
import java.time.Duration;
import java.time.Instant;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

import static java.util.Objects.requireNonNull;
import static spark.Spark.get;
Expand All @@ -69,14 +66,15 @@ public class MavenIndexTimestampLookup {

private final Map<String, IndexCreator> indexCreators;

private final ExecutorService executorService = Executors.newCachedThreadPool();

public static void main(String[] args) throws IOException {
final com.google.inject.Module app = Main.wire(BeanScanning.INDEX);
MavenIndexTimestampLookup mavenIndexTimestampLookup = Guice.createInjector(app).getInstance(MavenIndexTimestampLookup.class);

if (args.length > 0 && "server".equals(args[0])) {
mavenIndexTimestampLookup.startServer();
mavenIndexTimestampLookup.updateLocalIndex();

IndexingContext centralIndex = mavenIndexTimestampLookup.createCentralContext();
mavenIndexTimestampLookup.startServer(centralIndex);
} else {
mavenIndexTimestampLookup.perform(args);
}
Expand Down Expand Up @@ -113,24 +111,19 @@ private IndexingContext createCentralContext() throws IOException {
indexers);
}

private String lookupArtifactLastModified(String groupId, String artifactId, String version) throws IOException {
IndexingContext centralContext = createCentralContext();
try {
// construct the query for known GA
Query query = constructQuery(groupId, artifactId, version);
private String lookupArtifactLastModified(IndexingContext centralContext, String groupId, String artifactId, String version) throws IOException {
// construct the query for known GA
Query query = constructQuery(groupId, artifactId, version);

IteratorSearchRequest request = new IteratorSearchRequest(query, Collections.singletonList(centralContext));
IteratorSearchResponse response = indexer.searchIterator(request);
IteratorSearchRequest request = new IteratorSearchRequest(query, Collections.singletonList(centralContext));
IteratorSearchResponse response = indexer.searchIterator(request);

if (response.getTotalHitsCount() > 0) {
ArtifactInfo ai = response.iterator().next();
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
return sdf.format(new Date(ai.getLastModified()));
} else {
return "Artifact not found";
}
} finally {
indexer.closeIndexingContext(centralContext, false);
if (response.getTotalHitsCount() > 0) {
ArtifactInfo ai = response.iterator().next();
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
return sdf.format(new Date(ai.getLastModified()));
} else {
return "Artifact not found";
}
}

Expand All @@ -156,10 +149,6 @@ private Query constructQuery(String groupId, String artifactId, String version)
public void perform(String[] args) throws IOException {
IndexingContext centralContext = createCentralContext();
try {
if (args[0].equals("update")) {
updateLocalIndex(centralContext);
return;
}
String groupId = args[0];
String artifactId = args[1];
String version = args[2];
Expand All @@ -182,7 +171,7 @@ private void executeQuery(IndexingContext context, Query query) throws IOExcepti
}
}

private void startServer() {
private void startServer(IndexingContext centralIndex) throws IOException {
port(8080);

get("/lookup", (request, response) -> {
Expand All @@ -195,30 +184,38 @@ private void startServer() {
return "Missing query parameters";
}

CompletableFuture<String> future = CompletableFuture.supplyAsync(() -> {
try {
String lastModified = lookupArtifactLastModified(groupId, artifactId, version);
if (!"Artifact not found".equals(lastModified)) {
return lastModified;
} else {
response.status(404);
return "Artifact not found";
}
} catch (Exception e) {
response.status(500);
return "Internal server error: " + e.getMessage();
try {
String lastModified = lookupArtifactLastModified(centralIndex, groupId, artifactId, version);
if (!"Artifact not found".equals(lastModified)) {
return lastModified;
} else {
response.status(404);
return "Artifact not found";
}
}, executorService);

return future.join();
} catch (Exception e) {
response.status(500);
e.printStackTrace();
return "Internal server error: " + e.getMessage();
}
});

// when the server is stopped, close the indexer
Runtime.getRuntime().addShutdownHook(new Thread(() -> {
try {
indexer.closeIndexingContext(centralIndex, false);
} catch (IOException e) {
e.printStackTrace();
}
}));
}

private void updateLocalIndex(IndexingContext centralContext) throws IOException {
private void updateLocalIndex() throws IOException {
Instant updateStart = Instant.now();
System.out.println("Updating Index...");
System.out.println("Updating index...");
System.out.println("This might take a while on first run, so please be patient!");

IndexingContext centralContext = createCentralContext();

Date centralContextCurrentTimestamp = centralContext.getTimestamp();
IndexUpdateRequest updateRequest = new IndexUpdateRequest(centralContext, new Java11HttpClient());
IndexUpdateResult updateResult = indexUpdater.fetchAndUpdateIndex(updateRequest);
Expand All @@ -231,6 +228,7 @@ private void updateLocalIndex(IndexingContext centralContext) throws IOException
+ " - " + updateResult.getTimestamp() + " period.");
}

indexer.closeIndexingContext(centralContext, false);
System.out.println("Finished in "
+ Duration.between(updateStart, Instant.now()).getSeconds() + " sec");
System.out.println();
Expand Down
37 changes: 13 additions & 24 deletions util/pom_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
from tqdm.contrib.logging import logging_redirect_tqdm
from tqdm.gui import tqdm as tqdm_gui

total_waiting_for_maven = 0

DEFAULT_ARCHIVE_PATH = "pom_archive.zip"
NS_URL = "http://maven.apache.org/POM/4.0.0"

Expand Down Expand Up @@ -288,12 +286,13 @@ def print_stats(self):
logging.info(
f"Total pom files with parent: {self.total_with_parents} ({self.total_with_parents / self.total_pom_files * 100:.2f}%)"
)
logging.info(
f"Total not found in index: {self.total_not_found_in_index} ({self.total_not_found_in_index / self.total_shade_plugins * 100:.2f}%)"
)
logging.info(
f"Total not found: {self.total_not_found} ({self.total_not_found / self.total_shade_plugins * 100:.2f}%)"
)
if self.total_shade_plugins > 0:
logging.info(
f"Total not found in index: {self.total_not_found_in_index} ({self.total_not_found_in_index / self.total_shade_plugins * 100:.2f}%)"
)
logging.info(
f"Total not found: {self.total_not_found} ({self.total_not_found / self.total_shade_plugins * 100:.2f}%)"
)

def save_stats_if_required(self):
if self.args.save:
Expand All @@ -313,11 +312,8 @@ def save_stats_if_required(self):
}

with open(self.args.save, "w") as f:
json.dump(stats, f)
json.dump(stats, f, indent=4)
logging.info(f"Saved stats to {self.args.save}")
logging.info(
f"Total time waiting for Maven Central: {total_waiting_for_maven}"
)
logging.info(
f"Total time: {(datetime.now() - self.start_time).total_seconds()}"
)
Expand All @@ -333,7 +329,11 @@ def parse_arguments():
default=None,
)
parser.add_argument(
"--file", type=str, help="File containing paths to pom.xml files", default=None
"-f",
"--file",
type=str,
help="File containing paths to pom.xml files",
default=None,
)
parser.add_argument(
"--mode",
Expand Down Expand Up @@ -373,9 +373,6 @@ def get_publication_date_from_maven_repo_header(group_id, artifact_id, version):


def get_publication_date_from_maven_repo(group_id, artifact_id, version):
global total_waiting_for_maven
start_time = datetime.now()

group_id = group_id.replace(".", "/")

url = f"https://repo1.maven.org/maven2/{group_id}/{artifact_id}/{version}"
Expand Down Expand Up @@ -406,14 +403,9 @@ def get_publication_date_from_maven_repo(group_id, artifact_id, version):
except Exception as err:
logging.error(f"Other error occurred: {err}")
return None
finally:
end_time = datetime.now()
total_waiting_for_maven += (end_time - start_time).total_seconds()


def get_publication_date_from_maven_central(group_id, artifact_id, version):
global total_waiting_for_maven
start_time = datetime.now()
try:
base_url = "https://search.maven.org/solrsearch/select"
query = f"g:{group_id} AND a:{artifact_id} AND v:{version}"
Expand All @@ -436,9 +428,6 @@ def get_publication_date_from_maven_central(group_id, artifact_id, version):
f"Error fetching publication date for {group_id}:{artifact_id}:{version} from Maven Central: {e}"
)
return None
finally:
end_time = datetime.now()
total_waiting_for_maven += (end_time - start_time).total_seconds()


def main():
Expand Down

0 comments on commit f90adb2

Please sign in to comment.