Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,22 @@
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
import org.schabi.newpipe.extractor.subscription.SubscriptionItem;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import javax.annotation.Nonnull;

import static org.schabi.newpipe.extractor.subscription.SubscriptionExtractor.ContentSource.INPUT_STREAM;

/**
* Extract subscriptions from a Google takeout export (the user has to get the JSON out of the zip)
* Extract subscriptions from a Google takeout export
*/
public class YoutubeSubscriptionExtractor extends SubscriptionExtractor {
private static final String BASE_CHANNEL_URL = "https://www.youtube.com/channel/";
Expand All @@ -37,6 +42,30 @@ public String getRelatedUrl() {
@Override
public List<SubscriptionItem> fromInputStream(@Nonnull final InputStream contentInputStream)
throws ExtractionException {
return fromJsonInputStream(contentInputStream);
}

@Override
public List<SubscriptionItem> fromInputStream(@Nonnull final InputStream contentInputStream, String contentType)
throws ExtractionException {
switch (contentType) {
case "json":
case "application/json":
return fromJsonInputStream(contentInputStream);
case "csv":
case "text/csv":
case "text/comma-separated-values":
return fromCsvInputStream(contentInputStream);
case "zip":
case "application/zip":
return fromZipInputStream(contentInputStream);
default:
throw new InvalidSourceException("Unsupported content type: " + contentType);
}
}

public List<SubscriptionItem> fromJsonInputStream(@Nonnull final InputStream contentInputStream)
throws ExtractionException {
final JsonArray subscriptions;
try {
subscriptions = JsonParser.array().from(contentInputStream);
Expand Down Expand Up @@ -68,4 +97,109 @@ public List<SubscriptionItem> fromInputStream(@Nonnull final InputStream content
}
return subscriptionItems;
}

public List<SubscriptionItem> fromZipInputStream(@Nonnull final InputStream contentInputStream)
throws ExtractionException {
final ZipInputStream zipInputStream = new ZipInputStream(contentInputStream);

try {
ZipEntry zipEntry;
while ((zipEntry = zipInputStream.getNextEntry()) != null) {
if (zipEntry.getName().toLowerCase().endsWith(".csv")) {
try {
final List<SubscriptionItem> csvItems = fromCsvInputStream(zipInputStream);

// Return it only if it has items (it exits early if it's the wrong file format)
// Otherwise try the next file
if (csvItems.size() > 0) {
return csvItems;
}
} catch (ExtractionException e) {
// Ignore error and go to next file
// (maybe log it?)
}
}
}
} catch (IOException e) {
throw new InvalidSourceException("Error reading contents of zip file", e);
}

throw new InvalidSourceException("Unable to find a valid subscriptions.csv file (try extracting and selecting the csv file)");
}

public List<SubscriptionItem> fromCsvInputStream(@Nonnull final InputStream contentInputStream)
throws ExtractionException {
// Expected format of CSV file:
// Channel Id,Channel Url,Channel Title
// UC1JTQBa5QxZCpXrFSkMxmPw,http://www.youtube.com/channel/UC1JTQBa5QxZCpXrFSkMxmPw,Raycevick
// UCFl7yKfcRcFmIUbKeCA-SJQ,http://www.youtube.com/channel/UCFl7yKfcRcFmIUbKeCA-SJQ,Joji
//
// Notes:
// It's always 3 columns
// The first line is always a header
// Header names are different based on the locale
// Fortunately the data is always the same order no matter what locale

int currentLine = 0;
String line = "";

try (BufferedReader br = new BufferedReader(new InputStreamReader(contentInputStream))) {
final List<SubscriptionItem> subscriptionItems = new ArrayList<>();

// Ignore header
currentLine = 1;
line = br.readLine();

while ((line = br.readLine()) != null) {
currentLine++;

// Exit early if we've read the first few lines and we haven't added any items
// It's likely we're in the wrong file
if (currentLine > 5 && subscriptionItems.size() == 0) {
break;
}

// First comma
int i1 = line.indexOf(",");
if (i1 == -1) {
continue;
}

// Second comma
int i2 = line.indexOf(",", i1 + 1);
if (i2 == -1) {
continue;
}

// Third comma or line length
int i3 = line.indexOf(",", i2 + 1);
if (i3 == -1) {
i3 = line.length();
}

// Channel URL from second entry
final String channelUrl = line
.substring(i1 + 1, i2)
.replace("http://", "https://");
if (!channelUrl.startsWith(BASE_CHANNEL_URL)) {
continue;
}

// Channel title from third entry
final String channelTitle = line.substring(i2 + 1, i3);

final SubscriptionItem newItem = new SubscriptionItem(service.getServiceId(), channelUrl, channelTitle);
subscriptionItems.add(newItem);
}

return subscriptionItems;
} catch (IOException e) {
if (line == null) {
line = "<null>";
} else if (line.length() > 10) {
line = line.substring(0, 10) + "...";
}
throw new InvalidSourceException("Error reading CSV file, line = '" + line + "', line number = " + currentLine);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,15 @@ public List<SubscriptionItem> fromInputStream(@Nonnull final InputStream content
throw new UnsupportedOperationException("Service " + service.getServiceInfo().getName()
+ " doesn't support extracting from an InputStream");
}

/**
* Reads and parse a list of {@link SubscriptionItem} from the given InputStream.
*
* @throws InvalidSourceException when the content read from the InputStream is invalid and can not be parsed
*/
public List<SubscriptionItem> fromInputStream(@Nonnull final InputStream contentInputStream, String contentType)
throws ExtractionException {
throw new UnsupportedOperationException("Service " + service.getServiceInfo().getName()
+ " doesn't support extracting from an InputStream");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.schabi.newpipe.extractor.subscription.SubscriptionItem;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.util.Arrays;
import java.util.List;
Expand Down Expand Up @@ -110,4 +111,48 @@ public void testInvalidSourceException() {
}
}
}

private static void assertSubscriptionItems(final List<SubscriptionItem> subscriptionItems)
throws Exception {
assertTrue(subscriptionItems.size() > 0);

for (final SubscriptionItem item : subscriptionItems) {
assertNotNull(item.getName());
assertNotNull(item.getUrl());
assertTrue(urlHandler.acceptUrl(item.getUrl()));
assertEquals(ServiceList.YouTube.getServiceId(), item.getServiceId());
}
}

@Test
public void fromZipInputStream() throws Exception {
final List<String> zipPaths = Arrays.asList(
"youtube_takeout_import_test_1.zip",
"youtube_takeout_import_test_2.zip"
);

for (final String path : zipPaths)
{
final File file = resolveTestResource(path);
final FileInputStream fileInputStream = new FileInputStream(file);
final List<SubscriptionItem> subscriptionItems = subscriptionExtractor.fromZipInputStream(fileInputStream);
assertSubscriptionItems(subscriptionItems);
}
}

@Test
public void fromCsvInputStream() throws Exception {
final List<String> csvPaths = Arrays.asList(
"youtube_takeout_import_test_1.csv",
"youtube_takeout_import_test_2.csv"
);

for (String path : csvPaths)
{
final File file = resolveTestResource(path);
final FileInputStream fileInputStream = new FileInputStream(file);
final List<SubscriptionItem> subscriptionItems = subscriptionExtractor.fromCsvInputStream(fileInputStream);
assertSubscriptionItems(subscriptionItems);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Channel Id,Channel Url,Channel Title
UC1JTQBa5QxZCpXrFSkMxmPw,http://www.youtube.com/channel/UC1JTQBa5QxZCpXrFSkMxmPw,Raycevick
UC3ltptWa0xfrDweghW94Acg,http://www.youtube.com/channel/UC3ltptWa0xfrDweghW94Acg,Karl Jobst
UC9PBzalIcEQCsiIkq36PyUA,http://www.youtube.com/channel/UC9PBzalIcEQCsiIkq36PyUA,Digital Foundry
UCU64AfivgQUOPuIJ8N5YaCA,http://www.youtube.com/channel/UCU64AfivgQUOPuIJ8N5YaCA,EventStatus
UCsvn_Po0SmunchJYOWpOxMg,http://www.youtube.com/channel/UCsvn_Po0SmunchJYOWpOxMg,videogamedunkey

Binary file not shown.
Loading