NewPipeExtractor/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSubscriptionExtracto...

207 lines
8.0 KiB
Java

package org.schabi.newpipe.extractor.services.youtube.extractors;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.services.youtube.YoutubeService;
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
import org.schabi.newpipe.extractor.subscription.SubscriptionItem;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import javax.annotation.Nonnull;
import static org.schabi.newpipe.extractor.subscription.SubscriptionExtractor.ContentSource.INPUT_STREAM;
/**
* Extract subscriptions from a Google takeout export
*/
public class YoutubeSubscriptionExtractor extends SubscriptionExtractor {
private static final String BASE_CHANNEL_URL = "https://www.youtube.com/channel/";
public YoutubeSubscriptionExtractor(final YoutubeService youtubeService) {
super(youtubeService, Collections.singletonList(INPUT_STREAM));
}
@Override
public String getRelatedUrl() {
return "https://takeout.google.com/takeout/custom/youtube";
}
@Override
public List<SubscriptionItem> fromInputStream(@Nonnull final InputStream contentInputStream)
throws ExtractionException {
return fromJsonInputStream(contentInputStream);
}
@Override
public List<SubscriptionItem> fromInputStream(@Nonnull final InputStream contentInputStream,
@Nonnull final String contentType)
throws ExtractionException {
switch (contentType) {
case "json":
case "application/json":
return fromJsonInputStream(contentInputStream);
case "csv":
case "text/csv":
case "text/comma-separated-values":
return fromCsvInputStream(contentInputStream);
case "zip":
case "application/zip":
return fromZipInputStream(contentInputStream);
default:
throw new InvalidSourceException("Unsupported content type: " + contentType);
}
}
public List<SubscriptionItem> fromJsonInputStream(@Nonnull final InputStream contentInputStream)
throws ExtractionException {
final JsonArray subscriptions;
try {
subscriptions = JsonParser.array().from(contentInputStream);
} catch (final JsonParserException e) {
throw new InvalidSourceException("Invalid json input stream", e);
}
boolean foundInvalidSubscription = false;
final List<SubscriptionItem> subscriptionItems = new ArrayList<>();
for (final Object subscriptionObject : subscriptions) {
if (!(subscriptionObject instanceof JsonObject)) {
foundInvalidSubscription = true;
continue;
}
final JsonObject subscription = ((JsonObject) subscriptionObject).getObject("snippet");
final String id = subscription.getObject("resourceId").getString("channelId", "");
if (id.length() != 24) { // e.g. UCsXVk37bltHxD1rDPwtNM8Q
foundInvalidSubscription = true;
continue;
}
subscriptionItems.add(new SubscriptionItem(service.getServiceId(),
BASE_CHANNEL_URL + id, subscription.getString("title", "")));
}
if (foundInvalidSubscription && subscriptionItems.isEmpty()) {
throw new InvalidSourceException("Found only invalid channel ids");
}
return subscriptionItems;
}
public List<SubscriptionItem> fromZipInputStream(@Nonnull final InputStream contentInputStream)
throws ExtractionException {
try (ZipInputStream zipInputStream = new ZipInputStream(contentInputStream)) {
ZipEntry zipEntry;
while ((zipEntry = zipInputStream.getNextEntry()) != null) {
if (zipEntry.getName().toLowerCase().endsWith(".csv")) {
try {
final List<SubscriptionItem> csvItems = fromCsvInputStream(zipInputStream);
// Return it only if it has items (it exits early if it's the wrong file
// format), otherwise try the next file
if (csvItems.size() > 0) {
return csvItems;
}
} catch (final ExtractionException e) {
// Ignore error and go to next file
}
}
}
} catch (final IOException e) {
throw new InvalidSourceException("Error reading contents of zip file", e);
}
throw new InvalidSourceException("Unable to find a valid subscriptions.csv file"
+ " (try extracting and selecting the csv file)");
}
public List<SubscriptionItem> fromCsvInputStream(@Nonnull final InputStream contentInputStream)
throws ExtractionException {
// Expected format of CSV file:
// Channel Id,Channel Url,Channel Title
//UC1JTQBa5QxZCpXrFSkMxmPw,http://www.youtube.com/channel/UC1JTQBa5QxZCpXrFSkMxmPw,Raycevick
//UCFl7yKfcRcFmIUbKeCA-SJQ,http://www.youtube.com/channel/UCFl7yKfcRcFmIUbKeCA-SJQ,Joji
//
// Notes:
// It's always 3 columns
// The first line is always a header
// Header names are different based on the locale
// Fortunately the data is always the same order no matter what locale
int currentLine = 0;
String line = "";
try (BufferedReader br = new BufferedReader(new InputStreamReader(contentInputStream))) {
final List<SubscriptionItem> subscriptionItems = new ArrayList<>();
// ignore header and skip first line
currentLine = 1;
line = br.readLine();
while ((line = br.readLine()) != null) {
currentLine++;
// Exit early if we've read the first few lines and we haven't added any items
// It's likely we're in the wrong file
if (currentLine > 5 && subscriptionItems.size() == 0) {
break;
}
// First comma
final int i1 = line.indexOf(",");
if (i1 == -1) {
continue;
}
// Second comma
final int i2 = line.indexOf(",", i1 + 1);
if (i2 == -1) {
continue;
}
// Third comma or line length
int i3 = line.indexOf(",", i2 + 1);
if (i3 == -1) {
i3 = line.length();
}
// Channel URL from second entry
final String channelUrl = line
.substring(i1 + 1, i2)
.replace("http://", "https://");
if (!channelUrl.startsWith(BASE_CHANNEL_URL)) {
continue;
}
// Channel title from third entry
final String channelTitle = line.substring(i2 + 1, i3);
final SubscriptionItem newItem
= new SubscriptionItem(service.getServiceId(), channelUrl, channelTitle);
subscriptionItems.add(newItem);
}
return subscriptionItems;
} catch (final IOException e) {
if (line == null) {
line = "<null>";
} else if (line.length() > 10) {
line = line.substring(0, 10) + "...";
}
throw new InvalidSourceException("Error reading CSV file on line = \"" + line
+ "\", line number = " + currentLine, e);
}
}
}