This commit is contained in:
Christian Schabesberger 2018-07-25 14:05:04 +00:00 committed by GitHub
commit aeb18bb1d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 36 additions and 1 deletions

View File

@ -25,6 +25,7 @@ import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.*;
/*
@ -152,12 +153,46 @@ public class YoutubeStreamExtractor extends StreamExtractor {
public String getDescription() throws ParsingException {
assertPageFetched();
try {
return doc.select("p[id=\"eow-description\"]").first().html();
return fixDescriptionLinks(doc.select("p[id=\"eow-description\"]").first().html());
} catch (Exception e) {//todo: add fallback method <-- there is no ... as long as i know
throw new ParsingException("Could not get the description", e);
}
}
private String fixDescriptionLinks(String description) throws ParsingException, UnsupportedEncodingException {
boolean continueToNextLink = true;
String descriptionE = java.net.URLDecoder.decode(description, "UTF-8");
Parser.getLinksFromString(descriptionE);
int exceptionsThrown = 0;
while(continueToNextLink) {
try {
String[] firstCut = description.split("<a href=\"",2);
if(firstCut.length==1) {
continueToNextLink = false;
}
String beginning = firstCut[0];
String[] secondCut = firstCut[1].split("\"",2);
String end = secondCut[1];
if(secondCut[0].contains("q=")) {
String linkToBeFixed = secondCut[0].split("q=")[1].split("&amp")[0];
String link = java.net.URLDecoder.decode(linkToBeFixed, "UTF-8");
description = beginning + "<a href=\"" + link + "\"" + end; //I'm inserting a double space between "<a" and "href" here so the next cut doesn't cut here.
} else { //Timestamps and other links to youtube videos are processed here
description = beginning + "<a href=\"" + secondCut[0] + "\"" + end;
}
} catch (ArrayIndexOutOfBoundsException | UnsupportedEncodingException end) {
//this means we have run out of Links because there are no more <a href=" to split the text in so the Array has only one Element or that something else went wrong.
exceptionsThrown++;
if (exceptionsThrown > 20) continueToNextLink = false;
}
}
return description;
}
@Override
public int getAgeLimit() throws ParsingException {
assertPageFetched();