Merge 337ee21569
into dcca11fbeb
This commit is contained in:
commit
aeb18bb1d8
|
@ -25,6 +25,7 @@ import org.schabi.newpipe.extractor.utils.Utils;
|
|||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.*;
|
||||
|
||||
/*
|
||||
|
@ -152,12 +153,46 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
public String getDescription() throws ParsingException {
|
||||
assertPageFetched();
|
||||
try {
|
||||
return doc.select("p[id=\"eow-description\"]").first().html();
|
||||
return fixDescriptionLinks(doc.select("p[id=\"eow-description\"]").first().html());
|
||||
} catch (Exception e) {//todo: add fallback method <-- there is no ... as long as i know
|
||||
throw new ParsingException("Could not get the description", e);
|
||||
}
|
||||
}
|
||||
|
||||
private String fixDescriptionLinks(String description) throws ParsingException, UnsupportedEncodingException {
|
||||
boolean continueToNextLink = true;
|
||||
String descriptionE = java.net.URLDecoder.decode(description, "UTF-8");
|
||||
Parser.getLinksFromString(descriptionE);
|
||||
int exceptionsThrown = 0;
|
||||
|
||||
while(continueToNextLink) {
|
||||
try {
|
||||
String[] firstCut = description.split("<a href=\"",2);
|
||||
if(firstCut.length==1) {
|
||||
continueToNextLink = false;
|
||||
}
|
||||
|
||||
String beginning = firstCut[0];
|
||||
String[] secondCut = firstCut[1].split("\"",2);
|
||||
String end = secondCut[1];
|
||||
if(secondCut[0].contains("q=")) {
|
||||
String linkToBeFixed = secondCut[0].split("q=")[1].split("&")[0];
|
||||
String link = java.net.URLDecoder.decode(linkToBeFixed, "UTF-8");
|
||||
|
||||
description = beginning + "<a href=\"" + link + "\"" + end; //I'm inserting a double space between "<a" and "href" here so the next cut doesn't cut here.
|
||||
} else { //Timestamps and other links to youtube videos are processed here
|
||||
description = beginning + "<a href=\"" + secondCut[0] + "\"" + end;
|
||||
}
|
||||
} catch (ArrayIndexOutOfBoundsException | UnsupportedEncodingException end) {
|
||||
//this means we have run out of Links because there are no more <a href=" to split the text in so the Array has only one Element or that something else went wrong.
|
||||
|
||||
exceptionsThrown++;
|
||||
if (exceptionsThrown > 20) continueToNextLink = false;
|
||||
}
|
||||
}
|
||||
return description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getAgeLimit() throws ParsingException {
|
||||
assertPageFetched();
|
||||
|
|
Loading…
Reference in New Issue