2017-06-29 20:12:55 +02:00
|
|
|
package org.schabi.newpipe.extractor;
|
|
|
|
|
2019-04-28 22:03:16 +02:00
|
|
|
import org.schabi.newpipe.extractor.downloader.Downloader;
|
2017-08-06 22:20:15 +02:00
|
|
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
2018-07-13 18:02:40 +02:00
|
|
|
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
|
2019-04-28 22:03:16 +02:00
|
|
|
import org.schabi.newpipe.extractor.localization.ContentCountry;
|
|
|
|
import org.schabi.newpipe.extractor.localization.Localization;
|
|
|
|
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
2017-06-29 20:12:55 +02:00
|
|
|
|
2020-02-08 23:58:46 +01:00
|
|
|
import javax.annotation.Nonnull;
|
|
|
|
import javax.annotation.Nullable;
|
2022-03-17 16:14:58 +01:00
|
|
|
|
2020-02-08 23:58:46 +01:00
|
|
|
import java.io.IOException;
|
2020-10-19 14:13:34 +02:00
|
|
|
import java.util.Objects;
|
2020-02-08 23:58:46 +01:00
|
|
|
|
|
|
|
public abstract class Extractor {
|
2017-08-06 22:20:15 +02:00
|
|
|
/**
|
2018-03-18 20:17:22 +01:00
|
|
|
* {@link StreamingService} currently related to this extractor.<br>
|
2022-03-17 16:14:58 +01:00
|
|
|
* Useful for getting other things from a service (like the url handlers for
|
|
|
|
* cleaning/accepting/get id from urls).
|
2017-08-06 22:20:15 +02:00
|
|
|
*/
|
|
|
|
private final StreamingService service;
|
2018-09-15 20:12:52 +02:00
|
|
|
private final LinkHandler linkHandler;
|
2017-08-06 22:20:15 +02:00
|
|
|
|
2020-02-08 23:58:46 +01:00
|
|
|
@Nullable
|
|
|
|
private Localization forcedLocalization = null;
|
|
|
|
@Nullable
|
|
|
|
private ContentCountry forcedContentCountry = null;
|
2019-04-28 22:03:16 +02:00
|
|
|
|
2017-11-28 13:37:01 +01:00
|
|
|
private boolean pageFetched = false;
|
2022-03-17 16:14:58 +01:00
|
|
|
// called like this to prevent checkstyle errors about "hiding a field"
|
2022-03-26 18:54:21 +01:00
|
|
|
private final Downloader downloader;
|
2017-08-06 22:20:15 +02:00
|
|
|
|
2022-03-26 18:54:21 +01:00
|
|
|
protected Extractor(final StreamingService service, final LinkHandler linkHandler) {
|
2020-10-19 14:13:34 +02:00
|
|
|
this.service = Objects.requireNonNull(service, "service is null");
|
|
|
|
this.linkHandler = Objects.requireNonNull(linkHandler, "LinkHandler is null");
|
2022-03-26 18:54:21 +01:00
|
|
|
this.downloader = Objects.requireNonNull(NewPipe.getDownloader(), "downloader is null");
|
2017-06-29 20:12:55 +02:00
|
|
|
}
|
|
|
|
|
2017-08-06 22:20:15 +02:00
|
|
|
/**
|
2022-03-17 16:14:58 +01:00
|
|
|
* @return The {@link LinkHandler} of the current extractor object (e.g. a ChannelExtractor
|
|
|
|
* should return a channel url handler).
|
2017-08-06 22:20:15 +02:00
|
|
|
*/
|
2017-11-25 01:10:04 +01:00
|
|
|
@Nonnull
|
2018-09-15 20:12:52 +02:00
|
|
|
public LinkHandler getLinkHandler() {
|
|
|
|
return linkHandler;
|
2018-05-06 14:08:50 +02:00
|
|
|
}
|
2017-08-06 22:20:15 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Fetch the current page.
|
2020-02-08 23:58:46 +01:00
|
|
|
*
|
|
|
|
* @throws IOException if the page can not be loaded
|
2017-11-28 13:37:01 +01:00
|
|
|
* @throws ExtractionException if the pages content is not understood
|
2017-08-06 22:20:15 +02:00
|
|
|
*/
|
2017-11-28 13:37:01 +01:00
|
|
|
public void fetchPage() throws IOException, ExtractionException {
|
2022-03-17 16:14:58 +01:00
|
|
|
if (pageFetched) {
|
|
|
|
return;
|
|
|
|
}
|
2022-03-26 18:54:21 +01:00
|
|
|
onFetchPage(downloader);
|
2017-11-28 13:37:01 +01:00
|
|
|
pageFetched = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
protected void assertPageFetched() {
|
2022-03-17 16:14:58 +01:00
|
|
|
if (!pageFetched) {
|
|
|
|
throw new IllegalStateException("Page is not fetched. Make sure you call fetchPage()");
|
|
|
|
}
|
2017-11-28 13:37:01 +01:00
|
|
|
}
|
|
|
|
|
2018-01-13 20:09:31 +01:00
|
|
|
protected boolean isPageFetched() {
|
|
|
|
return pageFetched;
|
|
|
|
}
|
|
|
|
|
2017-11-28 13:37:01 +01:00
|
|
|
/**
|
|
|
|
* Fetch the current page.
|
2020-02-08 23:58:46 +01:00
|
|
|
*
|
2022-03-17 16:14:58 +01:00
|
|
|
* @param downloader the downloader to use
|
2020-02-08 23:58:46 +01:00
|
|
|
* @throws IOException if the page can not be loaded
|
2017-11-28 13:37:01 +01:00
|
|
|
* @throws ExtractionException if the pages content is not understood
|
|
|
|
*/
|
2022-03-26 18:54:21 +01:00
|
|
|
@SuppressWarnings("HiddenField")
|
2022-03-17 16:14:58 +01:00
|
|
|
public abstract void onFetchPage(@Nonnull Downloader downloader)
|
|
|
|
throws IOException, ExtractionException;
|
2017-08-06 22:20:15 +02:00
|
|
|
|
2017-11-25 01:10:04 +01:00
|
|
|
@Nonnull
|
2018-05-06 14:08:50 +02:00
|
|
|
public String getId() throws ParsingException {
|
2018-09-15 20:12:52 +02:00
|
|
|
return linkHandler.getId();
|
2018-05-06 14:08:50 +02:00
|
|
|
}
|
2017-11-25 01:10:04 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Get the name
|
2020-02-08 23:58:46 +01:00
|
|
|
*
|
2017-11-25 01:10:04 +01:00
|
|
|
* @return the name
|
|
|
|
* @throws ParsingException if the name cannot be extracted
|
|
|
|
*/
|
|
|
|
@Nonnull
|
2017-08-11 03:23:09 +02:00
|
|
|
public abstract String getName() throws ParsingException;
|
|
|
|
|
2017-11-25 01:10:04 +01:00
|
|
|
@Nonnull
|
2018-05-06 14:08:50 +02:00
|
|
|
public String getOriginalUrl() throws ParsingException {
|
2018-09-15 20:12:52 +02:00
|
|
|
return linkHandler.getOriginalUrl();
|
2017-06-29 20:12:55 +02:00
|
|
|
}
|
|
|
|
|
2017-11-25 01:10:04 +01:00
|
|
|
@Nonnull
|
2018-05-06 14:08:50 +02:00
|
|
|
public String getUrl() throws ParsingException {
|
2018-09-15 20:12:52 +02:00
|
|
|
return linkHandler.getUrl();
|
2017-06-29 20:12:55 +02:00
|
|
|
}
|
2020-02-08 23:58:46 +01:00
|
|
|
|
2019-11-22 19:35:49 +01:00
|
|
|
@Nonnull
|
|
|
|
public String getBaseUrl() throws ParsingException {
|
2020-02-08 23:58:46 +01:00
|
|
|
return linkHandler.getBaseUrl();
|
2019-11-22 19:35:49 +01:00
|
|
|
}
|
2017-06-29 20:12:55 +02:00
|
|
|
|
2017-11-25 01:10:04 +01:00
|
|
|
@Nonnull
|
2017-08-06 22:20:15 +02:00
|
|
|
public StreamingService getService() {
|
|
|
|
return service;
|
2017-06-29 20:12:55 +02:00
|
|
|
}
|
|
|
|
|
2017-08-06 22:20:15 +02:00
|
|
|
public int getServiceId() {
|
|
|
|
return service.getServiceId();
|
2017-06-29 20:12:55 +02:00
|
|
|
}
|
2018-05-26 19:15:45 +02:00
|
|
|
|
|
|
|
public Downloader getDownloader() {
|
2022-03-26 18:54:21 +01:00
|
|
|
return downloader;
|
2018-05-26 19:15:45 +02:00
|
|
|
}
|
2018-09-15 21:47:53 +02:00
|
|
|
|
2019-04-28 22:03:16 +02:00
|
|
|
/*//////////////////////////////////////////////////////////////////////////
|
|
|
|
// Localization
|
|
|
|
//////////////////////////////////////////////////////////////////////////*/
|
|
|
|
|
2022-03-17 16:14:58 +01:00
|
|
|
public void forceLocalization(final Localization localization) {
|
2019-04-28 22:03:16 +02:00
|
|
|
this.forcedLocalization = localization;
|
|
|
|
}
|
|
|
|
|
2022-03-17 16:14:58 +01:00
|
|
|
public void forceContentCountry(final ContentCountry contentCountry) {
|
2019-04-28 22:03:16 +02:00
|
|
|
this.forcedContentCountry = contentCountry;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Nonnull
|
|
|
|
public Localization getExtractorLocalization() {
|
|
|
|
return forcedLocalization == null ? getService().getLocalization() : forcedLocalization;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Nonnull
|
|
|
|
public ContentCountry getExtractorContentCountry() {
|
2022-03-17 16:14:58 +01:00
|
|
|
return forcedContentCountry == null ? getService().getContentCountry()
|
|
|
|
: forcedContentCountry;
|
2019-04-28 22:03:16 +02:00
|
|
|
}
|
|
|
|
|
2018-09-15 21:47:53 +02:00
|
|
|
@Nonnull
|
2019-04-28 22:03:16 +02:00
|
|
|
public TimeAgoParser getTimeAgoParser() {
|
|
|
|
return getService().getTimeAgoParser(getExtractorLocalization());
|
2018-09-15 21:47:53 +02:00
|
|
|
}
|
2017-06-29 20:12:55 +02:00
|
|
|
}
|