NewPipeExtractor/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamExtractor.java

336 lines
14 KiB
Java

package org.schabi.newpipe.extractor.stream;
/*
* Created by Christian Schabesberger on 10.08.18.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* StreamExtractor.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
import org.schabi.newpipe.extractor.Extractor;
import org.schabi.newpipe.extractor.MediaFormat;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
import org.schabi.newpipe.extractor.utils.Localization;
import org.schabi.newpipe.extractor.utils.Parser;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* Scrapes information from a video/audio streaming service (eg, YouTube).
*/
public abstract class StreamExtractor extends Extractor {
public static final int NO_AGE_LIMIT = 0;
public StreamExtractor(StreamingService service, LinkHandler linkHandler, Localization localization) {
super(service, linkHandler, localization);
}
/**
* The day on which the stream got uploaded/created. The return information should be in the format
* dd.mm.yyyy, however it NewPipe will not crash if its not.
* @return The day on which the stream got uploaded.
* @throws ParsingException
*/
@Nonnull
public abstract String getUploadDate() throws ParsingException;
/**
* This will return the url to the thumbnail of the stream. Try to return the medium resolution here.
* @return The url of the thumbnail.
* @throws ParsingException
*/
@Nonnull
public abstract String getThumbnailUrl() throws ParsingException;
/**
* This is the stream description. On YouTube this is the video description. You can return simple HTML here.
* @return The description of the stream/video.
* @throws ParsingException
*/
@Nonnull
public abstract String getDescription() throws ParsingException;
/**
* Get the age limit.
* @return The age which limits the content or {@value NO_AGE_LIMIT} if there is no limit
* @throws ParsingException if an error occurs while parsing
*/
public abstract int getAgeLimit() throws ParsingException;
/**
* This should return the length of a video in seconds.
* @return The length of the stream in seconds.
* @throws ParsingException
*/
public abstract long getLength() throws ParsingException;
/**
* If the url you are currently handling contains a time stamp/seek, you can return the
* position it represents here.
* If the url has no time stamp simply return zero.
* @return the timestamp in seconds
* @throws ParsingException
*/
public abstract long getTimeStamp() throws ParsingException;
/**
* The count of how many people have watched the video/listened to the audio stream.
* If the current stream has no view count or its not available simply return -1
* @return amount of views.
* @throws ParsingException
*/
public abstract long getViewCount() throws ParsingException;
/**
* The Amount of likes a video/audio stream got.
* If the current stream has no likes or its not available simply return -1
* @return the amount of likes the stream got
* @throws ParsingException
*/
public abstract long getLikeCount() throws ParsingException;
/**
* The Amount of dislikes a video/audio stream got.
* If the current stream has no dislikes or its not available simply return -1
* @return the amount of likes the stream got
* @throws ParsingException
*/
public abstract long getDislikeCount() throws ParsingException;
/**
* The Url to the page of the creator/uploader of the stream. This must not be a homepage,
* but the page offered by the service the extractor handles. This url will be handled by the
* <a href="https://teamnewpipe.github.io/documentation/03_Implement_a_service/#channel">ChannelExtractor</a>,
* so be sure to implement that one before you return a value here, otherwise NewPipe will crash if one selects
* this url.
* @return the url to the page of the creator/uploader of the stream or an empty String
* @throws ParsingException
*/
@Nonnull
public abstract String getUploaderUrl() throws ParsingException;
/**
* The name of the creator/uploader of the stream.
* If the name is not available you can simply return an empty string.
* @return the name of the creator/uploader of the stream or an empty String
* @throws ParsingException
*/
@Nonnull
public abstract String getUploaderName() throws ParsingException;
/**
* The url to the image file/profile picture/avatar of the creator/uploader of the stream.
* If the url is not available you can return an empty String.
* @return The url of the image file of the uploader or an empty String
* @throws ParsingException
*/
@Nonnull
public abstract String getUploaderAvatarUrl() throws ParsingException;
/**
* Get the dash mpd url. If you don't know what a dash MPD is you can read about it
* <a href="https://www.brendanlong.com/the-structure-of-an-mpeg-dash-mpd.html">here</a>.
* @return the url as a string or an empty string
* @throws ParsingException if an error occurs while reading
*/
@Nonnull public abstract String getDashMpdUrl() throws ParsingException;
/**
* I am not sure if this is in use, and how this is used. However the frontend is missing support
* for HLS streams. Prove me if I am wrong. Please open an
* <a href="https://github.com/teamnewpipe/newpipe/issues">issue</a>,
* or fix this description if you know whats up with this.
* @return The Url to the hls stream.
* @throws ParsingException
*/
@Nonnull public abstract String getHlsUrl() throws ParsingException;
/**
* This should return a list of available
* <a href="https://teamnewpipe.github.io/NewPipeExtractor/javadoc/org/schabi/newpipe/extractor/stream/AudioStream.html">AudioStream</a>s
* You can also return null or an empty list, however be aware that if you don't return anything
* in getVideoStreams(), getVideoOnlyStreams() and getDashMpdUrl() either the Collector will handle this as
* a failed extraction procedure.
* @return a list of audio only streams in the format of AudioStream
* @throws IOException
* @throws ExtractionException
*/
public abstract List<AudioStream> getAudioStreams() throws IOException, ExtractionException;
/**
* This should return a list of available
* <a href="https://teamnewpipe.github.io/NewPipeExtractor/javadoc/org/schabi/newpipe/extractor/stream/VideoStream.html">VideoStream</a>s
* Be aware this is the list of video streams which do contain an audio stream.
* You can also return null or an empty list, however be aware that if you don't return anything
* in getAudioStreams(), getVideoOnlyStreams() and getDashMpdUrl() either the Collector will handle this as
* a failed extraction procedure.
* @return a list of combined video and streams in the format of AudioStream
* @throws IOException
* @throws ExtractionException
*/
public abstract List<VideoStream> getVideoStreams() throws IOException, ExtractionException;
/**
* This should return a list of available
* <a href="https://teamnewpipe.github.io/NewPipeExtractor/javadoc/org/schabi/newpipe/extractor/stream/VideoStream.html">VideoStream</a>s.
* Be aware this is the list of video streams which do NOT contain an audio stream.
* You can also return null or an empty list, however be aware that if you don't return anything
* in getAudioStreams(), getVideoStreams() and getDashMpdUrl() either the Collector will handle this as
* a failed extraction procedure.
* @return a list of video and streams in the format of AudioStream
* @throws IOException
* @throws ExtractionException
*/
public abstract List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException;
/**
* This will return a list of available
* <a href="https://teamnewpipe.github.io/NewPipeExtractor/javadoc/org/schabi/newpipe/extractor/stream/Subtitles.html">Subtitles</a>s.
* If no subtitles are available an empty list can returned.
* @return a list of available subtitles or an empty list
* @throws IOException
* @throws ExtractionException
*/
@Nonnull
public abstract List<SubtitlesStream> getSubtitlesDefault() throws IOException, ExtractionException;
/**
* This will return a list of available
* <a href="https://teamnewpipe.github.io/NewPipeExtractor/javadoc/org/schabi/newpipe/extractor/stream/Subtitles.html">Subtitles</a>s.
* given by a specific type.
* If no subtitles in that specific format are available an empty list can returned.
* @param format the media format by which the subtitles should be filtered
* @return a list of available subtitles or an empty list
* @throws IOException
* @throws ExtractionException
*/
@Nonnull
public abstract List<SubtitlesStream> getSubtitles(MediaFormat format) throws IOException, ExtractionException;
/**
* Get the <a href="https://teamnewpipe.github.io/NewPipeExtractor/javadoc/">StreamType</a>.
* @return the type of the stream
* @throws ParsingException
*/
public abstract StreamType getStreamType() throws ParsingException;
/**
* should return the url of the next stream. NewPipe will automatically play
* the next stream if the user wants that.
* If the next stream is is not available simply return null
* @return the InfoItem of the next stream
* @throws IOException
* @throws ExtractionException
*/
public abstract StreamInfoItem getNextStream() throws IOException, ExtractionException;
/**
* Should return a list of streams related to the current handled. Many services show suggested
* streams. If you don't like suggested streams you should implement them anyway since they can
* be disabled by the user later in the frontend.
* This list MUST NOT contain the next available video as this should be return through getNextStream()
* If is is not available simply return null
* @return a list of InfoItems showing the related videos/streams
* @throws IOException
* @throws ExtractionException
*/
public abstract StreamInfoItemsCollector getRelatedStreams() throws IOException, ExtractionException;
/**
* Should return a list of Frameset object that contains preview of stream frames
* @return list of preview frames or empty list if frames preview is not supported or not found for specified stream
* @throws IOException
* @throws ExtractionException
*/
@Nonnull
public List<Frameset> getFrames() throws IOException, ExtractionException {
return Collections.emptyList();
}
/**
* Should analyse the webpage's document and extracts any error message there might be. (e.g. GEMA block)
*
* @return Error message; null if there is no error message.
*/
public abstract String getErrorMessage();
//////////////////////////////////////////////////////////////////
/// Helper
//////////////////////////////////////////////////////////////////
/**
* Override this function if the format of time stamp in the url is not the same format as that form youtube.
* Honestly I don't even know the time stamp fromat of youtube.
* @param regexPattern
* @return the sime stamp/seek for the video in seconds
* @throws ParsingException
*/
protected long getTimestampSeconds(String regexPattern) throws ParsingException {
String timeStamp;
try {
timeStamp = Parser.matchGroup1(regexPattern, getOriginalUrl());
} catch (Parser.RegexException e) {
// catch this instantly since an url does not necessarily have to have a time stamp
// -2 because well the testing system will then know its the regex that failed :/
// not good i know
return -2;
}
if (!timeStamp.isEmpty()) {
try {
String secondsString = "";
String minutesString = "";
String hoursString = "";
try {
secondsString = Parser.matchGroup1("(\\d{1,3})s", timeStamp);
minutesString = Parser.matchGroup1("(\\d{1,3})m", timeStamp);
hoursString = Parser.matchGroup1("(\\d{1,3})h", timeStamp);
} catch (Exception e) {
//it could be that time is given in another method
if (secondsString.isEmpty() //if nothing was got,
&& minutesString.isEmpty()//treat as unlabelled seconds
&& hoursString.isEmpty()) {
secondsString = Parser.matchGroup1("t=(\\d+)", timeStamp);
}
}
int seconds = secondsString.isEmpty() ? 0 : Integer.parseInt(secondsString);
int minutes = minutesString.isEmpty() ? 0 : Integer.parseInt(minutesString);
int hours = hoursString.isEmpty() ? 0 : Integer.parseInt(hoursString);
//don't trust BODMAS!
return seconds + (60 * minutes) + (3600 * hours);
//Log.d(TAG, "derived timestamp value:"+ret);
//the ordering varies internationally
} catch (ParsingException e) {
throw new ParsingException("Could not get timestamp.", e);
}
} else {
return 0;
}
}
}