NewPipeExtractor/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamExtractor.java

package org.schabi.newpipe.extractor.stream;

/*
 * Created by Christian Schabesberger on 10.08.18.
 *
 * Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
 * StreamExtractor.java is part of NewPipe.
 *
 * NewPipe is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * NewPipe is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with NewPipe.  If not, see <http://www.gnu.org/licenses/>.
 */

import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.InfoItemsCollector;
import org.schabi.newpipe.extractor.InfoItemExtractor;
import org.schabi.newpipe.extractor.Extractor;
import org.schabi.newpipe.extractor.MediaFormat;
import org.schabi.newpipe.extractor.MetaInfo;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.utils.Parser;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Locale;

/**
 * Scrapes information from a video/audio streaming service (eg, YouTube).
 */
public abstract class StreamExtractor extends Extractor {

    public static final int NO_AGE_LIMIT = 0;
    public static final long UNKNOWN_SUBSCRIBER_COUNT = -1;

    public StreamExtractor(final StreamingService service, final LinkHandler linkHandler) {
        super(service, linkHandler);
    }

    /**
     * The original textual date provided by the service. Should be used as a fallback if
     * {@link #getUploadDate()} isn't provided by the service, or it fails for some reason.
     *
     * <p>If the stream is a live stream, {@code null} should be returned.</p>
     *
     * @return The original textual date provided by the service, or {@code null}.
     * @throws ParsingException if there is an error in the extraction
     * @see #getUploadDate()
     */
    @Nullable
    public String getTextualUploadDate() throws ParsingException {
        return null;
    }

    /**
     * A more general {@code Calendar} instance set to the date provided by the service.<br>
     * Implementations usually will just parse the date returned from the {@link
     * #getTextualUploadDate()}.
     *
     * <p>If the stream is a live stream, {@code null} should be returned.</p>
     *
     * @return The date this item was uploaded, or {@code null}.
     * @throws ParsingException if there is an error in the extraction
     *                          or the extracted date couldn't be parsed.
     * @see #getTextualUploadDate()
     */
    @Nullable
    public DateWrapper getUploadDate() throws ParsingException {
        return null;
    }

    /**
     * This will return the url to the thumbnail of the stream. Try to return the medium resolution
     * here.
     *
     * @return The url of the thumbnail.
     */
    @Nonnull
    public abstract String getThumbnailUrl() throws ParsingException;

    /**
     * This is the stream description.
     *
     * @return The description of the stream/video or {@link Description#EMPTY_DESCRIPTION} if the
     * description is empty.
     */
    @Nonnull
    public Description getDescription() throws ParsingException {
        return Description.EMPTY_DESCRIPTION;
    }

    /**
     * Get the age limit.
     *
     * @return The age which limits the content or {@value NO_AGE_LIMIT} if there is no limit
     * @throws ParsingException if an error occurs while parsing
     */
    public int getAgeLimit() throws ParsingException {
        return NO_AGE_LIMIT;
    }

    /**
     * This should return the length of a video in seconds.
     *
     * @return The length of the stream in seconds or 0 when it has no length (e.g. a livestream).
     */
    public long getLength() throws ParsingException {
        return 0;
    }

    /**
     * If the url you are currently handling contains a time stamp/seek, you can return the
     * position it represents here.
     * If the url has no time stamp simply return zero.
     *
     * @return the timestamp in seconds or 0 when there is no timestamp
     */
    public long getTimeStamp() throws ParsingException {
        return 0;
    }

    /**
     * The count of how many people have watched the video/listened to the audio stream.
     * If the current stream has no view count or its not available simply return -1
     *
     * @return amount of views or -1 if not available.
     */
    public long getViewCount() throws ParsingException {
        return -1;
    }

    /**
     * The amount of likes a video/audio stream got.
     * If the current stream has no likes or its not available simply return -1
     *
     * @return the amount of likes the stream got or -1 if not available.
     */
    public long getLikeCount() throws ParsingException {
        return -1;
    }

    /**
     * The amount of dislikes a video/audio stream got.
     * If the current stream has no dislikes or its not available simply return -1
     *
     * @return the amount of likes the stream got or -1 if not available.
     */
    public long getDislikeCount() throws ParsingException {
        return -1;
    }

    /**
     * The Url to the page of the creator/uploader of the stream. This must not be a homepage,
     * but the page offered by the service the extractor handles. This url will be handled by the
     * {@link ChannelExtractor}, so be sure to implement that one before you return a value here,
     * otherwise NewPipe will crash if one selects this url.
     *
     * @return the url to the page of the creator/uploader of the stream or an empty string
     */
    @Nonnull
    public abstract String getUploaderUrl() throws ParsingException;

    /**
     * The name of the creator/uploader of the stream.
     * If the name is not available you can simply return an empty string.
     *
     * @return the name of the creator/uploader of the stream or an empty tring
     */
    @Nonnull
    public abstract String getUploaderName() throws ParsingException;

    /**
     * Whether the uploader has been verified by the service's provider.
     * If there is no verification implemented, return <code>false</code>.
     *
     * @return whether the uploader has been verified by the service's provider
     */
    public boolean isUploaderVerified() throws ParsingException {
        return false;
    }

    /**
     * The subscriber count of the uploader.
     * If the subscriber count is not implemented, or is unavailable, return <code>-1</code>.
     *
     * @return the subscriber count of the uploader or {@value UNKNOWN_SUBSCRIBER_COUNT} if not
     * available
     */
    public long getUploaderSubscriberCount() throws ParsingException {
        return UNKNOWN_SUBSCRIBER_COUNT;
    }

    /**
     * The url to the image file/profile picture/avatar of the creator/uploader of the stream.
     * If the url is not available you can return an empty String.
     *
     * @return The url of the image file of the uploader or an empty String
     */
    @Nonnull
    public String getUploaderAvatarUrl() throws ParsingException {
        return "";
    }

    /**
     * The Url to the page of the sub-channel of the stream. This must not be a homepage,
     * but the page offered by the service the extractor handles. This url will be handled by the
     * {@link ChannelExtractor}, so be sure to implement that one before you return a value here,
     * otherwise NewPipe will crash if one selects this url.
     *
     * @return the url to the page of the sub-channel of the stream or an empty String
     */
    @Nonnull
    public String getSubChannelUrl() throws ParsingException {
        return "";
    }

    /**
     * The name of the sub-channel of the stream.
     * If the name is not available you can simply return an empty string.
     *
     * @return the name of the sub-channel of the stream or an empty String
     */
    @Nonnull
    public String getSubChannelName() throws ParsingException {
        return "";
    }

    /**
     * The url to the image file/profile picture/avatar of the sub-channel of the stream.
     * If the url is not available you can return an empty String.
     *
     * @return The url of the image file of the sub-channel or an empty String
     */
    @Nonnull
    public String getSubChannelAvatarUrl() throws ParsingException {
        return "";
    }

    /**
     * Get the dash mpd url. If you don't know what a dash MPD is you can read about it
     * <a href="https://www.brendanlong.com/the-structure-of-an-mpeg-dash-mpd.html">here</a>.
     *
     * @return the url as a string or an empty string or an empty string if not available
     * @throws ParsingException if an error occurs while reading
     */
    @Nonnull
    public String getDashMpdUrl() throws ParsingException {
        return "";
    }

    /**
     * I am not sure if this is in use, and how this is used. However the frontend is missing
     * support for HLS streams. Prove me if I am wrong. Please open an
     * <a href="https://github.com/teamnewpipe/newpipe/issues">issue</a>,
     * or fix this description if you know whats up with this.
     *
     * @return The Url to the hls stream or an empty string if not available.
     */
    @Nonnull
    public String getHlsUrl() throws ParsingException {
        return "";
    }

    /**
     * This should return a list of available {@link AudioStream}s.
     * You can also return null or an empty list, however be aware that if you don't return anything
     * in getVideoStreams(), getVideoOnlyStreams() and getDashMpdUrl() either the Collector will
     * handle this as a failed extraction procedure.
     *
     * @return a list of audio only streams in the format of AudioStream
     */
    public abstract List<AudioStream> getAudioStreams() throws IOException, ExtractionException;

    /**
     * This should return a list of available {@link VideoStream}s.
     * Be aware this is the list of video streams which do contain an audio stream.
     * You can also return null or an empty list, however be aware that if you don't return anything
     * in getAudioStreams(), getVideoOnlyStreams() and getDashMpdUrl() either the Collector will
     * handle this as a failed extraction procedure.
     *
     * @return a list of combined video and streams in the format of AudioStream
     */
    public abstract List<VideoStream> getVideoStreams() throws IOException, ExtractionException;

    /**
     * This should return a list of available {@link VideoStream}s.
     * Be aware this is the list of video streams which do NOT contain an audio stream.
     * You can also return null or an empty list, however be aware that if you don't return anything
     * in getAudioStreams(), getVideoStreams() and getDashMpdUrl() either the Collector will handle
     * this as a failed extraction procedure.
     *
     * @return a list of video and streams in the format of AudioStream
     */
    public abstract List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException;

    /**
     * This will return a list of available {@link SubtitlesStream}s.
     * If no subtitles are available an empty list can be returned.
     *
     * @return a list of available subtitles or an empty list
     */
    @Nonnull
    public List<SubtitlesStream> getSubtitlesDefault() throws IOException, ExtractionException {
        return Collections.emptyList();
    }

    /**
     * This will return a list of available {@link SubtitlesStream}s given by a specific type.
     * If no subtitles in that specific format are available an empty list can be returned.
     *
     * @param format the media format by which the subtitles should be filtered
     * @return a list of available subtitles or an empty list
     */
    @Nonnull
    public List<SubtitlesStream> getSubtitles(final MediaFormat format)
            throws IOException, ExtractionException {
        return Collections.emptyList();
    }

    /**
     * Get the {@link StreamType}.
     *
     * @return the type of the stream
     */
    public abstract StreamType getStreamType() throws ParsingException;

    /**
     * Should return a list of streams related to the current handled. Many services show suggested
     * streams. If you don't like suggested streams you should implement them anyway since they can
     * be disabled by the user later in the frontend. The first related stream might be what was
     * previously known as a next stream.
     * If related streams aren't available simply return {@code null}.
     *
     * @return a list of InfoItems showing the related videos/streams
     */
    @Nullable
    public InfoItemsCollector<? extends InfoItem, ? extends InfoItemExtractor>
    getRelatedItems() throws IOException, ExtractionException {
        return null;
    }

    /**
     * @return The result of {@link #getRelatedItems()} if it is a
     * {@link StreamInfoItemsCollector}, <code>null</code> otherwise
     * @deprecated Use {@link #getRelatedItems()}. May be removed in a future version.
     */
    @Deprecated
    @Nullable
    public StreamInfoItemsCollector getRelatedStreams() throws IOException, ExtractionException {
        final InfoItemsCollector<?, ?> collector = getRelatedItems();
        if (collector instanceof StreamInfoItemsCollector) {
            return (StreamInfoItemsCollector) collector;
        } else {
            return null;
        }
    }

    /**
     * Should return a list of Frameset object that contains preview of stream frames
     *
     * @return list of preview frames or empty list if frames preview is not supported or not found
     *         for specified stream
     */
    @Nonnull
    public List<Frameset> getFrames() throws ExtractionException {
        return Collections.emptyList();
    }

    /**
     * Should analyse the webpage's document and extracts any error message there might be.
     *
     * @return Error message; <code>null</code> if there is no error message.
     */
    public String getErrorMessage() {
        return null;
    }

    //////////////////////////////////////////////////////////////////
    ///  Helper
    //////////////////////////////////////////////////////////////////

    /**
     * Override this function if the format of timestamp in the url is not the same format as that
     * from youtube.
     *
     * @return the time stamp/seek for the video in seconds
     */
    protected long getTimestampSeconds(final String regexPattern) throws ParsingException {
        final String timestamp;
        try {
            timestamp = Parser.matchGroup1(regexPattern, getOriginalUrl());
        } catch (final Parser.RegexException e) {
            // catch this instantly since a url does not necessarily have a timestamp

            // -2 because the testing system will consequently know that the regex failed
            // not good, I know
            return -2;
        }

        if (!timestamp.isEmpty()) {
            try {
                String secondsString = "";
                String minutesString = "";
                String hoursString = "";
                try {
                    secondsString = Parser.matchGroup1("(\\d+)s", timestamp);
                    minutesString = Parser.matchGroup1("(\\d+)m", timestamp);
                    hoursString = Parser.matchGroup1("(\\d+)h", timestamp);
                } catch (final Exception e) {
                    // it could be that time is given in another method
                    if (secondsString.isEmpty() && minutesString.isEmpty()) {
                        // if nothing was obtained, treat as unlabelled seconds
                        secondsString = Parser.matchGroup1("t=(\\d+)", timestamp);
                    }
                }

                final int seconds = secondsString.isEmpty() ? 0 : Integer.parseInt(secondsString);
                final int minutes = minutesString.isEmpty() ? 0 : Integer.parseInt(minutesString);
                final int hours = hoursString.isEmpty() ? 0 : Integer.parseInt(hoursString);

                return seconds + (60L * minutes) + (3600L * hours);
            } catch (final ParsingException e) {
                throw new ParsingException("Could not get timestamp.", e);
            }
        } else {
            return 0;
        }
    }

    /**
     * The host of the stream (Eg. peertube.cpy.re).
     * If the host is not available, or if the service doesn't use
     * a federated system, but a centralised system,
     * you can simply return an empty string.
     *
     * @return the host of the stream or an empty string.
     */
    @Nonnull
    public String getHost() throws ParsingException {
        return "";
    }

    /**
     * The privacy of the stream (Eg. Public, Private, Unlisted…).
     *
     * @return the privacy of the stream.
     */
    public Privacy getPrivacy() throws ParsingException {
        return Privacy.PUBLIC;
    }

    /**
     * The name of the category of the stream.
     * If the category is not available you can simply return an empty string.
     *
     * @return the category of the stream or an empty string.
     */
    @Nonnull
    public String getCategory() throws ParsingException {
        return "";
    }

    /**
     * The name of the licence of the stream.
     * If the licence is not available you can simply return an empty string.
     *
     * @return the licence of the stream or an empty String.
     */
    @Nonnull
    public String getLicence() throws ParsingException {
        return "";
    }

    /**
     * The locale language of the stream.
     * If the language is not available you can simply return null.
     * If the language is provided by a language code, you can return
     * new Locale(language_code);
     *
     * @return the locale language of the stream or <code>null</code>.
     */
    @Nullable
    public Locale getLanguageInfo() throws ParsingException {
        return null;
    }

    /**
     * The list of tags of the stream.
     * If the tag list is not available you can simply return an empty list.
     *
     * @return the list of tags of the stream or Collections.emptyList().
     */
    @Nonnull
    public List<String> getTags() throws ParsingException {
        return Collections.emptyList();
    }

    /**
     * The support information of the stream.
     * see: https://framatube.org/videos/watch/ee408ec8-07cd-4e35-b884-fb681a4b9d37
     * (support button).
     * If the support information are not available,
     * you can simply return an empty String.
     *
     * @return the support information of the stream or an empty string.
     */
    @Nonnull
    public String getSupportInfo() throws ParsingException {
        return "";
    }

    /**
     * The list of stream segments by timestamps for the stream.
     * If the segment list is not available you can simply return an empty list.
     *
     * @return The list of segments of the stream or an empty list.
     */
    @Nonnull
    public List<StreamSegment> getStreamSegments() throws ParsingException {
        return Collections.emptyList();
    }

    /**
     * Meta information about the stream.
     * <p>
     * This can be information about the stream creator (e.g. if the creator is a public
     * broadcaster) or further information on the topic (e.g. hints that the video might contain
     * conspiracy theories or contains information about a current health situation like the
     * Covid-19 pandemic).
     * </p>
     * The meta information often contains links to external sources like Wikipedia or the WHO.
     *
     * @return The meta info of the stream or an empty list if not provided.
     */
    @Nonnull
    public List<MetaInfo> getMetaInfo() throws ParsingException {
        return Collections.emptyList();
    }

    /**
     * Whether the stream is a short-form content.
     *
     * <p>
     * Short-form contents are contents in the style of TikTok, YouTube Shorts, or Instagram Reels videos.
     * </p>
     *
     * @return whether the stream is a short-form content
     */
    public boolean isShortFormContent() throws ParsingException {
        return false;
    }

    public enum Privacy {
        PUBLIC,
        UNLISTED,
        PRIVATE,
        INTERNAL,
        OTHER
    }
}