Merge branch 'master' into misc-utils

This commit is contained in:
Christian Schabesberger 2018-11-13 13:02:47 +01:00 committed by GitHub
commit f91ce42b42
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 251 additions and 92 deletions

View File

@ -1,6 +1,6 @@
# NewPipe Extractor
[![Build Status](https://travis-ci.org/TeamNewPipe/NewPipeExtractor.svg?branch=master)](https://travis-ci.org/TeamNewPipe/NewPipeExtractor) [![JIT Pack Badge](https://jitpack.io/v/TeamNewPipe/NewPipeExtractor.svg)](https://jitpack.io/#TeamNewPipe/NewPipeExtractor) [Documentation](https://teamnewpipe.github.io/NewPipeExtractor/javadoc/)
[![Build Status](https://travis-ci.org/TeamNewPipe/NewPipeExtractor.svg?branch=master)](https://travis-ci.org/TeamNewPipe/NewPipeExtractor) [![JIT Pack Badge](https://jitpack.io/v/TeamNewPipe/NewPipeExtractor.svg)](https://jitpack.io/#TeamNewPipe/NewPipeExtractor) [Documentation](https://teamnewpipe.github.io/documentation/)
NewPipe Extractor is a library for extracting things from streaming sites. It is a core component of [NewPipe](https://github.com/TeamNewPipe/NewPipe), but could be used independently.

View File

@ -36,7 +36,14 @@ public enum MediaFormat {
M4A (0x3, "m4a", "m4a", "audio/mp4"),
WEBMA (0x4, "WebM", "webm", "audio/webm"),
MP3 (0x5, "MP3", "mp3", "audio/mpeg"),
OPUS (0x6, "opus", "opus", "audio/opus");
OPUS (0x6, "opus", "opus", "audio/opus"),
// subtitles formats
VTT (0x7, "WebVTT", "vtt", "text/vtt"),
TTML (0x8, "Timed Text Markup Language", "ttml", "application/ttml+xml"),
TRANSCRIPT1 (0x9, "TranScript v1", "srv1", "text/xml"),
TRANSCRIPT2 (0xA, "TranScript v2", "srv2", "text/xml"),
TRANSCRIPT3 (0xB, "TranScript v3", "srv3", "text/xml"),
SRT (0xC, "SubRip file format", "srt", "text/srt");
public final int id;
public final String name;

View File

@ -8,6 +8,24 @@ import java.util.List;
import static java.util.Arrays.asList;
import static java.util.Collections.unmodifiableList;
/*
* Copyright (C) Christian Schabesberger 2018 <chris.schabesberger@mailbox.org>
* ServiceList.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* A list of supported services.
*/
@ -19,6 +37,10 @@ public final class ServiceList {
public static final YoutubeService YouTube;
public static final SoundcloudService SoundCloud;
/**
* When creating a new service, put this service in the end of this list,
* and give it the next free id.
*/
private static final List<StreamingService> SERVICES = unmodifiableList(
asList(
YouTube = new YoutubeService(0),

View File

@ -14,11 +14,38 @@ import org.schabi.newpipe.extractor.utils.Localization;
import java.util.Collections;
import java.util.List;
/*
* Copyright (C) Christian Schabesberger 2018 <chris.schabesberger@mailbox.org>
* StreamingService.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public abstract class StreamingService {
/**
* This class holds meta information about the service implementation.
*/
public static class ServiceInfo {
private final String name;
private final List<MediaCapability> mediaCapabilities;
/**
* Creates a new instance of a ServiceInfo
* @param name the name of the service
* @param mediaCapabilities the type of media this service can handle
*/
public ServiceInfo(String name, List<MediaCapability> mediaCapabilities) {
this.name = name;
this.mediaCapabilities = Collections.unmodifiableList(mediaCapabilities);
@ -37,6 +64,10 @@ public abstract class StreamingService {
}
}
/**
* LinkType will be used to determine which type of URL you are handling, and therefore which part
* of NewPipe should handle a certain URL.
*/
public enum LinkType {
NONE,
STREAM,
@ -47,6 +78,16 @@ public abstract class StreamingService {
private final int serviceId;
private final ServiceInfo serviceInfo;
/**
* Creates a new Streaming service.
* If you Implement one do not set id within your implementation of this extractor, instead
* set the id when you put the extractor into
* <a href="https://teamnewpipe.github.io/NewPipeExtractor/javadoc/org/schabi/newpipe/extractor/ServiceList.html">ServiceList</a>.
* All other parameters can be set directly from the overriding constructor.
* @param id the number of the service to identify him within the NewPipe frontend
* @param name the name of the service
* @param capabilities the type of media this service can handle
*/
public StreamingService(int id, String name, List<ServiceInfo.MediaCapability> capabilities) {
this.serviceId = id;
this.serviceInfo = new ServiceInfo(name, capabilities);
@ -68,24 +109,93 @@ public abstract class StreamingService {
////////////////////////////////////////////
// Url Id handler
////////////////////////////////////////////
/**
* Must return a new instance of an implementation of LinkHandlerFactory for streams.
* @return an instance of a LinkHandlerFactory for streams
*/
public abstract LinkHandlerFactory getStreamLHFactory();
/**
* Must return a new instance of an implementation of ListLinkHandlerFactory for channels.
* If support for channels is not given null must be returned.
* @return an instance of a ListLinkHandlerFactory for channels or null
*/
public abstract ListLinkHandlerFactory getChannelLHFactory();
/**
* Must return a new instance of an implementation of ListLinkHandlerFactory for playlists.
* If support for playlists is not given null must be returned.
* @return an instance of a ListLinkHandlerFactory for playlists or null
*/
public abstract ListLinkHandlerFactory getPlaylistLHFactory();
/**
* Must return an instance of an implementation of SearchQueryHandlerFactory.
* @return an instance of a SearchQueryHandlerFactory
*/
public abstract SearchQueryHandlerFactory getSearchQHFactory();
////////////////////////////////////////////
// Extractor
////////////////////////////////////////////
/**
* Must create a new instance of a SearchExtractor implementation.
* @param queryHandler specifies the keyword lock for, and the filters which should be applied.
* @param localization specifies the language/country for the extractor.
* @return a new SearchExtractor instance
*/
public abstract SearchExtractor getSearchExtractor(SearchQueryHandler queryHandler, Localization localization);
/**
* Must create a new instance of a SuggestionExtractor implementation.
* @param localization specifies the language/country for the extractor.
* @return a new SuggestionExtractor instance
*/
public abstract SuggestionExtractor getSuggestionExtractor(Localization localization);
/**
* Outdated or obsolete. null can be returned.
* @return just null
*/
public abstract SubscriptionExtractor getSubscriptionExtractor();
/**
* Must create a new instance of a KioskList implementation.
* @return a new KioskList instance
* @throws ExtractionException
*/
public abstract KioskList getKioskList() throws ExtractionException;
/**
* Must create a new instance of a ChannelExtractor implementation.
* @param linkHandler is pointing to the channel which should be handled by this new instance.
* @param localization specifies the language used for the request.
* @return a new ChannelExtractor
* @throws ExtractionException
*/
public abstract ChannelExtractor getChannelExtractor(ListLinkHandler linkHandler,
Localization localization) throws ExtractionException;
/**
* Must crete a new instance of a PlaylistExtractor implementation.
* @param linkHandler is pointing to the playlist which should be handled by this new instance.
* @param localization specifies the language used for the request.
* @return a new PlaylistExtractor
* @throws ExtractionException
*/
public abstract PlaylistExtractor getPlaylistExtractor(ListLinkHandler linkHandler,
Localization localization) throws ExtractionException;
/**
* Must create a new instance of a StreamExtractor implementation.
* @param linkHandler is pointing to the stream which should be handled by this new instance.
* @param localization specifies the language used for the request.
* @return a new StreamExtractor
* @throws ExtractionException
*/
public abstract StreamExtractor getStreamExtractor(LinkHandler linkHandler,
Localization localization) throws ExtractionException;
////////////////////////////////////////////
@ -165,9 +275,11 @@ public abstract class StreamingService {
return getStreamExtractor(getStreamLHFactory().fromUrl(url), NewPipe.getPreferredLocalization());
}
/**
* figure out where the link is pointing to (a channel, video, playlist, etc.)
* Figures out where the link is pointing to (a channel, a video, a playlist, etc.)
* @param url the url on which it should be decided of which link type it is
* @return the link type of url
* @throws ParsingException
*/
public final LinkType getLinkTypeByUrl(String url) throws ParsingException {
LinkHandlerFactory sH = getStreamLHFactory();

View File

@ -172,13 +172,13 @@ public class SoundcloudStreamExtractor extends StreamExtractor {
@Override
@Nonnull
public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException {
public List<SubtitlesStream> getSubtitlesDefault() throws IOException, ExtractionException {
return Collections.emptyList();
}
@Override
@Nonnull
public List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException {
public List<SubtitlesStream> getSubtitles(MediaFormat format) throws IOException, ExtractionException {
return Collections.emptyList();
}

View File

@ -7,6 +7,7 @@ import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function;
import org.mozilla.javascript.ScriptableObject;
@ -460,15 +461,15 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Override
@Nonnull
public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException {
return getSubtitles(SubtitlesFormat.TTML);
public List<SubtitlesStream> getSubtitlesDefault() throws IOException, ExtractionException {
return getSubtitles(MediaFormat.TTML);
}
@Override
@Nonnull
public List<Subtitles> getSubtitles(final SubtitlesFormat format) throws IOException, ExtractionException {
public List<SubtitlesStream> getSubtitles(final MediaFormat format) throws IOException, ExtractionException {
assertPageFetched();
List<Subtitles> subtitles = new ArrayList<>();
List<SubtitlesStream> subtitles = new ArrayList<>();
for (final SubtitlesInfo subtitlesInfo : subtitlesInfos) {
subtitles.add(subtitlesInfo.getSubtitle(format));
}
@ -494,9 +495,13 @@ public class YoutubeStreamExtractor extends StreamExtractor {
assertPageFetched();
try {
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
collector.commit(extractVideoPreviewInfo(doc.select("div[class=\"watch-sidebar-section\"]")
.first().select("li").first()));
Elements watch = doc.select("div[class=\"watch-sidebar-section\"]");
if (watch.size() < 1) {
return null;// prevent the snackbar notification "report error" on age-restricted videos
}
collector.commit(extractVideoPreviewInfo(watch.first().select("li").first()));
return collector.getItems().get(0);
} catch (Exception e) {
throw new ParsingException("Could not get next video", e);
@ -815,21 +820,16 @@ public class YoutubeStreamExtractor extends StreamExtractor {
final String languageCode;
final boolean isGenerated;
final Locale locale;
public SubtitlesInfo(final String baseUrl, final String languageCode, final boolean isGenerated) {
this.cleanUrl = baseUrl
.replaceAll("&fmt=[^&]*", "") // Remove preexisting format if exists
.replaceAll("&tlang=[^&]*", ""); // Remove translation language
this.languageCode = languageCode;
this.isGenerated = isGenerated;
final String[] splits = languageCode.split("-");
this.locale = splits.length == 2 ? new Locale(splits[0], splits[1]) : new Locale(languageCode);
}
public Subtitles getSubtitle(final SubtitlesFormat format) {
return new Subtitles(format, locale, cleanUrl + "&fmt=" + format.getExtension(), isGenerated);
public SubtitlesStream getSubtitle(final MediaFormat format) {
return new SubtitlesStream(format, languageCode, cleanUrl + "&fmt=" + format.getSuffix(), isGenerated);
}
}

View File

@ -1,7 +1,7 @@
package org.schabi.newpipe.extractor.stream;
/*
* Created by Christian Schabesberger on 10.08.15.
* Created by Christian Schabesberger on 10.08.18.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* StreamExtractor.java is part of NewPipe.
@ -21,6 +21,7 @@ package org.schabi.newpipe.extractor.stream;
*/
import org.schabi.newpipe.extractor.Extractor;
import org.schabi.newpipe.extractor.MediaFormat;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
@ -210,19 +211,20 @@ public abstract class StreamExtractor extends Extractor {
* @throws ExtractionException
*/
@Nonnull
public abstract List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException;
public abstract List<SubtitlesStream> getSubtitlesDefault() throws IOException, ExtractionException;
/**
* This will return a list of available
* <a href="https://teamnewpipe.github.io/NewPipeExtractor/javadoc/org/schabi/newpipe/extractor/stream/Subtitles.html">Subtitles</a>s.
* given by a specific type.
* If no subtitles in that specific format are available an empty list can returned.
* @param format the media format by which the subtitles should be filtered
* @return a list of available subtitles or an empty list
* @throws IOException
* @throws ExtractionException
*/
@Nonnull
public abstract List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException;
public abstract List<SubtitlesStream> getSubtitles(MediaFormat format) throws IOException, ExtractionException;
/**
* Get the <a href="https://teamnewpipe.github.io/NewPipeExtractor/javadoc/">StreamType</a>.
@ -314,5 +316,6 @@ public abstract class StreamExtractor extends Extractor {
}
} else {
return 0;
}};
}
}
}

View File

@ -283,7 +283,7 @@ public class StreamInfo extends Info {
private List<InfoItem> relatedStreams;
private long startPosition = 0;
private List<Subtitles> subtitles;
private List<SubtitlesStream> subtitles;
/**
* Get the stream type
@ -494,11 +494,11 @@ public class StreamInfo extends Info {
this.startPosition = startPosition;
}
public List<Subtitles> getSubtitles() {
public List<SubtitlesStream> getSubtitles() {
return subtitles;
}
public void setSubtitles(List<Subtitles> subtitles) {
public void setSubtitles(List<SubtitlesStream> subtitles) {
this.subtitles = subtitles;
}

View File

@ -1,34 +0,0 @@
package org.schabi.newpipe.extractor.stream;
import org.schabi.newpipe.extractor.stream.SubtitlesFormat;
import java.io.Serializable;
import java.util.Locale;
public class Subtitles implements Serializable {
private final SubtitlesFormat format;
private final Locale locale;
private final String URL;
private final boolean autoGenerated;
public Subtitles(SubtitlesFormat format, Locale locale, String URL, boolean autoGenerated) {
this.format = format;
this.locale = locale;
this.URL = URL;
this.autoGenerated = autoGenerated;
}
public SubtitlesFormat getFileType() { return format; }
public Locale getLocale() {
return locale;
}
public String getURL() {
return URL;
}
public boolean isAutoGenerated() {
return autoGenerated;
}
}

View File

@ -1,25 +0,0 @@
package org.schabi.newpipe.extractor.stream;
public enum SubtitlesFormat {
// YouTube subtitles formats
// TRANSCRIPT(3) is default YT format based on TTML,
// but unlike VTT or TTML, it is NOT W3 standard
// TRANSCRIPT subtitles are NOT supported by ExoPlayer, only VTT and TTML
VTT (0x0, "vtt"),
TTML (0x1, "ttml"),
TRANSCRIPT1 (0x2, "srv1"),
TRANSCRIPT2 (0x3, "srv2"),
TRANSCRIPT3 (0x4, "srv3");
private final int id;
private final String extension;
SubtitlesFormat(int id, String extension) {
this.id = id;
this.extension = extension;
}
public String getExtension() {
return extension;
}
}

View File

@ -0,0 +1,73 @@
package org.schabi.newpipe.extractor.stream;
import org.schabi.newpipe.extractor.MediaFormat;
import java.io.Serializable;
import java.util.Locale;
public class SubtitlesStream extends Stream implements Serializable {
private final MediaFormat format;
private final Locale locale;
private final String url;
private final boolean autoGenerated;
private final String code;
public SubtitlesStream(MediaFormat format, String languageCode, String url, boolean autoGenerated) {
super(url, format);
/*
* Locale.forLanguageTag only for API >= 21
* Locale.Builder only for API >= 21
* Country codes doesn't work well without
*/
final String[] splits = languageCode.split("-");
switch (splits.length) {
default:
this.locale = new Locale(splits[0]);
break;
case 3:
this.locale = new Locale(splits[0], splits[1], splits[2]);// complex variants doesn't work!
break;
case 2:
this.locale = new Locale(splits[0], splits[1]);
break;
}
this.code = languageCode;
this.format = format;
this.url = url;
this.autoGenerated = autoGenerated;
}
public String getExtension() {
return format.suffix;
}
public String getURL() {
return url;
}
public boolean isAutoGenerated() {
return autoGenerated;
}
@Override
public boolean equalStats(Stream cmp) {
return super.equalStats(cmp)&&
cmp instanceof SubtitlesStream &&
code.equals(((SubtitlesStream) cmp).code) &&
autoGenerated == ((SubtitlesStream) cmp).autoGenerated;
}
public String getDisplayLanguageName() {
return locale.getDisplayName(locale);
}
public String getLanguageTag() {
return code;
}
public Locale getLocale() {
return locale;
}
}

View File

@ -4,13 +4,13 @@ import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import org.schabi.newpipe.Downloader;
import org.schabi.newpipe.extractor.MediaFormat;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeStreamExtractor;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory;
import org.schabi.newpipe.extractor.stream.StreamExtractor;
import org.schabi.newpipe.extractor.stream.SubtitlesFormat;
import org.schabi.newpipe.extractor.stream.VideoStream;
import org.schabi.newpipe.extractor.utils.Localization;
@ -131,6 +131,6 @@ public class YoutubeStreamExtractorAgeRestrictedTest {
@Test
public void testGetSubtitlesList() throws IOException, ExtractionException {
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
assertTrue(extractor.getSubtitles(SubtitlesFormat.TTML).isEmpty());
assertTrue(extractor.getSubtitles(MediaFormat.TTML).isEmpty());
}
}

View File

@ -4,13 +4,13 @@ import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import org.schabi.newpipe.Downloader;
import org.schabi.newpipe.extractor.MediaFormat;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeStreamExtractor;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory;
import org.schabi.newpipe.extractor.stream.StreamExtractor;
import org.schabi.newpipe.extractor.stream.SubtitlesFormat;
import org.schabi.newpipe.extractor.stream.VideoStream;
import org.schabi.newpipe.extractor.utils.Localization;
@ -124,6 +124,6 @@ public class YoutubeStreamExtractorControversialTest {
@Test
public void testGetSubtitlesList() throws IOException, ExtractionException {
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
assertTrue(!extractor.getSubtitles(SubtitlesFormat.TTML).isEmpty());
assertTrue(!extractor.getSubtitles(MediaFormat.TTML).isEmpty());
}
}

View File

@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.youtube;
import org.junit.BeforeClass;
import org.junit.Test;
import org.schabi.newpipe.Downloader;
import org.schabi.newpipe.extractor.MediaFormat;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
@ -164,7 +165,7 @@ public class YoutubeStreamExtractorDefaultTest {
@Test
public void testGetSubtitlesList() throws IOException, ExtractionException {
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
assertTrue(extractor.getSubtitles(SubtitlesFormat.TTML).isEmpty());
assertTrue(extractor.getSubtitles(MediaFormat.TTML).isEmpty());
}
}

View File

@ -62,9 +62,9 @@ public class YoutubeSearchExtractorDefaultTest extends YoutubeSearchExtractorBas
@Test
public void testResultList_FirstElement() {
InfoItem firstInfoItem = itemsPage.getItems().get(0);
InfoItem firstInfoItem = itemsPage.getItems().get(1);
// THe channel should be the first item
// The channel should be the first item
assertTrue(firstInfoItem instanceof ChannelInfoItem);
assertEquals("name", "PewDiePie", firstInfoItem.getName());
assertEquals("url","https://www.youtube.com/user/PewDiePie", firstInfoItem.getUrl());