2018-05-08 21:19:03 +02:00
|
|
|
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
2017-03-01 18:47:52 +01:00
|
|
|
|
2020-02-23 18:27:28 +01:00
|
|
|
import com.grack.nanojson.JsonObject;
|
|
|
|
|
2020-03-17 14:04:46 +01:00
|
|
|
import org.schabi.newpipe.extractor.ListExtractor;
|
2017-08-11 03:23:09 +02:00
|
|
|
import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor;
|
2017-03-01 18:47:52 +01:00
|
|
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
2021-01-22 01:44:58 +01:00
|
|
|
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
|
2020-02-23 18:27:28 +01:00
|
|
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
|
2017-07-11 05:08:03 +02:00
|
|
|
import org.schabi.newpipe.extractor.utils.Utils;
|
2017-03-01 18:47:52 +01:00
|
|
|
|
2020-04-10 10:51:05 +02:00
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.fixThumbnailUrl;
|
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
|
2020-02-24 12:48:16 +01:00
|
|
|
|
2017-06-29 20:12:55 +02:00
|
|
|
/*
|
2017-03-01 18:47:52 +01:00
|
|
|
* Created by Christian Schabesberger on 12.02.17.
|
|
|
|
*
|
|
|
|
* Copyright (C) Christian Schabesberger 2017 <chris.schabesberger@mailbox.org>
|
2017-08-11 03:23:09 +02:00
|
|
|
* YoutubeChannelInfoItemExtractor.java is part of NewPipe.
|
2017-03-01 18:47:52 +01:00
|
|
|
*
|
|
|
|
* NewPipe is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* NewPipe is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
2017-08-11 03:23:09 +02:00
|
|
|
public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor {
|
2022-03-18 15:09:06 +01:00
|
|
|
private final JsonObject channelInfoItem;
|
2022-11-22 02:17:10 +01:00
|
|
|
/**
|
|
|
|
* New layout:
|
|
|
|
* "subscriberCountText": Channel handle
|
|
|
|
* "videoCountText": Subscriber count
|
|
|
|
*/
|
|
|
|
private final boolean withHandle;
|
2017-03-01 18:47:52 +01:00
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
public YoutubeChannelInfoItemExtractor(final JsonObject channelInfoItem) {
|
2020-02-23 18:27:28 +01:00
|
|
|
this.channelInfoItem = channelInfoItem;
|
2022-11-22 02:17:10 +01:00
|
|
|
|
|
|
|
boolean wHandle = false;
|
[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links
webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.
The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).
As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.
URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.
As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).
2023-02-20 13:21:55 +01:00
|
|
|
final String subscriberCountText = getTextFromObject(
|
|
|
|
channelInfoItem.getObject("subscriberCountText"));
|
|
|
|
if (subscriberCountText != null) {
|
|
|
|
wHandle = subscriberCountText.startsWith("@");
|
2022-11-22 02:17:10 +01:00
|
|
|
}
|
|
|
|
this.withHandle = wHandle;
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
|
2017-06-29 20:12:55 +02:00
|
|
|
@Override
|
2017-03-01 18:47:52 +01:00
|
|
|
public String getThumbnailUrl() throws ParsingException {
|
2020-02-23 18:27:28 +01:00
|
|
|
try {
|
2022-03-18 15:09:06 +01:00
|
|
|
final String url = channelInfoItem.getObject("thumbnail").getArray("thumbnails")
|
|
|
|
.getObject(0).getString("url");
|
2020-02-27 19:08:46 +01:00
|
|
|
|
2020-02-28 09:36:33 +01:00
|
|
|
return fixThumbnailUrl(url);
|
2022-03-18 15:09:06 +01:00
|
|
|
} catch (final Exception e) {
|
2020-02-23 18:27:28 +01:00
|
|
|
throw new ParsingException("Could not get thumbnail url", e);
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-29 20:12:55 +02:00
|
|
|
@Override
|
2017-08-11 20:21:49 +02:00
|
|
|
public String getName() throws ParsingException {
|
2020-02-23 18:27:28 +01:00
|
|
|
try {
|
2020-02-27 17:39:23 +01:00
|
|
|
return getTextFromObject(channelInfoItem.getObject("title"));
|
2022-03-18 15:09:06 +01:00
|
|
|
} catch (final Exception e) {
|
2020-02-23 18:27:28 +01:00
|
|
|
throw new ParsingException("Could not get name", e);
|
|
|
|
}
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
|
2017-06-29 20:12:55 +02:00
|
|
|
@Override
|
2017-08-11 20:21:49 +02:00
|
|
|
public String getUrl() throws ParsingException {
|
2019-08-27 12:01:00 +02:00
|
|
|
try {
|
2022-03-18 15:09:06 +01:00
|
|
|
final String id = "channel/" + channelInfoItem.getString("channelId");
|
2020-02-23 18:27:28 +01:00
|
|
|
return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id);
|
2022-03-18 15:09:06 +01:00
|
|
|
} catch (final Exception e) {
|
2020-02-23 18:27:28 +01:00
|
|
|
throw new ParsingException("Could not get url", e);
|
2019-08-12 11:57:29 +02:00
|
|
|
}
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
|
2017-06-29 20:12:55 +02:00
|
|
|
@Override
|
2020-02-24 10:39:52 +01:00
|
|
|
public long getSubscriberCount() throws ParsingException {
|
|
|
|
try {
|
2020-04-16 16:08:14 +02:00
|
|
|
if (!channelInfoItem.has("subscriberCountText")) {
|
2020-03-21 07:15:23 +01:00
|
|
|
// Subscription count is not available for this channel item.
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2022-11-22 02:17:10 +01:00
|
|
|
if (withHandle) {
|
2023-01-24 23:03:29 +01:00
|
|
|
if (channelInfoItem.has("videoCountText")) {
|
|
|
|
return Utils.mixedNumberWordToLong(getTextFromObject(
|
|
|
|
channelInfoItem.getObject("videoCountText")));
|
|
|
|
} else {
|
|
|
|
return -1;
|
|
|
|
}
|
2022-11-22 02:17:10 +01:00
|
|
|
}
|
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
return Utils.mixedNumberWordToLong(getTextFromObject(
|
|
|
|
channelInfoItem.getObject("subscriberCountText")));
|
|
|
|
} catch (final Exception e) {
|
2020-02-24 10:39:52 +01:00
|
|
|
throw new ParsingException("Could not get subscriber count", e);
|
|
|
|
}
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
|
2017-06-29 20:12:55 +02:00
|
|
|
@Override
|
2017-08-06 22:20:15 +02:00
|
|
|
public long getStreamCount() throws ParsingException {
|
2020-02-23 18:27:28 +01:00
|
|
|
try {
|
2022-11-22 02:17:10 +01:00
|
|
|
if (withHandle || !channelInfoItem.has("videoCountText")) {
|
2022-11-29 19:06:03 +01:00
|
|
|
// Video count is not available, either the channel has no public uploads
|
|
|
|
// or YouTube displays the channel handle instead.
|
2020-03-17 14:04:46 +01:00
|
|
|
return ListExtractor.ITEM_COUNT_UNKNOWN;
|
2020-03-21 07:16:33 +01:00
|
|
|
}
|
|
|
|
|
2020-04-16 16:08:14 +02:00
|
|
|
return Long.parseLong(Utils.removeNonDigitCharacters(getTextFromObject(
|
|
|
|
channelInfoItem.getObject("videoCountText"))));
|
2022-03-18 15:09:06 +01:00
|
|
|
} catch (final Exception e) {
|
2020-02-23 19:45:45 +01:00
|
|
|
throw new ParsingException("Could not get stream count", e);
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-22 01:44:58 +01:00
|
|
|
@Override
|
|
|
|
public boolean isVerified() throws ParsingException {
|
|
|
|
return YoutubeParsingHelper.isVerified(channelInfoItem.getArray("ownerBadges"));
|
|
|
|
}
|
|
|
|
|
2017-06-29 20:12:55 +02:00
|
|
|
@Override
|
2017-03-01 18:47:52 +01:00
|
|
|
public String getDescription() throws ParsingException {
|
2020-02-23 18:27:28 +01:00
|
|
|
try {
|
2020-04-16 16:08:14 +02:00
|
|
|
if (!channelInfoItem.has("descriptionSnippet")) {
|
2020-03-21 07:15:51 +01:00
|
|
|
// Channel have no description.
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2020-04-16 16:08:14 +02:00
|
|
|
return getTextFromObject(channelInfoItem.getObject("descriptionSnippet"));
|
2022-03-18 15:09:06 +01:00
|
|
|
} catch (final Exception e) {
|
2020-02-23 19:45:45 +01:00
|
|
|
throw new ParsingException("Could not get description", e);
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|