Add Utils methods for URL encoding/decoding using UTF-8.

This commit is contained in:
Isira Seneviratne 2022-08-09 07:33:29 +05:30
parent 366f5c1632
commit ddbce3b83d
16 changed files with 88 additions and 98 deletions

View File

@ -13,9 +13,9 @@ import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.suggestion.SuggestionExtractor;
import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException;
import java.net.URLEncoder;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
@ -33,7 +33,7 @@ public class BandcampSuggestionExtractor extends SuggestionExtractor {
try {
final JsonObject fuzzyResults = JsonParser.object().from(downloader
.get(AUTOCOMPLETE_URL + URLEncoder.encode(query, "UTF-8")).responseBody());
.get(AUTOCOMPLETE_URL + Utils.encodeUrlUtf8(query)).responseBody());
return fuzzyResults.getObject("auto").getArray("results").stream()
.filter(JsonObject.class::isInstance)
@ -44,6 +44,5 @@ public class BandcampSuggestionExtractor extends SuggestionExtractor {
} catch (final JsonParserException e) {
return Collections.emptyList();
}
}
}

View File

@ -2,24 +2,22 @@
package org.schabi.newpipe.extractor.services.bandcamp.linkHandler;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.List;
import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.BASE_URL;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory;
import org.schabi.newpipe.extractor.utils.Utils;
import java.io.UnsupportedEncodingException;
import java.util.List;
public class BandcampSearchQueryHandlerFactory extends SearchQueryHandlerFactory {
@Override
public String getUrl(final String query,
final List<String> contentFilter,
final String sortFilter) throws ParsingException {
try {
return BASE_URL + "/search?q=" + URLEncoder.encode(query, "UTF-8") + "&page=1";
return BASE_URL + "/search?q=" + Utils.encodeUrlUtf8(query) + "&page=1";
} catch (final UnsupportedEncodingException e) {
throw new ParsingException("query \"" + query + "\" could not be encoded", e);
}

View File

@ -2,10 +2,9 @@ package org.schabi.newpipe.extractor.services.media_ccc.linkHandler;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory;
import org.schabi.newpipe.extractor.utils.Utils;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.List;
public class MediaCCCSearchQueryHandlerFactory extends SearchQueryHandlerFactory {
@ -31,8 +30,7 @@ public class MediaCCCSearchQueryHandlerFactory extends SearchQueryHandlerFactory
public String getUrl(final String query, final List<String> contentFilter,
final String sortFilter) throws ParsingException {
try {
return "https://media.ccc.de/public/events/search?q="
+ URLEncoder.encode(query, StandardCharsets.UTF_8.name());
return "https://media.ccc.de/public/events/search?q=" + Utils.encodeUrlUtf8(query);
} catch (final UnsupportedEncodingException e) {
throw new ParsingException("Could not create search string with query: " + query, e);
}

View File

@ -35,8 +35,6 @@ import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@ -327,8 +325,7 @@ public class PeertubeStreamExtractor extends StreamExtractor {
final StringBuilder params = new StringBuilder();
params.append("start=0&count=8&sort=-createdAt");
for (final String tag : tags) {
params.append("&tagsOneOf=");
params.append(URLEncoder.encode(tag, StandardCharsets.UTF_8.name()));
params.append("&tagsOneOf=").append(Utils.encodeUrlUtf8(tag));
}
return url + "?" + params;
}

View File

@ -3,10 +3,9 @@ package org.schabi.newpipe.extractor.services.peertube.linkHandler;
import org.schabi.newpipe.extractor.ServiceList;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory;
import org.schabi.newpipe.extractor.utils.Utils;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.List;
public final class PeertubeSearchQueryHandlerFactory extends SearchQueryHandlerFactory {
@ -42,8 +41,7 @@ public final class PeertubeSearchQueryHandlerFactory extends SearchQueryHandlerF
final String sortFilter,
final String baseUrl) throws ParsingException {
try {
return baseUrl + SEARCH_ENDPOINT + "?search=" + URLEncoder.encode(searchString,
StandardCharsets.UTF_8.name());
return baseUrl + SEARCH_ENDPOINT + "?search=" + Utils.encodeUrlUtf8(searchString);
} catch (final UnsupportedEncodingException e) {
throw new ParsingException("Could not encode query", e);
}

View File

@ -31,8 +31,6 @@ import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.time.OffsetDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
@ -109,7 +107,7 @@ public final class SoundcloudParsingHelper {
public static JsonObject resolveFor(@Nonnull final Downloader downloader, final String url)
throws IOException, ExtractionException {
final String apiUrl = SOUNDCLOUD_API_V2_URL + "resolve"
+ "?url=" + URLEncoder.encode(url, StandardCharsets.UTF_8.name())
+ "?url=" + Utils.encodeUrlUtf8(url)
+ "&client_id=" + clientId();
try {
@ -131,8 +129,7 @@ public final class SoundcloudParsingHelper {
ReCaptchaException {
final String response = NewPipe.getDownloader().get("https://w.soundcloud.com/player/?url="
+ URLEncoder.encode(apiUrl, StandardCharsets.UTF_8.name()),
SoundCloud.getLocalization()).responseBody();
+ Utils.encodeUrlUtf8(apiUrl), SoundCloud.getLocalization()).responseBody();
return Jsoup.parse(response).select("link[rel=\"canonical\"]").first()
.attr("abs:href");
@ -163,7 +160,7 @@ public final class SoundcloudParsingHelper {
try {
final String widgetUrl = "https://api-widget.soundcloud.com/resolve?url="
+ URLEncoder.encode(url.toString(), StandardCharsets.UTF_8.name())
+ Utils.encodeUrlUtf8(url.toString())
+ "&format=json&client_id=" + SoundcloudParsingHelper.clientId();
final String response = NewPipe.getDownloader().get(widgetUrl,
SoundCloud.getLocalization()).responseBody();

View File

@ -31,11 +31,10 @@ import org.schabi.newpipe.extractor.stream.StreamExtractor;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.stream.VideoStream;
import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@ -320,7 +319,7 @@ public class SoundcloudStreamExtractor extends StreamExtractor {
private static String urlEncode(final String value) {
try {
return URLEncoder.encode(value, StandardCharsets.UTF_8.name());
return Utils.encodeUrlUtf8(value);
} catch (final UnsupportedEncodingException e) {
throw new IllegalStateException(e);
}

View File

@ -14,10 +14,9 @@ import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
import org.schabi.newpipe.extractor.suggestion.SuggestionExtractor;
import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
@ -32,9 +31,9 @@ public class SoundcloudSuggestionExtractor extends SuggestionExtractor {
ExtractionException {
final List<String> suggestions = new ArrayList<>();
final Downloader dl = NewPipe.getDownloader();
final String url = SOUNDCLOUD_API_V2_URL + "search/queries" + "?q="
+ URLEncoder.encode(query, StandardCharsets.UTF_8.name()) + "&client_id="
+ SoundcloudParsingHelper.clientId() + "&limit=10";
final String url = SOUNDCLOUD_API_V2_URL + "search/queries?q="
+ Utils.encodeUrlUtf8(query) + "&client_id=" + SoundcloudParsingHelper.clientId()
+ "&limit=10";
final String response = dl.get(url, getExtractorLocalization()).responseBody();
try {

View File

@ -7,11 +7,10 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory;
import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.List;
public class SoundcloudSearchQueryHandlerFactory extends SearchQueryHandlerFactory {
@ -48,7 +47,7 @@ public class SoundcloudSearchQueryHandlerFactory extends SearchQueryHandlerFacto
}
}
return url + "?q=" + URLEncoder.encode(id, StandardCharsets.UTF_8.name())
return url + "?q=" + Utils.encodeUrlUtf8(id)
+ "&client_id=" + SoundcloudParsingHelper.clientId()
+ "&limit=" + ITEMS_PER_PAGE + "&offset=0";

View File

@ -54,7 +54,6 @@ import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.security.SecureRandom;
import java.time.LocalDate;
@ -891,8 +890,7 @@ public final class YoutubeParsingHelper {
for (final String param : params) {
if (param.split("=")[0].equals("q")) {
try {
return URLDecoder.decode(param.split("=")[1],
StandardCharsets.UTF_8.name());
return Utils.decodeUrlUtf8(param.split("=")[1]);
} catch (final UnsupportedEncodingException e) {
return null;
}

View File

@ -12,10 +12,9 @@ import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.suggestion.SuggestionExtractor;
import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
@ -54,9 +53,8 @@ public class YoutubeSuggestionExtractor extends SuggestionExtractor {
+ "?client=" + "youtube" //"firefox" for JSON, 'toolbar' for xml
+ "&jsonp=" + "JP"
+ "&ds=" + "yt"
+ "&gl=" + URLEncoder.encode(getExtractorContentCountry().getCountryCode(),
StandardCharsets.UTF_8.name())
+ "&q=" + URLEncoder.encode(query, StandardCharsets.UTF_8.name());
+ "&gl=" + Utils.encodeUrlUtf8(getExtractorContentCountry().getCountryCode())
+ "&q=" + Utils.encodeUrlUtf8(query);
String response = dl.get(url, getCookieHeader(), getExtractorLocalization()).responseBody();
// trim JSONP part "JP(...)"

View File

@ -1,22 +1,4 @@
/*
* Created by Christian Schabesberger on 12.08.17.
*
* Copyright (C) Christian Schabesberger 2018 <chris.schabesberger@mailbox.org>
* YoutubeTrendingExtractor.java is part of NewPipe Extractor.
*
* NewPipe Extractor is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe Extractor is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe Extractor. If not, see <https://www.gnu.org/licenses/>.
*/
package org.schabi.newpipe.extractor.services.youtube.extractors;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextAtKey;
@ -42,6 +24,26 @@ import java.nio.charset.StandardCharsets;
import javax.annotation.Nonnull;
/*
* Created by Christian Schabesberger on 12.08.17.
*
* Copyright (C) Christian Schabesberger 2018 <chris.schabesberger@mailbox.org>
* YoutubeTrendingExtractor.java is part of NewPipe Extractor.
*
* NewPipe Extractor is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe Extractor is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe Extractor. If not, see <https://www.gnu.org/licenses/>.
*/
public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
private JsonObject initialData;

View File

@ -1,13 +1,12 @@
package org.schabi.newpipe.extractor.services.youtube.linkHandler;
import static org.schabi.newpipe.extractor.utils.Utils.encodeUrlUtf8;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.List;
import javax.annotation.Nonnull;
@ -45,28 +44,21 @@ public final class YoutubeSearchQueryHandlerFactory extends SearchQueryHandlerFa
default:
break;
case VIDEOS:
return SEARCH_URL + URLEncoder.encode(searchString,
StandardCharsets.UTF_8.name())
+ "&sp=EgIQAQ%253D%253D";
return SEARCH_URL + encodeUrlUtf8(searchString) + "&sp=EgIQAQ%253D%253D";
case CHANNELS:
return SEARCH_URL + URLEncoder.encode(searchString,
StandardCharsets.UTF_8.name())
+ "&sp=EgIQAg%253D%253D";
return SEARCH_URL + encodeUrlUtf8(searchString) + "&sp=EgIQAg%253D%253D";
case PLAYLISTS:
return SEARCH_URL + URLEncoder.encode(searchString,
StandardCharsets.UTF_8.name())
+ "&sp=EgIQAw%253D%253D";
return SEARCH_URL + encodeUrlUtf8(searchString) + "&sp=EgIQAw%253D%253D";
case MUSIC_SONGS:
case MUSIC_VIDEOS:
case MUSIC_ALBUMS:
case MUSIC_PLAYLISTS:
case MUSIC_ARTISTS:
return MUSIC_SEARCH_URL + URLEncoder.encode(searchString,
StandardCharsets.UTF_8.name());
return MUSIC_SEARCH_URL + encodeUrlUtf8(searchString);
}
}
return SEARCH_URL + URLEncoder.encode(searchString, StandardCharsets.UTF_8.name());
return SEARCH_URL + encodeUrlUtf8(searchString);
} catch (final UnsupportedEncodingException e) {
throw new ParsingException("Could not encode query", e);
}

View File

@ -20,15 +20,9 @@
package org.schabi.newpipe.extractor.utils;
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
@ -102,7 +96,7 @@ public final class Parser {
for (final String arg : input.split("&")) {
final String[] splitArg = arg.split("=");
if (splitArg.length > 1) {
map.put(splitArg[0], URLDecoder.decode(splitArg[1], StandardCharsets.UTF_8.name()));
map.put(splitArg[0], Utils.decodeUrlUtf8(splitArg[1]));
} else {
map.put(splitArg[0], "");
}

View File

@ -6,6 +6,7 @@ import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collection;
@ -27,6 +28,29 @@ public final class Utils {
// no instance
}
/**
* Encodes a string to URL format using the UTF-8 character set.
*
* @param string The string to be encoded.
* @return The encoded URL.
* @throws UnsupportedEncodingException This shouldn't be thrown, as UTF-8 should be supported.
*/
public static String encodeUrlUtf8(final String string) throws UnsupportedEncodingException {
// TODO: Switch to URLEncoder.encode(String, Charset) in Java 10.
return URLEncoder.encode(string, StandardCharsets.UTF_8.name());
}
/**
* Decodes a URL using the UTF-8 character set.
* @param url The URL to be decoded.
* @return The decoded URL.
* @throws UnsupportedEncodingException This shouldn't be thrown, as UTF-8 should be supported.
*/
public static String decodeUrlUtf8(final String url) throws UnsupportedEncodingException {
// TODO: Switch to URLDecoder.decode(String, Charset) in Java 10.
return URLDecoder.decode(url, StandardCharsets.UTF_8.name());
}
/**
* Remove all non-digit characters from a string.
*
@ -134,7 +158,7 @@ public final class Utils {
String query;
try {
query = URLDecoder.decode(params[0], StandardCharsets.UTF_8.name());
query = decodeUrlUtf8(params[0]);
} catch (final UnsupportedEncodingException e) {
// Cannot decode string with UTF-8, using the string without decoding
query = params[0];
@ -142,7 +166,7 @@ public final class Utils {
if (query.equals(parameterName)) {
try {
return URLDecoder.decode(params[1], StandardCharsets.UTF_8.name());
return decodeUrlUtf8(params[1]);
} catch (final UnsupportedEncodingException e) {
// Cannot decode string with UTF-8, using the string without decoding
return params[1];
@ -239,10 +263,9 @@ public final class Utils {
public static String followGoogleRedirectIfNeeded(final String url) {
// If the url is a redirect from a Google search, extract the actual URL
try {
final URL decoded = Utils.stringToURL(url);
final URL decoded = stringToURL(url);
if (decoded.getHost().contains("google") && decoded.getPath().equals("/url")) {
return URLDecoder.decode(Parser.matchGroup1("&url=([^&]+)(?:&|$)", url),
StandardCharsets.UTF_8.name());
return decodeUrlUtf8(Parser.matchGroup1("&url=([^&]+)(?:&|$)", url));
}
} catch (final Exception ignored) {
}

View File

@ -19,11 +19,10 @@ import org.schabi.newpipe.extractor.channel.ChannelInfoItem;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.search.SearchExtractor;
import org.schabi.newpipe.extractor.services.DefaultSearchExtractorTest;
import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.List;
import javax.annotation.Nullable;
@ -141,7 +140,7 @@ public class SoundcloudSearchExtractorTest {
private static String urlEncode(String value) {
try {
return URLEncoder.encode(value, StandardCharsets.UTF_8.name());
return Utils.encodeUrlUtf8(value);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}