finished implementing timestamp, along with refactoring services

* added VideoInfo(AbstractVideoInfo) constructor, to support later implementation for reusing info scraped into VideoPreviewInfo, into VideoInfo
* Made the Extractor class behave as a per-video object;
    - most method return values are video-specific, so it makes sense (to me) to have Extractor be stateful.
    - The only stateless methods are getVideoUrl(), getVideoId() and loadDecryptionCode(String)
* Implemented a constructor for YoutubeExtractor, which performs all initialisation work
This commit is contained in:
Adam Howard 2015-11-17 22:51:27 +00:00
parent 7f01e9a4d9
commit 91f98c125e
11 changed files with 441 additions and 234 deletions

View File

@ -7,9 +7,10 @@ public abstract class AbstractVideoInfo {
public String id = "";
public String title = "";
public String uploader = "";
//public int duration = -1;
public String thumbnail_url = "";
public Bitmap thumbnail = null;
public String webpage_url = "";
public String upload_date = "";
public long view_count = 0;
public long view_count = -1;
}

View File

@ -24,44 +24,76 @@ import android.graphics.Bitmap;
/**Info object for opened videos, ie the video ready to play.*/
public class VideoInfo extends AbstractVideoInfo {
private static final String TAG = VideoInfo.class.toString();
public String uploader_thumbnail_url = "";
public Bitmap uploader_thumbnail = null;
public String description = "";
public int duration = -1;
public int age_limit = 0;
public int like_count = 0;
public int dislike_count = 0;
public String average_rating = "";
public VideoStream[] videoStreams = null;
public AudioStream[] audioStreams = null;
public int videoAvailableStatus = VIDEO_AVAILABLE;
public int duration = -1;
/*YouTube-specific fields
todo: move these to a subclass*/
public int age_limit = 0;
public int like_count = -1;
public int dislike_count = -1;
public String average_rating = "";
public VideoPreviewInfo nextVideo = null;
public VideoPreviewInfo[] relatedVideos = null;
public int videoAvailableStatus = VIDEO_AVAILABLE;
//public int startPosition = 0;//in seconds. some metadata is not passed using a VideoInfo object!
private static final String TAG = VideoInfo.class.toString();
public int startPosition = -1;//in seconds. some metadata is not passed using a VideoInfo object!
public static final int VIDEO_AVAILABLE = 0x00;
public static final int VIDEO_UNAVAILABLE = 0x01;
public static final int VIDEO_UNAVAILABLE_GEMA = 0x02;//German DRM organisation
public static class VideoStream {
public VideoStream(String url, int format, String res) {
this.url = url; this.format = format; resolution = res;
public VideoInfo() {}
/**Creates a new VideoInfo object from an existing AbstractVideoInfo.
* All the shared properties are copied to the new VideoInfo.*/
public VideoInfo(AbstractVideoInfo avi) {
this.id = avi.id;
this.title = avi.title;
this.uploader = avi.uploader;
this.thumbnail_url = avi.thumbnail_url;
this.thumbnail = avi.thumbnail;
this.webpage_url = avi.webpage_url;
this.upload_date = avi.upload_date;
this.upload_date = avi.upload_date;
this.view_count = avi.view_count;
//todo: better than this
if(avi instanceof VideoPreviewInfo) {//shitty String to convert code
String dur = ((VideoPreviewInfo)avi).duration;
int minutes = Integer.parseInt(dur.substring(0, dur.indexOf(":")));
int seconds = Integer.parseInt(dur.substring(dur.indexOf(":")+1, dur.length()));
this.duration = (minutes*60)+seconds;
}
}
public static class VideoStream {
public String url = ""; //url of the stream
public int format = -1;
public String resolution = "";
public VideoStream(String url, int format, String res) {
this.url = url; this.format = format; resolution = res;
}
}
public static class AudioStream {
public AudioStream(String url, int format, int bandwidth, int samplingRate) {
this.url = url; this.format = format;
this.bandwidth = bandwidth; this.samplingRate = samplingRate;
}
public String url = "";
public int format = -1;
public int bandwidth = -1;
public int samplingRate = -1;
public AudioStream(String url, int format, int bandwidth, int samplingRate) {
this.url = url; this.format = format;
this.bandwidth = bandwidth; this.samplingRate = samplingRate;
}
}
}

View File

@ -57,7 +57,7 @@ public class VideoInfoItemViewCreator {
}
holder.itemVideoTitleView.setText(info.title);
holder.itemUploaderView.setText(info.uploader);
holder.itemDurationView.setText(info.duration);
holder.itemDurationView.setText(""+info.duration);
if(!info.upload_date.isEmpty()) {
holder.itemUploadDateView.setText(info.upload_date);
} else {

View File

@ -64,27 +64,25 @@ public class VideoItemDetailActivity extends AppCompatActivity {
// this means the video was called though another app
if (getIntent().getData() != null) {
videoUrl = getIntent().getData().toString();
Log.i(TAG, "video URL passed:\"" + videoUrl + "\"");
//Log.i(TAG, "video URL passed:\"" + videoUrl + "\"");
StreamingService[] serviceList = ServiceList.getServices();
Extractor extractor = null;
for (int i = 0; i < serviceList.length; i++) {
if (serviceList[i].acceptUrl(videoUrl)) {
arguments.putInt(VideoItemDetailFragment.STREAMING_SERVICE, i);
try {
currentStreamingService = i;
extractor = ServiceList.getService(i).getExtractorInstance();
} catch (Exception e) {
e.printStackTrace();
}
currentStreamingService = i;
//extractor = ServiceList.getService(i).getExtractorInstance();
break;
}
}
if(extractor == null) {
if(currentStreamingService == -1) {
Toast.makeText(this, R.string.urlNotSupportedText, Toast.LENGTH_LONG)
.show();
}
arguments.putString(VideoItemDetailFragment.VIDEO_URL,
extractor.getVideoUrl(extractor.getVideoId(videoUrl)));//cleans URL
//arguments.putString(VideoItemDetailFragment.VIDEO_URL,
// extractor.getVideoUrl(extractor.getVideoId(videoUrl)));//cleans URL
arguments.putString(VideoItemDetailFragment.VIDEO_URL, videoUrl);
arguments.putBoolean(VideoItemDetailFragment.AUTO_PLAY,
PreferenceManager.getDefaultSharedPreferences(this)
.getBoolean(getString(R.string.autoPlayThroughIntent), false));

View File

@ -90,16 +90,18 @@ public class VideoItemDetailFragment extends Fragment {
private class ExtractorRunnable implements Runnable {
private Handler h = new Handler();
private Extractor extractor;
private StreamingService service;
private String videoUrl;
public ExtractorRunnable(String videoUrl, Extractor extractor, VideoItemDetailFragment f) {
this.extractor = extractor;
public ExtractorRunnable(String videoUrl, StreamingService service, VideoItemDetailFragment f) {
this.service = service;
this.videoUrl = videoUrl;
}
@Override
public void run() {
try {
VideoInfo videoInfo = extractor.getVideoInfo(videoUrl);
this.extractor = service.getExtractorInstance(videoUrl);
VideoInfo videoInfo = extractor.getVideoInfo();
h.post(new VideoResultReturnedRunnable(videoInfo));
if (videoInfo.videoAvailableStatus == VideoInfo.VIDEO_AVAILABLE) {
h.post(new SetThumbnailRunnable(
@ -239,7 +241,7 @@ public class VideoItemDetailFragment extends Fragment {
//this is horribly convoluted
//TODO: find a better way to convert YYYY-MM-DD to a locale-specific date
//suggestions welcome
//suggestions are welcome
int year = Integer.parseInt(info.upload_date.substring(0, 4));
int month = Integer.parseInt(info.upload_date.substring(5, 7));
int date = Integer.parseInt(info.upload_date.substring(8, 10));
@ -255,6 +257,7 @@ public class VideoItemDetailFragment extends Fragment {
descriptionView.setMovementMethod(LinkMovementMethod.getInstance());
actionBarHandler.setVideoInfo(info.webpage_url, info.title);
actionBarHandler.setStartPosition(info.startPosition);
// parse streams
Vector<VideoInfo.VideoStream> streamsToUse = new Vector<>();
@ -357,7 +360,7 @@ public class VideoItemDetailFragment extends Fragment {
StreamingService streamingService = ServiceList.getService(
getArguments().getInt(STREAMING_SERVICE));
extractorThread = new Thread(new ExtractorRunnable(
getArguments().getString(VIDEO_URL), streamingService.getExtractorInstance(), this));
getArguments().getString(VIDEO_URL), streamingService, this));
autoPlayEnabled = getArguments().getBoolean(AUTO_PLAY);
extractorThread.start();

View File

@ -26,10 +26,7 @@ import android.os.Parcelable;
/**Info object for previews of unopened videos, eg search results, related videos*/
public class VideoPreviewInfo extends AbstractVideoInfo implements Parcelable {
public String duration = "";
protected VideoPreviewInfo(Parcel in) {
id = in.readString();
title = in.readString();

View File

@ -22,9 +22,94 @@ package org.schabi.newpipe.services;
import org.schabi.newpipe.VideoInfo;
/**Scrapes information from a video streaming service (eg, YouTube). To implement*/
public interface Extractor {
VideoInfo getVideoInfo(String siteUrl);
String getVideoUrl(String videoId);
String getVideoId(String videoUrl);
/**Scrapes information from a video streaming service (eg, YouTube).*/
public abstract class Extractor {
public String pageUrl;
public VideoInfo videoInfo;
public Extractor(String url) {
this.pageUrl = url;
}
/**Fills out the video info fields which are common to all services.
* Probably needs to be overridden by subclasses*/
public VideoInfo getVideoInfo()
{
if(videoInfo == null) {
videoInfo = new VideoInfo();
}
if(videoInfo.webpage_url.isEmpty()) {
videoInfo.webpage_url = pageUrl;
}
if(videoInfo.title.isEmpty()) {
videoInfo.title = getTitle();
}
if(videoInfo.duration < 1) {
videoInfo.duration = getLength();
}
if(videoInfo.uploader.isEmpty()) {
videoInfo.uploader = getUploader();
}
if(videoInfo.description.isEmpty()) {
videoInfo.description = getDescription();
}
if(videoInfo.view_count == -1) {
videoInfo.view_count = getViews();
}
if(videoInfo.upload_date.isEmpty()) {
videoInfo.upload_date = getUploadDate();
}
if(videoInfo.thumbnail_url.isEmpty()) {
videoInfo.thumbnail_url = getThumbnailUrl();
}
if(videoInfo.id.isEmpty()) {
videoInfo.id = getVideoId(pageUrl);
}
/** Load and extract audio*/
if(videoInfo.audioStreams == null) {
videoInfo.audioStreams = getAudioStreams();
}
/** Extract video stream url*/
if(videoInfo.videoStreams == null) {
videoInfo.videoStreams = getVideoStreams();
}
if(videoInfo.uploader_thumbnail_url.isEmpty()) {
videoInfo.uploader_thumbnail_url = getUploaderThumbnailUrl();
}
if(videoInfo.startPosition < 0) {
videoInfo.startPosition = getTimeStamp();
}
//Bitmap thumbnail = null;
//Bitmap uploader_thumbnail = null;
//int videoAvailableStatus = VIDEO_AVAILABLE;
return videoInfo;
}
public abstract String getVideoUrl(String videoId);
public abstract String getVideoId(String siteUrl);
public abstract int getTimeStamp();
public abstract String getTitle();
public abstract String getDescription();
public abstract String getUploader();
public abstract int getLength();
public abstract int getViews();
public abstract String getUploadDate();
public abstract String getThumbnailUrl();
public abstract String getUploaderThumbnailUrl();
public abstract VideoInfo.AudioStream[] getAudioStreams();
public abstract VideoInfo.VideoStream[] getVideoStreams();
}

View File

@ -25,7 +25,7 @@ public interface StreamingService {
public String name = "";
}
ServiceInfo getServiceInfo();
Extractor getExtractorInstance();
Extractor getExtractorInstance(String url);
SearchEngine getSearchEngineInstance();
/**When a VIEW_ACTION is caught this function will test if the url delivered within the calling

View File

@ -3,6 +3,7 @@ package org.schabi.newpipe.services.youtube;
import android.util.Log;
import android.util.Xml;
import org.json.JSONException;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
@ -46,14 +47,225 @@ import java.util.regex.Pattern;
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public class YoutubeExtractor implements Extractor {
public class YoutubeExtractor extends Extractor {
private static final String TAG = YoutubeExtractor.class.toString();
private String pageContents;
private Document doc;
private JSONObject jsonObj;
private JSONObject playerArgs;
// These lists only contain itag formats that are supported by the common Android Video player.
// How ever if you are heading for a list showing all itag formats look at
// https://github.com/rg3/youtube-dl/issues/1687
// static values
private static final String DECRYPTION_FUNC_NAME="decrypt";
// cached values
private static volatile String decryptionCode = "";
public YoutubeExtractor(String pageUrl) {
super(pageUrl);//most common videoInfo fields are now set in our superclass, for all services
pageContents = Downloader.download(cleanUrl(pageUrl));
doc = Jsoup.parse(pageContents, pageUrl);
//attempt to load the youtube js player JSON arguments
try {
String jsonString = matchGroup1("ytplayer.config\\s*=\\s*(\\{.*?\\});", pageContents);
jsonObj = new JSONObject(jsonString);
playerArgs = jsonObj.getJSONObject("args");
} catch (Exception e) {//if this fails, the video is most likely not available.
// Determining why is done later.
videoInfo.videoAvailableStatus = VideoInfo.VIDEO_UNAVAILABLE;
Log.d(TAG, "Could not load JSON data for Youtube video \""+pageUrl+"\". This most likely means the video is unavailable");
}
//----------------------------------
// load and parse description code, if it isn't already initialised
//----------------------------------
if (decryptionCode.isEmpty()) {
try {
// The Youtube service needs to be initialized by downloading the
// js-Youtube-player. This is done in order to get the algorithm
// for decrypting cryptic signatures inside certain stream urls.
JSONObject ytAssets = jsonObj.getJSONObject("assets");
String playerUrl = ytAssets.getString("js");
if (playerUrl.startsWith("//")) {
playerUrl = "https:" + playerUrl;
}
decryptionCode = loadDecryptionCode(playerUrl);
} catch (Exception e){
Log.d(TAG, "Could not load decryption code for the Youtube service.");
e.printStackTrace();
}
}
}
@Override
public String getTitle() {
try {//json player args method
return playerArgs.getString("title");
} catch(JSONException je) {//html <meta> method
je.printStackTrace();
Log.w(TAG, "failed to load title from JSON args; trying to extract it from HTML");
} try { // fall through to fall-back
return doc.select("meta[name=title]").attr("content");
} catch (Exception e) {
Log.e(TAG, "failed permanently to load title.");
e.printStackTrace();
return "";
}
}
@Override
public String getDescription() {
try {
return doc.select("p[id=\"eow-description\"]").first().html();
} catch (Exception e) {//todo: add fallback method
Log.e(TAG, "failed to load description.");
e.printStackTrace();
return "";
}
}
@Override
public String getUploader() {
try {//json player args method
return playerArgs.getString("author");
} catch(JSONException je) {
je.printStackTrace();
Log.w(TAG, "failed to load uploader name from JSON args; trying to extract it from HTML");
} try {//fall through to fallback HTML method
return doc.select("div.yt-user-info").first().text();
} catch (Exception e) {
e.printStackTrace();
Log.e(TAG, "failed permanently to load uploader name.");
return "";
}
}
@Override
public int getLength() {
try {
return playerArgs.getInt("length_seconds");
} catch (JSONException je) {//todo: find fallback method
Log.e(TAG, "failed to load video duration from JSON args");
je.printStackTrace();
return -1;
}
}
@Override
public int getViews() {
try {
String viewCountString = doc.select("meta[itemprop=interactionCount]").attr("content");
return Integer.parseInt(viewCountString);
} catch (Exception e) {//todo: find fallback method
Log.e(TAG, "failed to number of views");
e.printStackTrace();
return -1;
}
}
@Override
public String getUploadDate() {
try {
return doc.select("meta[itemprop=datePublished]").attr("content");
} catch (Exception e) {//todo: add fallback method
Log.e(TAG, "failed to get upload date.");
e.printStackTrace();
return "";
}
}
@Override
public String getThumbnailUrl() {
//first attempt getting a small image version
//in the html extracting part we try to get a thumbnail with a higher resolution
// Try to get high resolution thumbnail if it fails use low res from the player instead
try {
return doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href");
} catch(Exception e) {
Log.w(TAG, "Could not find high res Thumbnail. Using low res instead");
//fall through to fallback
} try {
return playerArgs.getString("thumbnail_url");
} catch (JSONException je) {
je.printStackTrace();
Log.w(TAG, "failed to extract thumbnail URL from JSON args; trying to extract it from HTML");
return "";
}
}
@Override
public String getUploaderThumbnailUrl() {
try {
return doc.select("a[class*=\"yt-user-photo\"]").first()
.select("img").first()
.attr("abs:data-thumb");
} catch (Exception e) {//todo: add fallback method
Log.e(TAG, "failed to get uploader thumbnail URL.");
e.printStackTrace();
return "";
}
}
@Override
public VideoInfo.AudioStream[] getAudioStreams() {
try {
String dashManifest = playerArgs.getString("dashmpd");
return parseDashManifest(dashManifest, decryptionCode);
} catch (NullPointerException e) {
Log.e(TAG, "Could not find \"dashmpd\" upon the player args (maybe no dash manifest available).");
} catch (Exception e) {
e.printStackTrace();
}
return new VideoInfo.AudioStream[0];
}
@Override
public VideoInfo.VideoStream[] getVideoStreams() {
try{
//------------------------------------
// extract video stream url
//------------------------------------
String encoded_url_map = playerArgs.getString("url_encoded_fmt_stream_map");
Vector<VideoInfo.VideoStream> videoStreams = new Vector<>();
for(String url_data_str : encoded_url_map.split(",")) {
Map<String, String> tags = new HashMap<>();
for(String raw_tag : Parser.unescapeEntities(url_data_str, true).split("&")) {
String[] split_tag = raw_tag.split("=");
tags.put(split_tag[0], split_tag[1]);
}
int itag = Integer.parseInt(tags.get("itag"));
String streamUrl = URLDecoder.decode(tags.get("url"), "UTF-8");
// if video has a signature: decrypt it and add it to the url
if(tags.get("s") != null) {
streamUrl = streamUrl + "&signature=" + decryptSignature(tags.get("s"), decryptionCode);
}
if(resolveFormat(itag) != -1) {
videoStreams.add(new VideoInfo.VideoStream(
streamUrl,
resolveFormat(itag),
resolveResolutionString(itag)));
}
}
return videoStreams.toArray(new VideoInfo.VideoStream[videoStreams.size()]);
} catch (Exception e) {
Log.e(TAG, "Failed to get video stream");
e.printStackTrace();
return new VideoInfo.VideoStream[0];
}
}
/**These lists only contain itag formats that are supported by the common Android Video player.
However if you are looking for a list showing all itag formats, look at
https://github.com/rg3/youtube-dl/issues/1687 */
public static int resolveFormat(int itag) {
switch(itag) {
// video
@ -91,68 +303,28 @@ public class YoutubeExtractor implements Extractor {
}
}
// static values
private static final String DECRYPTION_FUNC_NAME="decrypt";
// cached values
private static volatile String decryptionCode = "";
public void initService(String site) {
// The Youtube service needs to be initialized by downloading the
// js-Youtube-player. This is done in order to get the algorithm
// for decrypting cryptic signatures inside certain stream urls.
// Star Wars Kid is used as a dummy video, in order to download the youtube player.
//String site = Downloader.download("https://www.youtube.com/watch?v=HPPj6viIBmU");
//-------------------------------------
// extracting form player args
//-------------------------------------
try {
String jsonString = matchGroup1("ytplayer.config\\s*=\\s*(\\{.*?\\});", site);
JSONObject jsonObj = new JSONObject(jsonString);
//----------------------------------
// load and parse description code
//----------------------------------
if (decryptionCode.isEmpty()) {
JSONObject ytAssets = jsonObj.getJSONObject("assets");
String playerUrl = ytAssets.getString("js");
if (playerUrl.startsWith("//")) {
playerUrl = "https:" + playerUrl;
}
decryptionCode = loadDecryptionCode(playerUrl);
}
} catch (Exception e){
Log.d(TAG, "Could not initialize the extractor of the Youtube service.");
e.printStackTrace();
}
}
@Override
public String getVideoId(String videoUrl) {
String id = "";
Pattern pat;
public String getVideoId(String url) {
String id;
String pat;
if(videoUrl.contains("youtube")) {
pat = Pattern.compile("youtube\\.com/watch\\?v=([\\-a-zA-Z0-9_]{11})");
if(url.contains("youtube")) {
pat = "youtube\\.com/watch\\?v=([\\-a-zA-Z0-9_]{11})";
}
else if(videoUrl.contains("youtu.be")) {
pat = Pattern.compile("youtu\\.be/([a-zA-Z0-9_-]{11})");
else if(url.contains("youtu.be")) {
pat = "youtu\\.be/([a-zA-Z0-9_-]{11})";
}
else {
Log.e(TAG, "Error could not parse url: " + videoUrl);
Log.e(TAG, "Error could not parse url: " + url);
return "";
}
Matcher mat = pat.matcher(videoUrl);
boolean foundMatch = mat.find();
if(foundMatch){
id = mat.group(1);
Log.i(TAG, "string \""+videoUrl+"\" matches!");
id = matchGroup1(pat, url);
if(!id.isEmpty()){
Log.i(TAG, "string \""+url+"\" matches!");
return id;
}
Log.i(TAG, "string \""+videoUrl+"\" does not match.");
return id;
Log.i(TAG, "string \""+url+"\" does not match.");
return "";
}
@Override
@ -160,118 +332,47 @@ public class YoutubeExtractor implements Extractor {
return "https://www.youtube.com/watch?v=" + videoId;
}
public int getStartPosition(String siteUrl){
String timeStamp = matchGroup1("((#|&)t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)", siteUrl);
Log.i(TAG, "time stamp:"+timeStamp);
//videoInfo.startPosition
/**Attempts to parse (and return) the offset to start playing the video from.
* @return the offset (in seconds), or 0 if no timestamp is found.*/
@Override
public int getTimeStamp(){
String timeStamp = matchGroup1("((#|&)t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)", pageUrl);
//TODO: test this!
if(timeStamp.length() > 0) {
//TODO: test this
if(!timeStamp.isEmpty()) {
String secondsString = matchGroup1("(\\d{1,3})s", timeStamp);
if(secondsString.length() == 0)//try again with unspecified units as seconds
secondsString = matchGroup1("t=(\\d{1,3})", timeStamp);
String minutesString = matchGroup1("(\\d{1,3})m", timeStamp);
String hoursString = matchGroup1("(\\d{1,3})h", timeStamp);
int seconds = (secondsString.length() > 0 ? Integer.parseInt(secondsString) : 0);
int minutes = (minutesString.length() > 0 ? Integer.parseInt(minutesString) : 0);
int hours = (hoursString.length() > 0 ? Integer.parseInt(hoursString) : 0);
if(secondsString.isEmpty()//if nothing was got,
&& minutesString.isEmpty()//treat as unlabelled seconds
&& hoursString.isEmpty())
secondsString = matchGroup1("t=(\\d{1,3})", timeStamp);
return seconds + (60*minutes) + (3600*hours);//don't trust BODMAS!
int seconds = (secondsString.isEmpty() ? 0 : Integer.parseInt(secondsString));
int minutes = (minutesString.isEmpty() ? 0 : Integer.parseInt(minutesString));
int hours = (hoursString.isEmpty() ? 0 : Integer.parseInt(hoursString));
int ret = seconds + (60*minutes) + (3600*hours);//don't trust BODMAS!
Log.d(TAG, "derived timestamp value:"+ret);
return ret;
//the ordering varies internationally
}//else, return default 0
return 0;
}
@Override
public VideoInfo getVideoInfo(String siteUrl) {
String site = Downloader.download(siteUrl);
VideoInfo videoInfo = new VideoInfo();
Document doc = Jsoup.parse(site, siteUrl);
videoInfo.id = matchGroup1("v=([0-9a-zA-Z_-]{11})", siteUrl);
public VideoInfo getVideoInfo() {
videoInfo = super.getVideoInfo();
//todo: replace this with a call to getVideoId, if possible
videoInfo.id = matchGroup1("v=([0-9a-zA-Z_-]{11})", pageUrl);
videoInfo.age_limit = 0;
videoInfo.webpage_url = siteUrl;
initService(site);
//-------------------------------------
// extracting form player args
//-------------------------------------
JSONObject playerArgs = null;
{
try {
String jsonString = matchGroup1("ytplayer.config\\s*=\\s*(\\{.*?\\});", site);
JSONObject jsonObj = new JSONObject(jsonString);
playerArgs = jsonObj.getJSONObject("args");
}
catch (Exception e) {
e.printStackTrace();
// If we fail in this part the video is most likely not available.
// Determining why is done later.
videoInfo.videoAvailableStatus = VideoInfo.VIDEO_UNAVAILABLE;
}
}
//-----------------------
// load and extract audio
//-----------------------
//average rating
try {
String dashManifest = playerArgs.getString("dashmpd");
videoInfo.audioStreams = parseDashManifest(dashManifest, decryptionCode);
} catch (NullPointerException e) {
Log.e(TAG, "Could not find \"dashmpd\" upon the player args (maybe no dash manifest available).");
} catch (Exception e) {
e.printStackTrace();
}
try {
//--------------------------------------------
// extract general information about the video
//--------------------------------------------
videoInfo.uploader = playerArgs.getString("author");
videoInfo.title = playerArgs.getString("title");
//first attempt getting a small image version
//in the html extracting part we try to get a thumbnail with a higher resolution
videoInfo.thumbnail_url = playerArgs.getString("thumbnail_url");
videoInfo.duration = playerArgs.getInt("length_seconds");
videoInfo.average_rating = playerArgs.getString("avg_rating");
//------------------------------------
// extract video stream url
//------------------------------------
String encoded_url_map = playerArgs.getString("url_encoded_fmt_stream_map");
Vector<VideoInfo.VideoStream> videoStreams = new Vector<>();
for(String url_data_str : encoded_url_map.split(",")) {
Map<String, String> tags = new HashMap<>();
for(String raw_tag : Parser.unescapeEntities(url_data_str, true).split("&")) {
String[] split_tag = raw_tag.split("=");
tags.put(split_tag[0], split_tag[1]);
}
int itag = Integer.parseInt(tags.get("itag"));
String streamUrl = URLDecoder.decode(tags.get("url"), "UTF-8");
// if video has a signature: decrypt it and add it to the url
if(tags.get("s") != null) {
streamUrl = streamUrl + "&signature=" + decryptSignature(tags.get("s"), decryptionCode);
}
if(resolveFormat(itag) != -1) {
videoStreams.add(new VideoInfo.VideoStream(
streamUrl,
resolveFormat(itag),
resolveResolutionString(itag)));
}
}
videoInfo.videoStreams =
videoStreams.toArray(new VideoInfo.VideoStream[videoStreams.size()]);
} catch (Exception e) {
} catch (JSONException e) {
e.printStackTrace();
}
@ -279,7 +380,6 @@ public class YoutubeExtractor implements Extractor {
// extracting information from html page
//---------------------------------------
// Determine what went wrong when the Video is not available
if(videoInfo.videoAvailableStatus == VideoInfo.VIDEO_UNAVAILABLE) {
if(doc.select("h1[id=\"unavailable-message\"]").first().text().contains("GEMA")) {
@ -287,22 +387,6 @@ public class YoutubeExtractor implements Extractor {
}
}
// Try to get high resolution thumbnail if it fails use low res from the player instead
try {
videoInfo.thumbnail_url = doc.select("link[itemprop=\"thumbnailUrl\"]").first()
.attr("abs:href");
} catch(Exception e) {
Log.i(TAG, "Could not find high res Thumbnail. Using low res instead");
}
// upload date
videoInfo.upload_date = doc.select("meta[itemprop=datePublished]").attr("content");
//TODO: Format date locale-specifically
// description
videoInfo.description = doc.select("p[id=\"eow-description\"]").first().html();
String likesString = "";
String dislikesString = "";
try {
@ -325,17 +409,8 @@ public class YoutubeExtractor implements Extractor {
videoInfo.dislike_count = 0;
}
// uploader thumbnail
videoInfo.uploader_thumbnail_url = doc.select("a[class*=\"yt-user-photo\"]").first()
.select("img").first()
.attr("abs:data-thumb");
// view count TODO: locale-specific formatting
String viewCountString = doc.select("meta[itemprop=interactionCount]").attr("content");
videoInfo.view_count = Integer.parseInt(viewCountString);
// next video
videoInfo.nextVideo = extractVideoInfoItem(doc.select("div[class=\"watch-sidebar-section\"]").first()
videoInfo.nextVideo = extractVideoPreviewInfo(doc.select("div[class=\"watch-sidebar-section\"]").first()
.select("li").first());
// related videos
@ -343,13 +418,14 @@ public class YoutubeExtractor implements Extractor {
for(Element li : doc.select("ul[id=\"watch-related\"]").first().children()) {
// first check if we have a playlist. If so leave them out
if(li.select("a[class*=\"content-link\"]").first() != null) {
relatedVideos.add(extractVideoInfoItem(li));
relatedVideos.add(extractVideoPreviewInfo(li));
}
}
videoInfo.relatedVideos = relatedVideos.toArray(new VideoPreviewInfo[relatedVideos.size()]);
return videoInfo;
}
private VideoInfo.AudioStream[] parseDashManifest(String dashManifest, String decryptoinCode) {
if(!dashManifest.contains("/signature/")) {
String encryptedSig = matchGroup1("/s/([a-fA-F0-9\\.]+)", dashManifest);
@ -413,8 +489,10 @@ public class YoutubeExtractor implements Extractor {
}
return audioStreams.toArray(new VideoInfo.AudioStream[audioStreams.size()]);
}
private VideoPreviewInfo extractVideoInfoItem(Element li) {
/**Provides information about links to other videos on the video page, such as related videos.
* This is encapsulated in a VideoPreviewInfo object,
* which is a subset of the fields in a full VideoInfo.*/
private VideoPreviewInfo extractVideoPreviewInfo(Element li) {
VideoPreviewInfo info = new VideoPreviewInfo();
info.webpage_url = li.select("a[class*=\"content-link\"]").first()
.attr("abs:href");
@ -426,8 +504,10 @@ public class YoutubeExtractor implements Extractor {
//todo: check NullPointerException causing
info.title = li.select("span[class=\"title\"]").first().text();
info.view_count = Long.parseLong(li.select("span[class*=\"view-count\"]").first().text());
info.view_count = Long.parseLong(li.select("span[class*=\"view-count\"]")
.first().text().replaceAll("[^\\d]", ""));
info.uploader = li.select("span[class=\"g-hovercard\"]").first().text();
info.duration = li.select("span[class=\"video-time\"]").first().text();
Element img = li.select("img").first();
@ -491,15 +571,19 @@ public class YoutubeExtractor implements Extractor {
return result.toString();
}
private String cleanUrl(String complexUrl) {
return getVideoUrl(getVideoId(complexUrl));
}
private String matchGroup1(String pattern, String input) {
Pattern pat = Pattern.compile(pattern);
Matcher mat = pat.matcher(input);
boolean foundMatch = mat.find();
if(foundMatch){
if (foundMatch) {
return mat.group(1);
}
else {
Log.e(TAG, "failed to find pattern \""+pattern+"\" inside of \""+input+"\"");
Log.w(TAG, "failed to find pattern \""+pattern+"\" inside of \""+input+"\"");
new Exception("failed to find pattern \""+pattern+"\"").printStackTrace();
return "";
}

View File

@ -62,7 +62,7 @@ public class YoutubeSearchEngine implements SearchEngine {
String site;
String url = builder.build().toString();
//if we've been passed a valid language code, append it to the URL
if(languageCode.length() > 0) {
if(!languageCode.isEmpty()) {
//assert Pattern.matches("[a-z]{2}(-([A-Z]{2}|[0-9]{1,3}))?", languageCode);
site = Downloader.download(url, languageCode);
}
@ -101,6 +101,7 @@ public class YoutubeSearchEngine implements SearchEngine {
// video item type
} else if(!((el = item.select("div[class*=\"yt-lockup-video\"").first()) == null)) {
//todo: de-duplicate this with YoutubeExtractor.getVideoPreviewInfo()
VideoPreviewInfo resultItem = new VideoPreviewInfo();
Element dl = el.select("h3").first().select("a").first();
resultItem.webpage_url = dl.attr("abs:href");
@ -113,8 +114,9 @@ public class YoutubeSearchEngine implements SearchEngine {
e.printStackTrace();
}
resultItem.title = dl.text();
resultItem.duration = item.select("span[class=\"video-time\"]").first()
.text();
resultItem.duration = item.select("span[class=\"video-time\"]").first().text();
resultItem.uploader = item.select("div[class=\"yt-lockup-byline\"]").first()
.select("a").first()
.text();
@ -132,7 +134,7 @@ public class YoutubeSearchEngine implements SearchEngine {
}
result.resultList.add(resultItem);
} else {
Log.e(TAG, "GREAT FUCKING ERROR");
Log.e(TAG, "unexpected element found:\""+el+"\"");
}
}
return result;

View File

@ -33,8 +33,13 @@ public class YoutubeService implements StreamingService {
return serviceInfo;
}
@Override
public Extractor getExtractorInstance() {
return new YoutubeExtractor();
public Extractor getExtractorInstance(String url) {
if(acceptUrl(url)) {
return new YoutubeExtractor(url);
}
else {
throw new IllegalArgumentException("supplied String is not a valid Youtube URL");
}
}
@Override
public SearchEngine getSearchEngineInstance() {