Arquivos
pipe-viewer/lib/WWW/PipeViewer/InitialData.pm
T

500 linhas
13 KiB
Perl

package WWW::PipeViewer::InitialData;
use utf8;
use 5.014;
use warnings;
=head1 NAME
WWW::PipeViewer::InitialData - Extract initial data.
=head1 SYNOPSIS
use WWW::PipeViewer;
my $obj = WWW::PipeViewer->new(%opts);
my $results = $obj->yt_search(q => $keywords);
my $playlists = $obj->yt_channel_playlists($channel_ID);
=head1 SUBROUTINES/METHODS
=cut
sub _time_to_seconds {
my ($time) = @_;
my ($hours, $minutes, $seconds) = (0, 0, 0);
if ($time =~ /(\d+):(\d+):(\d+)/) {
($hours, $minutes, $seconds) = ($1, $2, $3);
}
elsif ($time =~ /(\d+):(\d+)/) {
($minutes, $seconds) = ($1, $2);
}
elsif ($time =~ /(\d+)/) {
$seconds = $1;
}
$hours * 3600 + $minutes * 60 + $seconds;
}
sub _human_number_to_int {
my ($text) = @_;
if ($text =~ /([\d,.]+)/) {
my $v = $1;
$v =~ tr/.,//d;
return $v;
}
return 0;
}
sub _thumbnail_quality {
my ($width, $height) = @_;
$width // return 'medium';
$height // return 'medium';
if ($width == 1280 and $height == 720) {
return "maxres";
}
if ($width == 640 and $height == 480) {
return "sddefault";
}
if ($width == 480 and $height == 360) {
return 'high';
}
if ($width == 320 and $height == 180) {
return 'medium';
}
if ($width == 120 and $height == 90) {
return 'default';
}
return 'medium';
}
sub _fix_url_protocol {
my ($url) = @_;
$url // return undef;
if ($url =~ m{^https://}) { # ok
return $url;
}
if ($url =~ s{^.*?//}{}) {
return "https://" . $url;
}
if ($url =~ /^\w+\./) {
return "https://" . $url;
}
return $url;
}
sub _extract_youtube_mix {
my ($self, $data) = @_;
my $info = eval { $data->{callToAction}{watchCardHeroVideoRenderer} } || return;
my $header = eval { $data->{header}{watchCardRichHeaderRenderer} };
my %mix;
$mix{type} = 'playlist';
$mix{title} =
eval { $header->{title}{runs}[0]{text} }
// eval { $info->{accessibility}{accessibilityData}{label} }
// eval { $info->{callToActionButton}{callToActionButtonRenderer}{label}{runs}[0]{text} } // 'Youtube Mix';
$mix{playlistId} = eval { $info->{navigationEndpoint}{watchEndpoint}{playlistId} } || return;
$mix{playlistThumbnail} = eval { _fix_url_protocol($header->{avatar}{thumbnails}[0]{url}) }
// eval { _fix_url_protocol($info->{heroImage}{collageHeroImageRenderer}{leftThumbnail}{thumbnails}[0]{url}) };
$mix{author} = eval { $header->{title}{runs}[0]{text} } // "YouTube";
$mix{authorId} = eval { $header->{titleNavigationEndpoint}{browseEndpoint}{browseId} } // "youtube";
return \%mix;
}
sub _extract_author_name {
my ($info) = @_;
eval { $info->{longBylineText}{runs}[0]{text} } // eval { $info->{shortBylineText}{runs}[0]{text} };
}
sub _extract_video_id {
my ($info) = @_;
eval { $info->{videoId} } || eval { $info->{navigationEndpoint}{watchEndpoint}{videoId} } || undef;
}
sub _extract_length_seconds {
my ($info) = @_;
eval { $info->{lengthSeconds} }
|| _time_to_seconds(eval { $info->{thumbnailOverlays}[0]{thumbnailOverlayTimeStatusRenderer}{text}{runs}[0]{text} } // 0)
|| _time_to_seconds(eval { $info->{lengthText}{runs}[0]{text} // 0 });
}
sub _extract_published_text {
my ($info) = @_;
eval { $info->{publishedTimeText}{runs}[0]{text} };
}
sub _extract_channel_id {
my ($info) = @_;
eval { $info->{channelId} } // eval { $info->{shortBylineText}{runs}[0]{navigationEndpoint}{browseEndpoint}{browseId} };
}
sub _extract_view_count_text {
my ($info) = @_;
eval { $info->{shortViewCountText}{runs}[0]{text} };
}
sub _extract_video_thumbnails {
my ($info) = @_;
eval {
[
map {
my %thumb = %$_;
$thumb{quality} = _thumbnail_quality($thumb{width}, $thumb{height});
$thumb{url} = _fix_url_protocol($thumb{url});
\%thumb;
} @{$info->{thumbnail}{thumbnails}}
]
};
}
sub _extract_title {
my ($info) = @_;
eval { $info->{title}{runs}[0]{text} } // eval { $info->{title}{accessibility}{accessibilityData}{label} };
}
sub _extract_description {
my ($info) = @_;
# FIXME: this is not the video description
eval { $info->{title}{accessibility}{accessibilityData}{label} };
}
sub _extract_view_count {
my ($info) = @_;
_human_number_to_int(eval { $info->{viewCountText}{runs}[0]{text} } // 0);
}
sub _extract_video_count {
my ($info) = @_;
_human_number_to_int(eval { $info->{videoCountShortText}{runs}[0]{text} }
// eval { $info->{videoCountText}{runs}[0]{text} } // 0);
}
sub _extract_playlist_id {
my ($info) = @_;
eval { $info->{playlistId} };
}
sub _extract_playlist_thumbnail {
my ($info) = @_;
eval { _fix_url_protocol($info->{thumbnailRenderer}{playlistVideoThumbnailRenderer}{thumbnail}{thumbnails}[0]{url}) }
// eval { _fix_url_protocol($info->{thumbnail}{thumbnails}[0]{url}) };
}
sub _extract_itemSection_entry {
my ($self, $data, %args) = @_;
# Album
if ($args{type} eq 'all' and exists $data->{horizontalCardListRenderer}) { # TODO
return;
}
# Video
if (exists($data->{compactVideoRenderer}) or exists($data->{playlistVideoRenderer})) {
my %video;
my $info = eval { $data->{compactVideoRenderer} } // eval { $data->{playlistVideoRenderer} };
$video{type} = 'video';
# Deleted video
if (defined(eval { $info->{isPlayable} }) and not $info->{isPlayable}) {
return;
}
$video{videoId} = _extract_video_id($info) // return;
$video{title} = _extract_title($info) // return;
$video{lengthSeconds} = _extract_length_seconds($info) || return;
$video{author} = _extract_author_name($info);
$video{authorId} = _extract_channel_id($info);
$video{publishedText} = _extract_published_text($info);
$video{viewCountText} = _extract_view_count_text($info);
$video{videoThumbnails} = _extract_video_thumbnails($info);
$video{description} = _extract_description($info);
$video{viewCount} = _extract_view_count($info);
return \%video;
}
# Playlist
if ($args{type} ne 'video' and exists $data->{compactPlaylistRenderer}) {
my %playlist;
my $info = eval { $data->{compactPlaylistRenderer} };
$playlist{type} = 'playlist';
$playlist{title} = _extract_title($info) // return;
$playlist{playlistId} = _extract_playlist_id($info) // return;
$playlist{videoCount} = _extract_video_count($info);
$playlist{playlistThumbnail} = _extract_playlist_thumbnail($info);
return \%playlist;
}
return;
}
sub _parse_itemSection {
my ($self, $entry, %args) = @_;
eval { ref($entry->{contents}) eq 'ARRAY' } || return;
my @results;
foreach my $entry (@{$entry->{contents}}) {
my $item = $self->_extract_itemSection_entry($entry, %args);
if (defined($item) and ref($item) eq 'HASH') {
push @results, $item;
}
}
return @results;
}
sub _extract_sectionList_results {
my ($self, $data, %args) = @_;
eval { ref($data->{contents}) eq 'ARRAY' } or return;
my @results;
foreach my $entry (@{$data->{contents}}) {
# Playlists
if (eval { ref($entry->{shelfRenderer}{content}{verticalListRenderer}{items}) eq 'ARRAY' }) {
push @results,
$self->_parse_itemSection({contents => $entry->{shelfRenderer}{content}{verticalListRenderer}{items}}, %args);
}
# Playlist videos
if (eval { ref($entry->{itemSectionRenderer}{contents}[0]{playlistVideoListRenderer}{contents}) eq 'ARRAY' }) {
push @results,
$self->_parse_itemSection($entry->{itemSectionRenderer}{contents}[0]{playlistVideoListRenderer}, %args);
next;
}
# YouTube Mix
if ($args{type} eq 'all' and exists $entry->{universalWatchCardRenderer}) {
my $mix = $self->_extract_youtube_mix($entry->{universalWatchCardRenderer});
if (defined($mix)) {
push(@results, $mix);
}
}
# Video results
if (exists $entry->{itemSectionRenderer}) {
push @results, $self->_parse_itemSection($entry->{itemSectionRenderer}, %args);
}
# Continuation page
if (exists $entry->{continuationItemRenderer}) { # TODO
## ...
}
}
return @results;
}
sub _add_author_to_results {
my ($self, $data, $results, %args) = @_;
my $header = eval { $data->{header}{c4TabbedHeaderRenderer} };
my $channel_id = eval { $header->{channelId} };
my $channel_name = eval { $header->{title} };
foreach my $result (@$results) {
if (ref($result) eq 'HASH') {
$result->{author} = $channel_name if defined($channel_name);
$result->{authorId} = $channel_id if defined($channel_id);
}
}
return 1;
}
sub _extract_channel_uploads {
my ($self, $data, %args) = @_;
my @results = $self->_extract_sectionList_results(
eval {
$data->{contents}{singleColumnBrowseResultsRenderer}{tabs}[1]{tabRenderer}{content}{sectionListRenderer};
},
%args
);
$self->_add_author_to_results($data, \@results, %args);
return @results;
}
sub _extract_channel_playlists {
my ($self, $data, %args) = @_;
my @results = $self->_extract_sectionList_results(
eval {
$data->{contents}{singleColumnBrowseResultsRenderer}{tabs}[2]{tabRenderer}{content}{sectionListRenderer};
},
%args
);
$self->_add_author_to_results($data, \@results, %args);
return @results;
}
sub _extract_playlist_videos {
my ($self, $data, %args) = @_;
my @results = $self->_extract_sectionList_results(
eval {
$data->{contents}{singleColumnBrowseResultsRenderer}{tabs}[0]{tabRenderer}{content}{sectionListRenderer};
},
%args
);
$self->_add_author_to_results($data, \@results, %args);
return @results;
}
sub _get_initial_data {
my ($self, $url) = @_;
my $content = $self->lwp_get($url);
if ($content =~ m{<div id="initial-data"><!--(.*?)--></div>}is) {
my $json = $1;
my $hash = $self->parse_utf8_json_string($json);
return $hash;
}
return;
}
sub _channel_data {
my ($self, $channel, %args) = @_;
state $yv_utils = WWW::PipeViewer::Utils->new();
my $url = $self->get_m_youtube_url;
if ($yv_utils->is_channelID($channel)) {
$url .= "/channel/$channel/$args{type}";
}
else {
$url .= "/c/$channel/$args{type}";
}
$self->_get_initial_data($url);
}
=head2 yt_search(q => $keyword, %args)
Search for videos given a keyword (uri-escaped).
=cut
sub yt_search {
my ($self, %args) = @_;
my $url = $self->get_m_youtube_url . "/results?search_query=$args{q}";
# TODO: add support for various search parameters
my $hash = $self->_get_initial_data($url) // return;
$self->_extract_sectionList_results(eval { $hash->{contents}{sectionListRenderer} }, %args);
}
=head2 yt_channel_uploads($channel, %args)
Latest uploads for a given channel ID or username.
=cut
sub yt_channel_uploads {
my ($self, $channel, %args) = @_;
my $hash = $self->_channel_data($channel, type => 'videos') // return;
$self->_extract_channel_uploads($hash, %args, type => 'video');
}
=head2 yt_channel_playlists($channel, %args)
Playlists for a given channel ID or username.
=cut
sub yt_channel_playlists {
my ($self, $channel, %args) = @_;
my $hash = $self->_channel_data($channel, type => 'playlists') // return;
$self->_extract_channel_playlists($hash, %args, type => 'playlist');
}
=head2 yt_playlist_videos($playlist_id, %args)
Videos from a given playlist ID.
=cut
sub yt_playlist_videos {
my ($self, $playlist_id, %args) = @_;
my $url = $self->get_m_youtube_url . "/playlist?list=$playlist_id";
my $hash = $self->_get_initial_data($url) // return;
$self->_extract_sectionList_results(
eval {
$hash->{contents}{singleColumnBrowseResultsRenderer}{tabs}[0]{tabRenderer}{content}{sectionListRenderer};
},
%args,
type => 'video'
);
}
=head1 AUTHOR
Trizen, C<< <echo dHJpemVuQHByb3Rvbm1haWwuY29tCg== | base64 -d> >>
=head1 SUPPORT
You can find documentation for this module with the perldoc command.
perldoc WWW::PipeViewer::InitialData
=head1 LICENSE AND COPYRIGHT
Copyright 2013-2015 Trizen.
This program is free software; you can redistribute it and/or modify it
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.
See L<http://dev.perl.org/licenses/> for more information.
=cut
1; # End of WWW::PipeViewer::InitialData