pipe-viewer/lib/WWW/PipeViewer/InitialData.pm

package WWW::PipeViewer::InitialData;

use utf8;
use 5.014;
use warnings;

=head1 NAME

WWW::PipeViewer::InitialData - Extract initial data.

=head1 SYNOPSIS

    use WWW::PipeViewer;
    my $obj = WWW::PipeViewer->new(%opts);

    my $results   = $obj->yt_search(q => $keywords);
    my $playlists = $obj->yt_channel_playlists($channel_ID);

=head1 SUBROUTINES/METHODS

=cut

sub _time_to_seconds {
    my ($time) = @_;

    my ($hours, $minutes, $seconds) = (0, 0, 0);

    if ($time =~ /(\d+):(\d+):(\d+)/) {
        ($hours, $minutes, $seconds) = ($1, $2, $3);
    }
    elsif ($time =~ /(\d+):(\d+)/) {
        ($minutes, $seconds) = ($1, $2);
    }
    elsif ($time =~ /(\d+)/) {
        $seconds = $1;
    }

    $hours * 3600 + $minutes * 60 + $seconds;
}

sub _human_number_to_int {
    my ($text) = @_;

    if ($text =~ /([\d,.]+)/) {
        my $v = $1;
        $v =~ tr/.,//d;
        return $v;
    }

    return 0;
}

sub _thumbnail_quality {
    my ($width, $height) = @_;

    $width  // return 'medium';
    $height // return 'medium';

    if ($width == 1280 and $height == 720) {
        return "maxres";
    }

    if ($width == 640 and $height == 480) {
        return "sddefault";
    }

    if ($width == 480 and $height == 360) {
        return 'high';
    }

    if ($width == 320 and $height == 180) {
        return 'medium';
    }

    if ($width == 120 and $height == 90) {
        return 'default';
    }

    return 'medium';
}

sub _fix_url_protocol {
    my ($url) = @_;

    $url // return undef;

    if ($url =~ m{^https://}) {    # ok
        return $url;
    }
    if ($url =~ s{^.*?//}{}) {
        return "https://" . $url;
    }
    if ($url =~ /^\w+\./) {
        return "https://" . $url;
    }

    return $url;
}

sub _extract_youtube_mix {
    my ($self, $data) = @_;

    my $info   = eval { $data->{callToAction}{watchCardHeroVideoRenderer} } || return;
    my $header = eval { $data->{header}{watchCardRichHeaderRenderer} };

    my %mix;

    $mix{type} = 'playlist';

    $mix{title} =
      eval    { $header->{title}{runs}[0]{text} }
      // eval { $info->{accessibility}{accessibilityData}{label} }
      // eval { $info->{callToActionButton}{callToActionButtonRenderer}{label}{runs}[0]{text} } // 'Youtube Mix';

    $mix{playlistId} = eval { $info->{navigationEndpoint}{watchEndpoint}{playlistId} } || return;

    $mix{playlistThumbnail} = eval { _fix_url_protocol($header->{avatar}{thumbnails}[0]{url}) }
      // eval { _fix_url_protocol($info->{heroImage}{collageHeroImageRenderer}{leftThumbnail}{thumbnails}[0]{url}) };

    $mix{author}   = eval { $header->{title}{runs}[0]{text} }                              // "YouTube";
    $mix{authorId} = eval { $header->{titleNavigationEndpoint}{browseEndpoint}{browseId} } // "youtube";

    return \%mix;
}

sub _extract_author_name {
    my ($info) = @_;
    eval { $info->{longBylineText}{runs}[0]{text} } // eval { $info->{shortBylineText}{runs}[0]{text} };
}

sub _extract_video_id {
    my ($info) = @_;
    eval { $info->{videoId} } || eval { $info->{navigationEndpoint}{watchEndpoint}{videoId} } || undef;
}

sub _extract_length_seconds {
    my ($info) = @_;
    eval { $info->{lengthSeconds} }
      || _time_to_seconds(eval { $info->{thumbnailOverlays}[0]{thumbnailOverlayTimeStatusRenderer}{text}{runs}[0]{text} } // 0)
      || _time_to_seconds(eval { $info->{lengthText}{runs}[0]{text} // 0 });
}

sub _extract_published_text {
    my ($info) = @_;
    eval { $info->{publishedTimeText}{runs}[0]{text} };
}

sub _extract_channel_id {
    my ($info) = @_;
    eval { $info->{channelId} } // eval { $info->{shortBylineText}{runs}[0]{navigationEndpoint}{browseEndpoint}{browseId} };
}

sub _extract_view_count_text {
    my ($info) = @_;
    eval { $info->{shortViewCountText}{runs}[0]{text} };
}

sub _extract_video_thumbnails {
    my ($info) = @_;
    eval {
        [
         map {
             my %thumb = %$_;
             $thumb{quality} = _thumbnail_quality($thumb{width}, $thumb{height});
             $thumb{url}     = _fix_url_protocol($thumb{url});
             \%thumb;
         } @{$info->{thumbnail}{thumbnails}}
        ]
    };
}

sub _extract_title {
    my ($info) = @_;
    eval { $info->{title}{runs}[0]{text} } // eval { $info->{title}{accessibility}{accessibilityData}{label} };
}

sub _extract_description {
    my ($info) = @_;

    # FIXME: this is not the video description
    eval { $info->{title}{accessibility}{accessibilityData}{label} };
}

sub _extract_view_count {
    my ($info) = @_;
    _human_number_to_int(eval { $info->{viewCountText}{runs}[0]{text} } // 0);
}

sub _extract_video_count {
    my ($info) = @_;
    _human_number_to_int(eval { $info->{videoCountShortText}{runs}[0]{text} }
                         // eval { $info->{videoCountText}{runs}[0]{text} } // 0);
}

sub _extract_playlist_id {
    my ($info) = @_;
    eval { $info->{playlistId} };
}

sub _extract_playlist_thumbnail {
    my ($info) = @_;
    eval { _fix_url_protocol($info->{thumbnailRenderer}{playlistVideoThumbnailRenderer}{thumbnail}{thumbnails}[0]{url}) }
      // eval { _fix_url_protocol($info->{thumbnail}{thumbnails}[0]{url}) };
}

sub _extract_itemSection_entry {
    my ($self, $data, %args) = @_;

    # Album
    if ($args{type} eq 'all' and exists $data->{horizontalCardListRenderer}) {    # TODO
        return;
    }

    # Video
    if (exists($data->{compactVideoRenderer}) or exists($data->{playlistVideoRenderer})) {

        my %video;
        my $info = eval { $data->{compactVideoRenderer} } // eval { $data->{playlistVideoRenderer} };

        $video{type} = 'video';

        # Deleted video
        if (defined(eval { $info->{isPlayable} }) and not $info->{isPlayable}) {
            return;
        }

        $video{videoId}         = _extract_video_id($info) // return;
        $video{title}           = _extract_title($info)    // return;
        $video{lengthSeconds}   = _extract_length_seconds($info) || return;
        $video{author}          = _extract_author_name($info);
        $video{authorId}        = _extract_channel_id($info);
        $video{publishedText}   = _extract_published_text($info);
        $video{viewCountText}   = _extract_view_count_text($info);
        $video{videoThumbnails} = _extract_video_thumbnails($info);
        $video{description}     = _extract_description($info);
        $video{viewCount}       = _extract_view_count($info);

        return \%video;
    }

    # Playlist
    if ($args{type} ne 'video' and exists $data->{compactPlaylistRenderer}) {

        my %playlist;
        my $info = eval { $data->{compactPlaylistRenderer} };

        $playlist{type} = 'playlist';

        $playlist{title}             = _extract_title($info)       // return;
        $playlist{playlistId}        = _extract_playlist_id($info) // return;
        $playlist{videoCount}        = _extract_video_count($info);
        $playlist{playlistThumbnail} = _extract_playlist_thumbnail($info);

        return \%playlist;
    }

    return;
}

sub _parse_itemSection {
    my ($self, $entry, %args) = @_;

    eval { ref($entry->{contents}) eq 'ARRAY' } || return;

    my @results;

    foreach my $entry (@{$entry->{contents}}) {

        my $item = $self->_extract_itemSection_entry($entry, %args);

        if (defined($item) and ref($item) eq 'HASH') {
            push @results, $item;
        }
    }

    return @results;
}

sub _extract_sectionList_results {
    my ($self, $data, %args) = @_;

    eval { ref($data->{contents}) eq 'ARRAY' } or return;

    my @results;

    foreach my $entry (@{$data->{contents}}) {

        # Playlists
        if (eval { ref($entry->{shelfRenderer}{content}{verticalListRenderer}{items}) eq 'ARRAY' }) {
            push @results,
              $self->_parse_itemSection({contents => $entry->{shelfRenderer}{content}{verticalListRenderer}{items}}, %args);
        }

        # Playlist videos
        if (eval { ref($entry->{itemSectionRenderer}{contents}[0]{playlistVideoListRenderer}{contents}) eq 'ARRAY' }) {
            push @results,
              $self->_parse_itemSection($entry->{itemSectionRenderer}{contents}[0]{playlistVideoListRenderer}, %args);
            next;
        }

        # YouTube Mix
        if ($args{type} eq 'all' and exists $entry->{universalWatchCardRenderer}) {

            my $mix = $self->_extract_youtube_mix($entry->{universalWatchCardRenderer});

            if (defined($mix)) {
                push(@results, $mix);
            }
        }

        # Video results
        if (exists $entry->{itemSectionRenderer}) {
            push @results, $self->_parse_itemSection($entry->{itemSectionRenderer}, %args);
        }

        # Continuation page
        if (exists $entry->{continuationItemRenderer}) {    # TODO
            ## ...
        }
    }

    return @results;
}

sub _add_author_to_results {
    my ($self, $data, $results, %args) = @_;

    my $header = eval { $data->{header}{c4TabbedHeaderRenderer} };

    my $channel_id   = eval { $header->{channelId} };
    my $channel_name = eval { $header->{title} };

    foreach my $result (@$results) {
        if (ref($result) eq 'HASH') {
            $result->{author}   = $channel_name if defined($channel_name);
            $result->{authorId} = $channel_id   if defined($channel_id);
        }
    }

    return 1;
}

sub _extract_channel_uploads {
    my ($self, $data, %args) = @_;

    my @results = $self->_extract_sectionList_results(
        eval {
            $data->{contents}{singleColumnBrowseResultsRenderer}{tabs}[1]{tabRenderer}{content}{sectionListRenderer};
        },
        %args
                                                     );
    $self->_add_author_to_results($data, \@results, %args);
    return @results;
}

sub _extract_channel_playlists {
    my ($self, $data, %args) = @_;

    my @results = $self->_extract_sectionList_results(
        eval {
            $data->{contents}{singleColumnBrowseResultsRenderer}{tabs}[2]{tabRenderer}{content}{sectionListRenderer};
        },
        %args
                                                     );
    $self->_add_author_to_results($data, \@results, %args);
    return @results;
}

sub _extract_playlist_videos {
    my ($self, $data, %args) = @_;

    my @results = $self->_extract_sectionList_results(
        eval {
            $data->{contents}{singleColumnBrowseResultsRenderer}{tabs}[0]{tabRenderer}{content}{sectionListRenderer};
        },
        %args
                                                     );
    $self->_add_author_to_results($data, \@results, %args);
    return @results;
}

sub _get_initial_data {
    my ($self, $url) = @_;

    my $content = $self->lwp_get($url);

    if ($content =~ m{<div id="initial-data"><!--(.*?)--></div>}is) {
        my $json = $1;
        my $hash = $self->parse_utf8_json_string($json);
        return $hash;
    }

    return;
}

sub _channel_data {
    my ($self, $channel, %args) = @_;

    state $yv_utils = WWW::PipeViewer::Utils->new();

    my $url = $self->get_m_youtube_url;

    if ($yv_utils->is_channelID($channel)) {
        $url .= "/channel/$channel/$args{type}";
    }
    else {
        $url .= "/c/$channel/$args{type}";
    }

    $self->_get_initial_data($url);
}

=head2 yt_search(q => $keyword, %args)

Search for videos given a keyword (uri-escaped).

=cut

sub yt_search {
    my ($self, %args) = @_;

    my $url = $self->get_m_youtube_url . "/results?search_query=$args{q}";

    # TODO: add support for various search parameters

    my $hash = $self->_get_initial_data($url) // return;
    $self->_extract_sectionList_results(eval { $hash->{contents}{sectionListRenderer} }, %args);
}

=head2 yt_channel_uploads($channel, %args)

Latest uploads for a given channel ID or username.

=cut

sub yt_channel_uploads {
    my ($self, $channel, %args) = @_;
    my $hash = $self->_channel_data($channel, type => 'videos') // return;
    $self->_extract_channel_uploads($hash, %args, type => 'video');
}

=head2 yt_channel_playlists($channel, %args)

Playlists for a given channel ID or username.

=cut

sub yt_channel_playlists {
    my ($self, $channel, %args) = @_;
    my $hash = $self->_channel_data($channel, type => 'playlists') // return;
    $self->_extract_channel_playlists($hash, %args, type => 'playlist');
}

=head2 yt_playlist_videos($playlist_id, %args)

Videos from a given playlist ID.

=cut

sub yt_playlist_videos {
    my ($self, $playlist_id, %args) = @_;

    my $url  = $self->get_m_youtube_url . "/playlist?list=$playlist_id";
    my $hash = $self->_get_initial_data($url) // return;

    $self->_extract_sectionList_results(
        eval {
            $hash->{contents}{singleColumnBrowseResultsRenderer}{tabs}[0]{tabRenderer}{content}{sectionListRenderer};
        },
        %args,
        type => 'video'
                                       );
}

=head1 AUTHOR

Trizen, C<< <echo dHJpemVuQHByb3Rvbm1haWwuY29tCg== | base64 -d> >>


=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc WWW::PipeViewer::InitialData


=head1 LICENSE AND COPYRIGHT

Copyright 2013-2015 Trizen.

This program is free software; you can redistribute it and/or modify it
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.

See L<http://dev.perl.org/licenses/> for more information.

=cut

1;    # End of WWW::PipeViewer::InitialData