blob: bd42f4f8af072bc4dc90431200135368dcc10d47 [file] [log] [blame]
# Part of get-flash-videos. See get_flash_videos for copyright.
package FlashVideo::URLFinder;
use strict;
use Module::Find;
use FlashVideo::Mechanize;
use FlashVideo::Generic;
use FlashVideo::Site;
use FlashVideo::Utils;
use URI;
# The main issue is getting a URL for the actual video, so we handle this
# here - a different package for each site, as well as a generic fallback.
# Each package has a find_video method, which should return a URL, and a
# suggested filename.
# In some cases there isn't an obvious URL to find, so the following will be loaded and their 'can_handle'
# method called.
my @extra_can_handle = findsubmod FlashVideo::Site;
sub find_package {
my($class, $url, $browser) = @_;
my $package = _find_package_url($url, $browser);
if(!defined $package) {
# Fairly lame heuristic, look for the first URL outside the <object>
# element (avoids grabbing things like codebase attribute).
# Also look at embedded scripts for sites which embed their content that way.
# TODO: extract all SWF URLs from the page and check to see if we've
# got a package for those.
for my $possible_url($browser->content =~
m!(?:<object[^>]+>.*?|<(?:script|embed|iframe|param) [^>]*(?:src=["']?|name=["']src["']\ value=["']))(http://[^"'> ]+)!gixs) {
$package = _find_package_url($possible_url, $browser);
return _found($package, $possible_url) if defined $package;
}
}
# Handle redirection such as short urls.
if (!defined $package) {
$browser->get($url);
if ($browser->response->is_redirect) {
my $possible_url = $browser->response->header('Location');
$package = _find_package_url($possible_url, $browser);
return _found($package, $possible_url) if (defined $package);
}
}
if(!defined $package) {
for(@extra_can_handle) {
s/FlashVideo::Site:://;
my $possible_package = _load($_);
next unless $possible_package->can("can_handle");
$browser->get($url);
my $r = $possible_package->can_handle($browser, $url);
if($r) {
$package = $possible_package;
last;
}
}
}
if(!defined $package) {
$package = "FlashVideo::Generic";
}
return _found($package, $url);
}
# Split the URLs into parts and see if we have a package with this name.
sub _find_package_url {
my($url, $browser) = @_;
my $package;
foreach my $host_part (split /\./, URI->new($url)->host) {
$host_part = lc $host_part;
$host_part =~ s/[^a-z0-9]//i;
my $possible_package = _load($host_part);
if($possible_package->can("find_video")) {
if($possible_package->can("can_handle")) {
next unless $possible_package->can_handle($browser, $url);
}
$package = $possible_package;
last;
}
}
return $package;
}
sub _found {
my($package, $url) = @_;
my $pv = eval "\$".$package."::VERSION";
$pv = ' plugin version ' . $pv if $pv;
info "Using method '" . lc((split /::/, $package)[-1]) . "'$pv for $url";
return $package, $url;
}
sub _load {
my($site) = @_;
my $package = "FlashVideo::Site::" . ucfirst lc $site;
if(eval "require $package") {
no strict 'refs';
push @{$package . "::ISA"}, "FlashVideo::Site";
}
return $package;
}
1;