Go to:
Gentoo Home
Documentation
Forums
Lists
Bugs
Planet
Store
Wiki
Get Gentoo!
Gentoo's Bugzilla – Attachment 67767 Details for
Bug 100926
xmltv fails to download TV information (tv_grab_de_tvtoday)
Home
|
New
–
[Ex]
|
Browse
|
Search
|
Privacy Policy
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
tv_grab_de_tvtoday.patch
tv_grab_de_tvtoday.patch (text/plain), 8.99 KB, created by
Christoph Vogtländer
on 2005-09-06 14:01:41 UTC
(
hide
)
Description:
tv_grab_de_tvtoday.patch
Filename:
MIME Type:
Creator:
Christoph Vogtländer
Created:
2005-09-06 14:01:41 UTC
Size:
8.99 KB
patch
obsolete
>*** xmltv-0.5.39/grab/de_tvtoday/tv_grab_de_tvtoday.in Thu Jan 13 17:54:07 2005 >--- xmltv-0.5.39-patched/grab/de_tvtoday/tv_grab_de_tvtoday.in Mon Sep 5 14:52:59 2005 >*************** >*** 91,97 **** > use warnings; > use strict; > use Date::Manip; >! use XMLTV::Version '$Id: tv_grab_de_tvtoday.in,v 1.24 2005/01/13 16:54:07 stesie Exp $ '; > use Getopt::Long; > use HTML::TreeBuilder; > use HTML::Entities; >--- 91,97 ---- > use warnings; > use strict; > use Date::Manip; >! use XMLTV::Version '$Id: tv_grab_de_tvtoday.in,v 1.26 2005/07/27 18:39:13 stesie Exp $ '; > use Getopt::Long; > use HTML::TreeBuilder; > use HTML::Entities; >*************** >*** 151,158 **** > > #-- attributes of xmltv root element > my $head = { >! 'source-data-url' => 'http://www.tvtoday.de/tv/programm/programm.php', >! 'source-info-url' => 'http://www.tvtoday.de/', > 'generator-info-name' => 'XMLTV', > 'generator-info-url' => 'http://membled.com/work/apps/xmltv/', > }; >--- 151,158 ---- > > #-- attributes of xmltv root element > my $head = { >! 'source-data-url' => 'http://programm.tvtoday.de/tv/programm/programm.php', >! 'source-info-url' => 'http://www.tvtoday.de/', > 'generator-info-name' => 'XMLTV', > 'generator-info-url' => 'http://membled.com/work/apps/xmltv/', > }; >*************** >*** 430,436 **** > > my $grab = { > 'channel' => channel_id($ch), >! 'url' => "http://www.tvtoday.de/tv/programm/programm.php?ztag=$offset&sparte=alle&uhrzeit=Ax00&sender=$ch", > 'lasttime' => 0, > 'lastday' => $lday, > }; >--- 430,436 ---- > > my $grab = { > 'channel' => channel_id($ch), >! 'url' => "http://programm.tvtoday.de/tv/programm/programm.php?ztag=$offset&sparte=alle&uhrzeit=Ax00&sender=$ch", > 'lasttime' => 0, > 'lastday' => $lday, > }; >*************** >*** 449,508 **** > } > > sub parse_page($$) { >! my $page = shift(@_)->look_down('_tag' => 'td', 'valign' => 'top', 'width' => 566); > my $grab = shift @_; > my $over_headline_table = 0; > my $pos; > my $day; > >! #-- delete the navigation form >! my $form = $page->look_down('_tag' => 'form', 'action' => '/tv/programm/programm.php', 'method' => 'get') or die("navigation form not found in requested page"); >! $form->delete(); >! > #-- extract date of grabbed data from retrieved webpage ... >! $_ = $page->look_down('_tag' => 'span', 'class' => 'text-weiss'); > die("cannot find date on requested page") > unless($_->as_text() =~ m/([1-3]?[0-9])\.(1?[0-9])\.(20[0-9]{2})/); > $day = ParseDate("$3-$2-$1 00:00:00"); > >! #-- okay, that's okay as well, yippie! >! foreach ($page->content_list()) { > my (%show, $begintime, $stoptime, $popup); >- next unless(ref($_) eq "HTML::Element"); > >! if ($_->tag eq "table" and $_->attr("cellpadding") eq "2") { > last unless($_->as_text() =~ m/weitere Sendungen/); >! last if($grab->{lasttime} >= 86400); #-- don't request another page, if day's over > >! #-- okay, we have even more shows available, scan that page as well ... > my $link = $_->extract_links('a'); >! $grab->{url} = "http://www.tvtoday.de" . $link->[scalar(@$link)-1]->[0]; > return; > } > >! if($_->tag eq "center" && >! $_->as_text() =~ m/Es tut uns leid,.*aber wir konnten keine Sendungen/) { >! warn "tvtoday.de has no information available for ", $grab->{channel}; >! last; >! } > >- #-- ignore everything but table's, since these hold our information >- next unless($_->tag eq "table"); >- > #-- okay, parse this table now (each table is one show) > my @el = $_->content_list(); > die unless(ref($el[0]) eq "HTML::Element" and $el[0]->tag eq "tr"); > >- #-- if it's the headline table, ignore it ... >- unless($over_headline_table) { >- my $headline = $el[0]->look_down('_tag' => 'span', 'class' => 'headline-balken') >- or next; >- >- next unless($headline->as_text() =~ m/P R O G R A M M/); >- >- $over_headline_table ++, next; >- } >- > @el = $el[0]->content_list(); > > $_ = shift @el; #-- in this column there's the logo of the tv station >--- 449,506 ---- > } > > sub parse_page($$) { >! my $page = shift(@_); >! #$page->dump(); exit 0; > my $grab = shift @_; > my $over_headline_table = 0; > my $pos; > my $day; > >! if(($_ = $page->look_down('_tag' => 'span', 'class' => 'text'))) { >! if($_->as_text() =~ m/aber wir konnten keine Sendungen mit dieser/) { >! warn "no information available for channel ", $grab->{channel}; >! undef($grab->{url}); >! return; >! } >! } >! > #-- extract date of grabbed data from retrieved webpage ... >! $_ = $page->look_down('_tag' => 'td', 'class' => 'navigator-hhead-large'); > die("cannot find date on requested page") > unless($_->as_text() =~ m/([1-3]?[0-9])\.(1?[0-9])\.(20[0-9]{2})/); > $day = ParseDate("$3-$2-$1 00:00:00"); > >! #-- well, now let's scan the table for programme data >! foreach ($page->look_down('_tag' => 'table')) { > my (%show, $begintime, $stoptime, $popup); > >! next if(not defined($_->attr('cellpadding'))); >! >! if ($_->attr("cellpadding") eq "2") { > last unless($_->as_text() =~ m/weitere Sendungen/); >! #-- don't request another page, if day's over >! last if($grab->{lasttime} >= 86400); > >! #-- we have even more shows available, scan that page as well ... > my $link = $_->extract_links('a'); >! $grab->{url} = "http://programm.tvtoday.de" . >! $link->[scalar(@$link)-1]->[0]; > return; > } > >! ### skip table, not containing data for us ... >! #print STDERR "cellpadding: ", $_->attr('cellpadding'), "\n"; >! #$_->dump(); >! next if($_->attr('cellpadding') ne "4"); >! #print STDERR "width: ", $_->attr('width'), "\n"; >! next if(not defined($_->attr('width')) >! or $_->attr('width') ne "585"); >! #print STDERR "got through ...\n"; > > #-- okay, parse this table now (each table is one show) > my @el = $_->content_list(); > die unless(ref($el[0]) eq "HTML::Element" and $el[0]->tag eq "tr"); > > @el = $el[0]->content_list(); > > $_ = shift @el; #-- in this column there's the logo of the tv station >*************** >*** 515,520 **** >--- 513,519 ---- > $begintime = $1 * 3600 + $2 * 60; > $begintime += 86400 if($grab->{'lasttime'} >= 86400); > >+ #print STDERR "start: $begintime\n"; > my $start = parse_local_date(DateCalc($day, "+ $begintime seconds"), $TZ); > my ($start_base, $start_tz) = @{date_to_local($start, $TZ)}; > $show{"start"} = UnixDate($start_base, '%q') . " $start_tz"; >*************** >*** 538,544 **** > $show{title} = [[ $span, $lang ]]; > } > elsif (ref($span) eq "HTML::Element" and $span->tag eq "a") { >! $popup = "http://www.tvtoday.de/" . $span->attr('href'); > > my $tag = (($span->content_list())[0]->content_list())[0]; > die unless(ref($tag) eq "HTML::Element" and $tag->tag eq "u"); >--- 537,543 ---- > $show{title} = [[ $span, $lang ]]; > } > elsif (ref($span) eq "HTML::Element" and $span->tag eq "a") { >! $popup = "http://programm.tvtoday.de/" . $span->attr('href'); > > my $tag = (($span->content_list())[0]->content_list())[0]; > die unless(ref($tag) eq "HTML::Element" and $tag->tag eq "u"); >*************** >*** 1079,1085 **** > #-- get channel logos > sub get_icons() { > my %icons; >! my $url="http://www.tvtoday.de/tv/programm/programm.php?ztag=0&sparte=alle&uhrzeit=Ax00&sender="; > my $chan; > my $tag; > my $addr; >--- 1078,1084 ---- > #-- get channel logos > sub get_icons() { > my %icons; >! my $url="http://programm.tvtoday.de/tv/programm/programm.php?ztag=0&sparte=alle&uhrzeit=Ax00&sender="; > my $chan; > my $tag; > my $addr; >*************** >*** 1118,1124 **** > #-- get channel listing > sub get_channels() { > my %channels; >! my $url="http://www.tvtoday.de/tv/programm/programm.php?ztag=0&sparte=alle&uhrzeit=Ax00&sender=alle"; > > my $tb=new HTML::TreeBuilder(); > my $htmldata = get_page($url); >--- 1117,1123 ---- > #-- get channel listing > sub get_channels() { > my %channels; >! my $url="http://programm.tvtoday.de/tv/programm/programm.php?ztag=0&sparte=alle&uhrzeit=Ax00&sender=alle"; > > my $tb=new HTML::TreeBuilder(); > my $htmldata = get_page($url); >*************** >*** 1238,1248 **** > my $retry = 0; > > local $SIG{__DIE__} = sub { die "\n$url: $_[0]" }; > >! while($retry < 4) { >! my $got = get_nice($url . ($retry ? "&retry=$retry" : "")); > $retry ++; > > die "retrieved webpage doesn't look like a tvtoday.de page, maybe a proxy error?" > unless(index($got, "<title>TV TODAY</title>")); > >--- 1237,1250 ---- > my $retry = 0; > > local $SIG{__DIE__} = sub { die "\n$url: $_[0]" }; >+ #print STDERR "get_page: $url\n"; > >! while($retry < 2) { >! my $got = eval { get_nice($url . ($retry ? "&retry=$retry" : "")); }; > $retry ++; > >+ next if($@); # unable to download, doesn't look too good for us. >+ > die "retrieved webpage doesn't look like a tvtoday.de page, maybe a proxy error?" > unless(index($got, "<title>TV TODAY</title>")); >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 100926
:
67766
| 67767 |
68316
|
68318