--- xmltv-0.5.40/grab/de_tvtoday/tv_grab_de_tvtoday.in.orig 2005-03-14 21:35:15.000000000 +0100 +++ xmltv-0.5.40/grab/de_tvtoday/tv_grab_de_tvtoday.in 2005-09-13 02:34:20.000000000 +0200 @@ -151,8 +151,8 @@ #-- attributes of xmltv root element my $head = { - 'source-data-url' => 'http://www.tvtoday.de/tv/programm/programm.php', - 'source-info-url' => 'http://www.tvtoday.de/', + 'source-data-url' => 'http://programm.tvtoday.de/tv/programm/programm.php', + 'source-info-url' => 'http://www.tvtoday.de/', 'generator-info-name' => 'XMLTV', 'generator-info-url' => 'http://membled.com/work/apps/xmltv/', }; @@ -430,7 +430,7 @@ my $grab = { 'channel' => channel_id($ch), - 'url' => "http://www.tvtoday.de/tv/programm/programm.php?ztag=$offset&sparte=alle&uhrzeit=Ax00&sender=$ch", + 'url' => "http://programm.tvtoday.de/tv/programm/programm.php?ztag=$offset&sparte=alle&uhrzeit=Ax00&sender=$ch", 'lasttime' => 0, 'lastday' => $lday, }; @@ -449,60 +449,58 @@ } sub parse_page($$) { - my $page = shift(@_)->look_down('_tag' => 'td', 'valign' => 'top', 'width' => 566); + my $page = shift(@_); + #$page->dump(); exit 0; my $grab = shift @_; my $over_headline_table = 0; my $pos; my $day; - #-- delete the navigation form - my $form = $page->look_down('_tag' => 'form', 'action' => '/tv/programm/programm.php', 'method' => 'get') or die("navigation form not found in requested page"); - $form->delete(); - + if(($_ = $page->look_down('_tag' => 'span', 'class' => 'text'))) { + if($_->as_text() =~ m/aber wir konnten keine Sendungen mit dieser/) { + warn "no information available for channel ", $grab->{channel}; + undef($grab->{url}); + return; + } + } + #-- extract date of grabbed data from retrieved webpage ... - $_ = $page->look_down('_tag' => 'span', 'class' => 'text-weiss'); + $_ = $page->look_down('_tag' => 'td', 'class' => 'navigator-hhead-large'); die("cannot find date on requested page") unless($_->as_text() =~ m/([1-3]?[0-9])\.(1?[0-9])\.(20[0-9]{2})/); $day = ParseDate("$3-$2-$1 00:00:00"); - #-- okay, that's okay as well, yippie! - foreach ($page->content_list()) { + #-- well, now let's scan the table for programme data + foreach ($page->look_down('_tag' => 'table')) { my (%show, $begintime, $stoptime, $popup); - next unless(ref($_) eq "HTML::Element"); - if ($_->tag eq "table" and $_->attr("cellpadding") eq "2") { + next if(not defined($_->attr('cellpadding'))); + + if ($_->attr("cellpadding") eq "2") { last unless($_->as_text() =~ m/weitere Sendungen/); - last if($grab->{lasttime} >= 86400); #-- don't request another page, if day's over + #-- don't request another page, if day's over + last if($grab->{lasttime} >= 86400); - #-- okay, we have even more shows available, scan that page as well ... + #-- we have even more shows available, scan that page as well ... my $link = $_->extract_links('a'); - $grab->{url} = "http://www.tvtoday.de" . $link->[scalar(@$link)-1]->[0]; + $grab->{url} = "http://programm.tvtoday.de" . + $link->[scalar(@$link)-1]->[0]; return; } - if($_->tag eq "center" && - $_->as_text() =~ m/Es tut uns leid,.*aber wir konnten keine Sendungen/) { - warn "tvtoday.de has no information available for ", $grab->{channel}; - last; - } + ### skip table, not containing data for us ... + #print STDERR "cellpadding: ", $_->attr('cellpadding'), "\n"; + #$_->dump(); + next if($_->attr('cellpadding') ne "4"); + #print STDERR "width: ", $_->attr('width'), "\n"; + next if(not defined($_->attr('width')) + or $_->attr('width') ne "585"); + #print STDERR "got through ...\n"; - #-- ignore everything but table's, since these hold our information - next unless($_->tag eq "table"); - #-- okay, parse this table now (each table is one show) my @el = $_->content_list(); die unless(ref($el[0]) eq "HTML::Element" and $el[0]->tag eq "tr"); - #-- if it's the headline table, ignore it ... - unless($over_headline_table) { - my $headline = $el[0]->look_down('_tag' => 'span', 'class' => 'headline-balken') - or next; - - next unless($headline->as_text() =~ m/P R O G R A M M/); - - $over_headline_table ++, next; - } - @el = $el[0]->content_list(); $_ = shift @el; #-- in this column there's the logo of the tv station @@ -515,6 +513,7 @@ $begintime = $1 * 3600 + $2 * 60; $begintime += 86400 if($grab->{'lasttime'} >= 86400); + #print STDERR "start: $begintime\n"; my $start = parse_local_date(DateCalc($day, "+ $begintime seconds"), $TZ); my ($start_base, $start_tz) = @{date_to_local($start, $TZ)}; $show{"start"} = UnixDate($start_base, '%q') . " $start_tz"; @@ -538,7 +537,7 @@ $show{title} = [[ $span, $lang ]]; } elsif (ref($span) eq "HTML::Element" and $span->tag eq "a") { - $popup = "http://www.tvtoday.de/" . $span->attr('href'); + $popup = "http://programm.tvtoday.de/" . $span->attr('href'); my $tag = (($span->content_list())[0]->content_list())[0]; die unless(ref($tag) eq "HTML::Element" and $tag->tag eq "u"); @@ -1079,7 +1078,7 @@ #-- get channel logos sub get_icons() { my %icons; - my $url="http://www.tvtoday.de/tv/programm/programm.php?ztag=0&sparte=alle&uhrzeit=Ax00&sender="; + my $url="http://programm.tvtoday.de/tv/programm/programm.php?ztag=0&sparte=alle&uhrzeit=Ax00&sender="; my $chan; my $tag; my $addr; @@ -1118,7 +1117,7 @@ #-- get channel listing sub get_channels() { my %channels; - my $url="http://www.tvtoday.de/tv/programm/programm.php?ztag=0&sparte=alle&uhrzeit=Ax00&sender=alle"; + my $url="http://programm.tvtoday.de/tv/programm/programm.php?ztag=0&sparte=alle&uhrzeit=Ax00&sender=alle"; my $tb=new HTML::TreeBuilder(); my $htmldata = get_page($url);