Lines 151-158
Link Here
|
151 |
|
151 |
|
152 |
#-- attributes of xmltv root element |
152 |
#-- attributes of xmltv root element |
153 |
my $head = { |
153 |
my $head = { |
154 |
'source-data-url' => 'http://www.tvtoday.de/tv/programm/programm.php', |
154 |
'source-data-url' => 'http://programm.tvtoday.de/tv/programm/programm.php', |
155 |
'source-info-url' => 'http://www.tvtoday.de/', |
155 |
'source-info-url' => 'http://www.tvtoday.de/', |
156 |
'generator-info-name' => 'XMLTV', |
156 |
'generator-info-name' => 'XMLTV', |
157 |
'generator-info-url' => 'http://membled.com/work/apps/xmltv/', |
157 |
'generator-info-url' => 'http://membled.com/work/apps/xmltv/', |
158 |
}; |
158 |
}; |
Lines 430-436
Link Here
|
430 |
|
430 |
|
431 |
my $grab = { |
431 |
my $grab = { |
432 |
'channel' => channel_id($ch), |
432 |
'channel' => channel_id($ch), |
433 |
'url' => "http://www.tvtoday.de/tv/programm/programm.php?ztag=$offset&sparte=alle&uhrzeit=Ax00&sender=$ch", |
433 |
'url' => "http://programm.tvtoday.de/tv/programm/programm.php?ztag=$offset&sparte=alle&uhrzeit=Ax00&sender=$ch", |
434 |
'lasttime' => 0, |
434 |
'lasttime' => 0, |
435 |
'lastday' => $lday, |
435 |
'lastday' => $lday, |
436 |
}; |
436 |
}; |
Lines 449-508
Link Here
|
449 |
} |
449 |
} |
450 |
|
450 |
|
451 |
sub parse_page($$) { |
451 |
sub parse_page($$) { |
452 |
my $page = shift(@_)->look_down('_tag' => 'td', 'valign' => 'top', 'width' => 566); |
452 |
my $page = shift(@_); |
|
|
453 |
#$page->dump(); exit 0; |
453 |
my $grab = shift @_; |
454 |
my $grab = shift @_; |
454 |
my $over_headline_table = 0; |
455 |
my $over_headline_table = 0; |
455 |
my $pos; |
456 |
my $pos; |
456 |
my $day; |
457 |
my $day; |
457 |
|
458 |
|
458 |
#-- delete the navigation form |
459 |
if(($_ = $page->look_down('_tag' => 'span', 'class' => 'text'))) { |
459 |
my $form = $page->look_down('_tag' => 'form', 'action' => '/tv/programm/programm.php', 'method' => 'get') or die("navigation form not found in requested page"); |
460 |
if($_->as_text() =~ m/aber wir konnten keine Sendungen mit dieser/) { |
460 |
$form->delete(); |
461 |
warn "no information available for channel ", $grab->{channel}; |
461 |
|
462 |
undef($grab->{url}); |
|
|
463 |
return; |
464 |
} |
465 |
} |
466 |
|
462 |
#-- extract date of grabbed data from retrieved webpage ... |
467 |
#-- extract date of grabbed data from retrieved webpage ... |
463 |
$_ = $page->look_down('_tag' => 'span', 'class' => 'text-weiss'); |
468 |
$_ = $page->look_down('_tag' => 'td', 'class' => 'navigator-hhead-large'); |
464 |
die("cannot find date on requested page") |
469 |
die("cannot find date on requested page") |
465 |
unless($_->as_text() =~ m/([1-3]?[0-9])\.(1?[0-9])\.(20[0-9]{2})/); |
470 |
unless($_->as_text() =~ m/([1-3]?[0-9])\.(1?[0-9])\.(20[0-9]{2})/); |
466 |
$day = ParseDate("$3-$2-$1 00:00:00"); |
471 |
$day = ParseDate("$3-$2-$1 00:00:00"); |
467 |
|
472 |
|
468 |
#-- okay, that's okay as well, yippie! |
473 |
#-- well, now let's scan the table for programme data |
469 |
foreach ($page->content_list()) { |
474 |
foreach ($page->look_down('_tag' => 'table')) { |
470 |
my (%show, $begintime, $stoptime, $popup); |
475 |
my (%show, $begintime, $stoptime, $popup); |
471 |
next unless(ref($_) eq "HTML::Element"); |
|
|
472 |
|
476 |
|
473 |
if ($_->tag eq "table" and $_->attr("cellpadding") eq "2") { |
477 |
next if(not defined($_->attr('cellpadding'))); |
|
|
478 |
|
479 |
if ($_->attr("cellpadding") eq "2") { |
474 |
last unless($_->as_text() =~ m/weitere Sendungen/); |
480 |
last unless($_->as_text() =~ m/weitere Sendungen/); |
475 |
last if($grab->{lasttime} >= 86400); #-- don't request another page, if day's over |
481 |
#-- don't request another page, if day's over |
|
|
482 |
last if($grab->{lasttime} >= 86400); |
476 |
|
483 |
|
477 |
#-- okay, we have even more shows available, scan that page as well ... |
484 |
#-- we have even more shows available, scan that page as well ... |
478 |
my $link = $_->extract_links('a'); |
485 |
my $link = $_->extract_links('a'); |
479 |
$grab->{url} = "http://www.tvtoday.de" . $link->[scalar(@$link)-1]->[0]; |
486 |
$grab->{url} = "http://programm.tvtoday.de" . |
|
|
487 |
$link->[scalar(@$link)-1]->[0]; |
480 |
return; |
488 |
return; |
481 |
} |
489 |
} |
482 |
|
490 |
|
483 |
if($_->tag eq "center" && |
491 |
### skip table, not containing data for us ... |
484 |
$_->as_text() =~ m/Es tut uns leid,.*aber wir konnten keine Sendungen/) { |
492 |
#print STDERR "cellpadding: ", $_->attr('cellpadding'), "\n"; |
485 |
warn "tvtoday.de has no information available for ", $grab->{channel}; |
493 |
#$_->dump(); |
486 |
last; |
494 |
next if($_->attr('cellpadding') ne "4"); |
487 |
} |
495 |
#print STDERR "width: ", $_->attr('width'), "\n"; |
|
|
496 |
next if(not defined($_->attr('width')) |
497 |
or $_->attr('width') ne "585"); |
498 |
#print STDERR "got through ...\n"; |
488 |
|
499 |
|
489 |
#-- ignore everything but table's, since these hold our information |
|
|
490 |
next unless($_->tag eq "table"); |
491 |
|
492 |
#-- okay, parse this table now (each table is one show) |
500 |
#-- okay, parse this table now (each table is one show) |
493 |
my @el = $_->content_list(); |
501 |
my @el = $_->content_list(); |
494 |
die unless(ref($el[0]) eq "HTML::Element" and $el[0]->tag eq "tr"); |
502 |
die unless(ref($el[0]) eq "HTML::Element" and $el[0]->tag eq "tr"); |
495 |
|
503 |
|
496 |
#-- if it's the headline table, ignore it ... |
|
|
497 |
unless($over_headline_table) { |
498 |
my $headline = $el[0]->look_down('_tag' => 'span', 'class' => 'headline-balken') |
499 |
or next; |
500 |
|
501 |
next unless($headline->as_text() =~ m/P R O G R A M M/); |
502 |
|
503 |
$over_headline_table ++, next; |
504 |
} |
505 |
|
506 |
@el = $el[0]->content_list(); |
504 |
@el = $el[0]->content_list(); |
507 |
|
505 |
|
508 |
$_ = shift @el; #-- in this column there's the logo of the tv station |
506 |
$_ = shift @el; #-- in this column there's the logo of the tv station |
Lines 515-520
Link Here
|
515 |
$begintime = $1 * 3600 + $2 * 60; |
513 |
$begintime = $1 * 3600 + $2 * 60; |
516 |
$begintime += 86400 if($grab->{'lasttime'} >= 86400); |
514 |
$begintime += 86400 if($grab->{'lasttime'} >= 86400); |
517 |
|
515 |
|
|
|
516 |
#print STDERR "start: $begintime\n"; |
518 |
my $start = parse_local_date(DateCalc($day, "+ $begintime seconds"), $TZ); |
517 |
my $start = parse_local_date(DateCalc($day, "+ $begintime seconds"), $TZ); |
519 |
my ($start_base, $start_tz) = @{date_to_local($start, $TZ)}; |
518 |
my ($start_base, $start_tz) = @{date_to_local($start, $TZ)}; |
520 |
$show{"start"} = UnixDate($start_base, '%q') . " $start_tz"; |
519 |
$show{"start"} = UnixDate($start_base, '%q') . " $start_tz"; |
Lines 538-544
Link Here
|
538 |
$show{title} = [[ $span, $lang ]]; |
537 |
$show{title} = [[ $span, $lang ]]; |
539 |
} |
538 |
} |
540 |
elsif (ref($span) eq "HTML::Element" and $span->tag eq "a") { |
539 |
elsif (ref($span) eq "HTML::Element" and $span->tag eq "a") { |
541 |
$popup = "http://www.tvtoday.de/" . $span->attr('href'); |
540 |
$popup = "http://programm.tvtoday.de/" . $span->attr('href'); |
542 |
|
541 |
|
543 |
my $tag = (($span->content_list())[0]->content_list())[0]; |
542 |
my $tag = (($span->content_list())[0]->content_list())[0]; |
544 |
die unless(ref($tag) eq "HTML::Element" and $tag->tag eq "u"); |
543 |
die unless(ref($tag) eq "HTML::Element" and $tag->tag eq "u"); |
Lines 1079-1085
Link Here
|
1079 |
#-- get channel logos |
1078 |
#-- get channel logos |
1080 |
sub get_icons() { |
1079 |
sub get_icons() { |
1081 |
my %icons; |
1080 |
my %icons; |
1082 |
my $url="http://www.tvtoday.de/tv/programm/programm.php?ztag=0&sparte=alle&uhrzeit=Ax00&sender="; |
1081 |
my $url="http://programm.tvtoday.de/tv/programm/programm.php?ztag=0&sparte=alle&uhrzeit=Ax00&sender="; |
1083 |
my $chan; |
1082 |
my $chan; |
1084 |
my $tag; |
1083 |
my $tag; |
1085 |
my $addr; |
1084 |
my $addr; |
Lines 1118-1124
Link Here
|
1118 |
#-- get channel listing |
1117 |
#-- get channel listing |
1119 |
sub get_channels() { |
1118 |
sub get_channels() { |
1120 |
my %channels; |
1119 |
my %channels; |
1121 |
my $url="http://www.tvtoday.de/tv/programm/programm.php?ztag=0&sparte=alle&uhrzeit=Ax00&sender=alle"; |
1120 |
my $url="http://programm.tvtoday.de/tv/programm/programm.php?ztag=0&sparte=alle&uhrzeit=Ax00&sender=alle"; |
1122 |
|
1121 |
|
1123 |
my $tb=new HTML::TreeBuilder(); |
1122 |
my $tb=new HTML::TreeBuilder(); |
1124 |
my $htmldata = get_page($url); |
1123 |
my $htmldata = get_page($url); |