|
Lines 620-645
Link Here
|
| 620 |
continue |
620 |
continue |
| 621 |
try: |
621 |
try: |
| 622 |
infostr = infoh5.next |
622 |
infostr = infoh5.next |
| 623 |
key = infostr.string.strip(':').lower() |
623 |
key = infostr.string.strip(':').lower().replace(' ', '_') |
| 624 |
nextsibling = nextsibling = infoh5.nextSibling.strip() |
624 |
nextsibling = nextsibling = infoh5.nextSibling.strip() |
| 625 |
sections = info.findAll('a', { 'href' : re.compile('/Sections') }) |
625 |
sections = info.findAll('a', { 'href' : re.compile('/Sections') }) |
| 626 |
lists = info.findAll('a', { 'href' : re.compile('/List') }) |
626 |
lists = info.findAll('a', { 'href' : re.compile('/List') }) |
| 627 |
if len(nextsibling) > 0: |
627 |
if len(nextsibling) > 0: |
| 628 |
self.info[key] = nextsibling |
628 |
s = nextsibling.replace('&','&') |
|
|
629 |
s = s.replace('<','<') |
| 630 |
s = s.replace('>','>') |
| 631 |
s = s.replace('\'','"') |
| 632 |
self.info[key] = s |
| 629 |
elif len(sections) > 0: |
633 |
elif len(sections) > 0: |
| 630 |
items = [] |
634 |
items = [] |
| 631 |
for item in sections: |
635 |
for item in sections: |
| 632 |
items.append(item.string) |
636 |
s = item.string.replace('&','&') |
| 633 |
self.info[key] = items |
637 |
s = s.replace('<','<') |
|
|
638 |
s = s.replace('>','>') |
| 639 |
s = s.replace('\'','"') |
| 640 |
items.append(s) |
| 641 |
self.info[key] = ' / '.join(items) |
| 634 |
elif len(lists) > 0: |
642 |
elif len(lists) > 0: |
| 635 |
items = [] |
643 |
items = [] |
| 636 |
for item in lists: |
644 |
for item in lists: |
| 637 |
items.append(item.string) |
645 |
s = item.string.replace('&','&') |
| 638 |
self.info[key] = items |
646 |
s = s.replace('<','<') |
|
|
647 |
s = s.replace('>','>') |
| 648 |
s = s.replace('\'','"') |
| 649 |
items.append(s) |
| 650 |
self.info[key] = ' / '.join(items) |
| 639 |
except: |
651 |
except: |
| 640 |
pass |
652 |
pass |
| 641 |
|
653 |
|
| 642 |
print self.info |
654 |
print self.info |
|
|
655 |
|
| 643 |
# Find Plot Outline/Summary: |
656 |
# Find Plot Outline/Summary: |
| 644 |
# Normally the tag is named "Plot Outline:" - however sometimes |
657 |
# Normally the tag is named "Plot Outline:" - however sometimes |
| 645 |
# the tag is "Plot Summary:". Search for both strings. |
658 |
# the tag is "Plot Summary:". Search for both strings. |
|
Lines 659-673
Link Here
|
| 659 |
else: |
672 |
else: |
| 660 |
self.info['tagline'] = u'' |
673 |
self.info['tagline'] = u'' |
| 661 |
|
674 |
|
| 662 |
self.info['genre'] = '' |
|
|
| 663 |
genre=soup.find(text='Genre:').parent |
| 664 |
genres = [] |
| 665 |
while genre.findNextSibling('a').string != 'more': |
| 666 |
genres.append(genre.findNextSibling('a').string.strip()) |
| 667 |
genre=genre.findNextSibling('a') |
| 668 |
self.info['genre'] = genres[0] |
| 669 |
for i in genres[1:]: |
| 670 |
self.info['genre'] += ' / ' + i |
| 671 |
rating = soup.find(text='User Rating:').findNext(text=re.compile('/10')) |
675 |
rating = soup.find(text='User Rating:').findNext(text=re.compile('/10')) |
| 672 |
if rating: |
676 |
if rating: |
| 673 |
votes = rating.findNext('a') |
677 |
votes = rating.findNext('a') |