|
Lines 876-878
Link Here
|
| 876 |
except (LookupError, UnicodeError, ValueError, HeaderParseError): |
876 |
except (LookupError, UnicodeError, ValueError, HeaderParseError): |
| 877 |
# possibly charset problem. return with undecoded string in one line. |
877 |
# possibly charset problem. return with undecoded string in one line. |
| 878 |
return EMPTYSTRING.join(s.splitlines()) |
878 |
return EMPTYSTRING.join(s.splitlines()) |
|
|
879 |
|
| 880 |
|
| 881 |
# Patterns and functions to flag possible XSS attacks in HTML. |
| 882 |
# This list is compiled from information at http://ha.ckers.org/xss.html, |
| 883 |
# http://www.quirksmode.org/js/events_compinfo.html, |
| 884 |
# http://www.htmlref.com/reference/appa/events1.htm, |
| 885 |
# http://lxr.mozilla.org/mozilla/source/content/events/src/nsDOMEvent.cpp#59, |
| 886 |
# http://www.w3.org/TR/DOM-Level-2-Events/events.html and |
| 887 |
# http://www.xulplanet.com/references/elemref/ref_EventHandlers.html |
| 888 |
# Many thanks are due to Moritz Naumann for his assistance with this. |
| 889 |
_badwords = [ |
| 890 |
'<i?frame', |
| 891 |
'<link', |
| 892 |
'<meta', |
| 893 |
'<script', |
| 894 |
r'(?:^|\W)j(?:ava)?script(?:\W|$)', |
| 895 |
r'(?:^|\W)vbs(?:cript)?(?:\W|$)', |
| 896 |
r'(?:^|\W)domactivate(?:\W|$)', |
| 897 |
r'(?:^|\W)domattrmodified(?:\W|$)', |
| 898 |
r'(?:^|\W)domcharacterdatamodified(?:\W|$)', |
| 899 |
r'(?:^|\W)domfocus(?:in|out)(?:\W|$)', |
| 900 |
r'(?:^|\W)dommenuitem(?:in)?active(?:\W|$)', |
| 901 |
r'(?:^|\W)dommousescroll(?:\W|$)', |
| 902 |
r'(?:^|\W)domnodeinserted(?:intodocument)?(?:\W|$)', |
| 903 |
r'(?:^|\W)domnoderemoved(?:fromdocument)?(?:\W|$)', |
| 904 |
r'(?:^|\W)domsubtreemodified(?:\W|$)', |
| 905 |
r'(?:^|\W)fscommand(?:\W|$)', |
| 906 |
r'(?:^|\W)onabort(?:\W|$)', |
| 907 |
r'(?:^|\W)on(?:de)?activate(?:\W|$)', |
| 908 |
r'(?:^|\W)on(?:after|before)print(?:\W|$)', |
| 909 |
r'(?:^|\W)on(?:after|before)update(?:\W|$)', |
| 910 |
r'(?:^|\W)onbefore(?:(?:de)?activate|copy|cut|editfocus|paste)(?:\W|$)', |
| 911 |
r'(?:^|\W)onbeforeunload(?:\W|$)', |
| 912 |
r'(?:^|\W)onbegin(?:\W|$)', |
| 913 |
r'(?:^|\W)onblur(?:\W|$)', |
| 914 |
r'(?:^|\W)onbounce(?:\W|$)', |
| 915 |
r'(?:^|\W)onbroadcast(?:\W|$)', |
| 916 |
r'(?:^|\W)on(?:cell)?change(?:\W|$)', |
| 917 |
r'(?:^|\W)oncheckboxstatechange(?:\W|$)', |
| 918 |
r'(?:^|\W)on(?:dbl)?click(?:\W|$)', |
| 919 |
r'(?:^|\W)onclose(?:\W|$)', |
| 920 |
r'(?:^|\W)oncommand(?:update)?(?:\W|$)', |
| 921 |
r'(?:^|\W)oncomposition(?:end|start)(?:\W|$)', |
| 922 |
r'(?:^|\W)oncontextmenu(?:\W|$)', |
| 923 |
r'(?:^|\W)oncontrolselect(?:\W|$)', |
| 924 |
r'(?:^|\W)oncopy(?:\W|$)', |
| 925 |
r'(?:^|\W)oncut(?:\W|$)', |
| 926 |
r'(?:^|\W)ondataavailable(?:\W|$)', |
| 927 |
r'(?:^|\W)ondataset(?:changed|complete)(?:\W|$)', |
| 928 |
r'(?:^|\W)ondrag(?:drop|end|enter|exit|gesture|leave|over)?(?:\W|$)', |
| 929 |
r'(?:^|\W)ondragstart(?:\W|$)', |
| 930 |
r'(?:^|\W)ondrop(?:\W|$)', |
| 931 |
r'(?:^|\W)onend(?:\W|$)', |
| 932 |
r'(?:^|\W)onerror(?:update)?(?:\W|$)', |
| 933 |
r'(?:^|\W)onfilterchange(?:\W|$)', |
| 934 |
r'(?:^|\W)onfinish(?:\W|$)', |
| 935 |
r'(?:^|\W)onfocus(?:in|out)?(?:\W|$)', |
| 936 |
r'(?:^|\W)onhelp(?:\W|$)', |
| 937 |
r'(?:^|\W)oninput(?:\W|$)', |
| 938 |
r'(?:^|\W)onkey(?:up|down|press)(?:\W|$)', |
| 939 |
r'(?:^|\W)onlayoutcomplete(?:\W|$)', |
| 940 |
r'(?:^|\W)on(?:un)?load(?:\W|$)', |
| 941 |
r'(?:^|\W)onlosecapture(?:\W|$)', |
| 942 |
r'(?:^|\W)onmedia(?:complete|error)(?:\W|$)', |
| 943 |
r'(?:^|\W)onmouse(?:down|enter|leave|move|out|over|up|wheel)(?:\W|$)', |
| 944 |
r'(?:^|\W)onmove(?:end|start)?(?:\W|$)', |
| 945 |
r'(?:^|\W)on(?:off|on)line(?:\W|$)', |
| 946 |
r'(?:^|\W)onoutofsync(?:\W|$)', |
| 947 |
r'(?:^|\W)onoverflow(?:changed)?(?:\W|$)', |
| 948 |
r'(?:^|\W)onpage(?:hide|show)(?:\W|$)', |
| 949 |
r'(?:^|\W)onpaint(?:\W|$)', |
| 950 |
r'(?:^|\W)onpaste(?:\W|$)', |
| 951 |
r'(?:^|\W)onpause(?:\W|$)', |
| 952 |
r'(?:^|\W)onpopup(?:hidden|hiding|showing|shown)(?:\W|$)', |
| 953 |
r'(?:^|\W)onprogress(?:\W|$)', |
| 954 |
r'(?:^|\W)onpropertychange(?:\W|$)', |
| 955 |
r'(?:^|\W)onradiostatechange(?:\W|$)', |
| 956 |
r'(?:^|\W)onreadystatechange(?:\W|$)', |
| 957 |
r'(?:^|\W)onrepeat(?:\W|$)', |
| 958 |
r'(?:^|\W)onreset(?:\W|$)', |
| 959 |
r'(?:^|\W)onresize(?:end|start)?(?:\W|$)', |
| 960 |
r'(?:^|\W)onresume(?:\W|$)', |
| 961 |
r'(?:^|\W)onreverse(?:\W|$)', |
| 962 |
r'(?:^|\W)onrow(?:delete|enter|exit|inserted)(?:\W|$)', |
| 963 |
r'(?:^|\W)onrows(?:delete|enter|inserted)(?:\W|$)', |
| 964 |
r'(?:^|\W)onscroll(?:\W|$)', |
| 965 |
r'(?:^|\W)onseek(?:\W|$)', |
| 966 |
r'(?:^|\W)onselect(?:start)?(?:\W|$)', |
| 967 |
r'(?:^|\W)onselectionchange(?:\W|$)', |
| 968 |
r'(?:^|\W)onstart(?:\W|$)', |
| 969 |
r'(?:^|\W)onstop(?:\W|$)', |
| 970 |
r'(?:^|\W)onsubmit(?:\W|$)', |
| 971 |
r'(?:^|\W)onsync(?:from|to)preference(?:\W|$)', |
| 972 |
r'(?:^|\W)onsyncrestored(?:\W|$)', |
| 973 |
r'(?:^|\W)ontext(?:\W|$)', |
| 974 |
r'(?:^|\W)ontimeerror(?:\W|$)', |
| 975 |
r'(?:^|\W)ontrackchange(?:\W|$)', |
| 976 |
r'(?:^|\W)onunderflow(?:\W|$)', |
| 977 |
r'(?:^|\W)onurlflip(?:\W|$)', |
| 978 |
r'(?:^|\W)seeksegmenttime(?:\W|$)', |
| 979 |
r'(?:^|\W)svgabort(?:\W|$)', |
| 980 |
r'(?:^|\W)svgerror(?:\W|$)', |
| 981 |
r'(?:^|\W)svgload(?:\W|$)', |
| 982 |
r'(?:^|\W)svgresize(?:\W|$)', |
| 983 |
r'(?:^|\W)svgscroll(?:\W|$)', |
| 984 |
r'(?:^|\W)svgunload(?:\W|$)', |
| 985 |
r'(?:^|\W)svgzoom(?:\W|$)', |
| 986 |
] |
| 987 |
|
| 988 |
|
| 989 |
# This is the actual re to look for the above patterns |
| 990 |
_badhtml = re.compile('|'.join(_badwords), re.IGNORECASE) |
| 991 |
# This is used to filter non-printable us-ascii characters, some of which |
| 992 |
# can be used to break words to avoid recognition. |
| 993 |
_filterchars = re.compile('[\000-\011\013\014\016-\037\177-\237]') |
| 994 |
# This is used to recognize '&#' and '%xx' strings for _translate which |
| 995 |
# translates them to characters |
| 996 |
_encodedchars = re.compile('(&#[0-9]+;?)|(&#x[0-9a-f]+;?)|(%[0-9a-f]{2})', |
| 997 |
re.IGNORECASE) |
| 998 |
|
| 999 |
|
| 1000 |
def _translate(mo): |
| 1001 |
"""Translate &#... and %xx encodings into the encoded character.""" |
| 1002 |
match = mo.group().lower().strip('&#;') |
| 1003 |
try: |
| 1004 |
if match.startswith('x') or match.startswith('%'): |
| 1005 |
val = int(match[1:], 16) |
| 1006 |
else: |
| 1007 |
val = int(match, 10) |
| 1008 |
except ValueError: |
| 1009 |
return '' |
| 1010 |
if val < 256: |
| 1011 |
return chr(val) |
| 1012 |
else: |
| 1013 |
return '' |
| 1014 |
|
| 1015 |
|
| 1016 |
def suspiciousHTML(html): |
| 1017 |
"""Check HTML string for various tags, script language names and |
| 1018 |
'onxxx' actions that can be used in XSS attacks. |
| 1019 |
Currently, this a very simple minded test. It just looks for |
| 1020 |
patterns without analyzing context. Thus, it potentially flags lots |
| 1021 |
of benign stuff. |
| 1022 |
Returns True if anything suspicious found, False otherwise. |
| 1023 |
""" |
| 1024 |
|
| 1025 |
if _badhtml.search(_filterchars.sub( |
| 1026 |
'', _encodedchars.sub(_translate, html))): |
| 1027 |
return True |
| 1028 |
else: |
| 1029 |
return False |