Lines 876-878
Link Here
|
876 |
except (LookupError, UnicodeError, ValueError, HeaderParseError): |
876 |
except (LookupError, UnicodeError, ValueError, HeaderParseError): |
877 |
# possibly charset problem. return with undecoded string in one line. |
877 |
# possibly charset problem. return with undecoded string in one line. |
878 |
return EMPTYSTRING.join(s.splitlines()) |
878 |
return EMPTYSTRING.join(s.splitlines()) |
|
|
879 |
|
880 |
|
881 |
# Patterns and functions to flag possible XSS attacks in HTML. |
882 |
# This list is compiled from information at http://ha.ckers.org/xss.html, |
883 |
# http://www.quirksmode.org/js/events_compinfo.html, |
884 |
# http://www.htmlref.com/reference/appa/events1.htm, |
885 |
# http://lxr.mozilla.org/mozilla/source/content/events/src/nsDOMEvent.cpp#59, |
886 |
# http://www.w3.org/TR/DOM-Level-2-Events/events.html and |
887 |
# http://www.xulplanet.com/references/elemref/ref_EventHandlers.html |
888 |
# Many thanks are due to Moritz Naumann for his assistance with this. |
889 |
_badwords = [ |
890 |
'<i?frame', |
891 |
'<link', |
892 |
'<meta', |
893 |
'<script', |
894 |
r'(?:^|\W)j(?:ava)?script(?:\W|$)', |
895 |
r'(?:^|\W)vbs(?:cript)?(?:\W|$)', |
896 |
r'(?:^|\W)domactivate(?:\W|$)', |
897 |
r'(?:^|\W)domattrmodified(?:\W|$)', |
898 |
r'(?:^|\W)domcharacterdatamodified(?:\W|$)', |
899 |
r'(?:^|\W)domfocus(?:in|out)(?:\W|$)', |
900 |
r'(?:^|\W)dommenuitem(?:in)?active(?:\W|$)', |
901 |
r'(?:^|\W)dommousescroll(?:\W|$)', |
902 |
r'(?:^|\W)domnodeinserted(?:intodocument)?(?:\W|$)', |
903 |
r'(?:^|\W)domnoderemoved(?:fromdocument)?(?:\W|$)', |
904 |
r'(?:^|\W)domsubtreemodified(?:\W|$)', |
905 |
r'(?:^|\W)fscommand(?:\W|$)', |
906 |
r'(?:^|\W)onabort(?:\W|$)', |
907 |
r'(?:^|\W)on(?:de)?activate(?:\W|$)', |
908 |
r'(?:^|\W)on(?:after|before)print(?:\W|$)', |
909 |
r'(?:^|\W)on(?:after|before)update(?:\W|$)', |
910 |
r'(?:^|\W)onbefore(?:(?:de)?activate|copy|cut|editfocus|paste)(?:\W|$)', |
911 |
r'(?:^|\W)onbeforeunload(?:\W|$)', |
912 |
r'(?:^|\W)onbegin(?:\W|$)', |
913 |
r'(?:^|\W)onblur(?:\W|$)', |
914 |
r'(?:^|\W)onbounce(?:\W|$)', |
915 |
r'(?:^|\W)onbroadcast(?:\W|$)', |
916 |
r'(?:^|\W)on(?:cell)?change(?:\W|$)', |
917 |
r'(?:^|\W)oncheckboxstatechange(?:\W|$)', |
918 |
r'(?:^|\W)on(?:dbl)?click(?:\W|$)', |
919 |
r'(?:^|\W)onclose(?:\W|$)', |
920 |
r'(?:^|\W)oncommand(?:update)?(?:\W|$)', |
921 |
r'(?:^|\W)oncomposition(?:end|start)(?:\W|$)', |
922 |
r'(?:^|\W)oncontextmenu(?:\W|$)', |
923 |
r'(?:^|\W)oncontrolselect(?:\W|$)', |
924 |
r'(?:^|\W)oncopy(?:\W|$)', |
925 |
r'(?:^|\W)oncut(?:\W|$)', |
926 |
r'(?:^|\W)ondataavailable(?:\W|$)', |
927 |
r'(?:^|\W)ondataset(?:changed|complete)(?:\W|$)', |
928 |
r'(?:^|\W)ondrag(?:drop|end|enter|exit|gesture|leave|over)?(?:\W|$)', |
929 |
r'(?:^|\W)ondragstart(?:\W|$)', |
930 |
r'(?:^|\W)ondrop(?:\W|$)', |
931 |
r'(?:^|\W)onend(?:\W|$)', |
932 |
r'(?:^|\W)onerror(?:update)?(?:\W|$)', |
933 |
r'(?:^|\W)onfilterchange(?:\W|$)', |
934 |
r'(?:^|\W)onfinish(?:\W|$)', |
935 |
r'(?:^|\W)onfocus(?:in|out)?(?:\W|$)', |
936 |
r'(?:^|\W)onhelp(?:\W|$)', |
937 |
r'(?:^|\W)oninput(?:\W|$)', |
938 |
r'(?:^|\W)onkey(?:up|down|press)(?:\W|$)', |
939 |
r'(?:^|\W)onlayoutcomplete(?:\W|$)', |
940 |
r'(?:^|\W)on(?:un)?load(?:\W|$)', |
941 |
r'(?:^|\W)onlosecapture(?:\W|$)', |
942 |
r'(?:^|\W)onmedia(?:complete|error)(?:\W|$)', |
943 |
r'(?:^|\W)onmouse(?:down|enter|leave|move|out|over|up|wheel)(?:\W|$)', |
944 |
r'(?:^|\W)onmove(?:end|start)?(?:\W|$)', |
945 |
r'(?:^|\W)on(?:off|on)line(?:\W|$)', |
946 |
r'(?:^|\W)onoutofsync(?:\W|$)', |
947 |
r'(?:^|\W)onoverflow(?:changed)?(?:\W|$)', |
948 |
r'(?:^|\W)onpage(?:hide|show)(?:\W|$)', |
949 |
r'(?:^|\W)onpaint(?:\W|$)', |
950 |
r'(?:^|\W)onpaste(?:\W|$)', |
951 |
r'(?:^|\W)onpause(?:\W|$)', |
952 |
r'(?:^|\W)onpopup(?:hidden|hiding|showing|shown)(?:\W|$)', |
953 |
r'(?:^|\W)onprogress(?:\W|$)', |
954 |
r'(?:^|\W)onpropertychange(?:\W|$)', |
955 |
r'(?:^|\W)onradiostatechange(?:\W|$)', |
956 |
r'(?:^|\W)onreadystatechange(?:\W|$)', |
957 |
r'(?:^|\W)onrepeat(?:\W|$)', |
958 |
r'(?:^|\W)onreset(?:\W|$)', |
959 |
r'(?:^|\W)onresize(?:end|start)?(?:\W|$)', |
960 |
r'(?:^|\W)onresume(?:\W|$)', |
961 |
r'(?:^|\W)onreverse(?:\W|$)', |
962 |
r'(?:^|\W)onrow(?:delete|enter|exit|inserted)(?:\W|$)', |
963 |
r'(?:^|\W)onrows(?:delete|enter|inserted)(?:\W|$)', |
964 |
r'(?:^|\W)onscroll(?:\W|$)', |
965 |
r'(?:^|\W)onseek(?:\W|$)', |
966 |
r'(?:^|\W)onselect(?:start)?(?:\W|$)', |
967 |
r'(?:^|\W)onselectionchange(?:\W|$)', |
968 |
r'(?:^|\W)onstart(?:\W|$)', |
969 |
r'(?:^|\W)onstop(?:\W|$)', |
970 |
r'(?:^|\W)onsubmit(?:\W|$)', |
971 |
r'(?:^|\W)onsync(?:from|to)preference(?:\W|$)', |
972 |
r'(?:^|\W)onsyncrestored(?:\W|$)', |
973 |
r'(?:^|\W)ontext(?:\W|$)', |
974 |
r'(?:^|\W)ontimeerror(?:\W|$)', |
975 |
r'(?:^|\W)ontrackchange(?:\W|$)', |
976 |
r'(?:^|\W)onunderflow(?:\W|$)', |
977 |
r'(?:^|\W)onurlflip(?:\W|$)', |
978 |
r'(?:^|\W)seeksegmenttime(?:\W|$)', |
979 |
r'(?:^|\W)svgabort(?:\W|$)', |
980 |
r'(?:^|\W)svgerror(?:\W|$)', |
981 |
r'(?:^|\W)svgload(?:\W|$)', |
982 |
r'(?:^|\W)svgresize(?:\W|$)', |
983 |
r'(?:^|\W)svgscroll(?:\W|$)', |
984 |
r'(?:^|\W)svgunload(?:\W|$)', |
985 |
r'(?:^|\W)svgzoom(?:\W|$)', |
986 |
] |
987 |
|
988 |
|
989 |
# This is the actual re to look for the above patterns |
990 |
_badhtml = re.compile('|'.join(_badwords), re.IGNORECASE) |
991 |
# This is used to filter non-printable us-ascii characters, some of which |
992 |
# can be used to break words to avoid recognition. |
993 |
_filterchars = re.compile('[\000-\011\013\014\016-\037\177-\237]') |
994 |
# This is used to recognize '&#' and '%xx' strings for _translate which |
995 |
# translates them to characters |
996 |
_encodedchars = re.compile('(&#[0-9]+;?)|(&#x[0-9a-f]+;?)|(%[0-9a-f]{2})', |
997 |
re.IGNORECASE) |
998 |
|
999 |
|
1000 |
def _translate(mo): |
1001 |
"""Translate &#... and %xx encodings into the encoded character.""" |
1002 |
match = mo.group().lower().strip('&#;') |
1003 |
try: |
1004 |
if match.startswith('x') or match.startswith('%'): |
1005 |
val = int(match[1:], 16) |
1006 |
else: |
1007 |
val = int(match, 10) |
1008 |
except ValueError: |
1009 |
return '' |
1010 |
if val < 256: |
1011 |
return chr(val) |
1012 |
else: |
1013 |
return '' |
1014 |
|
1015 |
|
1016 |
def suspiciousHTML(html): |
1017 |
"""Check HTML string for various tags, script language names and |
1018 |
'onxxx' actions that can be used in XSS attacks. |
1019 |
Currently, this a very simple minded test. It just looks for |
1020 |
patterns without analyzing context. Thus, it potentially flags lots |
1021 |
of benign stuff. |
1022 |
Returns True if anything suspicious found, False otherwise. |
1023 |
""" |
1024 |
|
1025 |
if _badhtml.search(_filterchars.sub( |
1026 |
'', _encodedchars.sub(_translate, html))): |
1027 |
return True |
1028 |
else: |
1029 |
return False |