Attachment 22911 Details for Bug 36886 – patch-roaring-pengiun

[patch] patch-roaring-pengiun

patch-roaring-pengiun (text/plain), 11.84 KB, created by Brett Simpson on 2003-12-31 06:37:45 UTC

(hide)

Description:

Filename:

MIME Type:

Creator: Brett Simpson

Created: 2003-12-31 06:37:45 UTC

Size: 11.84 KB

patch

obsolete

>diff --unified --recursive --new-file MIME-tools-5.411/lib/MIME/Field/ParamVal.pm MIME-tools-5.411a-RP-Patched-02/lib/MIME/Field/ParamVal.pm
>--- MIME-tools-5.411/lib/MIME/Field/ParamVal.pm	2000-11-04 14:54:49.000000000 -0500
>+++ MIME-tools-5.411a-RP-Patched-02/lib/MIME/Field/ParamVal.pm	2002-08-08 18:25:31.000000000 -0400
>@@ -9,42 +9,42 @@
> =head1 SYNOPSIS
> 
>     # Create an object for a content-type field:
>-    $field = new Mail::Field 'Content-type'; 
>-     
>+    $field = new Mail::Field 'Content-type';
>+
>     # Set some attributes:
>     $field->param('_'        => 'text/html');
>     $field->param('charset'  => 'us-ascii');
>     $field->param('boundary' => '---ABC---');
>-     
>+
>     # Same:
>     $field->set('_'        => 'text/html',
> 		'charset'  => 'us-ascii',
> 		'boundary' => '---ABC---');
>-      
>+
>     # Get an attribute, or undefined if not present:
>     print "no id!"  if defined($field->param('id'));
>-     
>+
>     # Same, but use empty string for missing values:
>     print "no id!"  if ($field->paramstr('id') eq '');
>-                    
>+
>     # Output as string:
>     print $field->stringify, "\n";
> 
> 
> =head1 DESCRIPTION
> 
>-This is an abstract superclass of most MIME fields.  It handles 
>+This is an abstract superclass of most MIME fields.  It handles
> fields with a general syntax like this:
> 
>     Content-Type: Message/Partial;
>-        number=2; total=3;
>-        id="oc=jpbe0M2Yt4s@thumper.bellcore.com"
>+	number=2; total=3;
>+	id="oc=jpbe0M2Yt4s@thumper.bellcore.com"
> 
> Comments are supported I<between> items, like this:
> 
>     Content-Type: Message/Partial; (a comment)
>-        number=2  (another comment) ; (yet another comment) total=3;
>-        id="oc=jpbe0M2Yt4s@thumper.bellcore.com"
>+	number=2  (another comment) ; (yet another comment) total=3;
>+	id="oc=jpbe0M2Yt4s@thumper.bellcore.com"
> 
> 
> =head1 PUBLIC INTERFACE
>@@ -100,6 +100,9 @@
> #      token      =  1*<any  (ASCII) CHAR except SPACE, CTLs, or tspecials>
> #
> my $TSPECIAL = '()<>@,;:\</[]?="';
>+
>+#" Fix emacs highlighting...
>+
> my $TOKEN    = '[^ \x00-\x1f\x80-\xff' . "\Q$TSPECIAL\E" . ']+';
> 
> # Encoded token:
>@@ -108,6 +111,9 @@
> # Pattern to match spaces or comments:
> my $SPCZ     = '(?:\s|$[^$]*\))*';
> 
>+# Pattern to match non-semicolon as fallback for broken MIME
>+# produced by some viruses
>+my $BADTOKEN = '[^;]+';
> 
> #------------------------------
> #
>@@ -133,7 +139,7 @@
> 		  'total'   => 3,
> 		  'id'      => "ocj=pbe0M2");
> 
>-Note that a single argument is taken to be a I<reference> to 
>+Note that a single argument is taken to be a I<reference> to
> a paramhash, while multiple args are taken to be the elements
> of the paramhash themselves.
> 
>@@ -160,16 +166,16 @@
> it as a hash reference.  For example, here is a field with parameters:
> 
>     Content-Type: Message/Partial;
>-        number=2; total=3;
>-        id="oc=jpbe0M2Yt4s@thumper.bellcore.com"
>+	number=2; total=3;
>+	id="oc=jpbe0M2Yt4s@thumper.bellcore.com"
> 
> Here is how you'd extract them:
> 
>     $params = $class->parse_params('content-type');
>     if ($$params{'_'} eq 'message/partial') {
>-        $number = $$params{'number'};
>-        $total  = $$params{'total'};
>-        $id     = $$params{'id'};
>+	$number = $$params{'number'};
>+	$total  = $$params{'total'};
>+	$id     = $$params{'id'};
>     }
> 
> Like field names, parameter names are coerced to lowercase.
>@@ -181,10 +187,40 @@
> 
> =cut
> 
>+sub rfc2231decode {
>+    my($val) = @_;
>+    my($enc, $lang, $rest);
>+
>+    if ($val =~ m/^([^\']*)\'([^\']*)\'(.*)$/) {
>+	# SHOULD REALLY DO SOMETHING MORE INTELLIGENT WITH ENCODING!!!
>+	$enc = $1;
>+	$lang = $2;
>+	$rest = $3;
>+	$rest = rfc2231percent($rest);
>+    } elsif ($val =~ m/^([^\']*)\'([^\']*)$/) {
>+	$enc = $1;
>+	$rest = $2;
>+	$rest = rfc2231percent($rest);
>+    } else {
>+	$rest = rfc2231percent($val);
>+    }
>+    return $rest;
>+}
>+
>+sub rfc2231percent {
>+    # Do percent-subsitution
>+    my($str) = @_;
>+    $str =~ s/%([0-9a-fA-F]{2})/pack("c", hex($1))/ge;
>+    return $str;
>+}
>+
> sub parse_params {
>     my ($self, $raw) = @_;
>     my %params = ();
>+    my %rfc2231params = ();
>     my $param;
>+    my $val;
>+    my $part;
> 
>     # Get raw field, and unfold it:
>     defined($raw) or $raw = '';
>@@ -200,9 +236,47 @@
> 	$raw =~ m/\G$SPCZ\;$SPCZ/og or last;             # skip leading separator
> 	$raw =~ m/\G($PARAMNAME)\s*=\s*/og or last;      # give up if not a param
> 	$param = lc($1);
>-	$raw =~ m/\G(\"([^\"]+)\")|\G($TOKEN)|\G($ENCTOKEN)/g or last;   # give up if no value
>-	my ($qstr, $str, $token, $enctoken) = ($1, $2, $3, $4);
>-	$params{$param} = defined($qstr) ? $str : (defined($token) ? $token : $enctoken);
>+	$raw =~ m/\G(\"([^\"]+)\")|\G($ENCTOKEN)|\G($BADTOKEN)|\G($TOKEN)/g or last;   # give up if no value"
>+	my ($qstr, $str, $enctoken, $badtoken, $token) = ($1, $2, $3, $4, $5);
>+	if (defined($badtoken)) {
>+	    # Strip leading/trailing whitespace from badtoken
>+	    $badtoken =~ s/^\s*//;
>+	    $badtoken =~ s/\s*$//;
>+	}
>+	$val = defined($qstr) ? $str :
>+	    (defined($enctoken) ? $enctoken :
>+	     (defined($badtoken) ? $badtoken : $token));
>+
>+	# Do RFC 2231 processing
>+	if ($param =~ /\*/) {
>+	    my($name, $num);
>+	    # Pick out the parts of the parameter
>+	    if ($param =~ m/^([^*]+)\*([^*]+)\*?$/) {
>+		# We have param*number* or param*number
>+		$name = $1;
>+		$num = $2;
>+	    } else {
>+		# Fake a part of zero... not sure how to handle this properly
>+		$param =~ s/\*//g;
>+		$name = $param;
>+		$num = 0;
>+	    }
>+	    # Decode the value unless it was a quoted string
>+	    if (!defined($qstr)) {
>+		$val = rfc2231decode($val);
>+	    }
>+	    $rfc2231params{$name}{$num} .= $val;
>+	} else {
>+	    # Make a fake "part zero" for non-RFC2231 params
>+	    $rfc2231params{$param}{"0"} = $val;
>+	}
>+    }
>+
>+    # Extract reconstructed parameters
>+    foreach $param (keys %rfc2231params) {
>+	foreach $part (sort { $a <=> $b } keys %{$rfc2231params{$param}}) {
>+	    $params{$param} .= $rfc2231params{$param}{$part};
>+	}
> 	debug "   field param <$param> = <$params{$param}>";
>     }
> 
>@@ -227,7 +301,7 @@
> 
>     # Allow use as constructor, for MIME::Head:
>     ref($self) or $self = bless({}, $self);
>-    
>+
>     # Get params, and stuff them into the self object:
>     $self->set($self->parse_params($string));
> }
>diff --unified --recursive --new-file MIME-tools-5.411/lib/MIME/Parser.pm MIME-tools-5.411a-RP-Patched-02/lib/MIME/Parser.pm
>--- MIME-tools-5.411/lib/MIME/Parser.pm	2000-11-12 00:55:11.000000000 -0500
>+++ MIME-tools-5.411a-RP-Patched-02/lib/MIME/Parser.pm	2002-10-07 11:29:59.000000000 -0400
>@@ -250,6 +250,7 @@
>     $self->{MP5_IgnoreErrors}    = 1;
>     $self->{MP5_UseInnerFiles}   = 0;
>     $self->{MP5_UUDecode}        = 0;
>+    $self->{MP5_MaxParts}        = -1;
> 
>     $self->interface(ENTITY_CLASS => 'MIME::Entity');
>     $self->interface(HEAD_CLASS   => 'MIME::Head');
>@@ -277,6 +278,7 @@
>     $self->{MP5_Filer}->results($self->{MP5_Results});
>     $self->{MP5_Filer}->init_parse();
>     $self->{MP5_Filer}->purgeable([]);   ### just to be safe
>+    $self->{MP5_NumParts} = 0;
>     1;
> }
> 
>@@ -378,16 +380,17 @@
> =item extract_nested_messages OPTION
> 
> I<Instance method.>
>-Some MIME messages will contain a part of type C<message/rfc822>:
>+Some MIME messages will contain a part of type C<message/rfc822>
>+or C<message/partial>:
> literally, the text of an embedded mail/news/whatever message.  
> This option controls whether (and how) we parse that embedded message.
> 
> If the OPTION is false, we treat such a message just as if it were a 
> C<text/plain> document, without attempting to decode its contents.  
> 
>-If the OPTION is true (the default), the body of the C<message/rfc822> 
>-part is parsed by this parser, creating an entity object.  
>-What happens then is determined by the actual OPTION:
>+If the OPTION is true (the default), the body of the C<message/rfc822>
>+or C<message/partial> part is parsed by this parser, creating an
>+entity object.  What happens then is determined by the actual OPTION:
> 
> =over 4
> 
>@@ -702,9 +705,11 @@
>     while ($more_parts) {
> 	++$partno;
> 	$self->debug("parsing part $partno...");
>-	
>+
> 	### Parse the next part, and add it to the entity...
> 	my $part = $self->process_part($in, $part_rdr, Retype=>$retype);
>+	return undef unless defined($part);
>+
> 	$ent->add_part($part);
> 
> 	### ...and look at how we finished up:
>@@ -715,7 +720,7 @@
> 						    "before epilogue\n");
> 				       return 1; }
>     }
>-    
>+
>     ### Parse epilogue... 
>     ###    (note that we use the *parent's* reader here, which does not
>     ###     know about the boundaries in this multipart!)
>@@ -944,6 +949,7 @@
> 
>     ### Parse the message:
>     my $msg = $self->process_part($in, $rdr);
>+    return undef unless defined($msg);
> 
>     ### How to handle nested messages?
>     if ($self->extract_nested_messages eq 'REPLACE') {
>@@ -969,11 +975,19 @@
> #    Retype => retype this part to the given content-type
> #
> # Return the entity.
>-# Fatal exception on failure.
>+# Fatal exception on failure.  Returns undef if message to complex
> #
> sub process_part {
>     my ($self, $in, $rdr, %p) = @_;
> 
>+    if ($self->{MP5_MaxParts} > 0) {
>+	$self->{MP5_NumParts}++;
>+	if ($self->{MP5_NumParts} > $self->{MP5_MaxParts}) {
>+	    # Return UNDEF if msg too complex
>+	    return undef;
>+	}
>+    }
>+
>     $rdr ||= MIME::Parser::Reader->new;
>     #debug "process_part";
>     $self->results->level(+1);
>@@ -995,12 +1009,13 @@
> 
>     ### Handle, according to the MIME type:
>     if ($type eq 'multipart') {
>-	$self->process_multipart($in, $rdr, $ent);
>+	return undef unless defined($self->process_multipart($in, $rdr, $ent));
>     }
>-    elsif (("$type/$subtype" eq "message/rfc822") && 
>-	   $self->extract_nested_messages) {
>+    elsif (("$type/$subtype" eq "message/rfc822" ||
>+	    ("$type/$subtype" eq "message/partial" && $head->mime_attr("content-type.number") == 1)) && 
>+	    $self->extract_nested_messages) {
> 	$self->debug("attempting to process a nested message");
>-	$self->process_message($in, $rdr, $ent);
>+	return undef unless defined($self->process_message($in, $rdr, $ent));
>     }
>     else {                     
> 	$self->process_singlepart($in, $rdr, $ent);
>@@ -1047,7 +1062,6 @@
> =back
> 
> Returns the parsed MIME::Entity on success.  
>-Throws exception on failure.
> 
> =cut
> 
>@@ -1086,7 +1100,8 @@
> (which minimally implements getline() and read()).
> 
> Returns the parsed MIME::Entity on success.  
>-Throws exception on failure.
>+Throws exception on failure.  If the message contained too many
>+parts (as set by I<max_parts>), returns undef.
> 
> =cut
> 
>@@ -1098,7 +1113,7 @@
> 
>     my $bm = benchmark {
> 	$self->init_parse;
>-	($entity) = $self->process_part($in, undef);  ### parse!
>+	$entity = $self->process_part($in, undef);  ### parse!
>     };
>     $self->debug("t parse: $bm");
> 
>@@ -1346,6 +1361,32 @@
> 
> #------------------------------
> 
>+=item max_parts NUM
>+
>+I<Instance method.>
>+Limits the number of MIME parts we will parse.
>+
>+Normally, instances of this class parse a message to the bitter end.
>+Messages with many MIME parts can cause excessive memory consumption.
>+If you invoke this method, parsing will abort with a die() if a message
>+contains more than NUM parts.
>+
>+If NUM is set to -1 (the default), then no maximum limit is enforced.
>+
>+With no argument, returns the current setting as an integer
>+
>+=cut
>+
>+sub max_parts {
>+    my($self, $num) = @_;
>+    if (@_ > 1) {
>+	$self->{MP5_MaxParts} = $num;
>+    }
>+    return $self->{MP5_MaxParts};
>+}
>+
>+#------------------------------
>+
> =item output_to_core YESNO
> 
> I<Instance method.>
>diff --unified --recursive --new-file MIME-tools-5.411/lib/MIME/Words.pm MIME-tools-5.411a-RP-Patched-02/lib/MIME/Words.pm
>--- MIME-tools-5.411/lib/MIME/Words.pm	2000-11-10 11:45:12.000000000 -0500
>+++ MIME-tools-5.411a-RP-Patched-02/lib/MIME/Words.pm	2002-08-08 18:25:31.000000000 -0400
>@@ -186,7 +186,7 @@
>     $@ = '';           ### error-return
> 
>     ### Collapse boundaries between adjacent encoded words:
>-    $encstr =~ s{(\?\=)\r?\n[ \t](\=\?)}{$1$2}gs;
>+    $encstr =~ s{(\?\=)\s*(\=\?)}{$1$2}gs;
>     pos($encstr) = 0;
>     ### print STDOUT "ENC = [", $encstr, "]\n";
>

Actions: View | Diff

Attachments on bug 36886: 22911 | 23406