Attachment #22911 for bug #36886




=head1 SYNOPSIS

    # Create an object for a content-type field:
    $field = new Mail::Field 'Content-type';

    # Set some attributes:
    $field->param('_'        => 'text/html');
    $field->param('charset'  => 'us-ascii');
    $field->param('boundary' => '---ABC---');

    # Same:
    $field->set('_'        => 'text/html',
		'charset'  => 'us-ascii',
		'boundary' => '---ABC---');

    # Get an attribute, or undefined if not present:
    print "no id!"  if defined($field->param('id'));

    # Same, but use empty string for missing values:
    print "no id!"  if ($field->paramstr('id') eq '');

    # Output as string:
    print $field->stringify, "\n";


=head1 DESCRIPTION

This is an abstract superclass of most MIME fields.  It handles
fields with a general syntax like this:

    Content-Type: Message/Partial;
	number=2; total=3;
	id="oc=jpbe0M2Yt4s@thumper.bellcore.com"

Comments are supported I<between> items, like this:

    Content-Type: Message/Partial; (a comment)
	number=2  (another comment) ; (yet another comment) total=3;
	id="oc=jpbe0M2Yt4s@thumper.bellcore.com"


=head1 PUBLIC INTERFACE

#      token      =  1*<any  (ASCII) CHAR except SPACE, CTLs, or tspecials>
#
my $TSPECIAL = '()<>@,;:\</[]?="';

#" Fix emacs highlighting...

my $TOKEN    = '[^ \x00-\x1f\x80-\xff' . "\Q$TSPECIAL\E" . ']+';

# Encoded token:

# Pattern to match spaces or comments:
my $SPCZ     = '(?:\s|\([^\)]*\))*';

# Pattern to match non-semicolon as fallback for broken MIME
# produced by some viruses
my $BADTOKEN = '[^;]+';

#------------------------------
#

		  'total'   => 3,
		  'id'      => "ocj=pbe0M2");

Note that a single argument is taken to be a I<reference> to
a paramhash, while multiple args are taken to be the elements
of the paramhash themselves.


it as a hash reference.  For example, here is a field with parameters:

    Content-Type: Message/Partial;
	number=2; total=3;
	id="oc=jpbe0M2Yt4s@thumper.bellcore.com"

Here is how you'd extract them:

    $params = $class->parse_params('content-type');
    if ($$params{'_'} eq 'message/partial') {
	$number = $$params{'number'};
	$total  = $$params{'total'};
	$id     = $$params{'id'};
    }

Like field names, parameter names are coerced to lowercase.


=cut

sub rfc2231decode {
    my($val) = @_;
    my($enc, $lang, $rest);

    if ($val =~ m/^([^\']*)\'([^\']*)\'(.*)$/) {
	# SHOULD REALLY DO SOMETHING MORE INTELLIGENT WITH ENCODING!!!
	$enc = $1;
	$lang = $2;
	$rest = $3;
	$rest = rfc2231percent($rest);
    } elsif ($val =~ m/^([^\']*)\'([^\']*)$/) {
	$enc = $1;
	$rest = $2;
	$rest = rfc2231percent($rest);
    } else {
	$rest = rfc2231percent($val);
    }
    return $rest;
}

sub rfc2231percent {
    # Do percent-subsitution
    my($str) = @_;
    $str =~ s/%([0-9a-fA-F]{2})/pack("c", hex($1))/ge;
    return $str;
}

sub parse_params {
    my ($self, $raw) = @_;
    my %params = ();
    my %rfc2231params = ();
    my $param;
    my $val;
    my $part;

    # Get raw field, and unfold it:
    defined($raw) or $raw = '';

	$raw =~ m/\G$SPCZ\;$SPCZ/og or last;             # skip leading separator
	$raw =~ m/\G($PARAMNAME)\s*=\s*/og or last;      # give up if not a param
	$param = lc($1);
	$raw =~ m/\G(\"([^\"]+)\")|\G($ENCTOKEN)|\G($BADTOKEN)|\G($TOKEN)/g or last;   # give up if no value"
	my ($qstr, $str, $enctoken, $badtoken, $token) = ($1, $2, $3, $4, $5);
	if (defined($badtoken)) {
	    # Strip leading/trailing whitespace from badtoken
	    $badtoken =~ s/^\s*//;
	    $badtoken =~ s/\s*$//;
	}
	$val = defined($qstr) ? $str :
	    (defined($enctoken) ? $enctoken :
	     (defined($badtoken) ? $badtoken : $token));

	# Do RFC 2231 processing
	if ($param =~ /\*/) {
	    my($name, $num);
	    # Pick out the parts of the parameter
	    if ($param =~ m/^([^*]+)\*([^*]+)\*?$/) {
		# We have param*number* or param*number
		$name = $1;
		$num = $2;
	    } else {
		# Fake a part of zero... not sure how to handle this properly
		$param =~ s/\*//g;
		$name = $param;
		$num = 0;
	    }
	    # Decode the value unless it was a quoted string
	    if (!defined($qstr)) {
		$val = rfc2231decode($val);
	    }
	    $rfc2231params{$name}{$num} .= $val;
	} else {
	    # Make a fake "part zero" for non-RFC2231 params
	    $rfc2231params{$param}{"0"} = $val;
	}
    }

    # Extract reconstructed parameters
    foreach $param (keys %rfc2231params) {
	foreach $part (sort { $a <=> $b } keys %{$rfc2231params{$param}}) {
	    $params{$param} .= $rfc2231params{$param}{$part};
	}
	debug "   field param <$param> = <$params{$param}>";
    }



    # Allow use as constructor, for MIME::Head:
    ref($self) or $self = bless({}, $self);

    # Get params, and stuff them into the self object:
    $self->set($self->parse_params($string));
}




    $self->{MP5_IgnoreErrors}    = 1;
    $self->{MP5_UseInnerFiles}   = 0;
    $self->{MP5_UUDecode}        = 0;
    $self->{MP5_MaxParts}        = -1;

    $self->interface(ENTITY_CLASS => 'MIME::Entity');
    $self->interface(HEAD_CLASS   => 'MIME::Head');

    $self->{MP5_Filer}->results($self->{MP5_Results});
    $self->{MP5_Filer}->init_parse();
    $self->{MP5_Filer}->purgeable([]);   ### just to be safe
    $self->{MP5_NumParts} = 0;
    1;
}


=item extract_nested_messages OPTION

I<Instance method.>
Some MIME messages will contain a part of type C<message/rfc822>
or C<message/partial>:
literally, the text of an embedded mail/news/whatever message.  
This option controls whether (and how) we parse that embedded message.

If the OPTION is false, we treat such a message just as if it were a 
C<text/plain> document, without attempting to decode its contents.  

If the OPTION is true (the default), the body of the C<message/rfc822>
or C<message/partial> part is parsed by this parser, creating an
entity object.  What happens then is determined by the actual OPTION:

=over 4


    while ($more_parts) {
	++$partno;
	$self->debug("parsing part $partno...");

	### Parse the next part, and add it to the entity...
	my $part = $self->process_part($in, $part_rdr, Retype=>$retype);
	return undef unless defined($part);

	$ent->add_part($part);

	### ...and look at how we finished up:

						    "before epilogue\n");
				       return 1; }
    }

    ### Parse epilogue... 
    ###    (note that we use the *parent's* reader here, which does not
    ###     know about the boundaries in this multipart!)


    ### Parse the message:
    my $msg = $self->process_part($in, $rdr);
    return undef unless defined($msg);

    ### How to handle nested messages?
    if ($self->extract_nested_messages eq 'REPLACE') {

#    Retype => retype this part to the given content-type
#
# Return the entity.
# Fatal exception on failure.  Returns undef if message to complex
#
sub process_part {
    my ($self, $in, $rdr, %p) = @_;

    if ($self->{MP5_MaxParts} > 0) {
	$self->{MP5_NumParts}++;
	if ($self->{MP5_NumParts} > $self->{MP5_MaxParts}) {
	    # Return UNDEF if msg too complex
	    return undef;
	}
    }

    $rdr ||= MIME::Parser::Reader->new;
    #debug "process_part";
    $self->results->level(+1);


    ### Handle, according to the MIME type:
    if ($type eq 'multipart') {
	return undef unless defined($self->process_multipart($in, $rdr, $ent));
    }
    elsif (("$type/$subtype" eq "message/rfc822" ||
	    ("$type/$subtype" eq "message/partial" && $head->mime_attr("content-type.number") == 1)) && 
	    $self->extract_nested_messages) {
	$self->debug("attempting to process a nested message");
	return undef unless defined($self->process_message($in, $rdr, $ent));
    }
    else {                     
	$self->process_singlepart($in, $rdr, $ent);

=back

Returns the parsed MIME::Entity on success.  
Throws exception on failure.

=cut


(which minimally implements getline() and read()).

Returns the parsed MIME::Entity on success.  
Throws exception on failure.  If the message contained too many
parts (as set by I<max_parts>), returns undef.

=cut



    my $bm = benchmark {
	$self->init_parse;
	$entity = $self->process_part($in, undef);  ### parse!
    };
    $self->debug("t parse: $bm");



#------------------------------

=item max_parts NUM

I<Instance method.>
Limits the number of MIME parts we will parse.

Normally, instances of this class parse a message to the bitter end.
Messages with many MIME parts can cause excessive memory consumption.
If you invoke this method, parsing will abort with a die() if a message
contains more than NUM parts.

If NUM is set to -1 (the default), then no maximum limit is enforced.

With no argument, returns the current setting as an integer

=cut

sub max_parts {
    my($self, $num) = @_;
    if (@_ > 1) {
	$self->{MP5_MaxParts} = $num;
    }
    return $self->{MP5_MaxParts};
}

#------------------------------

=item output_to_core YESNO

I<Instance method.>

Lines 186-192 Link Here

(-)MIME-tools-5.411/lib/MIME/Words.pm (-1 / +1 lines)
186	$@ = ''; ### error-return	186	$@ = ''; ### error-return
187		187
188	### Collapse boundaries between adjacent encoded words:	188	### Collapse boundaries between adjacent encoded words:
189	$encstr =~ s{(\?\=)\r?\n[ \t](\=\?)}{$1$2}gs;	189	$encstr =~ s{(\?\=)\s*(\=\?)}{$1$2}gs;
190	pos($encstr) = 0;	190	pos($encstr) = 0;
191	### print STDOUT "ENC = [", $encstr, "]\n";	191	### print STDOUT "ENC = [", $encstr, "]\n";
192		192

Return to bug 36886

Lines 9-50 Link Here

(-)MIME-tools-5.411/lib/MIME/Field/ParamVal.pm (-21 / +95 lines)
9	=head1 SYNOPSIS	9	=head1 SYNOPSIS
10		10
11	# Create an object for a content-type field:	11	# Create an object for a content-type field:
12	$field = new Mail::Field 'Content-type';	12	$field = new Mail::Field 'Content-type';
13		13
14	# Set some attributes:	14	# Set some attributes:
15	$field->param('_' => 'text/html');	15	$field->param('_' => 'text/html');
16	$field->param('charset' => 'us-ascii');	16	$field->param('charset' => 'us-ascii');
17	$field->param('boundary' => '---ABC---');	17	$field->param('boundary' => '---ABC---');
18		18
19	# Same:	19	# Same:
20	$field->set('_' => 'text/html',	20	$field->set('_' => 'text/html',
21	'charset' => 'us-ascii',	21	'charset' => 'us-ascii',
22	'boundary' => '---ABC---');	22	'boundary' => '---ABC---');
23		23
24	# Get an attribute, or undefined if not present:	24	# Get an attribute, or undefined if not present:
25	print "no id!" if defined($field->param('id'));	25	print "no id!" if defined($field->param('id'));
26		26
27	# Same, but use empty string for missing values:	27	# Same, but use empty string for missing values:
28	print "no id!" if ($field->paramstr('id') eq '');	28	print "no id!" if ($field->paramstr('id') eq '');
29		29
30	# Output as string:	30	# Output as string:
31	print $field->stringify, "\n";	31	print $field->stringify, "\n";
32		32
33		33
34	=head1 DESCRIPTION	34	=head1 DESCRIPTION
35		35
36	This is an abstract superclass of most MIME fields. It handles	36	This is an abstract superclass of most MIME fields. It handles
37	fields with a general syntax like this:	37	fields with a general syntax like this:
38		38
39	Content-Type: Message/Partial;	39	Content-Type: Message/Partial;
40	number=2; total=3;	40	number=2; total=3;
41	id="oc=jpbe0M2Yt4s@thumper.bellcore.com"	41	id="oc=jpbe0M2Yt4s@thumper.bellcore.com"
42		42
43	Comments are supported I<between> items, like this:	43	Comments are supported I<between> items, like this:
44		44
45	Content-Type: Message/Partial; (a comment)	45	Content-Type: Message/Partial; (a comment)
46	number=2 (another comment) ; (yet another comment) total=3;	46	number=2 (another comment) ; (yet another comment) total=3;
47	id="oc=jpbe0M2Yt4s@thumper.bellcore.com"	47	id="oc=jpbe0M2Yt4s@thumper.bellcore.com"
48		48
49		49
50	=head1 PUBLIC INTERFACE	50	=head1 PUBLIC INTERFACE
Lines 100-105 Link Here
100	# token = 1*<any (ASCII) CHAR except SPACE, CTLs, or tspecials>	100	# token = 1*<any (ASCII) CHAR except SPACE, CTLs, or tspecials>
101	#	101	#
102	my $TSPECIAL = '()<>@,;:\</[]?="';	102	my $TSPECIAL = '()<>@,;:\</[]?="';
		103
		104	#" Fix emacs highlighting...
		105
103	my $TOKEN = '[^ \x00-\x1f\x80-\xff' . "\Q$TSPECIAL\E" . ']+';	106	my $TOKEN = '[^ \x00-\x1f\x80-\xff' . "\Q$TSPECIAL\E" . ']+';
104		107
105	# Encoded token:	108	# Encoded token:
Lines 108-113 Link Here
108	# Pattern to match spaces or comments:	111	# Pattern to match spaces or comments:
109	my $SPCZ = '(?:\s\|\([^\)]\))';	112	my $SPCZ = '(?:\s\|\([^\)]\))';
110		113
		114	# Pattern to match non-semicolon as fallback for broken MIME
		115	# produced by some viruses
		116	my $BADTOKEN = '[^;]+';
111		117
112	#------------------------------	118	#------------------------------
113	#	119	#
Lines 133-139 Link Here
133	'total' => 3,	139	'total' => 3,
134	'id' => "ocj=pbe0M2");	140	'id' => "ocj=pbe0M2");
135		141
136	Note that a single argument is taken to be a I<reference> to	142	Note that a single argument is taken to be a I<reference> to
137	a paramhash, while multiple args are taken to be the elements	143	a paramhash, while multiple args are taken to be the elements
138	of the paramhash themselves.	144	of the paramhash themselves.
139		145
Lines 160-175 Link Here
160	it as a hash reference. For example, here is a field with parameters:	166	it as a hash reference. For example, here is a field with parameters:
161		167
162	Content-Type: Message/Partial;	168	Content-Type: Message/Partial;
163	number=2; total=3;	169	number=2; total=3;
164	id="oc=jpbe0M2Yt4s@thumper.bellcore.com"	170	id="oc=jpbe0M2Yt4s@thumper.bellcore.com"
165		171
166	Here is how you'd extract them:	172	Here is how you'd extract them:
167		173
168	$params = $class->parse_params('content-type');	174	$params = $class->parse_params('content-type');
169	if ($$params{'_'} eq 'message/partial') {	175	if ($$params{'_'} eq 'message/partial') {
170	$number = $$params{'number'};	176	$number = $$params{'number'};
171	$total = $$params{'total'};	177	$total = $$params{'total'};
172	$id = $$params{'id'};	178	$id = $$params{'id'};
173	}	179	}
174		180
175	Like field names, parameter names are coerced to lowercase.	181	Like field names, parameter names are coerced to lowercase.
Lines 181-190 Link Here
181		187
182	=cut	188	=cut
183		189
		190	sub rfc2231decode {
		191	my($val) = @_;
		192	my($enc, $lang, $rest);
		193
		194	if ($val =~ m/^([^\'])\'([^\'])\'(.*)$/) {
		195	# SHOULD REALLY DO SOMETHING MORE INTELLIGENT WITH ENCODING!!!
		196	$enc = $1;
		197	$lang = $2;
		198	$rest = $3;
		199	$rest = rfc2231percent($rest);
		200	} elsif ($val =~ m/^([^\'])\'([^\'])$/) {
		201	$enc = $1;
		202	$rest = $2;
		203	$rest = rfc2231percent($rest);
		204	} else {
		205	$rest = rfc2231percent($val);
		206	}
		207	return $rest;
		208	}
		209
		210	sub rfc2231percent {
		211	# Do percent-subsitution
		212	my($str) = @_;
		213	$str =~ s/%([0-9a-fA-F]{2})/pack("c", hex($1))/ge;
		214	return $str;
		215	}
		216
184	sub parse_params {	217	sub parse_params {
185	my ($self, $raw) = @_;	218	my ($self, $raw) = @_;
186	my %params = ();	219	my %params = ();
		220	my %rfc2231params = ();
187	my $param;	221	my $param;
		222	my $val;
		223	my $part;
188		224
189	# Get raw field, and unfold it:	225	# Get raw field, and unfold it:
190	defined($raw) or $raw = '';	226	defined($raw) or $raw = '';
Lines 200-208 Link Here
200	$raw =~ m/\G$SPCZ\;$SPCZ/og or last; # skip leading separator	236	$raw =~ m/\G$SPCZ\;$SPCZ/og or last; # skip leading separator
201	$raw =~ m/\G($PARAMNAME)\s=\s/og or last; # give up if not a param	237	$raw =~ m/\G($PARAMNAME)\s=\s/og or last; # give up if not a param
202	$param = lc($1);	238	$param = lc($1);
203	$raw =~ m/\G(\"([^\"]+)\")\|\G($TOKEN)\|\G($ENCTOKEN)/g or last; # give up if no value	239	$raw =~ m/\G(\"([^\"]+)\")\|\G($ENCTOKEN)\|\G($BADTOKEN)\|\G($TOKEN)/g or last; # give up if no value"
204	my ($qstr, $str, $token, $enctoken) = ($1, $2, $3, $4);	240	my ($qstr, $str, $enctoken, $badtoken, $token) = ($1, $2, $3, $4, $5);
205	$params{$param} = defined($qstr) ? $str : (defined($token) ? $token : $enctoken);	241	if (defined($badtoken)) {
		242	# Strip leading/trailing whitespace from badtoken
		243	$badtoken =~ s/^\s*//;
		244	$badtoken =~ s/\s*$//;
		245	}
		246	$val = defined($qstr) ? $str :
		247	(defined($enctoken) ? $enctoken :
		248	(defined($badtoken) ? $badtoken : $token));
		249
		250	# Do RFC 2231 processing
		251	if ($param =~ /\*/) {
		252	my($name, $num);
		253	# Pick out the parts of the parameter
		254	if ($param =~ m/^([^]+)\([^]+)\?$/) {
		255	# We have paramnumber or param*number
		256	$name = $1;
		257	$num = $2;
		258	} else {
		259	# Fake a part of zero... not sure how to handle this properly
		260	$param =~ s/\*//g;
		261	$name = $param;
		262	$num = 0;
		263	}
		264	# Decode the value unless it was a quoted string
		265	if (!defined($qstr)) {
		266	$val = rfc2231decode($val);
		267	}
		268	$rfc2231params{$name}{$num} .= $val;
		269	} else {
		270	# Make a fake "part zero" for non-RFC2231 params
		271	$rfc2231params{$param}{"0"} = $val;
		272	}
		273	}
		274
		275	# Extract reconstructed parameters
		276	foreach $param (keys %rfc2231params) {
		277	foreach $part (sort { $a <=> $b } keys %{$rfc2231params{$param}}) {
		278	$params{$param} .= $rfc2231params{$param}{$part};
		279	}
206	debug " field param <$param> = <$params{$param}>";	280	debug " field param <$param> = <$params{$param}>";
207	}	281	}
208		282
Lines 227-233 Link Here
227		301
228	# Allow use as constructor, for MIME::Head:	302	# Allow use as constructor, for MIME::Head:
229	ref($self) or $self = bless({}, $self);	303	ref($self) or $self = bless({}, $self);
230		304
231	# Get params, and stuff them into the self object:	305	# Get params, and stuff them into the self object:
232	$self->set($self->parse_params($string));	306	$self->set($self->parse_params($string));
233	}	307	}

Lines 250-255 Link Here

(-)MIME-tools-5.411/lib/MIME/Parser.pm (-14 / +55 lines)
250	$self->{MP5_IgnoreErrors} = 1;	250	$self->{MP5_IgnoreErrors} = 1;
251	$self->{MP5_UseInnerFiles} = 0;	251	$self->{MP5_UseInnerFiles} = 0;
252	$self->{MP5_UUDecode} = 0;	252	$self->{MP5_UUDecode} = 0;
		253	$self->{MP5_MaxParts} = -1;
253		254
254	$self->interface(ENTITY_CLASS => 'MIME::Entity');	255	$self->interface(ENTITY_CLASS => 'MIME::Entity');
255	$self->interface(HEAD_CLASS => 'MIME::Head');	256	$self->interface(HEAD_CLASS => 'MIME::Head');
Lines 277-282 Link Here
277	$self->{MP5_Filer}->results($self->{MP5_Results});	278	$self->{MP5_Filer}->results($self->{MP5_Results});
278	$self->{MP5_Filer}->init_parse();	279	$self->{MP5_Filer}->init_parse();
279	$self->{MP5_Filer}->purgeable([]); ### just to be safe	280	$self->{MP5_Filer}->purgeable([]); ### just to be safe
		281	$self->{MP5_NumParts} = 0;
280	1;	282	1;
281	}	283	}
282		284
Lines 378-393 Link Here
378	=item extract_nested_messages OPTION	380	=item extract_nested_messages OPTION
379		381
380	I<Instance method.>	382	I<Instance method.>
381	Some MIME messages will contain a part of type C<message/rfc822>:	383	Some MIME messages will contain a part of type C<message/rfc822>
		384	or C<message/partial>:
382	literally, the text of an embedded mail/news/whatever message.	385	literally, the text of an embedded mail/news/whatever message.
383	This option controls whether (and how) we parse that embedded message.	386	This option controls whether (and how) we parse that embedded message.
384		387
385	If the OPTION is false, we treat such a message just as if it were a	388	If the OPTION is false, we treat such a message just as if it were a
386	C<text/plain> document, without attempting to decode its contents.	389	C<text/plain> document, without attempting to decode its contents.
387		390
388	If the OPTION is true (the default), the body of the C<message/rfc822>	391	If the OPTION is true (the default), the body of the C<message/rfc822>
389	part is parsed by this parser, creating an entity object.	392	or C<message/partial> part is parsed by this parser, creating an
390	What happens then is determined by the actual OPTION:	393	entity object. What happens then is determined by the actual OPTION:
391		394
392	=over 4	395	=over 4
393		396
Lines 702-710 Link Here
702	while ($more_parts) {	705	while ($more_parts) {
703	++$partno;	706	++$partno;
704	$self->debug("parsing part $partno...");	707	$self->debug("parsing part $partno...");
705		708
706	### Parse the next part, and add it to the entity...	709	### Parse the next part, and add it to the entity...
707	my $part = $self->process_part($in, $part_rdr, Retype=>$retype);	710	my $part = $self->process_part($in, $part_rdr, Retype=>$retype);
		711	return undef unless defined($part);
		712
708	$ent->add_part($part);	713	$ent->add_part($part);
709		714
710	### ...and look at how we finished up:	715	### ...and look at how we finished up:
Lines 715-721 Link Here
715	"before epilogue\n");	720	"before epilogue\n");
716	return 1; }	721	return 1; }
717	}	722	}
718		723
719	### Parse epilogue...	724	### Parse epilogue...
720	### (note that we use the parent's reader here, which does not	725	### (note that we use the parent's reader here, which does not
721	### know about the boundaries in this multipart!)	726	### know about the boundaries in this multipart!)
Lines 944-949 Link Here
944		949
945	### Parse the message:	950	### Parse the message:
946	my $msg = $self->process_part($in, $rdr);	951	my $msg = $self->process_part($in, $rdr);
		952	return undef unless defined($msg);
947		953
948	### How to handle nested messages?	954	### How to handle nested messages?
949	if ($self->extract_nested_messages eq 'REPLACE') {	955	if ($self->extract_nested_messages eq 'REPLACE') {
Lines 969-979 Link Here
969	# Retype => retype this part to the given content-type	975	# Retype => retype this part to the given content-type
970	#	976	#
971	# Return the entity.	977	# Return the entity.
972	# Fatal exception on failure.	978	# Fatal exception on failure. Returns undef if message to complex
973	#	979	#
974	sub process_part {	980	sub process_part {
975	my ($self, $in, $rdr, %p) = @_;	981	my ($self, $in, $rdr, %p) = @_;
976		982
		983	if ($self->{MP5_MaxParts} > 0) {
		984	$self->{MP5_NumParts}++;
		985	if ($self->{MP5_NumParts} > $self->{MP5_MaxParts}) {
		986	# Return UNDEF if msg too complex
		987	return undef;
		988	}
		989	}
		990
977	$rdr \|\|= MIME::Parser::Reader->new;	991	$rdr \|\|= MIME::Parser::Reader->new;
978	#debug "process_part";	992	#debug "process_part";
979	$self->results->level(+1);	993	$self->results->level(+1);
Lines 995-1006 Link Here
995		1009
996	### Handle, according to the MIME type:	1010	### Handle, according to the MIME type:
997	if ($type eq 'multipart') {	1011	if ($type eq 'multipart') {
998	$self->process_multipart($in, $rdr, $ent);	1012	return undef unless defined($self->process_multipart($in, $rdr, $ent));
999	}	1013	}
1000	elsif (("$type/$subtype" eq "message/rfc822") &&	1014	elsif (("$type/$subtype" eq "message/rfc822" \|\|
1001	$self->extract_nested_messages) {	1015	("$type/$subtype" eq "message/partial" && $head->mime_attr("content-type.number") == 1)) &&
		1016	$self->extract_nested_messages) {
1002	$self->debug("attempting to process a nested message");	1017	$self->debug("attempting to process a nested message");
1003	$self->process_message($in, $rdr, $ent);	1018	return undef unless defined($self->process_message($in, $rdr, $ent));
1004	}	1019	}
1005	else {	1020	else {
1006	$self->process_singlepart($in, $rdr, $ent);	1021	$self->process_singlepart($in, $rdr, $ent);
Lines 1047-1053 Link Here
1047	=back	1062	=back
1048		1063
1049	Returns the parsed MIME::Entity on success.	1064	Returns the parsed MIME::Entity on success.
1050	Throws exception on failure.
1051		1065
1052	=cut	1066	=cut
1053		1067
Lines 1086-1092 Link Here
1086	(which minimally implements getline() and read()).	1100	(which minimally implements getline() and read()).
1087		1101
1088	Returns the parsed MIME::Entity on success.	1102	Returns the parsed MIME::Entity on success.
1089	Throws exception on failure.	1103	Throws exception on failure. If the message contained too many
		1104	parts (as set by I<max_parts>), returns undef.
1090		1105
1091	=cut	1106	=cut
1092		1107
Lines 1098-1104 Link Here
1098		1113
1099	my $bm = benchmark {	1114	my $bm = benchmark {
1100	$self->init_parse;	1115	$self->init_parse;
1101	($entity) = $self->process_part($in, undef); ### parse!	1116	$entity = $self->process_part($in, undef); ### parse!
1102	};	1117	};
1103	$self->debug("t parse: $bm");	1118	$self->debug("t parse: $bm");
1104		1119
Lines 1346-1351 Link Here
1346		1361
1347	#------------------------------	1362	#------------------------------
1348		1363
		1364	=item max_parts NUM
		1365
		1366	I<Instance method.>
		1367	Limits the number of MIME parts we will parse.
		1368
		1369	Normally, instances of this class parse a message to the bitter end.
		1370	Messages with many MIME parts can cause excessive memory consumption.
		1371	If you invoke this method, parsing will abort with a die() if a message
		1372	contains more than NUM parts.
		1373
		1374	If NUM is set to -1 (the default), then no maximum limit is enforced.
		1375
		1376	With no argument, returns the current setting as an integer
		1377
		1378	=cut
		1379
		1380	sub max_parts {
		1381	my($self, $num) = @_;
		1382	if (@_ > 1) {
		1383	$self->{MP5_MaxParts} = $num;
		1384	}
		1385	return $self->{MP5_MaxParts};
		1386	}
		1387
		1388	#------------------------------
		1389
1349	=item output_to_core YESNO	1390	=item output_to_core YESNO
1350		1391
1351	I<Instance method.>	1392	I<Instance method.>