Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 36886 | Differences between
and this patch

Collapse All | Expand All

(-)MIME-tools-5.411/lib/MIME/Field/ParamVal.pm (-21 / +95 lines)
Lines 9-50 Link Here
9
=head1 SYNOPSIS
9
=head1 SYNOPSIS
10
10
11
    # Create an object for a content-type field:
11
    # Create an object for a content-type field:
12
    $field = new Mail::Field 'Content-type'; 
12
    $field = new Mail::Field 'Content-type';
13
     
13
14
    # Set some attributes:
14
    # Set some attributes:
15
    $field->param('_'        => 'text/html');
15
    $field->param('_'        => 'text/html');
16
    $field->param('charset'  => 'us-ascii');
16
    $field->param('charset'  => 'us-ascii');
17
    $field->param('boundary' => '---ABC---');
17
    $field->param('boundary' => '---ABC---');
18
     
18
19
    # Same:
19
    # Same:
20
    $field->set('_'        => 'text/html',
20
    $field->set('_'        => 'text/html',
21
		'charset'  => 'us-ascii',
21
		'charset'  => 'us-ascii',
22
		'boundary' => '---ABC---');
22
		'boundary' => '---ABC---');
23
      
23
24
    # Get an attribute, or undefined if not present:
24
    # Get an attribute, or undefined if not present:
25
    print "no id!"  if defined($field->param('id'));
25
    print "no id!"  if defined($field->param('id'));
26
     
26
27
    # Same, but use empty string for missing values:
27
    # Same, but use empty string for missing values:
28
    print "no id!"  if ($field->paramstr('id') eq '');
28
    print "no id!"  if ($field->paramstr('id') eq '');
29
                    
29
30
    # Output as string:
30
    # Output as string:
31
    print $field->stringify, "\n";
31
    print $field->stringify, "\n";
32
32
33
33
34
=head1 DESCRIPTION
34
=head1 DESCRIPTION
35
35
36
This is an abstract superclass of most MIME fields.  It handles 
36
This is an abstract superclass of most MIME fields.  It handles
37
fields with a general syntax like this:
37
fields with a general syntax like this:
38
38
39
    Content-Type: Message/Partial;
39
    Content-Type: Message/Partial;
40
        number=2; total=3;
40
	number=2; total=3;
41
        id="oc=jpbe0M2Yt4s@thumper.bellcore.com"
41
	id="oc=jpbe0M2Yt4s@thumper.bellcore.com"
42
42
43
Comments are supported I<between> items, like this:
43
Comments are supported I<between> items, like this:
44
44
45
    Content-Type: Message/Partial; (a comment)
45
    Content-Type: Message/Partial; (a comment)
46
        number=2  (another comment) ; (yet another comment) total=3;
46
	number=2  (another comment) ; (yet another comment) total=3;
47
        id="oc=jpbe0M2Yt4s@thumper.bellcore.com"
47
	id="oc=jpbe0M2Yt4s@thumper.bellcore.com"
48
48
49
49
50
=head1 PUBLIC INTERFACE
50
=head1 PUBLIC INTERFACE
Lines 100-105 Link Here
100
#      token      =  1*<any  (ASCII) CHAR except SPACE, CTLs, or tspecials>
100
#      token      =  1*<any  (ASCII) CHAR except SPACE, CTLs, or tspecials>
101
#
101
#
102
my $TSPECIAL = '()<>@,;:\</[]?="';
102
my $TSPECIAL = '()<>@,;:\</[]?="';
103
104
#" Fix emacs highlighting...
105
103
my $TOKEN    = '[^ \x00-\x1f\x80-\xff' . "\Q$TSPECIAL\E" . ']+';
106
my $TOKEN    = '[^ \x00-\x1f\x80-\xff' . "\Q$TSPECIAL\E" . ']+';
104
107
105
# Encoded token:
108
# Encoded token:
Lines 108-113 Link Here
108
# Pattern to match spaces or comments:
111
# Pattern to match spaces or comments:
109
my $SPCZ     = '(?:\s|\([^\)]*\))*';
112
my $SPCZ     = '(?:\s|\([^\)]*\))*';
110
113
114
# Pattern to match non-semicolon as fallback for broken MIME
115
# produced by some viruses
116
my $BADTOKEN = '[^;]+';
111
117
112
#------------------------------
118
#------------------------------
113
#
119
#
Lines 133-139 Link Here
133
		  'total'   => 3,
139
		  'total'   => 3,
134
		  'id'      => "ocj=pbe0M2");
140
		  'id'      => "ocj=pbe0M2");
135
141
136
Note that a single argument is taken to be a I<reference> to 
142
Note that a single argument is taken to be a I<reference> to
137
a paramhash, while multiple args are taken to be the elements
143
a paramhash, while multiple args are taken to be the elements
138
of the paramhash themselves.
144
of the paramhash themselves.
139
145
Lines 160-175 Link Here
160
it as a hash reference.  For example, here is a field with parameters:
166
it as a hash reference.  For example, here is a field with parameters:
161
167
162
    Content-Type: Message/Partial;
168
    Content-Type: Message/Partial;
163
        number=2; total=3;
169
	number=2; total=3;
164
        id="oc=jpbe0M2Yt4s@thumper.bellcore.com"
170
	id="oc=jpbe0M2Yt4s@thumper.bellcore.com"
165
171
166
Here is how you'd extract them:
172
Here is how you'd extract them:
167
173
168
    $params = $class->parse_params('content-type');
174
    $params = $class->parse_params('content-type');
169
    if ($$params{'_'} eq 'message/partial') {
175
    if ($$params{'_'} eq 'message/partial') {
170
        $number = $$params{'number'};
176
	$number = $$params{'number'};
171
        $total  = $$params{'total'};
177
	$total  = $$params{'total'};
172
        $id     = $$params{'id'};
178
	$id     = $$params{'id'};
173
    }
179
    }
174
180
175
Like field names, parameter names are coerced to lowercase.
181
Like field names, parameter names are coerced to lowercase.
Lines 181-190 Link Here
181
187
182
=cut
188
=cut
183
189
190
sub rfc2231decode {
191
    my($val) = @_;
192
    my($enc, $lang, $rest);
193
194
    if ($val =~ m/^([^\']*)\'([^\']*)\'(.*)$/) {
195
	# SHOULD REALLY DO SOMETHING MORE INTELLIGENT WITH ENCODING!!!
196
	$enc = $1;
197
	$lang = $2;
198
	$rest = $3;
199
	$rest = rfc2231percent($rest);
200
    } elsif ($val =~ m/^([^\']*)\'([^\']*)$/) {
201
	$enc = $1;
202
	$rest = $2;
203
	$rest = rfc2231percent($rest);
204
    } else {
205
	$rest = rfc2231percent($val);
206
    }
207
    return $rest;
208
}
209
210
sub rfc2231percent {
211
    # Do percent-subsitution
212
    my($str) = @_;
213
    $str =~ s/%([0-9a-fA-F]{2})/pack("c", hex($1))/ge;
214
    return $str;
215
}
216
184
sub parse_params {
217
sub parse_params {
185
    my ($self, $raw) = @_;
218
    my ($self, $raw) = @_;
186
    my %params = ();
219
    my %params = ();
220
    my %rfc2231params = ();
187
    my $param;
221
    my $param;
222
    my $val;
223
    my $part;
188
224
189
    # Get raw field, and unfold it:
225
    # Get raw field, and unfold it:
190
    defined($raw) or $raw = '';
226
    defined($raw) or $raw = '';
Lines 200-208 Link Here
200
	$raw =~ m/\G$SPCZ\;$SPCZ/og or last;             # skip leading separator
236
	$raw =~ m/\G$SPCZ\;$SPCZ/og or last;             # skip leading separator
201
	$raw =~ m/\G($PARAMNAME)\s*=\s*/og or last;      # give up if not a param
237
	$raw =~ m/\G($PARAMNAME)\s*=\s*/og or last;      # give up if not a param
202
	$param = lc($1);
238
	$param = lc($1);
203
	$raw =~ m/\G(\"([^\"]+)\")|\G($TOKEN)|\G($ENCTOKEN)/g or last;   # give up if no value
239
	$raw =~ m/\G(\"([^\"]+)\")|\G($ENCTOKEN)|\G($BADTOKEN)|\G($TOKEN)/g or last;   # give up if no value"
204
	my ($qstr, $str, $token, $enctoken) = ($1, $2, $3, $4);
240
	my ($qstr, $str, $enctoken, $badtoken, $token) = ($1, $2, $3, $4, $5);
205
	$params{$param} = defined($qstr) ? $str : (defined($token) ? $token : $enctoken);
241
	if (defined($badtoken)) {
242
	    # Strip leading/trailing whitespace from badtoken
243
	    $badtoken =~ s/^\s*//;
244
	    $badtoken =~ s/\s*$//;
245
	}
246
	$val = defined($qstr) ? $str :
247
	    (defined($enctoken) ? $enctoken :
248
	     (defined($badtoken) ? $badtoken : $token));
249
250
	# Do RFC 2231 processing
251
	if ($param =~ /\*/) {
252
	    my($name, $num);
253
	    # Pick out the parts of the parameter
254
	    if ($param =~ m/^([^*]+)\*([^*]+)\*?$/) {
255
		# We have param*number* or param*number
256
		$name = $1;
257
		$num = $2;
258
	    } else {
259
		# Fake a part of zero... not sure how to handle this properly
260
		$param =~ s/\*//g;
261
		$name = $param;
262
		$num = 0;
263
	    }
264
	    # Decode the value unless it was a quoted string
265
	    if (!defined($qstr)) {
266
		$val = rfc2231decode($val);
267
	    }
268
	    $rfc2231params{$name}{$num} .= $val;
269
	} else {
270
	    # Make a fake "part zero" for non-RFC2231 params
271
	    $rfc2231params{$param}{"0"} = $val;
272
	}
273
    }
274
275
    # Extract reconstructed parameters
276
    foreach $param (keys %rfc2231params) {
277
	foreach $part (sort { $a <=> $b } keys %{$rfc2231params{$param}}) {
278
	    $params{$param} .= $rfc2231params{$param}{$part};
279
	}
206
	debug "   field param <$param> = <$params{$param}>";
280
	debug "   field param <$param> = <$params{$param}>";
207
    }
281
    }
208
282
Lines 227-233 Link Here
227
301
228
    # Allow use as constructor, for MIME::Head:
302
    # Allow use as constructor, for MIME::Head:
229
    ref($self) or $self = bless({}, $self);
303
    ref($self) or $self = bless({}, $self);
230
    
304
231
    # Get params, and stuff them into the self object:
305
    # Get params, and stuff them into the self object:
232
    $self->set($self->parse_params($string));
306
    $self->set($self->parse_params($string));
233
}
307
}
(-)MIME-tools-5.411/lib/MIME/Parser.pm (-14 / +55 lines)
Lines 250-255 Link Here
250
    $self->{MP5_IgnoreErrors}    = 1;
250
    $self->{MP5_IgnoreErrors}    = 1;
251
    $self->{MP5_UseInnerFiles}   = 0;
251
    $self->{MP5_UseInnerFiles}   = 0;
252
    $self->{MP5_UUDecode}        = 0;
252
    $self->{MP5_UUDecode}        = 0;
253
    $self->{MP5_MaxParts}        = -1;
253
254
254
    $self->interface(ENTITY_CLASS => 'MIME::Entity');
255
    $self->interface(ENTITY_CLASS => 'MIME::Entity');
255
    $self->interface(HEAD_CLASS   => 'MIME::Head');
256
    $self->interface(HEAD_CLASS   => 'MIME::Head');
Lines 277-282 Link Here
277
    $self->{MP5_Filer}->results($self->{MP5_Results});
278
    $self->{MP5_Filer}->results($self->{MP5_Results});
278
    $self->{MP5_Filer}->init_parse();
279
    $self->{MP5_Filer}->init_parse();
279
    $self->{MP5_Filer}->purgeable([]);   ### just to be safe
280
    $self->{MP5_Filer}->purgeable([]);   ### just to be safe
281
    $self->{MP5_NumParts} = 0;
280
    1;
282
    1;
281
}
283
}
282
284
Lines 378-393 Link Here
378
=item extract_nested_messages OPTION
380
=item extract_nested_messages OPTION
379
381
380
I<Instance method.>
382
I<Instance method.>
381
Some MIME messages will contain a part of type C<message/rfc822>:
383
Some MIME messages will contain a part of type C<message/rfc822>
384
or C<message/partial>:
382
literally, the text of an embedded mail/news/whatever message.  
385
literally, the text of an embedded mail/news/whatever message.  
383
This option controls whether (and how) we parse that embedded message.
386
This option controls whether (and how) we parse that embedded message.
384
387
385
If the OPTION is false, we treat such a message just as if it were a 
388
If the OPTION is false, we treat such a message just as if it were a 
386
C<text/plain> document, without attempting to decode its contents.  
389
C<text/plain> document, without attempting to decode its contents.  
387
390
388
If the OPTION is true (the default), the body of the C<message/rfc822> 
391
If the OPTION is true (the default), the body of the C<message/rfc822>
389
part is parsed by this parser, creating an entity object.  
392
or C<message/partial> part is parsed by this parser, creating an
390
What happens then is determined by the actual OPTION:
393
entity object.  What happens then is determined by the actual OPTION:
391
394
392
=over 4
395
=over 4
393
396
Lines 702-710 Link Here
702
    while ($more_parts) {
705
    while ($more_parts) {
703
	++$partno;
706
	++$partno;
704
	$self->debug("parsing part $partno...");
707
	$self->debug("parsing part $partno...");
705
	
708
706
	### Parse the next part, and add it to the entity...
709
	### Parse the next part, and add it to the entity...
707
	my $part = $self->process_part($in, $part_rdr, Retype=>$retype);
710
	my $part = $self->process_part($in, $part_rdr, Retype=>$retype);
711
	return undef unless defined($part);
712
708
	$ent->add_part($part);
713
	$ent->add_part($part);
709
714
710
	### ...and look at how we finished up:
715
	### ...and look at how we finished up:
Lines 715-721 Link Here
715
						    "before epilogue\n");
720
						    "before epilogue\n");
716
				       return 1; }
721
				       return 1; }
717
    }
722
    }
718
    
723
719
    ### Parse epilogue... 
724
    ### Parse epilogue... 
720
    ###    (note that we use the *parent's* reader here, which does not
725
    ###    (note that we use the *parent's* reader here, which does not
721
    ###     know about the boundaries in this multipart!)
726
    ###     know about the boundaries in this multipart!)
Lines 944-949 Link Here
944
949
945
    ### Parse the message:
950
    ### Parse the message:
946
    my $msg = $self->process_part($in, $rdr);
951
    my $msg = $self->process_part($in, $rdr);
952
    return undef unless defined($msg);
947
953
948
    ### How to handle nested messages?
954
    ### How to handle nested messages?
949
    if ($self->extract_nested_messages eq 'REPLACE') {
955
    if ($self->extract_nested_messages eq 'REPLACE') {
Lines 969-979 Link Here
969
#    Retype => retype this part to the given content-type
975
#    Retype => retype this part to the given content-type
970
#
976
#
971
# Return the entity.
977
# Return the entity.
972
# Fatal exception on failure.
978
# Fatal exception on failure.  Returns undef if message to complex
973
#
979
#
974
sub process_part {
980
sub process_part {
975
    my ($self, $in, $rdr, %p) = @_;
981
    my ($self, $in, $rdr, %p) = @_;
976
982
983
    if ($self->{MP5_MaxParts} > 0) {
984
	$self->{MP5_NumParts}++;
985
	if ($self->{MP5_NumParts} > $self->{MP5_MaxParts}) {
986
	    # Return UNDEF if msg too complex
987
	    return undef;
988
	}
989
    }
990
977
    $rdr ||= MIME::Parser::Reader->new;
991
    $rdr ||= MIME::Parser::Reader->new;
978
    #debug "process_part";
992
    #debug "process_part";
979
    $self->results->level(+1);
993
    $self->results->level(+1);
Lines 995-1006 Link Here
995
1009
996
    ### Handle, according to the MIME type:
1010
    ### Handle, according to the MIME type:
997
    if ($type eq 'multipart') {
1011
    if ($type eq 'multipart') {
998
	$self->process_multipart($in, $rdr, $ent);
1012
	return undef unless defined($self->process_multipart($in, $rdr, $ent));
999
    }
1013
    }
1000
    elsif (("$type/$subtype" eq "message/rfc822") && 
1014
    elsif (("$type/$subtype" eq "message/rfc822" ||
1001
	   $self->extract_nested_messages) {
1015
	    ("$type/$subtype" eq "message/partial" && $head->mime_attr("content-type.number") == 1)) && 
1016
	    $self->extract_nested_messages) {
1002
	$self->debug("attempting to process a nested message");
1017
	$self->debug("attempting to process a nested message");
1003
	$self->process_message($in, $rdr, $ent);
1018
	return undef unless defined($self->process_message($in, $rdr, $ent));
1004
    }
1019
    }
1005
    else {                     
1020
    else {                     
1006
	$self->process_singlepart($in, $rdr, $ent);
1021
	$self->process_singlepart($in, $rdr, $ent);
Lines 1047-1053 Link Here
1047
=back
1062
=back
1048
1063
1049
Returns the parsed MIME::Entity on success.  
1064
Returns the parsed MIME::Entity on success.  
1050
Throws exception on failure.
1051
1065
1052
=cut
1066
=cut
1053
1067
Lines 1086-1092 Link Here
1086
(which minimally implements getline() and read()).
1100
(which minimally implements getline() and read()).
1087
1101
1088
Returns the parsed MIME::Entity on success.  
1102
Returns the parsed MIME::Entity on success.  
1089
Throws exception on failure.
1103
Throws exception on failure.  If the message contained too many
1104
parts (as set by I<max_parts>), returns undef.
1090
1105
1091
=cut
1106
=cut
1092
1107
Lines 1098-1104 Link Here
1098
1113
1099
    my $bm = benchmark {
1114
    my $bm = benchmark {
1100
	$self->init_parse;
1115
	$self->init_parse;
1101
	($entity) = $self->process_part($in, undef);  ### parse!
1116
	$entity = $self->process_part($in, undef);  ### parse!
1102
    };
1117
    };
1103
    $self->debug("t parse: $bm");
1118
    $self->debug("t parse: $bm");
1104
1119
Lines 1346-1351 Link Here
1346
1361
1347
#------------------------------
1362
#------------------------------
1348
1363
1364
=item max_parts NUM
1365
1366
I<Instance method.>
1367
Limits the number of MIME parts we will parse.
1368
1369
Normally, instances of this class parse a message to the bitter end.
1370
Messages with many MIME parts can cause excessive memory consumption.
1371
If you invoke this method, parsing will abort with a die() if a message
1372
contains more than NUM parts.
1373
1374
If NUM is set to -1 (the default), then no maximum limit is enforced.
1375
1376
With no argument, returns the current setting as an integer
1377
1378
=cut
1379
1380
sub max_parts {
1381
    my($self, $num) = @_;
1382
    if (@_ > 1) {
1383
	$self->{MP5_MaxParts} = $num;
1384
    }
1385
    return $self->{MP5_MaxParts};
1386
}
1387
1388
#------------------------------
1389
1349
=item output_to_core YESNO
1390
=item output_to_core YESNO
1350
1391
1351
I<Instance method.>
1392
I<Instance method.>
(-)MIME-tools-5.411/lib/MIME/Words.pm (-1 / +1 lines)
Lines 186-192 Link Here
186
    $@ = '';           ### error-return
186
    $@ = '';           ### error-return
187
187
188
    ### Collapse boundaries between adjacent encoded words:
188
    ### Collapse boundaries between adjacent encoded words:
189
    $encstr =~ s{(\?\=)\r?\n[ \t](\=\?)}{$1$2}gs;
189
    $encstr =~ s{(\?\=)\s*(\=\?)}{$1$2}gs;
190
    pos($encstr) = 0;
190
    pos($encstr) = 0;
191
    ### print STDOUT "ENC = [", $encstr, "]\n";
191
    ### print STDOUT "ENC = [", $encstr, "]\n";
192
192

Return to bug 36886