diff -ru wget-1.10.2/src/http.c wget-1.10.2-mine/src/http.c --- wget-1.10.2/src/http.c 2005-08-09 00:54:16.000000000 +0200 +++ wget-1.10.2-mine/src/http.c 2007-08-24 10:54:01.000000000 +0200 @@ -1874,9 +1874,16 @@ else logputs (LOG_VERBOSE, "\n"); } - } + } + int is_acceptable_content_type = 1; + if(type) is_acceptable_content_type = acceptable_content_type(type); xfree_null (type); type = NULL; /* We don't need it any more. */ + if(!is_acceptable_content_type) + { + CLOSE_FINISH (sock); + return CTYPEREJECTED; + } /* Return if we have no intention of further downloading. */ if (!(*dt & RETROKF) || head_only) @@ -2241,6 +2248,13 @@ free_hstat (&hstat); xfree_null (dummy); return err; + case CTYPEREJECTED: + logprintf (LOG_VERBOSE, _("Not downloading `%s', " + "content type rejected. -- not retrieving.\n"), + url_string (u, 1)); + free_hstat (&hstat); + xfree_null (dummy); + return err; case FWRITEERR: case FOPENERR: /* Another fatal error. */ logputs (LOG_VERBOSE, "\n"); diff -ru wget-1.10.2/src/init.c wget-1.10.2-mine/src/init.c --- wget-1.10.2/src/init.c 2005-08-09 00:54:16.000000000 +0200 +++ wget-1.10.2-mine/src/init.c 2007-08-24 10:52:24.000000000 +0200 @@ -132,6 +132,8 @@ { "checkcertificate", &opt.check_cert, cmd_boolean }, #endif { "connecttimeout", &opt.connect_timeout, cmd_time }, + { "contentaccepts", &opt.content_accepts, cmd_vector }, + { "contentrejects", &opt.content_rejects, cmd_vector }, { "continue", &opt.always_rest, cmd_boolean }, { "convertlinks", &opt.convert_links, cmd_boolean }, { "cookies", &opt.cookies, cmd_boolean }, diff -ru wget-1.10.2/src/main.c wget-1.10.2-mine/src/main.c --- wget-1.10.2/src/main.c 2005-07-01 03:20:30.000000000 +0200 +++ wget-1.10.2-mine/src/main.c 2007-08-24 10:52:24.000000000 +0200 @@ -169,6 +169,8 @@ { "connect-timeout", 0, OPT_VALUE, "connecttimeout", -1 }, { "continue", 'c', OPT_BOOLEAN, "continue", -1 }, { "convert-links", 'k', OPT_BOOLEAN, "convertlinks", -1 }, + { "content-type", 0, OPT_VALUE, "contentaccepts", -1 }, + { "content-type-exclude", 0, OPT_VALUE, "contentrejects", -1 }, { "cookies", 0, OPT_BOOLEAN, "cookies", -1 }, { "cut-dirs", 0, OPT_VALUE, "cutdirs", -1 }, { IF_DEBUG ("debug"), 'd', OPT_BOOLEAN, "debug", -1 }, @@ -541,6 +543,10 @@ --post-data=STRING use the POST method; send STRING as the data.\n"), N_("\ --post-file=FILE use the POST method; send contents of FILE.\n"), + N_("\ + --content-type=LIST comma-separated list of accepted content-types.\n"), + N_("\ + --content-type-exclude=LIST comma-separated list of rejected content-types.\n"), "\n", #ifdef HAVE_SSL @@ -942,6 +948,9 @@ else status = retrieve_url (*t, &filename, &redirected_URL, NULL, &dt); + if(status == CTYPEREJECTED) + logprintf (LOG_VERBOSE, _("Not downloading, content type rejected.\n")); + if (opt.delete_after && file_exists_p(filename)) { DEBUGP (("Removing file due to --delete-after in main():\n")); Only in wget-1.10.2-mine/src: Makefile diff -ru wget-1.10.2/src/options.h wget-1.10.2-mine/src/options.h --- wget-1.10.2/src/options.h 2005-08-09 00:54:16.000000000 +0200 +++ wget-1.10.2-mine/src/options.h 2007-08-24 10:52:24.000000000 +0200 @@ -57,6 +57,9 @@ int spider; /* Is Wget in spider mode? */ + char **content_accepts; /* List of content-type patterns to accept. */ + char **content_rejects; /* List of content-type patterns to reject. */ + char **accepts; /* List of patterns to accept. */ char **rejects; /* List of patterns to reject. */ char **excludes; /* List of excluded FTP directories. */ diff -ru wget-1.10.2/src/recur.c wget-1.10.2-mine/src/recur.c --- wget-1.10.2/src/recur.c 2005-06-25 16:47:52.000000000 +0200 +++ wget-1.10.2-mine/src/recur.c 2007-08-24 10:52:24.000000000 +0200 @@ -317,6 +317,9 @@ } } + if(status == CTYPEREJECTED) + logprintf (LOG_VERBOSE, _("Not downloading, content type rejected.\n")); + /* If the downloaded document was HTML, parse it and enqueue the links it contains. */ diff -ru wget-1.10.2/src/utils.c wget-1.10.2-mine/src/utils.c --- wget-1.10.2/src/utils.c 2005-06-27 16:12:20.000000000 +0200 +++ wget-1.10.2-mine/src/utils.c 2007-08-24 10:52:24.000000000 +0200 @@ -668,6 +668,24 @@ return 1; } +/* Determine whether a content type is acceptable to be downloaded, according to + * commandline options. */ +int +acceptable_content_type (const char *type) +{ + if (opt.content_accepts) + { + if (opt.content_rejects) + return (in_acclist ((const char *const *)opt.content_accepts, type, 1) + && !in_acclist ((const char *const *)opt.content_rejects, type, 1)); + else + return in_acclist ((const char *const *)opt.content_accepts, type, 1); + } + else if (opt.content_rejects) + return !in_acclist ((const char *const *)opt.content_rejects, type, 1); + return 1; +} + /* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is `/something', frontcmp() will return 1 only if S2 begins with `/something'. Otherwise, 0 is returned. */ diff -ru wget-1.10.2/src/utils.h wget-1.10.2-mine/src/utils.h --- wget-1.10.2/src/utils.h 2005-06-25 16:47:52.000000000 +0200 +++ wget-1.10.2-mine/src/utils.h 2007-08-24 10:52:24.000000000 +0200 @@ -79,6 +79,7 @@ char *file_merge PARAMS ((const char *, const char *)); int acceptable PARAMS ((const char *)); +int acceptable_content_type (const char *); int accdir PARAMS ((const char *s, enum accd)); char *suffix PARAMS ((const char *s)); int match_tail PARAMS ((const char *, const char *, int)); diff -ru wget-1.10.2/src/wget.h wget-1.10.2-mine/src/wget.h --- wget-1.10.2/src/wget.h 2005-08-11 23:35:27.000000000 +0200 +++ wget-1.10.2-mine/src/wget.h 2007-08-24 10:52:24.000000000 +0200 @@ -294,7 +294,7 @@ CONTNOTSUPPORTED, RETRUNNEEDED, RETRFINISHED, READERR, TRYLIMEXC, URLBADPATTERN, FILEBADFILE, RANGEERR, RETRBADPATTERN, RETNOTSUP, ROBOTSOK, NOROBOTS, PROXERR, AUTHFAILED, - QUOTEXC, WRITEFAILED, SSLINITFAILED + QUOTEXC, WRITEFAILED, SSLINITFAILED, CTYPEREJECTED } uerr_t; #endif /* WGET_H */