diff -u urlgrabber-3.1.0/urlgrabber/grabber.py urlgrabber-3.9.1/urlgrabber/grabber.py
--- urlgrabber-3.1.0/urlgrabber/grabber.py	2006-09-21 20:58:05.000000000 -0400
+++ urlgrabber-3.9.1/urlgrabber/grabber.py	2010-06-26 13:12:59.000000000 -0400
@@ -16,6 +16,7 @@
 
 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
+# Copyright 2009 Red Hat inc, pycurl code written by Seth Vidal
 
 """A high-level cross-protocol url-grabber.
 
@@ -55,8 +56,9 @@
 
   text = None
   
-    specifies an alternativ text item in the beginning of the progress
-    bar line. If not given, the basename of the file is used.
+    specifies alternative text to be passed to the progress meter
+    object.  If not given, the default progress meter will use the
+    basename of the file.
 
   throttle = 1.0
 
@@ -157,16 +159,11 @@
     partial file or directory name.
 
   opener = None
-  
-    Overrides the default urllib2.OpenerDirector provided to urllib2
-    when making requests.  This option exists so that the urllib2
-    handler chain may be customized.  Note that the range, reget,
-    proxy, and keepalive features require that custom handlers be
-    provided to urllib2 in order to function properly.  If an opener
-    option is provided, no attempt is made by urlgrabber to ensure
-    chain integrity.  You are responsible for ensuring that any
-    extension handlers are present if said features are required.
-    
+    No-op when using the curl backend (default)
+
+  cache_openers = True
+    No-op when using the curl backend (default)
+
   data = None
 
     Only relevant for the HTTP family (and ignored for other
@@ -179,6 +176,78 @@
     badly and if you do not use the proper case (shown here), your
     values will be overridden with the defaults.
     
+  urlparser = URLParser()
+
+    The URLParser class handles pre-processing of URLs, including
+    auth-handling for user/pass encoded in http urls, file handing
+    (that is, filenames not sent as a URL), and URL quoting.  If you
+    want to override any of this behavior, you can pass in a
+    replacement instance.  See also the 'quote' option.
+
+  quote = None
+
+    Whether or not to quote the path portion of a url.
+      quote = 1    ->  quote the URLs (they're not quoted yet)
+      quote = 0    ->  do not quote them (they're already quoted)
+      quote = None ->  guess what to do
+
+    This option only affects proper urls like 'file:///etc/passwd'; it
+    does not affect 'raw' filenames like '/etc/passwd'.  The latter
+    will always be quoted as they are converted to URLs.  Also, only
+    the path part of a url is quoted.  If you need more fine-grained
+    control, you should probably subclass URLParser and pass it in via
+    the 'urlparser' option.
+
+  ssl_ca_cert = None
+
+    this option can be used if M2Crypto is available and will be
+    ignored otherwise.  If provided, it will be used to create an SSL
+    context.  If both ssl_ca_cert and ssl_context are provided, then
+    ssl_context will be ignored and a new context will be created from
+    ssl_ca_cert.
+
+  ssl_context = None
+
+    No-op when using the curl backend (default)
+   
+
+  self.ssl_verify_peer = True 
+
+    Check the server's certificate to make sure it is valid with what our CA validates
+  
+  self.ssl_verify_host = True
+
+    Check the server's hostname to make sure it matches the certificate DN
+
+  self.ssl_key = None
+
+    Path to the key the client should use to connect/authenticate with
+
+  self.ssl_key_type = 'PEM' 
+
+    PEM or DER - format of key
+     
+  self.ssl_cert = None
+
+    Path to the ssl certificate the client should use to to authenticate with
+
+  self.ssl_cert_type = 'PEM' 
+
+    PEM or DER - format of certificate
+    
+  self.ssl_key_pass = None 
+
+    password to access the ssl_key
+    
+  self.size = None
+
+    size (in bytes) or Maximum size of the thing being downloaded. 
+    This is mostly to keep us from exploding with an endless datastream
+  
+  self.max_header_size = 2097152 
+
+    Maximum size (in bytes) of the headers.
+    
 
 RETRY RELATED ARGUMENTS
 
@@ -283,28 +352,6 @@
     passed the same arguments, so you could use the same function for
     both.
       
-  urlparser = URLParser()
-
-    The URLParser class handles pre-processing of URLs, including
-    auth-handling for user/pass encoded in http urls, file handing
-    (that is, filenames not sent as a URL), and URL quoting.  If you
-    want to override any of this behavior, you can pass in a
-    replacement instance.  See also the 'quote' option.
-
-  quote = None
-
-    Whether or not to quote the path portion of a url.
-      quote = 1    ->  quote the URLs (they're not quoted yet)
-      quote = 0    ->  do not quote them (they're already quoted)
-      quote = None ->  guess what to do
-
-    This option only affects proper urls like 'file:///etc/passwd'; it
-    does not affect 'raw' filenames like '/etc/passwd'.  The latter
-    will always be quoted as they are converted to URLs.  Also, only
-    the path part of a url is quoted.  If you need more fine-grained
-    control, you should probably subclass URLParser and pass it in via
-    the 'urlparser' option.
-
 BANDWIDTH THROTTLING
 
   urlgrabber supports throttling via two values: throttle and
@@ -364,18 +411,26 @@
 
 """
 
-# $Id: grabber.py,v 1.48 2006/09/22 00:58:05 mstenner Exp $
+
 
 import os
 import os.path
 import sys
 import urlparse
-import rfc822
 import time
 import string
 import urllib
 import urllib2
-from stat import *  # S_* and ST_*
+import mimetools
+import thread
+import types
+import stat
+import pycurl
+from ftplib import parse150
+from StringIO import StringIO
+from httplib import HTTPException
+import socket
+from byterange import range_tuple_normalize, range_tuple_to_header, RangeError
 
 ########################################################################
 #                     MODULE INITIALIZATION
@@ -385,55 +440,6 @@
 except:
     __version__ = '???'
 
-import sslfactory
-
-auth_handler = urllib2.HTTPBasicAuthHandler( \
-     urllib2.HTTPPasswordMgrWithDefaultRealm())
-
-try:
-    from i18n import _
-except ImportError, msg:
-    def _(st): return st
-
-try:
-    from httplib import HTTPException
-except ImportError, msg:
-    HTTPException = None
-
-try:
-    # This is a convenient way to make keepalive optional.
-    # Just rename the module so it can't be imported.
-    import keepalive
-    from keepalive import HTTPHandler, HTTPSHandler
-    have_keepalive = True
-except ImportError, msg:
-    have_keepalive = False
-
-try:
-    # add in range support conditionally too
-    import byterange
-    from byterange import HTTPRangeHandler, HTTPSRangeHandler, \
-         FileRangeHandler, FTPRangeHandler, range_tuple_normalize, \
-         range_tuple_to_header, RangeError
-except ImportError, msg:
-    range_handlers = ()
-    RangeError = None
-    have_range = 0
-else:
-    range_handlers = (HTTPRangeHandler(), HTTPSRangeHandler(),
-        FileRangeHandler(), FTPRangeHandler())
-    have_range = 1
-
-
-# check whether socket timeout support is available (Python >= 2.3)
-import socket
-try:
-    TimeoutError = socket.timeout
-    have_socket_timeout = True
-except AttributeError:
-    TimeoutError = None
-    have_socket_timeout = False
-
 ########################################################################
 # functions for debugging output.  These functions are here because they
 # are also part of the module initialization.
@@ -456,14 +462,8 @@
 
     global DEBUG
     DEBUG = DBOBJ
-    if have_keepalive and keepalive.DEBUG is None:
-        keepalive.DEBUG = DBOBJ
-    if have_range and byterange.DEBUG is None:
-        byterange.DEBUG = DBOBJ
-    if sslfactory.DEBUG is None:
-        sslfactory.DEBUG = DBOBJ
 
-def _init_default_logger():
+def _init_default_logger(logspec=None):
     '''Examines the environment variable URLGRABBER_DEBUG and creates
     a logging object (logging.logger) based on the contents.  It takes
     the form
@@ -489,9 +489,12 @@
     collect the code into a nice block.'''
 
     try:
-        dbinfo = os.environ['URLGRABBER_DEBUG'].split(',')
+        if logspec is None:
+            logspec = os.environ['URLGRABBER_DEBUG']
+        dbinfo = logspec.split(',')
         import logging
-        level = logging._levelNames.get(dbinfo[0], int(dbinfo[0]))
+        level = logging._levelNames.get(dbinfo[0], None)
+        if level is None: level = int(dbinfo[0])
         if level < 1: raise ValueError()
 
         formatter = logging.Formatter('%(asctime)s %(message)s')
@@ -508,7 +511,19 @@
         DBOBJ = None
     set_logger(DBOBJ)
 
+def _log_package_state():
+    if not DEBUG: return
+    DEBUG.info('urlgrabber version  = %s' % __version__)
+    DEBUG.info('trans function "_"  = %s' % _)
+        
 _init_default_logger()
+_log_package_state()
+
+
+# normally this would be from i18n or something like it ...
+def _(st):
+    return st
+
 ########################################################################
 #                 END MODULE INITIALIZATION
 ########################################################################
@@ -536,6 +551,7 @@
         13   - malformed proxy url
         14   - HTTPError (includes .code and .exception attributes)
         15   - user abort
+        16   - error writing to local file
         
       MirrorGroup error codes (256 -- 511)
         256  - No more mirrors left to try
@@ -567,7 +583,9 @@
            # or simply
          print e  #### print '[Errno %i] %s' % (e.errno, e.strerror)
     """
-    pass
+    def __init__(self, *args):
+        IOError.__init__(self, *args)
+        self.url = "No url specified"
 
 class CallbackObject:
     """Container for returned callback data.
@@ -661,7 +679,7 @@
             quote = 0 # pathname2url quotes, so we won't do it again
             
         if scheme in ['http', 'https']:
-            parts = self.process_http(parts)
+            parts = self.process_http(parts, url)
             
         if quote is None:
             quote = self.guess_should_quote(parts)
@@ -678,19 +696,9 @@
             url = prefix + '/' + url
         return url
 
-    def process_http(self, parts):
+    def process_http(self, parts, url):
         (scheme, host, path, parm, query, frag) = parts
-
-        if '@' in host and auth_handler:
-            try:
-                user_pass, host = host.split('@', 1)
-                if ':' in user_pass:
-                    user, password = user_pass.split(':', 1)
-            except ValueError, e:
-                raise URLGrabError(1, _('Bad URL: %s') % url)
-            if DEBUG: DEBUG.info('adding HTTP auth: %s, %s', user, password)
-            auth_handler.add_password(None, host, user, password)
-
+        # TODO: auth-parsing here, maybe? pycurl doesn't really need it
         return (scheme, host, path, parm, query, frag)
 
     def quote(self, parts):
@@ -771,7 +779,7 @@
     def _set_attributes(self, **kwargs):
         """Update object attributes with those provided in kwargs."""
         self.__dict__.update(kwargs)
-        if have_range and kwargs.has_key('range'):
+        if kwargs.has_key('range'):
             # normalize the supplied range value
             self.range = range_tuple_normalize(self.range)
         if not self.reget in [None, 'simple', 'check_timestamp']:
@@ -808,8 +816,36 @@
         self.data = None
         self.urlparser = URLParser()
         self.quote = None
-        self.ssl_ca_cert = None
-        self.ssl_context = None
+        self.ssl_ca_cert = None # sets SSL_CAINFO - path to certdb
+        self.ssl_context = None # no-op in pycurl
+        self.ssl_verify_peer = True # check peer's cert for authenticityb
+        self.ssl_verify_host = True # make sure who they are and who the cert is for matches
+        self.ssl_key = None # client key
+        self.ssl_key_type = 'PEM' #(or DER)
+        self.ssl_cert = None # client cert
+        self.ssl_cert_type = 'PEM' # (or DER)
+        self.ssl_key_pass = None # password to access the key
+        self.size = None # if we know how big the thing we're getting is going
+                         # to be. this is ultimately a MAXIMUM size for the file
+        self.max_header_size = 2097152 #2mb seems reasonable for maximum header size
+        
+    def __repr__(self):
+        return self.format()
+        
+    def format(self, indent='  '):
+        keys = self.__dict__.keys()
+        if self.delegate is not None:
+            keys.remove('delegate')
+        keys.sort()
+        s = '{\n'
+        for k in keys:
+            s = s + indent + '%-15s: %s,\n' % \
+                (repr(k), repr(self.__dict__[k]))
+        if self.delegate:
+            df = self.delegate.format(indent + '  ')
+            s = s + indent + '%-15s: %s\n' % ("'delegate'", df)
+        s = s + indent + '}'
+        return s
 
 class URLGrabber:
     """Provides easy opening of URLs with a variety of options.
@@ -878,9 +914,10 @@
         like any other file object.
         """
         opts = self.opts.derive(**kwargs)
+        if DEBUG: DEBUG.debug('combined options: %s' % repr(opts))
         (url,parts) = opts.urlparser.parse(url, opts) 
         def retryfunc(opts, url):
-            return URLGrabberFileObject(url, filename=None, opts=opts)
+            return PyCurlFileObject(url, filename=None, opts=opts)
         return self._retry(opts, retryfunc, url)
     
     def urlgrab(self, url, filename=None, **kwargs):
@@ -890,6 +927,7 @@
         different from the passed-in filename if copy_local == 0.
         """
         opts = self.opts.derive(**kwargs)
+        if DEBUG: DEBUG.debug('combined options: %s' % repr(opts))
         (url,parts) = opts.urlparser.parse(url, opts) 
         (scheme, host, path, parm, query, frag) = parts
         if filename is None:
@@ -901,16 +939,28 @@
             if host:
                 path = os.path.normpath('//' + host + path)
             if not os.path.exists(path):
-                raise URLGrabError(2, 
+                err = URLGrabError(2, 
                       _('Local file does not exist: %s') % (path, ))
+                err.url = url
+                raise err
             elif not os.path.isfile(path):
-                raise URLGrabError(3, 
-                              _('Not a normal file: %s') % (path, ))
+                err = URLGrabError(3, 
+                                 _('Not a normal file: %s') % (path, ))
+                err.url = url
+                raise err
+
             elif not opts.range:
+                if not opts.checkfunc is None:
+                    cb_func, cb_args, cb_kwargs = \
+                       self._make_callback(opts.checkfunc)
+                    obj = CallbackObject()
+                    obj.filename = path
+                    obj.url = url
+                    apply(cb_func, (obj, )+cb_args, cb_kwargs)        
                 return path
         
         def retryfunc(opts, url, filename):
-            fo = URLGrabberFileObject(url, filename, opts)
+            fo = PyCurlFileObject(url, filename, opts)
             try:
                 fo._do_grab()
                 if not opts.checkfunc is None:
@@ -934,12 +984,13 @@
         into memory, but don't use too much'
         """
         opts = self.opts.derive(**kwargs)
+        if DEBUG: DEBUG.debug('combined options: %s' % repr(opts))
         (url,parts) = opts.urlparser.parse(url, opts) 
         if limit is not None:
             limit = limit + 1
             
         def retryfunc(opts, url, limit):
-            fo = URLGrabberFileObject(url, filename=None, opts=opts)
+            fo = PyCurlFileObject(url, filename=None, opts=opts)
             s = ''
             try:
                 # this is an unfortunate thing.  Some file-like objects
@@ -962,8 +1013,11 @@
             
         s = self._retry(opts, retryfunc, url, limit)
         if limit and len(s) > limit:
-            raise URLGrabError(8, 
-                        _('Exceeded limit (%i): %s') % (limit, url))
+            err = URLGrabError(8, 
+                               _('Exceeded limit (%i): %s') % (limit, url))
+            err.url = url
+            raise err
+
         return s
         
     def _make_callback(self, callback_obj):
@@ -976,192 +1030,328 @@
 # NOTE: actual defaults are set in URLGrabberOptions
 default_grabber = URLGrabber()
 
-class URLGrabberFileObject:
-    """This is a file-object wrapper that supports progress objects 
-    and throttling.
-
-    This exists to solve the following problem: lets say you want to
-    drop-in replace a normal open with urlopen.  You want to use a
-    progress meter and/or throttling, but how do you do that without
-    rewriting your code?  Answer: urlopen will return a wrapped file
-    object that does the progress meter and-or throttling internally.
-    """
 
+class PyCurlFileObject():
     def __init__(self, url, filename, opts):
+        self.fo = None
+        self._hdr_dump = ''
+        self._parsed_hdr = None
         self.url = url
+        self.scheme = urlparse.urlsplit(self.url)[0]
         self.filename = filename
+        self.append = False
+        self.reget_time = None
         self.opts = opts
-        self.fo = None
+        if self.opts.reget == 'check_timestamp':
+            raise NotImplementedError, "check_timestamp regets are not implemented in this ver of urlgrabber. Please report this."
+        self._complete = False
         self._rbuf = ''
         self._rbufsize = 1024*8
         self._ttime = time.time()
         self._tsize = 0
         self._amount_read = 0
-        self._opener = None
+        self._reget_length = 0
+        self._prog_running = False
+        self._error = (None, None)
+        self.size = None
         self._do_open()
         
+        
     def __getattr__(self, name):
         """This effectively allows us to wrap at the instance level.
         Any attribute not found in _this_ object will be searched for
         in self.fo.  This includes methods."""
+
         if hasattr(self.fo, name):
             return getattr(self.fo, name)
         raise AttributeError, name
-   
-    def _get_opener(self):
-        """Build a urllib2 OpenerDirector based on request options."""
-        if self.opts.opener:
-            return self.opts.opener
-        elif self._opener is None:
-            handlers = []
-            need_keepalive_handler = (have_keepalive and self.opts.keepalive)
-            need_range_handler = (range_handlers and \
-                                  (self.opts.range or self.opts.reget))
-            # if you specify a ProxyHandler when creating the opener
-            # it _must_ come before all other handlers in the list or urllib2
-            # chokes.
-            if self.opts.proxies:
-                handlers.append( CachedProxyHandler(self.opts.proxies) )
-
-                # -------------------------------------------------------
-                # OK, these next few lines are a serious kludge to get
-                # around what I think is a bug in python 2.2's
-                # urllib2.  The basic idea is that default handlers
-                # get applied first.  If you override one (like a
-                # proxy handler), then the default gets pulled, but
-                # the replacement goes on the end.  In the case of
-                # proxies, this means the normal handler picks it up
-                # first and the proxy isn't used.  Now, this probably
-                # only happened with ftp or non-keepalive http, so not
-                # many folks saw it.  The simple approach to fixing it
-                # is just to make sure you override the other
-                # conflicting defaults as well.  I would LOVE to see
-                # these go way or be dealt with more elegantly.  The
-                # problem isn't there after 2.2.  -MDS 2005/02/24
-                if not need_keepalive_handler:
-                    handlers.append( urllib2.HTTPHandler() )
-                if not need_range_handler:
-                    handlers.append( urllib2.FTPHandler() )
-                # -------------------------------------------------------
-
-            ssl_factory = sslfactory.get_factory(self.opts.ssl_ca_cert,
-                self.opts.ssl_context)
-
-            if need_keepalive_handler:
-                handlers.append(HTTPHandler())
-                handlers.append(HTTPSHandler(ssl_factory))
-            if need_range_handler:
-                handlers.extend( range_handlers )
-            handlers.append( auth_handler )
-            if self.opts.cache_openers:
-                self._opener = CachedOpenerDirector(ssl_factory, *handlers)
-            else:
-                self._opener = ssl_factory.create_opener(*handlers)
-            # OK, I don't like to do this, but otherwise, we end up with
-            # TWO user-agent headers.
-            self._opener.addheaders = []
-        return self._opener
+
+    def _retrieve(self, buf):
+        try:
+            if not self._prog_running:
+                if self.opts.progress_obj:
+                    size  = self.size + self._reget_length
+                    self.opts.progress_obj.start(self._prog_reportname, 
+                                                 urllib.unquote(self.url), 
+                                                 self._prog_basename, 
+                                                 size=size,
+                                                 text=self.opts.text)
+                    self._prog_running = True
+                    self.opts.progress_obj.update(self._amount_read)
+
+            self._amount_read += len(buf)
+            self.fo.write(buf)
+            return len(buf)
+        except KeyboardInterrupt:
+            return -1
+            
+    def _hdr_retrieve(self, buf):
+        if self._over_max_size(cur=len(self._hdr_dump), 
+                               max_size=self.opts.max_header_size):
+            return -1            
+        try:
+            self._hdr_dump += buf
+            # we have to get the size before we do the progress obj start
+            # but we can't do that w/o making it do 2 connects, which sucks
+            # so we cheat and stuff it in here in the hdr_retrieve
+            if self.scheme in ['http','https'] and buf.lower().find('content-length') != -1:
+                length = buf.split(':')[1]
+                self.size = int(length)
+            elif self.scheme in ['ftp']:
+                s = None
+                if buf.startswith('213 '):
+                    s = buf[3:].strip()
+                elif buf.startswith('150 '):
+                    s = parse150(buf)
+                if s:
+                    self.size = int(s)
+            
+            return len(buf)
+        except KeyboardInterrupt:
+            return pycurl.READFUNC_ABORT
+
+    def _return_hdr_obj(self):
+        if self._parsed_hdr:
+            return self._parsed_hdr
+        statusend = self._hdr_dump.find('\n')
+        hdrfp = StringIO()
+        hdrfp.write(self._hdr_dump[statusend:])
+        self._parsed_hdr =  mimetools.Message(hdrfp)
+        return self._parsed_hdr
+    
+    hdr = property(_return_hdr_obj)
+    http_code = property(fget=
+                 lambda self: self.curl_obj.getinfo(pycurl.RESPONSE_CODE))
+
+    def _set_opts(self, opts={}):
+        # XXX
+        if not opts:
+            opts = self.opts
+
+
+        # defaults we're always going to set
+        self.curl_obj.setopt(pycurl.NOPROGRESS, False)
+        self.curl_obj.setopt(pycurl.NOSIGNAL, True)
+        self.curl_obj.setopt(pycurl.WRITEFUNCTION, self._retrieve)
+        self.curl_obj.setopt(pycurl.HEADERFUNCTION, self._hdr_retrieve)
+        self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
+        self.curl_obj.setopt(pycurl.FAILONERROR, True)
+        self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
         
-    def _do_open(self):
-        opener = self._get_opener()
+        if DEBUG:
+            self.curl_obj.setopt(pycurl.VERBOSE, True)
+        if opts.user_agent:
+            self.curl_obj.setopt(pycurl.USERAGENT, opts.user_agent)
+        
+        # maybe to be options later
+        self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
+        self.curl_obj.setopt(pycurl.MAXREDIRS, 5)
+        
+        # timeouts
+        timeout = 300
+        if opts.timeout:
+            timeout = int(opts.timeout)
+            self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout)
+
+        # ssl options
+        if self.scheme == 'https':
+            if opts.ssl_ca_cert: # this may do ZERO with nss  according to curl docs
+                self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
+                self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert)
+            self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer)
+            self.curl_obj.setopt(pycurl.SSL_VERIFYHOST, opts.ssl_verify_host)
+            if opts.ssl_key:
+                self.curl_obj.setopt(pycurl.SSLKEY, opts.ssl_key)
+            if opts.ssl_key_type:
+                self.curl_obj.setopt(pycurl.SSLKEYTYPE, opts.ssl_key_type)
+            if opts.ssl_cert:
+                self.curl_obj.setopt(pycurl.SSLCERT, opts.ssl_cert)
+            if opts.ssl_cert_type:                
+                self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
+            if opts.ssl_key_pass:
+                self.curl_obj.setopt(pycurl.SSLKEYPASSWD, opts.ssl_key_pass)
+
+        #headers:
+        if opts.http_headers and self.scheme in ('http', 'https'):
+            headers = []
+            for (tag, content) in opts.http_headers:
+                headers.append('%s:%s' % (tag, content))
+            self.curl_obj.setopt(pycurl.HTTPHEADER, headers)
+
+        # ranges:
+        if opts.range or opts.reget:
+            range_str = self._build_range()
+            if range_str:
+                self.curl_obj.setopt(pycurl.RANGE, range_str)
+            
+        # throttle/bandwidth
+        if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
+            self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
+            
+        # proxy settings
+        if opts.proxies:
+            for (scheme, proxy) in opts.proxies.items():
+                if self.scheme in ('ftp'): # only set the ftp proxy for ftp items
+                    if scheme not in ('ftp'):
+                        continue
+                    else:
+                        if proxy == '_none_': proxy = ""
+                        self.curl_obj.setopt(pycurl.PROXY, proxy)
+                elif self.scheme in ('http', 'https'):
+                    if scheme not in ('http', 'https'):
+                        continue
+                    else:
+                        if proxy == '_none_': proxy = ""
+                        self.curl_obj.setopt(pycurl.PROXY, proxy)
+            
+        # FIXME username/password/auth settings
 
-        req = urllib2.Request(self.url, self.opts.data) # build request object
-        self._add_headers(req) # add misc headers that we need
-        self._build_range(req) # take care of reget and byterange stuff
-
-        fo, hdr = self._make_request(req, opener)
-        if self.reget_time and self.opts.reget == 'check_timestamp':
-            # do this if we have a local file with known timestamp AND
-            # we're in check_timestamp reget mode.
-            fetch_again = 0
-            try:
-                modified_tuple  = hdr.getdate_tz('last-modified')
-                modified_stamp  = rfc822.mktime_tz(modified_tuple)
-                if modified_stamp > self.reget_time: fetch_again = 1
-            except (TypeError,):
-                fetch_again = 1
+        #posts - simple - expects the fields as they are
+        if opts.data:
+            self.curl_obj.setopt(pycurl.POST, True)
+            self.curl_obj.setopt(pycurl.POSTFIELDS, self._to_utf8(opts.data))
             
-            if fetch_again:
-                # the server version is newer than the (incomplete) local
-                # version, so we should abandon the version we're getting
-                # and fetch the whole thing again.
-                fo.close()
-                self.opts.reget = None
-                del req.headers['Range']
-                self._build_range(req)
-                fo, hdr = self._make_request(req, opener)
-
-        (scheme, host, path, parm, query, frag) = urlparse.urlparse(self.url)
-        path = urllib.unquote(path)
-        if not (self.opts.progress_obj or self.opts.raw_throttle() \
-                or self.opts.timeout):
-            # if we're not using the progress_obj, throttling, or timeout
-            # we can get a performance boost by going directly to
-            # the underlying fileobject for reads.
-            self.read = fo.read
-            if hasattr(fo, 'readline'):
-                self.readline = fo.readline
-        elif self.opts.progress_obj:
-            try:    
-                length = int(hdr['Content-Length'])
-                length = length + self._amount_read     # Account for regets
-            except (KeyError, ValueError, TypeError): 
-                length = None
-
-            self.opts.progress_obj.start(str(self.filename),
-                                         urllib.unquote(self.url),
-                                         os.path.basename(path), 
-                                         length, text=self.opts.text)
-            self.opts.progress_obj.update(0)
-        (self.fo, self.hdr) = (fo, hdr)
-    
-    def _add_headers(self, req):
-        if self.opts.user_agent:
-            req.add_header('User-agent', self.opts.user_agent)
-        try: req_type = req.get_type()
-        except ValueError: req_type = None
-        if self.opts.http_headers and req_type in ('http', 'https'):
-            for h, v in self.opts.http_headers:
-                req.add_header(h, v)
-        if self.opts.ftp_headers and req_type == 'ftp':
-            for h, v in self.opts.ftp_headers:
-                req.add_header(h, v)
+        # our url
+        self.curl_obj.setopt(pycurl.URL, self.url)
+        
+    
+    def _do_perform(self):
+        if self._complete:
+            return
+        
+        try:
+            self.curl_obj.perform()
+        except pycurl.error, e:
+            # XXX - break some of these out a bit more clearly
+            # to other URLGrabErrors from 
+            # http://curl.haxx.se/libcurl/c/libcurl-errors.html
+            # this covers e.args[0] == 22 pretty well - which will be common
+            
+            code = self.http_code
+            errcode = e.args[0]
+            if self._error[0]:
+                errcode = self._error[0]
+                
+            if errcode == 23 and code >= 200 and code < 299:
+                err = URLGrabError(15, _('User (or something) called abort %s: %s') % (self.url, e))
+                err.url = self.url
+                
+                # this is probably wrong but ultimately this is what happens
+                # we have a legit http code and a pycurl 'writer failed' code
+                # which almost always means something aborted it from outside
+                # since we cannot know what it is -I'm banking on it being
+                # a ctrl-c. XXXX - if there's a way of going back two raises to 
+                # figure out what aborted the pycurl process FIXME
+                raise KeyboardInterrupt
+            
+            elif errcode == 28:
+                err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))
+                err.url = self.url
+                raise err
+            elif errcode == 35:
+                msg = _("problem making ssl connection")
+                err = URLGrabError(14, msg)
+                err.url = self.url
+                raise err
+            elif errcode == 37:
+                msg = _("Could not open/read %s") % (self.url)
+                err = URLGrabError(14, msg)
+                err.url = self.url
+                raise err
+                
+            elif errcode == 42:
+                err = URLGrabError(15, _('User (or something) called abort %s: %s') % (self.url, e))
+                err.url = self.url
+                # this is probably wrong but ultimately this is what happens
+                # we have a legit http code and a pycurl 'writer failed' code
+                # which almost always means something aborted it from outside
+                # since we cannot know what it is -I'm banking on it being
+                # a ctrl-c. XXXX - if there's a way of going back two raises to 
+                # figure out what aborted the pycurl process FIXME
+                raise KeyboardInterrupt
+                
+            elif errcode == 58:
+                msg = _("problem with the local client certificate")
+                err = URLGrabError(14, msg)
+                err.url = self.url
+                raise err
+
+            elif errcode == 60:
+                msg = _("client cert cannot be verified or client cert incorrect")
+                err = URLGrabError(14, msg)
+                err.url = self.url
+                raise err
+            
+            elif errcode == 63:
+                if self._error[1]:
+                    msg = self._error[1]
+                else:
+                    msg = _("Max download size exceeded on %s") % (self.url)
+                err = URLGrabError(14, msg)
+                err.url = self.url
+                raise err
+                    
+            elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it
+                msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)
+            else:
+                msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))
+                code = errcode
+            err = URLGrabError(14, msg)
+            err.code = code
+            err.exception = e
+            raise err
 
-    def _build_range(self, req):
-        self.reget_time = None
-        self.append = 0
+    def _do_open(self):
+        self.curl_obj = _curl_cache
+        self.curl_obj.reset() # reset all old settings away, just in case
+        # setup any ranges
+        self._set_opts()
+        self._do_grab()
+        return self.fo
+
+    def _add_headers(self):
+        pass
+        
+    def _build_range(self):
         reget_length = 0
         rt = None
-        if have_range and self.opts.reget and type(self.filename) == type(''):
+        if self.opts.reget and type(self.filename) in types.StringTypes:
             # we have reget turned on and we're dumping to a file
             try:
                 s = os.stat(self.filename)
             except OSError:
                 pass
             else:
-                self.reget_time = s[ST_MTIME]
-                reget_length = s[ST_SIZE]
+                self.reget_time = s[stat.ST_MTIME]
+                reget_length = s[stat.ST_SIZE]
 
                 # Set initial length when regetting
                 self._amount_read = reget_length    
+                self._reget_length = reget_length # set where we started from, too
 
                 rt = reget_length, ''
                 self.append = 1
                 
         if self.opts.range:
-            if not have_range:
-                raise URLGrabError(10, _('Byte range requested but range '\
-                                         'support unavailable'))
             rt = self.opts.range
             if rt[0]: rt = (rt[0] + reget_length, rt[1])
 
         if rt:
             header = range_tuple_to_header(rt)
-            if header: req.add_header('Range', header)
+            if header:
+                return header.split('=')[1]
+
+
 
     def _make_request(self, req, opener):
+        #XXXX
+        # This doesn't do anything really, but we could use this
+        # instead of do_open() to catch a lot of crap errors as 
+        # mstenner did before here
+        return (self.fo, self.hdr)
+        
         try:
-            if have_socket_timeout and self.opts.timeout:
+            if self.opts.timeout:
                 old_to = socket.getdefaulttimeout()
                 socket.setdefaulttimeout(self.opts.timeout)
                 try:
@@ -1172,50 +1362,99 @@
                 fo = opener.open(req)
             hdr = fo.info()
         except ValueError, e:
-            raise URLGrabError(1, _('Bad URL: %s') % (e, ))
+            err = URLGrabError(1, _('Bad URL: %s : %s') % (self.url, e, ))
+            err.url = self.url
+            raise err
+
         except RangeError, e:
-            raise URLGrabError(9, str(e))
+            err = URLGrabError(9, _('%s on %s') % (e, self.url))
+            err.url = self.url
+            raise err
         except urllib2.HTTPError, e:
-            new_e = URLGrabError(14, str(e))
+            new_e = URLGrabError(14, _('%s on %s') % (e, self.url))
             new_e.code = e.code
             new_e.exception = e
+            new_e.url = self.url
             raise new_e
         except IOError, e:
-            if hasattr(e, 'reason') and have_socket_timeout and \
-                   isinstance(e.reason, TimeoutError):
-                raise URLGrabError(12, _('Timeout: %s') % (e, ))
+            if hasattr(e, 'reason') and isinstance(e.reason, socket.timeout):
+                err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))
+                err.url = self.url
+                raise err
             else:
-                raise URLGrabError(4, _('IOError: %s') % (e, ))
+                err = URLGrabError(4, _('IOError on %s: %s') % (self.url, e))
+                err.url = self.url
+                raise err
+
         except OSError, e:
-            raise URLGrabError(5, _('OSError: %s') % (e, ))
+            err = URLGrabError(5, _('%s on %s') % (e, self.url))
+            err.url = self.url
+            raise err
+
         except HTTPException, e:
-            raise URLGrabError(7, _('HTTP Exception (%s): %s') % \
-                            (e.__class__.__name__, e))
+            err = URLGrabError(7, _('HTTP Exception (%s) on %s: %s') % \
+                            (e.__class__.__name__, self.url, e))
+            err.url = self.url
+            raise err
+
         else:
             return (fo, hdr)
         
     def _do_grab(self):
-        """dump the file to self.filename."""
-        if self.append: new_fo = open(self.filename, 'ab')
-        else: new_fo = open(self.filename, 'wb')
-        bs = 1024*8
-        size = 0
-
-        block = self.read(bs)
-        size = size + len(block)
-        while block:
-            new_fo.write(block)
-            block = self.read(bs)
-            size = size + len(block)
+        """dump the file to a filename or StringIO buffer"""
+
+        if self._complete:
+            return
+        _was_filename = False
+        if type(self.filename) in types.StringTypes and self.filename:
+            _was_filename = True
+            self._prog_reportname = str(self.filename)
+            self._prog_basename = os.path.basename(self.filename)
+            
+            if self.append: mode = 'ab'
+            else: mode = 'wb'
+
+            if DEBUG: DEBUG.info('opening local file "%s" with mode %s' % \
+                                 (self.filename, mode))
+            try:
+                self.fo = open(self.filename, mode)
+            except IOError, e:
+                err = URLGrabError(16, _(\
+                  'error opening local file from %s, IOError: %s') % (self.url, e))
+                err.url = self.url
+                raise err
+
+        else:
+            self._prog_reportname = 'MEMORY'
+            self._prog_basename = 'MEMORY'
+
+            
+            self.fo = StringIO()
+            # if this is to be a tempfile instead....
+            # it just makes crap in the tempdir
+            #fh, self._temp_name = mkstemp()
+            #self.fo = open(self._temp_name, 'wb')
+
+            
+        self._do_perform()
+        
 
-        new_fo.close()
-        try:
-            modified_tuple  = self.hdr.getdate_tz('last-modified')
-            modified_stamp  = rfc822.mktime_tz(modified_tuple)
-            os.utime(self.filename, (modified_stamp, modified_stamp))
-        except (TypeError,), e: pass
 
-        return size
+        if _was_filename:
+            # close it up
+            self.fo.flush()
+            self.fo.close()
+            # set the time
+            mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME)
+            if mod_time != -1:
+                os.utime(self.filename, (mod_time, mod_time))
+            # re open it
+            self.fo = open(self.filename, 'r')
+        else:
+            #self.fo = open(self._temp_name, 'r')
+            self.fo.seek(0)
+
+        self._complete = True
     
     def _fill_buffer(self, amt=None):
         """fill the buffer to contain at least 'amt' bytes by reading
@@ -1233,7 +1472,9 @@
 
         # if we've made it here, then we don't have enough in the buffer
         # and we need to read more.
-
+        
+        if not self._complete: self._do_grab() #XXX cheater - change on ranges
+        
         buf = [self._rbuf]
         bufsize = len(self._rbuf)
         while amt is None or amt:
@@ -1250,11 +1491,20 @@
             try:
                 new = self.fo.read(readamount)
             except socket.error, e:
-                raise URLGrabError(4, _('Socket Error: %s') % (e, ))
-            except TimeoutError, e:
-                raise URLGrabError(12, _('Timeout: %s') % (e, ))
+                err = URLGrabError(4, _('Socket Error on %s: %s') % (self.url, e))
+                err.url = self.url
+                raise err
+
+            except socket.timeout, e:
+                raise URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))
+                err.url = self.url
+                raise err
+
             except IOError, e:
-                raise URLGrabError(4, _('IOError: %s') %(e,))
+                raise URLGrabError(4, _('IOError on %s: %s') %(self.url, e))
+                err.url = self.url
+                raise err
+
             newsize = len(new)
             if not newsize: break # no more to read
 
@@ -1263,12 +1513,45 @@
             bufsize = bufsize + newsize
             self._tsize = newsize
             self._amount_read = self._amount_read + newsize
-            if self.opts.progress_obj:
-                self.opts.progress_obj.update(self._amount_read)
+            #if self.opts.progress_obj:
+            #    self.opts.progress_obj.update(self._amount_read)
 
         self._rbuf = string.join(buf, '')
         return
 
+    def _progress_update(self, download_total, downloaded, upload_total, uploaded):
+        if self._over_max_size(cur=self._amount_read-self._reget_length):
+            return -1
+
+        try:
+            if self._prog_running:
+                downloaded += self._reget_length
+                self.opts.progress_obj.update(downloaded)
+        except KeyboardInterrupt:
+            return -1
+    
+    def _over_max_size(self, cur, max_size=None):
+
+        if not max_size:
+            max_size = self.size
+        if self.opts.size: # if we set an opts size use that, no matter what
+            max_size = self.opts.size
+        if not max_size: return False # if we have None for all of the Max then this is dumb
+        if cur > max_size + max_size*.10:
+
+            msg = _("Downloaded more than max size for %s: %s > %s") \
+                        % (self.url, cur, max_size)
+            self._error = (pycurl.E_FILESIZE_EXCEEDED, msg)
+            return True
+        return False
+        
+    def _to_utf8(self, obj, errors='replace'):
+        '''convert 'unicode' to an encoded utf-8 byte string '''
+        # stolen from yum.i18n
+        if isinstance(obj, unicode):
+            obj = obj.encode('utf-8', errors)
+        return obj
+        
     def read(self, amt=None):
         self._fill_buffer(amt)
         if amt is None:
@@ -1278,6 +1561,9 @@
         return s
 
     def readline(self, limit=-1):
+        if not self._complete: self._do_grab()
+        return self.fo.readline()
+        
         i = string.find(self._rbuf, '\n')
         while i < 0 and not (0 < limit <= len(self._rbuf)):
             L = len(self._rbuf)
@@ -1293,43 +1579,13 @@
         return s
 
     def close(self):
-        if self.opts.progress_obj:
+        if self._prog_running:
             self.opts.progress_obj.end(self._amount_read)
         self.fo.close()
-        if self.opts.close_connection:
-            try: self.fo.close_connection()
-            except: pass
-
-_handler_cache = []
-def CachedOpenerDirector(ssl_factory = None, *handlers):
-    for (cached_handlers, opener) in _handler_cache:
-        if cached_handlers == handlers:
-            for handler in opener.handlers:
-                handler.add_parent(opener)
-            return opener
-    if not ssl_factory:
-        ssl_factory = sslfactory.get_factory()
-    opener = ssl_factory.create_opener(*handlers)
-    _handler_cache.append( (handlers, opener) )
-    return opener
-
-_proxy_cache = []
-def CachedProxyHandler(proxies):
-    for (pdict, handler) in _proxy_cache:
-        if pdict == proxies:
-            if DEBUG: DEBUG.debug('re-using proxy settings: %s', proxies)
-            break
-    else:
-        for k, v in proxies.items():
-            utype, url = urllib.splittype(v)
-            host, other = urllib.splithost(url)
-            if (utype is None) or (host is None):
-                raise URLGrabError(13, _('Bad proxy URL: %s') % v)
-
-        if DEBUG: DEBUG.info('creating new proxy handler: %s', proxies)
-        handler = urllib2.ProxyHandler(proxies)
-        _proxy_cache.append( (proxies, handler) )
-    return handler
+        
+
+_curl_cache = pycurl.Curl() # make one and reuse it over and over and over
+
 
 #####################################################################
 # DEPRECATED FUNCTIONS
@@ -1368,7 +1624,6 @@
 #####################################################################
 #  TESTING
 def _main_test():
-    import sys
     try: url, filename = sys.argv[1:3]
     except ValueError:
         print 'usage:', sys.argv[0], \
@@ -1395,7 +1650,6 @@
 
 
 def _retry_test():
-    import sys
     try: url, filename = sys.argv[1:3]
     except ValueError:
         print 'usage:', sys.argv[0], \
@@ -1430,7 +1684,7 @@
     else: print 'LOCAL FILE:', name
 
 def _file_object_test(filename=None):
-    import random, cStringIO, sys
+    import cStringIO
     if filename is None:
         filename = __file__
     print 'using file "%s" for comparisons' % filename
@@ -1444,7 +1698,7 @@
                      _test_file_object_readlines]:
         fo_input = cStringIO.StringIO(s_input)
         fo_output = cStringIO.StringIO()
-        wrapper = URLGrabberFileObject(fo_input, None, 0)
+        wrapper = PyCurlFileObject(fo_input, None, 0)
         print 'testing %-30s ' % testfunc.__name__,
         testfunc(wrapper, fo_output)
         s_output = fo_output.getvalue()