123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182 |
- '''A high-level interface to the pycurl extension'''
- # ** mfx NOTE: the CGI class uses "black magic" using COOKIEFILE in
- # combination with a non-existant file name. See the libcurl docs
- # for more info.
- import sys, pycurl
- py3 = sys.version_info[0] == 3
- # python 2/3 compatibility
- if py3:
- import urllib.parse as urllib_parse
- from urllib.parse import urljoin
- from io import BytesIO
- else:
- import urllib as urllib_parse
- from urlparse import urljoin
- try:
- from cStringIO import StringIO as BytesIO
- except ImportError:
- from StringIO import StringIO as BytesIO
- try:
- import signal
- from signal import SIGPIPE, SIG_IGN
- signal.signal(signal.SIGPIPE, signal.SIG_IGN)
- except ImportError:
- pass
- class Curl:
- "High-level interface to pycurl functions."
- def __init__(self, base_url="", fakeheaders=[]):
- self.handle = pycurl.Curl()
- # These members might be set.
- self.set_url(base_url)
- self.verbosity = 0
- self.fakeheaders = fakeheaders
- # Nothing past here should be modified by the caller.
- self.payload = None
- self.payload_io = BytesIO()
- self.hrd = ""
- # Verify that we've got the right site; harmless on a non-SSL connect.
- self.set_option(pycurl.SSL_VERIFYHOST, 2)
- # Follow redirects in case it wants to take us to a CGI...
- self.set_option(pycurl.FOLLOWLOCATION, 1)
- self.set_option(pycurl.MAXREDIRS, 5)
- self.set_option(pycurl.NOSIGNAL, 1)
- # Setting this option with even a nonexistent file makes libcurl
- # handle cookie capture and playback automatically.
- self.set_option(pycurl.COOKIEFILE, "/dev/null")
- # Set timeouts to avoid hanging too long
- self.set_timeout(30)
- # Use password identification from .netrc automatically
- self.set_option(pycurl.NETRC, 1)
- self.set_option(pycurl.WRITEFUNCTION, self.payload_io.write)
- def header_callback(x):
- self.hdr += x.decode('ascii')
- self.set_option(pycurl.HEADERFUNCTION, header_callback)
- def set_timeout(self, timeout):
- "Set timeout for a retrieving an object"
- self.set_option(pycurl.TIMEOUT, timeout)
- def set_url(self, url):
- "Set the base URL to be retrieved."
- self.base_url = url
- self.set_option(pycurl.URL, self.base_url)
- def set_option(self, *args):
- "Set an option on the retrieval."
- self.handle.setopt(*args)
- def set_verbosity(self, level):
- "Set verbosity to 1 to see transactions."
- self.set_option(pycurl.VERBOSE, level)
- def __request(self, relative_url=None):
- "Perform the pending request."
- if self.fakeheaders:
- self.set_option(pycurl.HTTPHEADER, self.fakeheaders)
- if relative_url:
- self.set_option(pycurl.URL, urljoin(self.base_url, relative_url))
- self.payload = None
- self.hdr = ""
- self.handle.perform()
- self.payload = self.payload_io.getvalue()
- return self.payload
- def get(self, url="", params=None):
- "Ship a GET request for a specified URL, capture the response."
- if params:
- url += "?" + urllib_parse.urlencode(params)
- self.set_option(pycurl.HTTPGET, 1)
- return self.__request(url)
- def post(self, cgi, params):
- "Ship a POST request to a specified CGI, capture the response."
- self.set_option(pycurl.POST, 1)
- self.set_option(pycurl.POSTFIELDS, urllib_parse.urlencode(params))
- return self.__request(cgi)
- def body(self):
- "Return the body from the last response."
- return self.payload
- def header(self):
- "Return the header from the last response."
- return self.hdr
- def get_info(self, *args):
- "Get information about retrieval."
- return self.handle.getinfo(*args)
- def info(self):
- "Return a dictionary with all info on the last response."
- m = {}
- m['effective-url'] = self.handle.getinfo(pycurl.EFFECTIVE_URL)
- m['http-code'] = self.handle.getinfo(pycurl.HTTP_CODE)
- m['total-time'] = self.handle.getinfo(pycurl.TOTAL_TIME)
- m['namelookup-time'] = self.handle.getinfo(pycurl.NAMELOOKUP_TIME)
- m['connect-time'] = self.handle.getinfo(pycurl.CONNECT_TIME)
- m['pretransfer-time'] = self.handle.getinfo(pycurl.PRETRANSFER_TIME)
- m['redirect-time'] = self.handle.getinfo(pycurl.REDIRECT_TIME)
- m['redirect-count'] = self.handle.getinfo(pycurl.REDIRECT_COUNT)
- m['size-upload'] = self.handle.getinfo(pycurl.SIZE_UPLOAD)
- m['size-download'] = self.handle.getinfo(pycurl.SIZE_DOWNLOAD)
- m['speed-upload'] = self.handle.getinfo(pycurl.SPEED_UPLOAD)
- m['header-size'] = self.handle.getinfo(pycurl.HEADER_SIZE)
- m['request-size'] = self.handle.getinfo(pycurl.REQUEST_SIZE)
- m['content-length-download'] = self.handle.getinfo(pycurl.CONTENT_LENGTH_DOWNLOAD)
- m['content-length-upload'] = self.handle.getinfo(pycurl.CONTENT_LENGTH_UPLOAD)
- m['content-type'] = self.handle.getinfo(pycurl.CONTENT_TYPE)
- m['response-code'] = self.handle.getinfo(pycurl.RESPONSE_CODE)
- m['speed-download'] = self.handle.getinfo(pycurl.SPEED_DOWNLOAD)
- m['ssl-verifyresult'] = self.handle.getinfo(pycurl.SSL_VERIFYRESULT)
- m['filetime'] = self.handle.getinfo(pycurl.INFO_FILETIME)
- m['starttransfer-time'] = self.handle.getinfo(pycurl.STARTTRANSFER_TIME)
- m['redirect-time'] = self.handle.getinfo(pycurl.REDIRECT_TIME)
- m['redirect-count'] = self.handle.getinfo(pycurl.REDIRECT_COUNT)
- m['http-connectcode'] = self.handle.getinfo(pycurl.HTTP_CONNECTCODE)
- m['httpauth-avail'] = self.handle.getinfo(pycurl.HTTPAUTH_AVAIL)
- m['proxyauth-avail'] = self.handle.getinfo(pycurl.PROXYAUTH_AVAIL)
- m['os-errno'] = self.handle.getinfo(pycurl.OS_ERRNO)
- m['num-connects'] = self.handle.getinfo(pycurl.NUM_CONNECTS)
- m['ssl-engines'] = self.handle.getinfo(pycurl.SSL_ENGINES)
- m['cookielist'] = self.handle.getinfo(pycurl.INFO_COOKIELIST)
- m['lastsocket'] = self.handle.getinfo(pycurl.LASTSOCKET)
- m['ftp-entry-path'] = self.handle.getinfo(pycurl.FTP_ENTRY_PATH)
- return m
- def answered(self, check):
- "Did a given check string occur in the last payload?"
- return self.payload.find(check) >= 0
- def close(self):
- "Close a session, freeing resources."
- if self.handle:
- self.handle.close()
- self.handle = None
- self.hdr = ""
- self.payload = ""
- def __del__(self):
- self.close()
- if __name__ == "__main__":
- if len(sys.argv) < 2:
- url = 'http://curl.haxx.se'
- else:
- url = sys.argv[1]
- c = Curl()
- c.get(url)
- print(c.body())
- print('='*74 + '\n')
- import pprint
- pprint.pprint(c.info())
- print(c.get_info(pycurl.OS_ERRNO))
- print(c.info()['os-errno'])
- c.close()
|