__init__.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. '''A high-level interface to the pycurl extension'''
  2. # ** mfx NOTE: the CGI class uses "black magic" using COOKIEFILE in
  3. # combination with a non-existant file name. See the libcurl docs
  4. # for more info.
  5. import sys, pycurl
  6. py3 = sys.version_info[0] == 3
  7. # python 2/3 compatibility
  8. if py3:
  9. import urllib.parse as urllib_parse
  10. from urllib.parse import urljoin
  11. from io import BytesIO
  12. else:
  13. import urllib as urllib_parse
  14. from urlparse import urljoin
  15. try:
  16. from cStringIO import StringIO as BytesIO
  17. except ImportError:
  18. from StringIO import StringIO as BytesIO
  19. try:
  20. import signal
  21. from signal import SIGPIPE, SIG_IGN
  22. signal.signal(signal.SIGPIPE, signal.SIG_IGN)
  23. except ImportError:
  24. pass
  25. class Curl:
  26. "High-level interface to pycurl functions."
  27. def __init__(self, base_url="", fakeheaders=[]):
  28. self.handle = pycurl.Curl()
  29. # These members might be set.
  30. self.set_url(base_url)
  31. self.verbosity = 0
  32. self.fakeheaders = fakeheaders
  33. # Nothing past here should be modified by the caller.
  34. self.payload = None
  35. self.payload_io = BytesIO()
  36. self.hrd = ""
  37. # Verify that we've got the right site; harmless on a non-SSL connect.
  38. self.set_option(pycurl.SSL_VERIFYHOST, 2)
  39. # Follow redirects in case it wants to take us to a CGI...
  40. self.set_option(pycurl.FOLLOWLOCATION, 1)
  41. self.set_option(pycurl.MAXREDIRS, 5)
  42. self.set_option(pycurl.NOSIGNAL, 1)
  43. # Setting this option with even a nonexistent file makes libcurl
  44. # handle cookie capture and playback automatically.
  45. self.set_option(pycurl.COOKIEFILE, "/dev/null")
  46. # Set timeouts to avoid hanging too long
  47. self.set_timeout(30)
  48. # Use password identification from .netrc automatically
  49. self.set_option(pycurl.NETRC, 1)
  50. self.set_option(pycurl.WRITEFUNCTION, self.payload_io.write)
  51. def header_callback(x):
  52. self.hdr += x.decode('ascii')
  53. self.set_option(pycurl.HEADERFUNCTION, header_callback)
  54. def set_timeout(self, timeout):
  55. "Set timeout for a retrieving an object"
  56. self.set_option(pycurl.TIMEOUT, timeout)
  57. def set_url(self, url):
  58. "Set the base URL to be retrieved."
  59. self.base_url = url
  60. self.set_option(pycurl.URL, self.base_url)
  61. def set_option(self, *args):
  62. "Set an option on the retrieval."
  63. self.handle.setopt(*args)
  64. def set_verbosity(self, level):
  65. "Set verbosity to 1 to see transactions."
  66. self.set_option(pycurl.VERBOSE, level)
  67. def __request(self, relative_url=None):
  68. "Perform the pending request."
  69. if self.fakeheaders:
  70. self.set_option(pycurl.HTTPHEADER, self.fakeheaders)
  71. if relative_url:
  72. self.set_option(pycurl.URL, urljoin(self.base_url, relative_url))
  73. self.payload = None
  74. self.hdr = ""
  75. self.handle.perform()
  76. self.payload = self.payload_io.getvalue()
  77. return self.payload
  78. def get(self, url="", params=None):
  79. "Ship a GET request for a specified URL, capture the response."
  80. if params:
  81. url += "?" + urllib_parse.urlencode(params)
  82. self.set_option(pycurl.HTTPGET, 1)
  83. return self.__request(url)
  84. def post(self, cgi, params):
  85. "Ship a POST request to a specified CGI, capture the response."
  86. self.set_option(pycurl.POST, 1)
  87. self.set_option(pycurl.POSTFIELDS, urllib_parse.urlencode(params))
  88. return self.__request(cgi)
  89. def body(self):
  90. "Return the body from the last response."
  91. return self.payload
  92. def header(self):
  93. "Return the header from the last response."
  94. return self.hdr
  95. def get_info(self, *args):
  96. "Get information about retrieval."
  97. return self.handle.getinfo(*args)
  98. def info(self):
  99. "Return a dictionary with all info on the last response."
  100. m = {}
  101. m['effective-url'] = self.handle.getinfo(pycurl.EFFECTIVE_URL)
  102. m['http-code'] = self.handle.getinfo(pycurl.HTTP_CODE)
  103. m['total-time'] = self.handle.getinfo(pycurl.TOTAL_TIME)
  104. m['namelookup-time'] = self.handle.getinfo(pycurl.NAMELOOKUP_TIME)
  105. m['connect-time'] = self.handle.getinfo(pycurl.CONNECT_TIME)
  106. m['pretransfer-time'] = self.handle.getinfo(pycurl.PRETRANSFER_TIME)
  107. m['redirect-time'] = self.handle.getinfo(pycurl.REDIRECT_TIME)
  108. m['redirect-count'] = self.handle.getinfo(pycurl.REDIRECT_COUNT)
  109. m['size-upload'] = self.handle.getinfo(pycurl.SIZE_UPLOAD)
  110. m['size-download'] = self.handle.getinfo(pycurl.SIZE_DOWNLOAD)
  111. m['speed-upload'] = self.handle.getinfo(pycurl.SPEED_UPLOAD)
  112. m['header-size'] = self.handle.getinfo(pycurl.HEADER_SIZE)
  113. m['request-size'] = self.handle.getinfo(pycurl.REQUEST_SIZE)
  114. m['content-length-download'] = self.handle.getinfo(pycurl.CONTENT_LENGTH_DOWNLOAD)
  115. m['content-length-upload'] = self.handle.getinfo(pycurl.CONTENT_LENGTH_UPLOAD)
  116. m['content-type'] = self.handle.getinfo(pycurl.CONTENT_TYPE)
  117. m['response-code'] = self.handle.getinfo(pycurl.RESPONSE_CODE)
  118. m['speed-download'] = self.handle.getinfo(pycurl.SPEED_DOWNLOAD)
  119. m['ssl-verifyresult'] = self.handle.getinfo(pycurl.SSL_VERIFYRESULT)
  120. m['filetime'] = self.handle.getinfo(pycurl.INFO_FILETIME)
  121. m['starttransfer-time'] = self.handle.getinfo(pycurl.STARTTRANSFER_TIME)
  122. m['redirect-time'] = self.handle.getinfo(pycurl.REDIRECT_TIME)
  123. m['redirect-count'] = self.handle.getinfo(pycurl.REDIRECT_COUNT)
  124. m['http-connectcode'] = self.handle.getinfo(pycurl.HTTP_CONNECTCODE)
  125. m['httpauth-avail'] = self.handle.getinfo(pycurl.HTTPAUTH_AVAIL)
  126. m['proxyauth-avail'] = self.handle.getinfo(pycurl.PROXYAUTH_AVAIL)
  127. m['os-errno'] = self.handle.getinfo(pycurl.OS_ERRNO)
  128. m['num-connects'] = self.handle.getinfo(pycurl.NUM_CONNECTS)
  129. m['ssl-engines'] = self.handle.getinfo(pycurl.SSL_ENGINES)
  130. m['cookielist'] = self.handle.getinfo(pycurl.INFO_COOKIELIST)
  131. m['lastsocket'] = self.handle.getinfo(pycurl.LASTSOCKET)
  132. m['ftp-entry-path'] = self.handle.getinfo(pycurl.FTP_ENTRY_PATH)
  133. return m
  134. def answered(self, check):
  135. "Did a given check string occur in the last payload?"
  136. return self.payload.find(check) >= 0
  137. def close(self):
  138. "Close a session, freeing resources."
  139. if self.handle:
  140. self.handle.close()
  141. self.handle = None
  142. self.hdr = ""
  143. self.payload = ""
  144. def __del__(self):
  145. self.close()
  146. if __name__ == "__main__":
  147. if len(sys.argv) < 2:
  148. url = 'http://curl.haxx.se'
  149. else:
  150. url = sys.argv[1]
  151. c = Curl()
  152. c.get(url)
  153. print(c.body())
  154. print('='*74 + '\n')
  155. import pprint
  156. pprint.pprint(c.info())
  157. print(c.get_info(pycurl.OS_ERRNO))
  158. print(c.info()['os-errno'])
  159. c.close()