""" This module implements the FormRequest class which is a more convenient class (than Request) to generate Requests based on form data. See documentation in docs/topics/request-response.rst """ import six from six.moves.urllib.parse import urljoin, urlencode import lxml.html from parsel.selector import create_root_node from w3lib.html import strip_html5_whitespace from scrapy.http.request import Request from scrapy.utils.python import to_bytes, is_listlike from scrapy.utils.response import get_base_url class FormRequest(Request): valid_form_methods = ['GET', 'POST'] def __init__(self, *args, **kwargs): formdata = kwargs.pop('formdata', None) if formdata and kwargs.get('method') is None: kwargs['method'] = 'POST' super(FormRequest, self).__init__(*args, **kwargs) if formdata: items = formdata.items() if isinstance(formdata, dict) else formdata querystr = _urlencode(items, self.encoding) if self.method == 'POST': self.headers.setdefault(b'Content-Type', b'application/x-www-form-urlencoded') self._set_body(querystr) else: self._set_url(self.url + ('&' if '?' in self.url else '?') + querystr) @classmethod def from_response(cls, response, formname=None, formid=None, formnumber=0, formdata=None, clickdata=None, dont_click=False, formxpath=None, formcss=None, **kwargs): kwargs.setdefault('encoding', response.encoding) if formcss is not None: from parsel.csstranslator import HTMLTranslator formxpath = HTMLTranslator().css_to_xpath(formcss) form = _get_form(response, formname, formid, formnumber, formxpath) formdata = _get_inputs(form, formdata, dont_click, clickdata, response) url = _get_form_url(form, kwargs.pop('url', None)) method = kwargs.pop('method', form.method) if method is not None: method = method.upper() if method not in cls.valid_form_methods: method = 'GET' return cls(url=url, method=method, formdata=formdata, **kwargs) def _get_form_url(form, url): if url is None: action = form.get('action') if action is None: return form.base_url return urljoin(form.base_url, strip_html5_whitespace(action)) return urljoin(form.base_url, url) def _urlencode(seq, enc): values = [(to_bytes(k, enc), to_bytes(v, enc)) for k, vs in seq for v in (vs if is_listlike(vs) else [vs])] return urlencode(values, doseq=1) def _get_form(response, formname, formid, formnumber, formxpath): """Find the form element """ root = create_root_node(response.text, lxml.html.HTMLParser, base_url=get_base_url(response)) forms = root.xpath('//form') if not forms: raise ValueError("No