Override some of the twisted.names methods to disable logging.
[ooni-probe.git] / ooni / templates / httpt.py
1 import copy
2 import random
3 import struct
4
5 from twisted.plugin import IPlugin
6 from twisted.internet import protocol, defer
7 from twisted.internet.ssl import ClientContextFactory
8
9 from twisted.internet import reactor
10 from twisted.internet.error import ConnectionRefusedError, DNSLookupError, TCPTimedOutError
11 from twisted.internet.endpoints import TCP4ClientEndpoint
12 from twisted.web._newclient import Request, Response, ResponseNeverReceived
13 from twisted.web.client import Agent
14 from ooni.utils.trueheaders import TrueHeadersAgent, TrueHeadersSOCKS5Agent
15
16 from ooni.nettest import NetTestCase
17 from ooni.utils import log
18 from ooni.settings import config
19
20 from ooni.utils.net import BodyReceiver, StringProducer, userAgents
21
22 from ooni.utils.trueheaders import TrueHeaders
23 from ooni.errors import handleAllFailures
24
25
26 class InvalidSocksProxyOption(Exception):
27 pass
28
29 class HTTPTest(NetTestCase):
30 """
31 A utility class for dealing with HTTP based testing. It provides methods to
32 be overriden for dealing with HTTP based testing.
33 The main functions to look at are processResponseBody and
34 processResponseHeader that are invoked once the headers have been received
35 and once the request body has been received.
36
37 To perform requests over Tor you will have to use the special URL schema
38 "shttp". For example to request / on example.com you will have to do
39 specify as URL "shttp://example.com/".
40
41 XXX all of this requires some refactoring.
42 """
43 name = "HTTP Test"
44 version = "0.1.1"
45
46 randomizeUA = False
47 followRedirects = False
48
49 baseParameters = [['socksproxy', 's', None,
50 'Specify a socks proxy to use for requests (ip:port)']]
51
52 def _setUp(self):
53 super(HTTPTest, self)._setUp()
54
55 try:
56 import OpenSSL
57 except:
58 log.err("Warning! pyOpenSSL is not installed. https websites will "
59 "not work")
60
61 self.control_agent = TrueHeadersSOCKS5Agent(reactor,
62 proxyEndpoint=TCP4ClientEndpoint(reactor, '127.0.0.1',
63 config.tor.socks_port))
64
65 self.report['socksproxy'] = None
66 sockshost, socksport = (None, None)
67 if self.localOptions['socksproxy']:
68 try:
69 sockshost, socksport = self.localOptions['socksproxy'].split(':')
70 self.report['socksproxy'] = self.localOptions['socksproxy']
71 except ValueError:
72 raise InvalidSocksProxyOption
73 socksport = int(socksport)
74 self.agent = TrueHeadersSOCKS5Agent(reactor,
75 proxyEndpoint=TCP4ClientEndpoint(reactor, sockshost,
76 socksport))
77 else:
78 self.agent = TrueHeadersAgent(reactor)
79
80 self.report['agent'] = 'agent'
81
82 if self.followRedirects:
83 try:
84 from twisted.web.client import RedirectAgent
85 self.control_agent = RedirectAgent(self.control_agent)
86 self.agent = RedirectAgent(self.agent)
87 self.report['agent'] = 'redirect'
88 except:
89 log.err("Warning! You are running an old version of twisted"\
90 "(<= 10.1). I will not be able to follow redirects."\
91 "This may make the testing less precise.")
92
93 self.processInputs()
94 log.debug("Finished test setup")
95
96 def randomize_useragent(self, request):
97 user_agent = random.choice(userAgents)
98 request['headers']['User-Agent'] = [user_agent]
99
100 def processInputs(self):
101 pass
102
103 def addToReport(self, request, response=None, response_body=None, failure_string=None):
104 """
105 Adds to the report the specified request and response.
106
107 Args:
108 request (dict): A dict describing the request that was made
109
110 response (instance): An instance of
111 :class:twisted.web.client.Response.
112 Note: headers is our modified True Headers version.
113
114 failure (instance): An instance of :class:twisted.internet.failure.Failure
115 """
116 log.debug("Adding %s to report" % request)
117 request_headers = TrueHeaders(request['headers'])
118 request_response = {
119 'request': {
120 'headers': list(request_headers.getAllRawHeaders()),
121 'body': request['body'],
122 'url': request['url'],
123 'method': request['method'],
124 'tor': request['tor']
125 }
126 }
127 if response:
128 request_response['response'] = {
129 'headers': list(response.headers.getAllRawHeaders()),
130 'body': response_body,
131 'code': response.code
132 }
133 if failure_string:
134 request_response['failure'] = failure_string
135
136 self.report['requests'].append(request_response)
137
138 def _processResponseBody(self, response_body, request, response, body_processor):
139 log.debug("Processing response body")
140 HTTPTest.addToReport(self, request, response, response_body)
141 if body_processor:
142 body_processor(response_body)
143 else:
144 self.processResponseBody(response_body)
145 response.body = response_body
146 return response
147
148 def processResponseBody(self, body):
149 """
150 Overwrite this method if you wish to interact with the response body of
151 every request that is made.
152
153 Args:
154
155 body (str): The body of the HTTP response
156 """
157 pass
158
159 def processResponseHeaders(self, headers):
160 """
161 This should take care of dealing with the returned HTTP headers.
162
163 Args:
164
165 headers (dict): The returned header fields.
166 """
167 pass
168
169 def processRedirect(self, location):
170 """
171 Handle a redirection via a 3XX HTTP status code.
172
173 Here you may place logic that evaluates the destination that you are
174 being redirected to. Matches against known censor redirects, etc.
175
176 Note: if self.followRedirects is set to True, then this method will
177 never be called.
178 XXX perhaps we may want to hook _handleResponse in RedirectAgent to
179 call processRedirect every time we get redirected.
180
181 Args:
182
183 location (str): the url that we are being redirected to.
184 """
185 pass
186
187 def _cbResponse(self, response, request,
188 headers_processor, body_processor):
189 """
190 This callback is fired once we have gotten a response for our request.
191 If we are using a RedirectAgent then this will fire once we have
192 reached the end of the redirect chain.
193
194 Args:
195
196 response (:twisted.web.iweb.IResponse:): a provider for getting our response
197
198 request (dict): the dict containing our response (XXX this should be dropped)
199
200 header_processor (func): a function to be called with argument a
201 dict containing the response headers. This will lead
202 self.headerProcessor to not be called.
203
204 body_processor (func): a function to be called with as argument the
205 body of the response. This will lead self.bodyProcessor to not
206 be called.
207
208 """
209 if not response:
210 log.err("Got no response for request %s" % request)
211 HTTPTest.addToReport(self, request, response)
212 return
213 else:
214 log.debug("Got response %s" % response)
215
216 if str(response.code).startswith('3'):
217 self.processRedirect(response.headers.getRawHeaders('Location')[0])
218
219 # [!] We are passing to the headers_processor the headers dict and
220 # not the Headers() object
221 response_headers_dict = list(response.headers.getAllRawHeaders())
222 if headers_processor:
223 headers_processor(response_headers_dict)
224 else:
225 self.processResponseHeaders(response_headers_dict)
226
227 try:
228 content_length = int(response.headers.getRawHeaders('content-length')[0])
229 except Exception:
230 content_length = None
231
232 finished = defer.Deferred()
233 response.deliverBody(BodyReceiver(finished, content_length))
234 finished.addCallback(self._processResponseBody, request,
235 response, body_processor)
236 return finished
237
238 def doRequest(self, url, method="GET",
239 headers={}, body=None, headers_processor=None,
240 body_processor=None, use_tor=False):
241 """
242 Perform an HTTP request with the specified method and headers.
243
244 Args:
245
246 url (str): the full URL of the request. The scheme may be either
247 http, https, or httpo for http over Tor Hidden Service.
248
249 Kwargs:
250
251 method (str): the HTTP method name to use for the request
252
253 headers (dict): the request headers to send
254
255 body (str): the request body
256
257 headers_processor : a function to be used for processing the HTTP
258 header responses (defaults to self.processResponseHeaders).
259 This function takes as argument the HTTP headers as a dict.
260
261 body_processory: a function to be used for processing the HTTP
262 response body (defaults to self.processResponseBody). This
263 function takes the response body as an argument.
264
265 use_tor (bool): specify if the HTTP request should be done over Tor
266 or not.
267
268 """
269
270 # We prefix the URL with 's' to make the connection go over the
271 # configured socks proxy
272 if use_tor:
273 log.debug("Using Tor for the request to %s" % url)
274 agent = self.control_agent
275 else:
276 agent = self.agent
277
278 if self.localOptions['socksproxy']:
279 log.debug("Using SOCKS proxy %s for request" % (self.localOptions['socksproxy']))
280
281 log.debug("Performing request %s %s %s" % (url, method, headers))
282
283 request = {}
284 request['method'] = method
285 request['url'] = url
286 request['headers'] = headers
287 request['body'] = body
288 request['tor'] = False
289 if use_tor:
290 request['tor'] = True
291
292 if self.randomizeUA:
293 log.debug("Randomizing user agent")
294 self.randomize_useragent(request)
295
296 if 'requests' not in self.report:
297 self.report['requests'] = []
298
299 # If we have a request body payload, set the request body to such
300 # content
301 if body:
302 body_producer = StringProducer(request['body'])
303 else:
304 body_producer = None
305
306 headers = TrueHeaders(request['headers'])
307
308 def errback(failure, request):
309 if request['tor']:
310 log.err("Error performing torified request: %s" % request['url'])
311 else:
312 log.err("Error performing request: %s" % request['url'])
313 failure_string = handleAllFailures(failure)
314 self.addToReport(request, failure_string=failure_string)
315 return failure
316
317 d = agent.request(request['method'], request['url'], headers,
318 body_producer)
319 d.addErrback(errback, request)
320 d.addCallback(self._cbResponse, request, headers_processor,
321 body_processor)
322 return d