Response headers when downloading descriptors

Adding a reply_headers attribute to our Query class so callers can get the headers the DirPort responded with. This is needed for... https://trac.torproject.org/projects/tor/ticket/25768 Generally speaking though good data to make available to our callers.

Response headers when downloading descriptors
187d3766 · Damian Johnson · a2300beb · 187d3766 · 187d3766 · 187d3766
Commit 187d3766 authored Apr 13, 2018 by Damian Johnson
--- a/docs/change_log.rst
+++ b/docs/change_log.rst
@@ -58,6 +58,7 @@ The following are only available within Stem's `git repository
  * Added zstd and lzma compression support (:spec:`1cb56af`)
  * Added server descriptor's new is_hidden_service_dir attribute
  * Don't retry downloading descriptors when we've timed out
+  * Added the reply_headers attribute to :class:`~stem.descriptor.remote.Query`
  * Supplying a User-Agent when downloading descriptors.
  * Reduced maximum descriptors fetched by the remote module to match tor's new limit (:trac:`24743`)
  * Consensus **shared_randomness_*_reveal_count** attributes undocumented, and unavailable if retrieved before their corresponding shared_randomness_*_value attribute (:trac:`25046`)

--- a/stem/descriptor/remote.py
+++ b/stem/descriptor/remote.py
@@ -326,6 +326,13 @@ class Query(object):
  .. versionchanged:: 1.7.0
     Added the compression argument.

+  .. versionchanged:: 1.7.0
+     Added the reply_headers attribute.
+
+     The class this provides changed between Python versions. In python2
+     this was called httplib.HTTPMessage, whereas in python3 the class was
+     renamed to http.client.HTTPMessage.
+
  :var str resource: resource being fetched, such as '/tor/server/all'
  :var str descriptor_type: type of descriptors being fetched (for options see
    :func:`~stem.descriptor.__init__.parse_file`), this is guessed from the
@@ -356,6 +363,8 @@ class Query(object):
    **True**, skips these checks otherwise
  :var stem.descriptor.__init__.DocumentHandler document_handler: method in
    which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
+  :var http.client.HTTPMessage reply_headers: headers provided in the response,
+    **None** if we haven't yet made our request
  :var dict kwargs: additional arguments for the descriptor constructor

  :param bool start: start making the request when constructed (default is **True**)
@@ -409,6 +418,7 @@ class Query(object):

    self.validate = validate
    self.document_handler = document_handler
+    self.reply_headers = None
    self.kwargs = kwargs

    self._downloader_thread = None
@@ -538,7 +548,7 @@ class Query(object):
      )

      data = response.read()
-      encoding = response.info().get('Content-Encoding')
+      encoding = response.headers.get('Content-Encoding')

      # Tor doesn't include compression headers. As such when using gzip we
      # need to include '32' for automatic header detection...
@@ -560,6 +570,7 @@ class Query(object):
        data = lzma.decompress(data)

      self.content = data.strip()
+      self.reply_headers = response.headers
      self.runtime = time.time() - self.start_time
      log.trace("Descriptors retrieved from '%s' in %0.2fs" % (self.download_url, self.runtime))
    except:

--- a/test/unit/descriptor/remote.py
+++ b/test/unit/descriptor/remote.py
@@ -2,6 +2,7 @@
 Unit tests for stem.descriptor.remote.
 """

+import io
 import socket
 import tempfile
 import time
@@ -14,6 +15,11 @@ import stem.util.conf
 from stem.descriptor.remote import Compression
 from test.unit.descriptor import read_resource

+try:
+  from http.client import HTTPMessage  # python3
+except ImportError:
+  from httplib import HTTPMessage  # python2
+
 try:
  # added in python 2.7
  from collections import OrderedDict
@@ -111,11 +117,30 @@ FALLBACK_ENTRY = b"""\
 /* extrainfo=1 */
 """

+HEADER = '\r\n'.join([
+  'Date: Fri, 13 Apr 2018 16:35:50 GMT',
+  'Content-Type: application/octet-stream',
+  'X-Your-Address-Is: 97.103.17.56',
+  'Pragma: no-cache',
+  'Content-Encoding: %s',
+])
+

 def _urlopen_mock(data, encoding = 'identity'):
  urlopen_mock = Mock()
  urlopen_mock().read.return_value = data
-  urlopen_mock().info().get.return_value = encoding
+
+  if stem.prereq.is_python_3():
+    headers = HTTPMessage()
+
+    for line in HEADER.splitlines():
+      key, value = line.split(': ', 1)
+      headers.add_header(key, encoding if key == 'Content-Encoding' else value)
+
+    urlopen_mock().headers = headers
+  else:
+    urlopen_mock().headers = HTTPMessage(io.BytesIO(HEADER % encoding))
+
  return urlopen_mock


@@ -219,6 +244,29 @@ class TestDescriptorDownloader(unittest.TestCase):
    self.assertEqual(1, len(descriptors))
    self.assertEqual('moria1', descriptors[0].nickname)

+  @patch(URL_OPEN, _urlopen_mock(TEST_DESCRIPTOR))
+  def test_reply_headers(self):
+    query = stem.descriptor.remote.get_server_descriptors('9695DFC35FFEB861329B9F1AB04C46397020CE31', start = False)
+    self.assertEqual(None, query.reply_headers)  # initially we don't have a reply
+    query.run()
+
+    self.assertEqual('Fri, 13 Apr 2018 16:35:50 GMT', query.reply_headers.get('date'))
+    self.assertEqual('application/octet-stream', query.reply_headers.get('content-type'))
+    self.assertEqual('97.103.17.56', query.reply_headers.get('x-your-address-is'))
+    self.assertEqual('no-cache', query.reply_headers.get('pragma'))
+    self.assertEqual('identity', query.reply_headers.get('content-encoding'))
+
+    # getting headers should be case insensitive
+    self.assertEqual('identity', query.reply_headers.get('CoNtEnT-ENCODING'))
+
+    # request a header that isn't present
+    self.assertEqual(None, query.reply_headers.get('no-such-header'))
+    self.assertEqual('default', query.reply_headers.get('no-such-header', 'default'))
+
+    descriptors = list(query)
+    self.assertEqual(1, len(descriptors))
+    self.assertEqual('moria1', descriptors[0].nickname)
+
  @patch(URL_OPEN, _urlopen_mock(TEST_DESCRIPTOR))
  def test_query_download(self):
    """