summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDamian Johnson <atagar@torproject.org>2018-03-31 21:39:00 -0700
committerDamian Johnson <atagar@torproject.org>2018-03-31 21:39:00 -0700
commit079b48126fdc97d83df84d96e7423f99870f2584 (patch)
treeba489d54b6d757ce41a82ca311ee76cfa7f0d425
parent4a8f8d0499aefaddd6d5d9386744e1b1125c6036 (diff)
parentf7949b644eaadf9a0e731547deafb40cf106e8a8 (diff)
Expanded descriptor compression support
We supported plaintext and gzip when downloading descriptors, but recently tor added lzma and zstd support as well... https://gitweb.torproject.org/torspec.git/commit/?id=1cb56af Lzma support was added in Python 3.3... https://docs.python.org/3/library/lzma.html ... and zstd is covered by the zstandard module... https://pypi.python.org/pypi/zstandard
-rw-r--r--docs/change_log.rst1
-rw-r--r--stem/descriptor/remote.py182
-rw-r--r--stem/version.py2
-rw-r--r--test/unit/descriptor/__init__.py9
-rw-r--r--test/unit/descriptor/data/compressed_gzipbin0 -> 1543 bytes
-rw-r--r--test/unit/descriptor/data/compressed_identity52
-rw-r--r--test/unit/descriptor/data/compressed_lzmabin0 -> 1652 bytes
-rw-r--r--test/unit/descriptor/data/compressed_zstdbin0 -> 1550 bytes
-rw-r--r--test/unit/descriptor/remote.py174
9 files changed, 341 insertions, 79 deletions
diff --git a/docs/change_log.rst b/docs/change_log.rst
index bb42c982..a67aae2d 100644
--- a/docs/change_log.rst
+++ b/docs/change_log.rst
@@ -55,6 +55,7 @@ The following are only available within Stem's `git repository
* **Descriptors**
* `Fallback directory v2 support <https://lists.torproject.org/pipermail/tor-dev/2017-December/012721.html>`_, which adds *nickname* and *extrainfo*
+ * Added zstd and lzma compression support (:spec:`1cb56af`)
* Reduced maximum descriptors fetched by the remote module to match tor's new limit (:trac:`24743`)
* Consensus **shared_randomness_*_reveal_count** attributes undocumented, and unavailable if retrieved before their corresponding shared_randomness_*_value attribute (:trac:`25046`)
* Allow 'proto' line to have blank values (:spec:`a8455f4`)
diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py
index 9181dbcf..e1190fc7 100644
--- a/stem/descriptor/remote.py
+++ b/stem/descriptor/remote.py
@@ -80,6 +80,21 @@ content. For example...
Maximum number of microdescriptors that can requested at a time by their
hashes.
+
+.. data:: Compression (enum)
+
+ Compression when downloading descriptors.
+
+ .. versionadded:: 1.7.0
+
+ =============== ===========
+ Compression Description
+ =============== ===========
+ **PLAINTEXT** Uncompressed data.
+ **GZIP** `GZip compression <https://www.gnu.org/software/gzip/>`_.
+ **ZSTD** `Zstandard compression <https://www.zstd.net>`_, this requires the `zstandard module <https://pypi.python.org/pypi/zstandard>`_.
+ **LZMA** `LZMA compression <https://en.wikipedia.org/wiki/LZMA>`_, this requires the 'lzma module <https://docs.python.org/3/library/lzma.html>`_.
+ =============== ===========
"""
import io
@@ -91,6 +106,13 @@ import threading
import time
import zlib
+import stem.descriptor
+import stem.prereq
+import stem.util.enum
+
+from stem import Flag
+from stem.util import _hash_attr, connection, log, str_tools, tor_tools
+
try:
# added in python 2.7
from collections import OrderedDict
@@ -103,11 +125,38 @@ try:
except ImportError:
import urllib2 as urllib
-import stem.descriptor
-import stem.prereq
+try:
+ # added in python 3.3
+ import lzma
+ LZMA_SUPPORTED = True
+except ImportError:
+ LZMA_SUPPORTED = False
-from stem import Flag
-from stem.util import _hash_attr, connection, log, str_tools, tor_tools
+try:
+ # We use the suggested python zstd library...
+ #
+ # https://pypi.python.org/pypi/zstandard
+ #
+ # Unfortunately this installs as a zstd module which can be confused with...
+ #
+ # https://pypi.python.org/pypi/zstd
+ #
+ # As such checking for the specific decompression class we'll need.
+
+ import zstd
+ ZSTD_SUPPORTED = hasattr(zstd, 'ZstdDecompressor')
+except ImportError:
+ ZSTD_SUPPORTED = False
+
+Compression = stem.util.enum.Enum(
+ ('PLAINTEXT', 'identity'),
+ ('GZIP', 'gzip'), # can also be 'deflate'
+ ('ZSTD', 'x-zstd'),
+ ('LZMA', 'x-tor-lzma'),
+)
+
+ZSTD_UNAVAILABLE_MSG = 'ZSTD compression requires the zstandard module (https://pypi.python.org/pypi/zstandard)'
+LZMA_UNAVAILABLE_MSG = 'LZMA compression requires the lzma module (https://docs.python.org/3/library/lzma.html)'
# Tor has a limited number of descriptors we can fetch explicitly by their
# fingerprint or hashes due to a limit on the url length by squid proxies.
@@ -224,7 +273,7 @@ class Query(object):
from stem.descriptor.remote import Query
query = Query(
- '/tor/server/all.z',
+ '/tor/server/all',
block = True,
timeout = 30,
)
@@ -243,7 +292,7 @@ class Query(object):
print('Current relays:')
- for desc in Query('/tor/server/all.z', 'server-descriptor 1.0'):
+ for desc in Query('/tor/server/all', 'server-descriptor 1.0'):
print(desc.fingerprint)
In either case exceptions are available via our 'error' attribute.
@@ -256,28 +305,37 @@ class Query(object):
=============================================== ===========
Resource Description
=============================================== ===========
- /tor/server/all.z all present server descriptors
- /tor/server/fp/<fp1>+<fp2>+<fp3>.z server descriptors with the given fingerprints
- /tor/extra/all.z all present extrainfo descriptors
- /tor/extra/fp/<fp1>+<fp2>+<fp3>.z extrainfo descriptors with the given fingerprints
- /tor/micro/d/<hash1>-<hash2>.z microdescriptors with the given hashes
- /tor/status-vote/current/consensus.z present consensus
- /tor/status-vote/current/consensus-microdesc.z present microdescriptor consensus
- /tor/keys/all.z key certificates for the authorities
- /tor/keys/fp/<v3ident1>+<v3ident2>.z key certificates for specific authorities
+ /tor/server/all all present server descriptors
+ /tor/server/fp/<fp1>+<fp2>+<fp3> server descriptors with the given fingerprints
+ /tor/extra/all all present extrainfo descriptors
+ /tor/extra/fp/<fp1>+<fp2>+<fp3> extrainfo descriptors with the given fingerprints
+ /tor/micro/d/<hash1>-<hash2> microdescriptors with the given hashes
+ /tor/status-vote/current/consensus present consensus
+ /tor/status-vote/current/consensus-microdesc present microdescriptor consensus
+ /tor/keys/all key certificates for the authorities
+ /tor/keys/fp/<v3ident1>+<v3ident2> key certificates for specific authorities
=============================================== ===========
- The '.z' suffix can be excluded to get a plaintext rather than compressed
- response. Compression is handled transparently, so this shouldn't matter to
- the caller.
+ **ZSTD** compression requires `zstandard
+ <https://pypi.python.org/pypi/zstandard>`_, and **LZMA** requires the `lzma
+ module <https://docs.python.org/3/library/lzma.html>`_.
+
+ For legacy reasons if our resource has a '.z' suffix then our **compression**
+ argument is overwritten with Compression.GZIP.
+
+ .. versionchanged:: 1.7.0
+ Added the compression argument.
- :var str resource: resource being fetched, such as '/tor/server/all.z'
+ :var str resource: resource being fetched, such as '/tor/server/all'
:var str descriptor_type: type of descriptors being fetched (for options see
:func:`~stem.descriptor.__init__.parse_file`), this is guessed from the
resource if **None**
:var list endpoints: (address, dirport) tuples of the authority or mirror
we're querying, this uses authorities if undefined
+ :var list compression: list of :data:`stem.descriptor.remote.Compression`
+ we're willing to accept, when none are mutually supported downloads fall
+ back to Compression.PLAINTEXT
:var int retries: number of times to attempt the request if downloading it
fails
:var bool fall_back_to_authority: when retrying request issues the last
@@ -305,17 +363,37 @@ class Query(object):
the same as running **query.run(True)** (default is **False**)
"""
- def __init__(self, resource, descriptor_type = None, endpoints = None, retries = 2, fall_back_to_authority = False, timeout = None, start = True, block = False, validate = False, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **kwargs):
+ def __init__(self, resource, descriptor_type = None, endpoints = None, compression = None, retries = 2, fall_back_to_authority = False, timeout = None, start = True, block = False, validate = False, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **kwargs):
if not resource.startswith('/'):
raise ValueError("Resources should start with a '/': %s" % resource)
- self.resource = resource
+ if resource.endswith('.z'):
+ compression = [Compression.GZIP]
+ resource = resource[:-2]
+ elif compression is None:
+ compression = [Compression.PLAINTEXT]
+ else:
+ if isinstance(compression, str):
+ compression = [compression] # caller provided only a single option
+
+ if Compression.ZSTD in compression and not ZSTD_SUPPORTED:
+ log.log_once('stem.descriptor.remote.zstd_unavailable', log.INFO, ZSTD_UNAVAILABLE_MSG)
+ compression.remove(Compression.ZSTD)
+
+ if Compression.LZMA in compression and not LZMA_SUPPORTED:
+ log.log_once('stem.descriptor.remote.lzma_unavailable', log.INFO, LZMA_UNAVAILABLE_MSG)
+ compression.remove(Compression.LZMA)
+
+ if not compression:
+ compression = [Compression.PLAINTEXT]
if descriptor_type:
self.descriptor_type = descriptor_type
else:
self.descriptor_type = _guess_descriptor_type(resource)
+ self.resource = resource
+ self.compression = compression
self.endpoints = endpoints if endpoints else []
self.retries = retries
self.fall_back_to_authority = fall_back_to_authority
@@ -352,7 +430,7 @@ class Query(object):
self._downloader_thread = threading.Thread(
name = 'Descriptor Query',
target = self._download_descriptors,
- args = (self.retries,)
+ args = (self.compression, self.retries,)
)
self._downloader_thread.setDaemon(True)
@@ -435,26 +513,50 @@ class Query(object):
if use_authority or not self.endpoints:
directories = get_authorities().values()
- picked = random.choice(directories)
+ picked = random.choice(list(directories))
address, dirport = picked.address, picked.dir_port
else:
address, dirport = random.choice(self.endpoints)
return 'http://%s:%i/%s' % (address, dirport, self.resource.lstrip('/'))
- def _download_descriptors(self, retries):
+ def _download_descriptors(self, compression, retries):
try:
use_authority = retries == 0 and self.fall_back_to_authority
self.download_url = self._pick_url(use_authority)
-
self.start_time = time.time()
- response = urllib.urlopen(self.download_url, timeout = self.timeout).read()
- if self.download_url.endswith('.z'):
- response = zlib.decompress(response)
+ response = urllib.urlopen(
+ urllib.Request(
+ self.download_url,
+ headers = {'Accept-Encoding': ', '.join(compression)},
+ ),
+ timeout = self.timeout,
+ )
+
+ data = response.read()
+ encoding = response.info().getheader('Content-Encoding')
- self.content = response.strip()
+ # Tor doesn't include compression headers. As such when using gzip we
+ # need to include '32' for automatic header detection...
+ #
+ # https://stackoverflow.com/questions/3122145/zlib-error-error-3-while-decompressing-incorrect-header-check/22310760#22310760
+ #
+ # ... and with zstd we need to use the streaming API.
+ if encoding in (Compression.GZIP, 'deflate'):
+ data = zlib.decompress(data, zlib.MAX_WBITS | 32)
+ elif encoding == Compression.ZSTD and ZSTD_SUPPORTED:
+ output_buffer = io.BytesIO()
+
+ with zstd.ZstdDecompressor().write_to(output_buffer) as decompressor:
+ decompressor.write(data)
+
+ data = output_buffer.getvalue()
+ elif encoding == Compression.LZMA and LZMA_SUPPORTED:
+ data = lzma.decompress(data)
+
+ self.content = data.strip()
self.runtime = time.time() - self.start_time
log.trace("Descriptors retrieved from '%s' in %0.2fs" % (self.download_url, self.runtime))
except:
@@ -462,7 +564,7 @@ class Query(object):
if retries > 0:
log.debug("Unable to download descriptors from '%s' (%i retries remaining): %s" % (self.download_url, retries, exc))
- return self._download_descriptors(retries - 1)
+ return self._download_descriptors(compression, retries - 1)
else:
log.debug("Unable to download descriptors from '%s': %s" % (self.download_url, exc))
self.error = exc
@@ -539,7 +641,7 @@ class DescriptorDownloader(object):
fingerprints (this is due to a limit on the url length by squid proxies).
"""
- resource = '/tor/server/all.z'
+ resource = '/tor/server/all'
if isinstance(fingerprints, str):
fingerprints = [fingerprints]
@@ -548,7 +650,7 @@ class DescriptorDownloader(object):
if len(fingerprints) > MAX_FINGERPRINTS:
raise ValueError('Unable to request more than %i descriptors at a time by their fingerprints' % MAX_FINGERPRINTS)
- resource = '/tor/server/fp/%s.z' % '+'.join(fingerprints)
+ resource = '/tor/server/fp/%s' % '+'.join(fingerprints)
return self.query(resource, **query_args)
@@ -569,7 +671,7 @@ class DescriptorDownloader(object):
fingerprints (this is due to a limit on the url length by squid proxies).
"""
- resource = '/tor/extra/all.z'
+ resource = '/tor/extra/all'
if isinstance(fingerprints, str):
fingerprints = [fingerprints]
@@ -578,7 +680,7 @@ class DescriptorDownloader(object):
if len(fingerprints) > MAX_FINGERPRINTS:
raise ValueError('Unable to request more than %i descriptors at a time by their fingerprints' % MAX_FINGERPRINTS)
- resource = '/tor/extra/fp/%s.z' % '+'.join(fingerprints)
+ resource = '/tor/extra/fp/%s' % '+'.join(fingerprints)
return self.query(resource, **query_args)
@@ -613,7 +715,7 @@ class DescriptorDownloader(object):
if len(hashes) > MAX_MICRODESCRIPTOR_HASHES:
raise ValueError('Unable to request more than %i microdescriptors at a time by their hashes' % MAX_MICRODESCRIPTOR_HASHES)
- return self.query('/tor/micro/d/%s.z' % '-'.join(hashes), **query_args)
+ return self.query('/tor/micro/d/%s' % '-'.join(hashes), **query_args)
def get_consensus(self, authority_v3ident = None, microdescriptor = False, **query_args):
"""
@@ -643,7 +745,7 @@ class DescriptorDownloader(object):
if authority_v3ident:
resource += '/%s' % authority_v3ident
- consensus_query = self.query(resource + '.z', **query_args)
+ consensus_query = self.query(resource, **query_args)
# if we're performing validation then check that it's signed by the
# authority key certificates
@@ -672,7 +774,7 @@ class DescriptorDownloader(object):
if 'endpoint' not in query_args:
query_args['endpoints'] = [(authority.address, authority.dir_port)]
- return self.query(resource + '.z', **query_args)
+ return self.query(resource, **query_args)
def get_key_certificates(self, authority_v3idents = None, **query_args):
"""
@@ -694,7 +796,7 @@ class DescriptorDownloader(object):
squid proxies).
"""
- resource = '/tor/keys/all.z'
+ resource = '/tor/keys/all'
if isinstance(authority_v3idents, str):
authority_v3idents = [authority_v3idents]
@@ -703,7 +805,7 @@ class DescriptorDownloader(object):
if len(authority_v3idents) > MAX_FINGERPRINTS:
raise ValueError('Unable to request more than %i key certificates at a time by their identity fingerprints' % MAX_FINGERPRINTS)
- resource = '/tor/keys/fp/%s.z' % '+'.join(authority_v3idents)
+ resource = '/tor/keys/fp/%s' % '+'.join(authority_v3idents)
return self.query(resource, **query_args)
@@ -711,7 +813,7 @@ class DescriptorDownloader(object):
"""
Issues a request for the given resource.
- :param str resource: resource being fetched, such as '/tor/server/all.z'
+ :param str resource: resource being fetched, such as '/tor/server/all'
:param query_args: additional arguments for the
:class:`~stem.descriptor.remote.Query` constructor
diff --git a/stem/version.py b/stem/version.py
index 9de2f1a5..9036effb 100644
--- a/stem/version.py
+++ b/stem/version.py
@@ -35,6 +35,7 @@ easily parsed and compared, for instance...
Requirement Description
===================================== ===========
**AUTH_SAFECOOKIE** SAFECOOKIE authentication method
+ **DESCRIPTOR_COMPRESSION** `Expanded compression support for ZSTD and LZMA <https://gitweb.torproject.org/torspec.git/commit/?id=1cb56afdc1e55e303e3e6b69e90d983ee217d93f>`_
**DROPGUARDS** DROPGUARDS requests
**EVENT_AUTHDIR_NEWDESCS** AUTHDIR_NEWDESC events
**EVENT_BUILDTIMEOUT_SET** BUILDTIMEOUT_SET events
@@ -353,6 +354,7 @@ safecookie_req.greater_than(Version('0.2.3.13'))
Requirement = stem.util.enum.Enum(
('AUTH_SAFECOOKIE', safecookie_req),
+ ('DESCRIPTOR_COMPRESSION', Version('0.3.1.1-alpha')),
('DROPGUARDS', Version('0.2.5.1-alpha')),
('EVENT_AUTHDIR_NEWDESCS', Version('0.1.1.10-alpha')),
('EVENT_BUILDTIMEOUT_SET', Version('0.2.2.7-alpha')),
diff --git a/test/unit/descriptor/__init__.py b/test/unit/descriptor/__init__.py
index bf0e8619..bce8d3d0 100644
--- a/test/unit/descriptor/__init__.py
+++ b/test/unit/descriptor/__init__.py
@@ -25,6 +25,15 @@ def get_resource(filename):
return os.path.join(DESCRIPTOR_TEST_DATA, filename)
+def read_resource(filename):
+ """
+ Provides test data.
+ """
+
+ with open(get_resource(filename), 'rb') as resource_file:
+ return resource_file.read()
+
+
def base_expect_invalid_attr(cls, default_attr, default_value, test, desc_attrs, attr = None, expected_value = None):
return base_expect_invalid_attr_for_text(cls, default_attr, default_value, test, cls.content(desc_attrs), attr, expected_value)
diff --git a/test/unit/descriptor/data/compressed_gzip b/test/unit/descriptor/data/compressed_gzip
new file mode 100644
index 00000000..2b2dc642
--- /dev/null
+++ b/test/unit/descriptor/data/compressed_gzip
Binary files differ
diff --git a/test/unit/descriptor/data/compressed_identity b/test/unit/descriptor/data/compressed_identity
new file mode 100644
index 00000000..cd5b56b4
--- /dev/null
+++ b/test/unit/descriptor/data/compressed_identity
@@ -0,0 +1,52 @@
+router moria1 128.31.0.34 9101 0 9131
+identity-ed25519
+-----BEGIN ED25519 CERT-----
+AQQABnxNAQS9ja600v/ZodOUiu7NepTkbPIOrFPgEVQE+03rGBtPAQAgBADKnR/C
+2nhpr9UzJkkbPy83sqbfNh63VgFnCpkSTULAcq52z8xM7raRDCiTJTu/FK/BJGgE
+dJcFQ8MgZJOuYgFKcMVyQ6j2FGbhDI0zQTK1+TAPNRG4ixiF7h7wqDT9Ugw=
+-----END ED25519 CERT-----
+master-key-ed25519 yp0fwtp4aa/VMyZJGz8vN7Km3zYet1YBZwqZEk1CwHI
+platform Tor 0.3.4.0-alpha-dev on Linux
+proto Cons=1-2 Desc=1-2 DirCache=1-2 HSDir=1-2 HSIntro=3-4 HSRend=1-2 Link=1-5 LinkAuth=1,3 Microdesc=1-2 Relay=1-2
+published 2018-03-31 04:17:41
+fingerprint 9695 DFC3 5FFE B861 329B 9F1A B04C 4639 7020 CE31
+uptime 295196
+bandwidth 512000 62914560 3403447
+extra-info-digest 393861CB4D9A0480E5A58A3005A88DD7F09271E3 CCbwxMembtsEKAVkU2bqCiBhKaokRlJv6077uy5kI4Q
+caches-extra-info
+onion-key
+-----BEGIN RSA PUBLIC KEY-----
+MIGJAoGBAKsNybH/i/nMLDwSr6WYd77EV6IgiuhfD7UBbsifsDywTe+YrZU3Z4O+
+DY9jemMPRA7wYioJrmXyMVTsbktjWUtWFge1oUj2xsxO3cufCavLmmkmqpkHL3Wn
+xRJraupgfLK72/UN8wXB0duSAU/DH9hTg7WRhkFZdRJK7rostJvZAgMBAAE=
+-----END RSA PUBLIC KEY-----
+signing-key
+-----BEGIN RSA PUBLIC KEY-----
+MIGJAoGBALtJ9uD7cD7iHjqNA3AgsX9prES5QN+yFQyr2uOkxzhvunnaf6SNhzWW
+bkfylnMrRm/qCz/czcjZO6N6EKHcXmypehvP566B7gAQ9vDsb+l7VZVWgXvzNc2s
+tl3P7qpC08rgyJh1GqmtQTCesIDqkEyWxwToympCt09ZQRq+fIttAgMBAAE=
+-----END RSA PUBLIC KEY-----
+onion-key-crosscert
+-----BEGIN CROSSCERT-----
+A9x30r1LTl6rRUe4/irB6/cdNGmd95+bOuArjrmuwSI8Crerx3uNbbN8/iRlRSec
+fBqElxb8SvEGONvtyjGVxp+t5QQS381Bah7DRMz7MfRvOgD8UoZ4DEcpMBnJnA+O
+5OnAoBVOProdfUvFfEnBWJY+ELB3ShG4A3TL4c8QrwA=
+-----END CROSSCERT-----
+ntor-onion-key-crosscert 0
+-----BEGIN ED25519 CERT-----
+AQoABnadAcqdH8LaeGmv1TMmSRs/Lzeypt82HrdWAWcKmRJNQsByAEnIlLlEtELa
+EZA/y3Q5wXxA20upqmSKoCt1s8a6cXHqnsbha61MVbRLVCgWlWmKNjBSu+GiXeXC
+IJ9Tr+aYawM=
+-----END ED25519 CERT-----
+hidden-service-dir
+contact 1024D/28988BF5 arma mit edu
+ntor-onion-key a3r4k2huGP4YdHTQ8ldJhZkm5O+1tLaKE//o5DSibU0=
+reject *:*
+tunnelled-dir-server
+router-sig-ed25519 FkY6h3SMPWPt1fQlU8jsFllGWIy+lw7SGTjTV2FZVNlrHYHcS1F2L7bMGu38ljZ7J+VIBssRZnaiQVEXbI9uDQ
+router-signature
+-----BEGIN SIGNATURE-----
+UzHGCcQK1XGntL3MVvcgcj0kpCsEMf5UIEj4eWOtCjWyriOX7nqwayc5vWOubZLN
+pAZ27MjVmMbVGgtS0xFZ/UQrGEeznITaZwjLNd1IOliadKPdOp4w4OFcjbkCytk6
+MdAEyxwnZzxUwCex0Kj+3FWXQTr09VnXMg5/+vdbCUY=
+-----END SIGNATURE-----
diff --git a/test/unit/descriptor/data/compressed_lzma b/test/unit/descriptor/data/compressed_lzma
new file mode 100644
index 00000000..e3297504
--- /dev/null
+++ b/test/unit/descriptor/data/compressed_lzma
Binary files differ
diff --git a/test/unit/descriptor/data/compressed_zstd b/test/unit/descriptor/data/compressed_zstd
new file mode 100644
index 00000000..b3c3269b
--- /dev/null
+++ b/test/unit/descriptor/data/compressed_zstd
Binary files differ
diff --git a/test/unit/descriptor/remote.py b/test/unit/descriptor/remote.py
index ac150d5c..05cf1d57 100644
--- a/test/unit/descriptor/remote.py
+++ b/test/unit/descriptor/remote.py
@@ -2,7 +2,6 @@
Unit tests for stem.descriptor.remote.
"""
-import io
import socket
import tempfile
import unittest
@@ -11,6 +10,9 @@ import stem.descriptor.remote
import stem.prereq
import stem.util.conf
+from stem.descriptor.remote import Compression
+from test.unit.descriptor import read_resource
+
try:
# added in python 2.7
from collections import OrderedDict
@@ -19,9 +21,9 @@ except ImportError:
try:
# added in python 3.3
- from unittest.mock import patch
+ from unittest.mock import patch, Mock
except ImportError:
- from mock import patch
+ from mock import patch, Mock
# The urlopen() method is in a different location depending on if we're using
# python 2.x or 3.x. The 2to3 converter accounts for this in imports, but not
@@ -29,6 +31,8 @@ except ImportError:
URL_OPEN = 'urllib.request.urlopen' if stem.prereq.is_python_3() else 'urllib2.urlopen'
+TEST_RESOURCE = '/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31'
+
# Output from requesting moria1's descriptor from itself...
# % curl http://128.31.0.39:9131/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31
@@ -107,23 +111,128 @@ FALLBACK_ENTRY = b"""\
"""
+def _urlopen_mock(data, encoding = 'identity'):
+ urlopen_mock = Mock()
+ urlopen_mock().read.return_value = data
+ urlopen_mock().info().getheader.return_value = encoding
+ return urlopen_mock
+
+
class TestDescriptorDownloader(unittest.TestCase):
- @patch(URL_OPEN)
- def test_query_download(self, urlopen_mock):
+ def tearDown(self):
+ # prevent our mocks from impacting other tests
+ stem.descriptor.remote.SINGLETON_DOWNLOADER = None
+
+ def test_gzip_url_override(self):
+ query = stem.descriptor.remote.Query(TEST_RESOURCE, start = False)
+ self.assertEqual([Compression.PLAINTEXT], query.compression)
+ self.assertEqual(TEST_RESOURCE, query.resource)
+
+ query = stem.descriptor.remote.Query(TEST_RESOURCE + '.z', compression = Compression.PLAINTEXT, start = False)
+ self.assertEqual([Compression.GZIP], query.compression)
+ self.assertEqual(TEST_RESOURCE, query.resource)
+
+ def test_zstd_support_check(self):
+ with patch('stem.descriptor.remote.ZSTD_SUPPORTED', True):
+ query = stem.descriptor.remote.Query(TEST_RESOURCE, compression = Compression.ZSTD, start = False)
+ self.assertEqual([Compression.ZSTD], query.compression)
+
+ with patch('stem.descriptor.remote.ZSTD_SUPPORTED', False):
+ query = stem.descriptor.remote.Query(TEST_RESOURCE, compression = Compression.ZSTD, start = False)
+ self.assertEqual([Compression.PLAINTEXT], query.compression)
+
+ def test_lzma_support_check(self):
+ with patch('stem.descriptor.remote.LZMA_SUPPORTED', True):
+ query = stem.descriptor.remote.Query(TEST_RESOURCE, compression = Compression.LZMA, start = False)
+ self.assertEqual([Compression.LZMA], query.compression)
+
+ with patch('stem.descriptor.remote.LZMA_SUPPORTED', False):
+ query = stem.descriptor.remote.Query(TEST_RESOURCE, compression = Compression.LZMA, start = False)
+ self.assertEqual([Compression.PLAINTEXT], query.compression)
+
+ @patch(URL_OPEN, _urlopen_mock(read_resource('compressed_identity'), encoding = 'identity'))
+ def test_compression_plaintext(self):
"""
- Check Query functionality when we successfully download a descriptor.
+ Download a plaintext descriptor.
+ """
+
+ descriptors = list(stem.descriptor.remote.get_server_descriptors(
+ '9695DFC35FFEB861329B9F1AB04C46397020CE31',
+ compression = Compression.PLAINTEXT,
+ validate = True,
+ ))
+
+ self.assertEqual(1, len(descriptors))
+ self.assertEqual('moria1', descriptors[0].nickname)
+
+ @patch(URL_OPEN, _urlopen_mock(read_resource('compressed_gzip'), encoding = 'gzip'))
+ def test_compression_gzip(self):
"""
+ Download a gip compressed descriptor.
+ """
+
+ descriptors = list(stem.descriptor.remote.get_server_descriptors(
+ '9695DFC35FFEB861329B9F1AB04C46397020CE31',
+ compression = Compression.GZIP,
+ validate = True,
+ ))
+
+ self.assertEqual(1, len(descriptors))
+ self.assertEqual('moria1', descriptors[0].nickname)
+
+ @patch(URL_OPEN, _urlopen_mock(read_resource('compressed_zstd'), encoding = 'x-zstd'))
+ def test_compression_zstd(self):
+ """
+ Download a zstd compressed descriptor.
+ """
+
+ if not stem.descriptor.remote.ZSTD_SUPPORTED:
+ self.skipTest('(requires zstd module)')
+ return
- urlopen_mock.return_value = io.BytesIO(TEST_DESCRIPTOR)
+ descriptors = list(stem.descriptor.remote.get_server_descriptors(
+ '9695DFC35FFEB861329B9F1AB04C46397020CE31',
+ compression = Compression.ZSTD,
+ validate = True,
+ ))
+
+ self.assertEqual(1, len(descriptors))
+ self.assertEqual('moria1', descriptors[0].nickname)
+
+ @patch(URL_OPEN, _urlopen_mock(read_resource('compressed_lzma'), encoding = 'x-tor-lzma'))
+ def test_compression_lzma(self):
+ """
+ Download a lzma compressed descriptor.
+ """
+
+ if not stem.descriptor.remote.LZMA_SUPPORTED:
+ self.skipTest('(requires lzma module)')
+ return
+
+ descriptors = list(stem.descriptor.remote.get_server_descriptors(
+ '9695DFC35FFEB861329B9F1AB04C46397020CE31',
+ compression = Compression.LZMA,
+ validate = True,
+ ))
+
+ self.assertEqual(1, len(descriptors))
+ self.assertEqual('moria1', descriptors[0].nickname)
+
+ @patch(URL_OPEN, _urlopen_mock(TEST_DESCRIPTOR))
+ def test_query_download(self):
+ """
+ Check Query functionality when we successfully download a descriptor.
+ """
query = stem.descriptor.remote.Query(
- '/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31',
+ TEST_RESOURCE,
'server-descriptor 1.0',
endpoints = [('128.31.0.39', 9131)],
+ compression = Compression.PLAINTEXT,
validate = True,
)
- expeced_url = 'http://128.31.0.39:9131/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31'
+ expeced_url = 'http://128.31.0.39:9131' + TEST_RESOURCE
self.assertEqual(expeced_url, query._pick_url())
descriptors = list(query)
@@ -135,21 +244,17 @@ class TestDescriptorDownloader(unittest.TestCase):
self.assertEqual('9695DFC35FFEB861329B9F1AB04C46397020CE31', desc.fingerprint)
self.assertEqual(TEST_DESCRIPTOR.strip(), desc.get_bytes())
- urlopen_mock.assert_called_once_with(expeced_url, timeout = None)
-
- @patch(URL_OPEN)
- def test_query_with_malformed_content(self, urlopen_mock):
+ @patch(URL_OPEN, _urlopen_mock(b'some malformed stuff'))
+ def test_query_with_malformed_content(self):
"""
Query with malformed descriptor content.
"""
- descriptor_content = b'some malformed stuff'
- urlopen_mock.return_value = io.BytesIO(descriptor_content)
-
query = stem.descriptor.remote.Query(
- '/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31',
+ TEST_RESOURCE,
'server-descriptor 1.0',
endpoints = [('128.31.0.39', 9131)],
+ compression = Compression.PLAINTEXT,
validate = True,
)
@@ -171,7 +276,7 @@ class TestDescriptorDownloader(unittest.TestCase):
urlopen_mock.side_effect = socket.timeout('connection timed out')
query = stem.descriptor.remote.Query(
- '/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31',
+ TEST_RESOURCE,
'server-descriptor 1.0',
endpoints = [('128.31.0.39', 9131)],
fall_back_to_authority = False,
@@ -180,20 +285,15 @@ class TestDescriptorDownloader(unittest.TestCase):
)
self.assertRaises(socket.timeout, query.run)
- urlopen_mock.assert_called_with(
- 'http://128.31.0.39:9131/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31',
- timeout = 5,
- )
self.assertEqual(3, urlopen_mock.call_count)
- @patch(URL_OPEN)
- def test_can_iterate_multiple_times(self, urlopen_mock):
- urlopen_mock.return_value = io.BytesIO(TEST_DESCRIPTOR)
-
+ @patch(URL_OPEN, _urlopen_mock(TEST_DESCRIPTOR))
+ def test_can_iterate_multiple_times(self):
query = stem.descriptor.remote.Query(
- '/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31',
+ TEST_RESOURCE,
'server-descriptor 1.0',
endpoints = [('128.31.0.39', 9131)],
+ compression = Compression.PLAINTEXT,
validate = True,
)
@@ -213,9 +313,8 @@ class TestDescriptorDownloader(unittest.TestCase):
self.assertTrue(len(fallback_directories) > 10)
self.assertEqual('5.39.92.199', fallback_directories['0BEA4A88D069753218EAAAD6D22EA87B9A1319D6'].address)
- @patch(URL_OPEN)
- def test_fallback_directories_from_remote(self, urlopen_mock):
- urlopen_mock.return_value = io.BytesIO(FALLBACK_DIR_CONTENT)
+ @patch(URL_OPEN, _urlopen_mock(FALLBACK_DIR_CONTENT))
+ def test_fallback_directories_from_remote(self):
fallback_directories = stem.descriptor.remote.FallbackDirectory.from_remote()
header = OrderedDict((('type', 'fallback'), ('version', '2.0.0'), ('timestamp', '20170526090242')))
@@ -298,19 +397,16 @@ class TestDescriptorDownloader(unittest.TestCase):
self.assertEqual(expected, stem.descriptor.remote.FallbackDirectory.from_cache(tmp.name))
- @patch(URL_OPEN)
- def test_fallback_directories_from_remote_empty(self, urlopen_mock):
- urlopen_mock.return_value = io.BytesIO(b'')
+ @patch(URL_OPEN, _urlopen_mock(b''))
+ def test_fallback_directories_from_remote_empty(self):
self.assertRaisesRegexp(IOError, 'did not have any content', stem.descriptor.remote.FallbackDirectory.from_remote)
- @patch(URL_OPEN)
- def test_fallback_directories_from_remote_no_header(self, urlopen_mock):
- urlopen_mock.return_value = io.BytesIO(b'\n'.join(FALLBACK_DIR_CONTENT.splitlines()[1:]))
+ @patch(URL_OPEN, _urlopen_mock(b'\n'.join(FALLBACK_DIR_CONTENT.splitlines()[1:])))
+ def test_fallback_directories_from_remote_no_header(self):
self.assertRaisesRegexp(IOError, 'does not have a type field indicating it is fallback directory metadata', stem.descriptor.remote.FallbackDirectory.from_remote)
- @patch(URL_OPEN)
- def test_fallback_directories_from_remote_malformed_header(self, urlopen_mock):
- urlopen_mock.return_value = io.BytesIO(FALLBACK_DIR_CONTENT.replace(b'version=2.0.0', b'version'))
+ @patch(URL_OPEN, _urlopen_mock(FALLBACK_DIR_CONTENT.replace(b'version=2.0.0', b'version')))
+ def test_fallback_directories_from_remote_malformed_header(self):
self.assertRaisesRegexp(IOError, 'Malformed fallback directory header line: /\* version \*/', stem.descriptor.remote.FallbackDirectory.from_remote)
def test_fallback_directories_from_str(self):