diff options
| author | Damian Johnson <atagar@torproject.org> | 2018-03-31 21:39:00 -0700 |
|---|---|---|
| committer | Damian Johnson <atagar@torproject.org> | 2018-03-31 21:39:00 -0700 |
| commit | 079b48126fdc97d83df84d96e7423f99870f2584 (patch) | |
| tree | ba489d54b6d757ce41a82ca311ee76cfa7f0d425 | |
| parent | 4a8f8d0499aefaddd6d5d9386744e1b1125c6036 (diff) | |
| parent | f7949b644eaadf9a0e731547deafb40cf106e8a8 (diff) | |
Expanded descriptor compression support
We supported plaintext and gzip when downloading descriptors, but recently tor
added lzma and zstd support as well...
https://gitweb.torproject.org/torspec.git/commit/?id=1cb56af
Lzma support was added in Python 3.3...
https://docs.python.org/3/library/lzma.html
... and zstd is covered by the zstandard module...
https://pypi.python.org/pypi/zstandard
| -rw-r--r-- | docs/change_log.rst | 1 | ||||
| -rw-r--r-- | stem/descriptor/remote.py | 182 | ||||
| -rw-r--r-- | stem/version.py | 2 | ||||
| -rw-r--r-- | test/unit/descriptor/__init__.py | 9 | ||||
| -rw-r--r-- | test/unit/descriptor/data/compressed_gzip | bin | 0 -> 1543 bytes | |||
| -rw-r--r-- | test/unit/descriptor/data/compressed_identity | 52 | ||||
| -rw-r--r-- | test/unit/descriptor/data/compressed_lzma | bin | 0 -> 1652 bytes | |||
| -rw-r--r-- | test/unit/descriptor/data/compressed_zstd | bin | 0 -> 1550 bytes | |||
| -rw-r--r-- | test/unit/descriptor/remote.py | 174 |
9 files changed, 341 insertions, 79 deletions
diff --git a/docs/change_log.rst b/docs/change_log.rst index bb42c982..a67aae2d 100644 --- a/docs/change_log.rst +++ b/docs/change_log.rst @@ -55,6 +55,7 @@ The following are only available within Stem's `git repository * **Descriptors** * `Fallback directory v2 support <https://lists.torproject.org/pipermail/tor-dev/2017-December/012721.html>`_, which adds *nickname* and *extrainfo* + * Added zstd and lzma compression support (:spec:`1cb56af`) * Reduced maximum descriptors fetched by the remote module to match tor's new limit (:trac:`24743`) * Consensus **shared_randomness_*_reveal_count** attributes undocumented, and unavailable if retrieved before their corresponding shared_randomness_*_value attribute (:trac:`25046`) * Allow 'proto' line to have blank values (:spec:`a8455f4`) diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py index 9181dbcf..e1190fc7 100644 --- a/stem/descriptor/remote.py +++ b/stem/descriptor/remote.py @@ -80,6 +80,21 @@ content. For example... Maximum number of microdescriptors that can requested at a time by their hashes. + +.. data:: Compression (enum) + + Compression when downloading descriptors. + + .. versionadded:: 1.7.0 + + =============== =========== + Compression Description + =============== =========== + **PLAINTEXT** Uncompressed data. + **GZIP** `GZip compression <https://www.gnu.org/software/gzip/>`_. + **ZSTD** `Zstandard compression <https://www.zstd.net>`_, this requires the `zstandard module <https://pypi.python.org/pypi/zstandard>`_. + **LZMA** `LZMA compression <https://en.wikipedia.org/wiki/LZMA>`_, this requires the 'lzma module <https://docs.python.org/3/library/lzma.html>`_. + =============== =========== """ import io @@ -91,6 +106,13 @@ import threading import time import zlib +import stem.descriptor +import stem.prereq +import stem.util.enum + +from stem import Flag +from stem.util import _hash_attr, connection, log, str_tools, tor_tools + try: # added in python 2.7 from collections import OrderedDict @@ -103,11 +125,38 @@ try: except ImportError: import urllib2 as urllib -import stem.descriptor -import stem.prereq +try: + # added in python 3.3 + import lzma + LZMA_SUPPORTED = True +except ImportError: + LZMA_SUPPORTED = False -from stem import Flag -from stem.util import _hash_attr, connection, log, str_tools, tor_tools +try: + # We use the suggested python zstd library... + # + # https://pypi.python.org/pypi/zstandard + # + # Unfortunately this installs as a zstd module which can be confused with... + # + # https://pypi.python.org/pypi/zstd + # + # As such checking for the specific decompression class we'll need. + + import zstd + ZSTD_SUPPORTED = hasattr(zstd, 'ZstdDecompressor') +except ImportError: + ZSTD_SUPPORTED = False + +Compression = stem.util.enum.Enum( + ('PLAINTEXT', 'identity'), + ('GZIP', 'gzip'), # can also be 'deflate' + ('ZSTD', 'x-zstd'), + ('LZMA', 'x-tor-lzma'), +) + +ZSTD_UNAVAILABLE_MSG = 'ZSTD compression requires the zstandard module (https://pypi.python.org/pypi/zstandard)' +LZMA_UNAVAILABLE_MSG = 'LZMA compression requires the lzma module (https://docs.python.org/3/library/lzma.html)' # Tor has a limited number of descriptors we can fetch explicitly by their # fingerprint or hashes due to a limit on the url length by squid proxies. @@ -224,7 +273,7 @@ class Query(object): from stem.descriptor.remote import Query query = Query( - '/tor/server/all.z', + '/tor/server/all', block = True, timeout = 30, ) @@ -243,7 +292,7 @@ class Query(object): print('Current relays:') - for desc in Query('/tor/server/all.z', 'server-descriptor 1.0'): + for desc in Query('/tor/server/all', 'server-descriptor 1.0'): print(desc.fingerprint) In either case exceptions are available via our 'error' attribute. @@ -256,28 +305,37 @@ class Query(object): =============================================== =========== Resource Description =============================================== =========== - /tor/server/all.z all present server descriptors - /tor/server/fp/<fp1>+<fp2>+<fp3>.z server descriptors with the given fingerprints - /tor/extra/all.z all present extrainfo descriptors - /tor/extra/fp/<fp1>+<fp2>+<fp3>.z extrainfo descriptors with the given fingerprints - /tor/micro/d/<hash1>-<hash2>.z microdescriptors with the given hashes - /tor/status-vote/current/consensus.z present consensus - /tor/status-vote/current/consensus-microdesc.z present microdescriptor consensus - /tor/keys/all.z key certificates for the authorities - /tor/keys/fp/<v3ident1>+<v3ident2>.z key certificates for specific authorities + /tor/server/all all present server descriptors + /tor/server/fp/<fp1>+<fp2>+<fp3> server descriptors with the given fingerprints + /tor/extra/all all present extrainfo descriptors + /tor/extra/fp/<fp1>+<fp2>+<fp3> extrainfo descriptors with the given fingerprints + /tor/micro/d/<hash1>-<hash2> microdescriptors with the given hashes + /tor/status-vote/current/consensus present consensus + /tor/status-vote/current/consensus-microdesc present microdescriptor consensus + /tor/keys/all key certificates for the authorities + /tor/keys/fp/<v3ident1>+<v3ident2> key certificates for specific authorities =============================================== =========== - The '.z' suffix can be excluded to get a plaintext rather than compressed - response. Compression is handled transparently, so this shouldn't matter to - the caller. + **ZSTD** compression requires `zstandard + <https://pypi.python.org/pypi/zstandard>`_, and **LZMA** requires the `lzma + module <https://docs.python.org/3/library/lzma.html>`_. + + For legacy reasons if our resource has a '.z' suffix then our **compression** + argument is overwritten with Compression.GZIP. + + .. versionchanged:: 1.7.0 + Added the compression argument. - :var str resource: resource being fetched, such as '/tor/server/all.z' + :var str resource: resource being fetched, such as '/tor/server/all' :var str descriptor_type: type of descriptors being fetched (for options see :func:`~stem.descriptor.__init__.parse_file`), this is guessed from the resource if **None** :var list endpoints: (address, dirport) tuples of the authority or mirror we're querying, this uses authorities if undefined + :var list compression: list of :data:`stem.descriptor.remote.Compression` + we're willing to accept, when none are mutually supported downloads fall + back to Compression.PLAINTEXT :var int retries: number of times to attempt the request if downloading it fails :var bool fall_back_to_authority: when retrying request issues the last @@ -305,17 +363,37 @@ class Query(object): the same as running **query.run(True)** (default is **False**) """ - def __init__(self, resource, descriptor_type = None, endpoints = None, retries = 2, fall_back_to_authority = False, timeout = None, start = True, block = False, validate = False, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **kwargs): + def __init__(self, resource, descriptor_type = None, endpoints = None, compression = None, retries = 2, fall_back_to_authority = False, timeout = None, start = True, block = False, validate = False, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **kwargs): if not resource.startswith('/'): raise ValueError("Resources should start with a '/': %s" % resource) - self.resource = resource + if resource.endswith('.z'): + compression = [Compression.GZIP] + resource = resource[:-2] + elif compression is None: + compression = [Compression.PLAINTEXT] + else: + if isinstance(compression, str): + compression = [compression] # caller provided only a single option + + if Compression.ZSTD in compression and not ZSTD_SUPPORTED: + log.log_once('stem.descriptor.remote.zstd_unavailable', log.INFO, ZSTD_UNAVAILABLE_MSG) + compression.remove(Compression.ZSTD) + + if Compression.LZMA in compression and not LZMA_SUPPORTED: + log.log_once('stem.descriptor.remote.lzma_unavailable', log.INFO, LZMA_UNAVAILABLE_MSG) + compression.remove(Compression.LZMA) + + if not compression: + compression = [Compression.PLAINTEXT] if descriptor_type: self.descriptor_type = descriptor_type else: self.descriptor_type = _guess_descriptor_type(resource) + self.resource = resource + self.compression = compression self.endpoints = endpoints if endpoints else [] self.retries = retries self.fall_back_to_authority = fall_back_to_authority @@ -352,7 +430,7 @@ class Query(object): self._downloader_thread = threading.Thread( name = 'Descriptor Query', target = self._download_descriptors, - args = (self.retries,) + args = (self.compression, self.retries,) ) self._downloader_thread.setDaemon(True) @@ -435,26 +513,50 @@ class Query(object): if use_authority or not self.endpoints: directories = get_authorities().values() - picked = random.choice(directories) + picked = random.choice(list(directories)) address, dirport = picked.address, picked.dir_port else: address, dirport = random.choice(self.endpoints) return 'http://%s:%i/%s' % (address, dirport, self.resource.lstrip('/')) - def _download_descriptors(self, retries): + def _download_descriptors(self, compression, retries): try: use_authority = retries == 0 and self.fall_back_to_authority self.download_url = self._pick_url(use_authority) - self.start_time = time.time() - response = urllib.urlopen(self.download_url, timeout = self.timeout).read() - if self.download_url.endswith('.z'): - response = zlib.decompress(response) + response = urllib.urlopen( + urllib.Request( + self.download_url, + headers = {'Accept-Encoding': ', '.join(compression)}, + ), + timeout = self.timeout, + ) + + data = response.read() + encoding = response.info().getheader('Content-Encoding') - self.content = response.strip() + # Tor doesn't include compression headers. As such when using gzip we + # need to include '32' for automatic header detection... + # + # https://stackoverflow.com/questions/3122145/zlib-error-error-3-while-decompressing-incorrect-header-check/22310760#22310760 + # + # ... and with zstd we need to use the streaming API. + if encoding in (Compression.GZIP, 'deflate'): + data = zlib.decompress(data, zlib.MAX_WBITS | 32) + elif encoding == Compression.ZSTD and ZSTD_SUPPORTED: + output_buffer = io.BytesIO() + + with zstd.ZstdDecompressor().write_to(output_buffer) as decompressor: + decompressor.write(data) + + data = output_buffer.getvalue() + elif encoding == Compression.LZMA and LZMA_SUPPORTED: + data = lzma.decompress(data) + + self.content = data.strip() self.runtime = time.time() - self.start_time log.trace("Descriptors retrieved from '%s' in %0.2fs" % (self.download_url, self.runtime)) except: @@ -462,7 +564,7 @@ class Query(object): if retries > 0: log.debug("Unable to download descriptors from '%s' (%i retries remaining): %s" % (self.download_url, retries, exc)) - return self._download_descriptors(retries - 1) + return self._download_descriptors(compression, retries - 1) else: log.debug("Unable to download descriptors from '%s': %s" % (self.download_url, exc)) self.error = exc @@ -539,7 +641,7 @@ class DescriptorDownloader(object): fingerprints (this is due to a limit on the url length by squid proxies). """ - resource = '/tor/server/all.z' + resource = '/tor/server/all' if isinstance(fingerprints, str): fingerprints = [fingerprints] @@ -548,7 +650,7 @@ class DescriptorDownloader(object): if len(fingerprints) > MAX_FINGERPRINTS: raise ValueError('Unable to request more than %i descriptors at a time by their fingerprints' % MAX_FINGERPRINTS) - resource = '/tor/server/fp/%s.z' % '+'.join(fingerprints) + resource = '/tor/server/fp/%s' % '+'.join(fingerprints) return self.query(resource, **query_args) @@ -569,7 +671,7 @@ class DescriptorDownloader(object): fingerprints (this is due to a limit on the url length by squid proxies). """ - resource = '/tor/extra/all.z' + resource = '/tor/extra/all' if isinstance(fingerprints, str): fingerprints = [fingerprints] @@ -578,7 +680,7 @@ class DescriptorDownloader(object): if len(fingerprints) > MAX_FINGERPRINTS: raise ValueError('Unable to request more than %i descriptors at a time by their fingerprints' % MAX_FINGERPRINTS) - resource = '/tor/extra/fp/%s.z' % '+'.join(fingerprints) + resource = '/tor/extra/fp/%s' % '+'.join(fingerprints) return self.query(resource, **query_args) @@ -613,7 +715,7 @@ class DescriptorDownloader(object): if len(hashes) > MAX_MICRODESCRIPTOR_HASHES: raise ValueError('Unable to request more than %i microdescriptors at a time by their hashes' % MAX_MICRODESCRIPTOR_HASHES) - return self.query('/tor/micro/d/%s.z' % '-'.join(hashes), **query_args) + return self.query('/tor/micro/d/%s' % '-'.join(hashes), **query_args) def get_consensus(self, authority_v3ident = None, microdescriptor = False, **query_args): """ @@ -643,7 +745,7 @@ class DescriptorDownloader(object): if authority_v3ident: resource += '/%s' % authority_v3ident - consensus_query = self.query(resource + '.z', **query_args) + consensus_query = self.query(resource, **query_args) # if we're performing validation then check that it's signed by the # authority key certificates @@ -672,7 +774,7 @@ class DescriptorDownloader(object): if 'endpoint' not in query_args: query_args['endpoints'] = [(authority.address, authority.dir_port)] - return self.query(resource + '.z', **query_args) + return self.query(resource, **query_args) def get_key_certificates(self, authority_v3idents = None, **query_args): """ @@ -694,7 +796,7 @@ class DescriptorDownloader(object): squid proxies). """ - resource = '/tor/keys/all.z' + resource = '/tor/keys/all' if isinstance(authority_v3idents, str): authority_v3idents = [authority_v3idents] @@ -703,7 +805,7 @@ class DescriptorDownloader(object): if len(authority_v3idents) > MAX_FINGERPRINTS: raise ValueError('Unable to request more than %i key certificates at a time by their identity fingerprints' % MAX_FINGERPRINTS) - resource = '/tor/keys/fp/%s.z' % '+'.join(authority_v3idents) + resource = '/tor/keys/fp/%s' % '+'.join(authority_v3idents) return self.query(resource, **query_args) @@ -711,7 +813,7 @@ class DescriptorDownloader(object): """ Issues a request for the given resource. - :param str resource: resource being fetched, such as '/tor/server/all.z' + :param str resource: resource being fetched, such as '/tor/server/all' :param query_args: additional arguments for the :class:`~stem.descriptor.remote.Query` constructor diff --git a/stem/version.py b/stem/version.py index 9de2f1a5..9036effb 100644 --- a/stem/version.py +++ b/stem/version.py @@ -35,6 +35,7 @@ easily parsed and compared, for instance... Requirement Description ===================================== =========== **AUTH_SAFECOOKIE** SAFECOOKIE authentication method + **DESCRIPTOR_COMPRESSION** `Expanded compression support for ZSTD and LZMA <https://gitweb.torproject.org/torspec.git/commit/?id=1cb56afdc1e55e303e3e6b69e90d983ee217d93f>`_ **DROPGUARDS** DROPGUARDS requests **EVENT_AUTHDIR_NEWDESCS** AUTHDIR_NEWDESC events **EVENT_BUILDTIMEOUT_SET** BUILDTIMEOUT_SET events @@ -353,6 +354,7 @@ safecookie_req.greater_than(Version('0.2.3.13')) Requirement = stem.util.enum.Enum( ('AUTH_SAFECOOKIE', safecookie_req), + ('DESCRIPTOR_COMPRESSION', Version('0.3.1.1-alpha')), ('DROPGUARDS', Version('0.2.5.1-alpha')), ('EVENT_AUTHDIR_NEWDESCS', Version('0.1.1.10-alpha')), ('EVENT_BUILDTIMEOUT_SET', Version('0.2.2.7-alpha')), diff --git a/test/unit/descriptor/__init__.py b/test/unit/descriptor/__init__.py index bf0e8619..bce8d3d0 100644 --- a/test/unit/descriptor/__init__.py +++ b/test/unit/descriptor/__init__.py @@ -25,6 +25,15 @@ def get_resource(filename): return os.path.join(DESCRIPTOR_TEST_DATA, filename) +def read_resource(filename): + """ + Provides test data. + """ + + with open(get_resource(filename), 'rb') as resource_file: + return resource_file.read() + + def base_expect_invalid_attr(cls, default_attr, default_value, test, desc_attrs, attr = None, expected_value = None): return base_expect_invalid_attr_for_text(cls, default_attr, default_value, test, cls.content(desc_attrs), attr, expected_value) diff --git a/test/unit/descriptor/data/compressed_gzip b/test/unit/descriptor/data/compressed_gzip Binary files differnew file mode 100644 index 00000000..2b2dc642 --- /dev/null +++ b/test/unit/descriptor/data/compressed_gzip diff --git a/test/unit/descriptor/data/compressed_identity b/test/unit/descriptor/data/compressed_identity new file mode 100644 index 00000000..cd5b56b4 --- /dev/null +++ b/test/unit/descriptor/data/compressed_identity @@ -0,0 +1,52 @@ +router moria1 128.31.0.34 9101 0 9131 +identity-ed25519 +-----BEGIN ED25519 CERT----- +AQQABnxNAQS9ja600v/ZodOUiu7NepTkbPIOrFPgEVQE+03rGBtPAQAgBADKnR/C +2nhpr9UzJkkbPy83sqbfNh63VgFnCpkSTULAcq52z8xM7raRDCiTJTu/FK/BJGgE +dJcFQ8MgZJOuYgFKcMVyQ6j2FGbhDI0zQTK1+TAPNRG4ixiF7h7wqDT9Ugw= +-----END ED25519 CERT----- +master-key-ed25519 yp0fwtp4aa/VMyZJGz8vN7Km3zYet1YBZwqZEk1CwHI +platform Tor 0.3.4.0-alpha-dev on Linux +proto Cons=1-2 Desc=1-2 DirCache=1-2 HSDir=1-2 HSIntro=3-4 HSRend=1-2 Link=1-5 LinkAuth=1,3 Microdesc=1-2 Relay=1-2 +published 2018-03-31 04:17:41 +fingerprint 9695 DFC3 5FFE B861 329B 9F1A B04C 4639 7020 CE31 +uptime 295196 +bandwidth 512000 62914560 3403447 +extra-info-digest 393861CB4D9A0480E5A58A3005A88DD7F09271E3 CCbwxMembtsEKAVkU2bqCiBhKaokRlJv6077uy5kI4Q +caches-extra-info +onion-key +-----BEGIN RSA PUBLIC KEY----- +MIGJAoGBAKsNybH/i/nMLDwSr6WYd77EV6IgiuhfD7UBbsifsDywTe+YrZU3Z4O+ +DY9jemMPRA7wYioJrmXyMVTsbktjWUtWFge1oUj2xsxO3cufCavLmmkmqpkHL3Wn +xRJraupgfLK72/UN8wXB0duSAU/DH9hTg7WRhkFZdRJK7rostJvZAgMBAAE= +-----END RSA PUBLIC KEY----- +signing-key +-----BEGIN RSA PUBLIC KEY----- +MIGJAoGBALtJ9uD7cD7iHjqNA3AgsX9prES5QN+yFQyr2uOkxzhvunnaf6SNhzWW +bkfylnMrRm/qCz/czcjZO6N6EKHcXmypehvP566B7gAQ9vDsb+l7VZVWgXvzNc2s +tl3P7qpC08rgyJh1GqmtQTCesIDqkEyWxwToympCt09ZQRq+fIttAgMBAAE= +-----END RSA PUBLIC KEY----- +onion-key-crosscert +-----BEGIN CROSSCERT----- +A9x30r1LTl6rRUe4/irB6/cdNGmd95+bOuArjrmuwSI8Crerx3uNbbN8/iRlRSec +fBqElxb8SvEGONvtyjGVxp+t5QQS381Bah7DRMz7MfRvOgD8UoZ4DEcpMBnJnA+O +5OnAoBVOProdfUvFfEnBWJY+ELB3ShG4A3TL4c8QrwA= +-----END CROSSCERT----- +ntor-onion-key-crosscert 0 +-----BEGIN ED25519 CERT----- +AQoABnadAcqdH8LaeGmv1TMmSRs/Lzeypt82HrdWAWcKmRJNQsByAEnIlLlEtELa +EZA/y3Q5wXxA20upqmSKoCt1s8a6cXHqnsbha61MVbRLVCgWlWmKNjBSu+GiXeXC +IJ9Tr+aYawM= +-----END ED25519 CERT----- +hidden-service-dir +contact 1024D/28988BF5 arma mit edu +ntor-onion-key a3r4k2huGP4YdHTQ8ldJhZkm5O+1tLaKE//o5DSibU0= +reject *:* +tunnelled-dir-server +router-sig-ed25519 FkY6h3SMPWPt1fQlU8jsFllGWIy+lw7SGTjTV2FZVNlrHYHcS1F2L7bMGu38ljZ7J+VIBssRZnaiQVEXbI9uDQ +router-signature +-----BEGIN SIGNATURE----- +UzHGCcQK1XGntL3MVvcgcj0kpCsEMf5UIEj4eWOtCjWyriOX7nqwayc5vWOubZLN +pAZ27MjVmMbVGgtS0xFZ/UQrGEeznITaZwjLNd1IOliadKPdOp4w4OFcjbkCytk6 +MdAEyxwnZzxUwCex0Kj+3FWXQTr09VnXMg5/+vdbCUY= +-----END SIGNATURE----- diff --git a/test/unit/descriptor/data/compressed_lzma b/test/unit/descriptor/data/compressed_lzma Binary files differnew file mode 100644 index 00000000..e3297504 --- /dev/null +++ b/test/unit/descriptor/data/compressed_lzma diff --git a/test/unit/descriptor/data/compressed_zstd b/test/unit/descriptor/data/compressed_zstd Binary files differnew file mode 100644 index 00000000..b3c3269b --- /dev/null +++ b/test/unit/descriptor/data/compressed_zstd diff --git a/test/unit/descriptor/remote.py b/test/unit/descriptor/remote.py index ac150d5c..05cf1d57 100644 --- a/test/unit/descriptor/remote.py +++ b/test/unit/descriptor/remote.py @@ -2,7 +2,6 @@ Unit tests for stem.descriptor.remote. """ -import io import socket import tempfile import unittest @@ -11,6 +10,9 @@ import stem.descriptor.remote import stem.prereq import stem.util.conf +from stem.descriptor.remote import Compression +from test.unit.descriptor import read_resource + try: # added in python 2.7 from collections import OrderedDict @@ -19,9 +21,9 @@ except ImportError: try: # added in python 3.3 - from unittest.mock import patch + from unittest.mock import patch, Mock except ImportError: - from mock import patch + from mock import patch, Mock # The urlopen() method is in a different location depending on if we're using # python 2.x or 3.x. The 2to3 converter accounts for this in imports, but not @@ -29,6 +31,8 @@ except ImportError: URL_OPEN = 'urllib.request.urlopen' if stem.prereq.is_python_3() else 'urllib2.urlopen' +TEST_RESOURCE = '/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31' + # Output from requesting moria1's descriptor from itself... # % curl http://128.31.0.39:9131/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31 @@ -107,23 +111,128 @@ FALLBACK_ENTRY = b"""\ """ +def _urlopen_mock(data, encoding = 'identity'): + urlopen_mock = Mock() + urlopen_mock().read.return_value = data + urlopen_mock().info().getheader.return_value = encoding + return urlopen_mock + + class TestDescriptorDownloader(unittest.TestCase): - @patch(URL_OPEN) - def test_query_download(self, urlopen_mock): + def tearDown(self): + # prevent our mocks from impacting other tests + stem.descriptor.remote.SINGLETON_DOWNLOADER = None + + def test_gzip_url_override(self): + query = stem.descriptor.remote.Query(TEST_RESOURCE, start = False) + self.assertEqual([Compression.PLAINTEXT], query.compression) + self.assertEqual(TEST_RESOURCE, query.resource) + + query = stem.descriptor.remote.Query(TEST_RESOURCE + '.z', compression = Compression.PLAINTEXT, start = False) + self.assertEqual([Compression.GZIP], query.compression) + self.assertEqual(TEST_RESOURCE, query.resource) + + def test_zstd_support_check(self): + with patch('stem.descriptor.remote.ZSTD_SUPPORTED', True): + query = stem.descriptor.remote.Query(TEST_RESOURCE, compression = Compression.ZSTD, start = False) + self.assertEqual([Compression.ZSTD], query.compression) + + with patch('stem.descriptor.remote.ZSTD_SUPPORTED', False): + query = stem.descriptor.remote.Query(TEST_RESOURCE, compression = Compression.ZSTD, start = False) + self.assertEqual([Compression.PLAINTEXT], query.compression) + + def test_lzma_support_check(self): + with patch('stem.descriptor.remote.LZMA_SUPPORTED', True): + query = stem.descriptor.remote.Query(TEST_RESOURCE, compression = Compression.LZMA, start = False) + self.assertEqual([Compression.LZMA], query.compression) + + with patch('stem.descriptor.remote.LZMA_SUPPORTED', False): + query = stem.descriptor.remote.Query(TEST_RESOURCE, compression = Compression.LZMA, start = False) + self.assertEqual([Compression.PLAINTEXT], query.compression) + + @patch(URL_OPEN, _urlopen_mock(read_resource('compressed_identity'), encoding = 'identity')) + def test_compression_plaintext(self): """ - Check Query functionality when we successfully download a descriptor. + Download a plaintext descriptor. + """ + + descriptors = list(stem.descriptor.remote.get_server_descriptors( + '9695DFC35FFEB861329B9F1AB04C46397020CE31', + compression = Compression.PLAINTEXT, + validate = True, + )) + + self.assertEqual(1, len(descriptors)) + self.assertEqual('moria1', descriptors[0].nickname) + + @patch(URL_OPEN, _urlopen_mock(read_resource('compressed_gzip'), encoding = 'gzip')) + def test_compression_gzip(self): """ + Download a gip compressed descriptor. + """ + + descriptors = list(stem.descriptor.remote.get_server_descriptors( + '9695DFC35FFEB861329B9F1AB04C46397020CE31', + compression = Compression.GZIP, + validate = True, + )) + + self.assertEqual(1, len(descriptors)) + self.assertEqual('moria1', descriptors[0].nickname) + + @patch(URL_OPEN, _urlopen_mock(read_resource('compressed_zstd'), encoding = 'x-zstd')) + def test_compression_zstd(self): + """ + Download a zstd compressed descriptor. + """ + + if not stem.descriptor.remote.ZSTD_SUPPORTED: + self.skipTest('(requires zstd module)') + return - urlopen_mock.return_value = io.BytesIO(TEST_DESCRIPTOR) + descriptors = list(stem.descriptor.remote.get_server_descriptors( + '9695DFC35FFEB861329B9F1AB04C46397020CE31', + compression = Compression.ZSTD, + validate = True, + )) + + self.assertEqual(1, len(descriptors)) + self.assertEqual('moria1', descriptors[0].nickname) + + @patch(URL_OPEN, _urlopen_mock(read_resource('compressed_lzma'), encoding = 'x-tor-lzma')) + def test_compression_lzma(self): + """ + Download a lzma compressed descriptor. + """ + + if not stem.descriptor.remote.LZMA_SUPPORTED: + self.skipTest('(requires lzma module)') + return + + descriptors = list(stem.descriptor.remote.get_server_descriptors( + '9695DFC35FFEB861329B9F1AB04C46397020CE31', + compression = Compression.LZMA, + validate = True, + )) + + self.assertEqual(1, len(descriptors)) + self.assertEqual('moria1', descriptors[0].nickname) + + @patch(URL_OPEN, _urlopen_mock(TEST_DESCRIPTOR)) + def test_query_download(self): + """ + Check Query functionality when we successfully download a descriptor. + """ query = stem.descriptor.remote.Query( - '/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31', + TEST_RESOURCE, 'server-descriptor 1.0', endpoints = [('128.31.0.39', 9131)], + compression = Compression.PLAINTEXT, validate = True, ) - expeced_url = 'http://128.31.0.39:9131/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31' + expeced_url = 'http://128.31.0.39:9131' + TEST_RESOURCE self.assertEqual(expeced_url, query._pick_url()) descriptors = list(query) @@ -135,21 +244,17 @@ class TestDescriptorDownloader(unittest.TestCase): self.assertEqual('9695DFC35FFEB861329B9F1AB04C46397020CE31', desc.fingerprint) self.assertEqual(TEST_DESCRIPTOR.strip(), desc.get_bytes()) - urlopen_mock.assert_called_once_with(expeced_url, timeout = None) - - @patch(URL_OPEN) - def test_query_with_malformed_content(self, urlopen_mock): + @patch(URL_OPEN, _urlopen_mock(b'some malformed stuff')) + def test_query_with_malformed_content(self): """ Query with malformed descriptor content. """ - descriptor_content = b'some malformed stuff' - urlopen_mock.return_value = io.BytesIO(descriptor_content) - query = stem.descriptor.remote.Query( - '/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31', + TEST_RESOURCE, 'server-descriptor 1.0', endpoints = [('128.31.0.39', 9131)], + compression = Compression.PLAINTEXT, validate = True, ) @@ -171,7 +276,7 @@ class TestDescriptorDownloader(unittest.TestCase): urlopen_mock.side_effect = socket.timeout('connection timed out') query = stem.descriptor.remote.Query( - '/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31', + TEST_RESOURCE, 'server-descriptor 1.0', endpoints = [('128.31.0.39', 9131)], fall_back_to_authority = False, @@ -180,20 +285,15 @@ class TestDescriptorDownloader(unittest.TestCase): ) self.assertRaises(socket.timeout, query.run) - urlopen_mock.assert_called_with( - 'http://128.31.0.39:9131/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31', - timeout = 5, - ) self.assertEqual(3, urlopen_mock.call_count) - @patch(URL_OPEN) - def test_can_iterate_multiple_times(self, urlopen_mock): - urlopen_mock.return_value = io.BytesIO(TEST_DESCRIPTOR) - + @patch(URL_OPEN, _urlopen_mock(TEST_DESCRIPTOR)) + def test_can_iterate_multiple_times(self): query = stem.descriptor.remote.Query( - '/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31', + TEST_RESOURCE, 'server-descriptor 1.0', endpoints = [('128.31.0.39', 9131)], + compression = Compression.PLAINTEXT, validate = True, ) @@ -213,9 +313,8 @@ class TestDescriptorDownloader(unittest.TestCase): self.assertTrue(len(fallback_directories) > 10) self.assertEqual('5.39.92.199', fallback_directories['0BEA4A88D069753218EAAAD6D22EA87B9A1319D6'].address) - @patch(URL_OPEN) - def test_fallback_directories_from_remote(self, urlopen_mock): - urlopen_mock.return_value = io.BytesIO(FALLBACK_DIR_CONTENT) + @patch(URL_OPEN, _urlopen_mock(FALLBACK_DIR_CONTENT)) + def test_fallback_directories_from_remote(self): fallback_directories = stem.descriptor.remote.FallbackDirectory.from_remote() header = OrderedDict((('type', 'fallback'), ('version', '2.0.0'), ('timestamp', '20170526090242'))) @@ -298,19 +397,16 @@ class TestDescriptorDownloader(unittest.TestCase): self.assertEqual(expected, stem.descriptor.remote.FallbackDirectory.from_cache(tmp.name)) - @patch(URL_OPEN) - def test_fallback_directories_from_remote_empty(self, urlopen_mock): - urlopen_mock.return_value = io.BytesIO(b'') + @patch(URL_OPEN, _urlopen_mock(b'')) + def test_fallback_directories_from_remote_empty(self): self.assertRaisesRegexp(IOError, 'did not have any content', stem.descriptor.remote.FallbackDirectory.from_remote) - @patch(URL_OPEN) - def test_fallback_directories_from_remote_no_header(self, urlopen_mock): - urlopen_mock.return_value = io.BytesIO(b'\n'.join(FALLBACK_DIR_CONTENT.splitlines()[1:])) + @patch(URL_OPEN, _urlopen_mock(b'\n'.join(FALLBACK_DIR_CONTENT.splitlines()[1:]))) + def test_fallback_directories_from_remote_no_header(self): self.assertRaisesRegexp(IOError, 'does not have a type field indicating it is fallback directory metadata', stem.descriptor.remote.FallbackDirectory.from_remote) - @patch(URL_OPEN) - def test_fallback_directories_from_remote_malformed_header(self, urlopen_mock): - urlopen_mock.return_value = io.BytesIO(FALLBACK_DIR_CONTENT.replace(b'version=2.0.0', b'version')) + @patch(URL_OPEN, _urlopen_mock(FALLBACK_DIR_CONTENT.replace(b'version=2.0.0', b'version'))) + def test_fallback_directories_from_remote_malformed_header(self): self.assertRaisesRegexp(IOError, 'Malformed fallback directory header line: /\* version \*/', stem.descriptor.remote.FallbackDirectory.from_remote) def test_fallback_directories_from_str(self): |
