diff options
| author | Damian Johnson <atagar@torproject.org> | 2018-03-28 11:57:10 -0700 |
|---|---|---|
| committer | Damian Johnson <atagar@torproject.org> | 2018-03-28 11:57:10 -0700 |
| commit | d6efeafe32c03ae514a95bd05a09fe8c7564b0b3 (patch) | |
| tree | 9930b13af47b2beb3f4748fd0ae6574ecb91419b | |
| parent | 4a8f8d0499aefaddd6d5d9386744e1b1125c6036 (diff) | |
Expanded descriptor compression supportcompression
We supported plaintext and gzip when downloading descriptors, but recently tor
added lzma and zstd support as well...
https://gitweb.torproject.org/torspec.git/commit/?id=1cb56af
In practice these don't seem to work, so filing an issue. :/
| -rw-r--r-- | docs/change_log.rst | 1 | ||||
| -rw-r--r-- | stem/descriptor/remote.py | 154 | ||||
| -rw-r--r-- | stem/version.py | 2 | ||||
| -rw-r--r-- | test/integ/descriptor/remote.py | 20 | ||||
| -rw-r--r-- | test/unit/descriptor/remote.py | 43 |
5 files changed, 169 insertions, 51 deletions
diff --git a/docs/change_log.rst b/docs/change_log.rst index bb42c982..a67aae2d 100644 --- a/docs/change_log.rst +++ b/docs/change_log.rst @@ -55,6 +55,7 @@ The following are only available within Stem's `git repository * **Descriptors** * `Fallback directory v2 support <https://lists.torproject.org/pipermail/tor-dev/2017-December/012721.html>`_, which adds *nickname* and *extrainfo* + * Added zstd and lzma compression support (:spec:`1cb56af`) * Reduced maximum descriptors fetched by the remote module to match tor's new limit (:trac:`24743`) * Consensus **shared_randomness_*_reveal_count** attributes undocumented, and unavailable if retrieved before their corresponding shared_randomness_*_value attribute (:trac:`25046`) * Allow 'proto' line to have blank values (:spec:`a8455f4`) diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py index 9181dbcf..768c49e1 100644 --- a/stem/descriptor/remote.py +++ b/stem/descriptor/remote.py @@ -80,6 +80,21 @@ content. For example... Maximum number of microdescriptors that can requested at a time by their hashes. + +.. data:: Compression (enum) + + Compression when downloading descriptors. + + .. versionadded:: 1.7.0 + + =============== =========== + Compression Description + =============== =========== + **PLAINTEXT** Uncompressed data. + **GZIP** `GZip compression <https://www.gnu.org/software/gzip/>`_. + **ZSTD** `Zstandard compression <https://www.zstd.net>`_ + **LZMA** `LZMA compression <https://en.wikipedia.org/wiki/LZMA>`_. + =============== =========== """ import io @@ -91,6 +106,13 @@ import threading import time import zlib +import stem.descriptor +import stem.prereq +import stem.util.enum + +from stem import Flag +from stem.util import _hash_attr, connection, log, str_tools, tor_tools + try: # added in python 2.7 from collections import OrderedDict @@ -103,11 +125,26 @@ try: except ImportError: import urllib2 as urllib -import stem.descriptor -import stem.prereq - -from stem import Flag -from stem.util import _hash_attr, connection, log, str_tools, tor_tools +try: + # added in python 3.3 + import lzma + LZMA_SUPPORTED = True +except ImportError: + LZMA_SUPPORTED = False + +Compression = stem.util.enum.UppercaseEnum( + 'PLAINTEXT', + 'GZIP', + 'ZSTD', + 'LZMA', +) + +COMPRESSION_HEADER = { + Compression.PLAINTEXT: 'identity', + Compression.GZIP: 'gzip', # can also be 'deflate' + Compression.ZSTD: 'x-zstd', + Compression.LZMA: 'x-tor-lzma', +} # Tor has a limited number of descriptors we can fetch explicitly by their # fingerprint or hashes due to a limit on the url length by squid proxies. @@ -224,7 +261,7 @@ class Query(object): from stem.descriptor.remote import Query query = Query( - '/tor/server/all.z', + '/tor/server/all', block = True, timeout = 30, ) @@ -243,7 +280,7 @@ class Query(object): print('Current relays:') - for desc in Query('/tor/server/all.z', 'server-descriptor 1.0'): + for desc in Query('/tor/server/all', 'server-descriptor 1.0'): print(desc.fingerprint) In either case exceptions are available via our 'error' attribute. @@ -256,28 +293,37 @@ class Query(object): =============================================== =========== Resource Description =============================================== =========== - /tor/server/all.z all present server descriptors - /tor/server/fp/<fp1>+<fp2>+<fp3>.z server descriptors with the given fingerprints - /tor/extra/all.z all present extrainfo descriptors - /tor/extra/fp/<fp1>+<fp2>+<fp3>.z extrainfo descriptors with the given fingerprints - /tor/micro/d/<hash1>-<hash2>.z microdescriptors with the given hashes - /tor/status-vote/current/consensus.z present consensus - /tor/status-vote/current/consensus-microdesc.z present microdescriptor consensus - /tor/keys/all.z key certificates for the authorities - /tor/keys/fp/<v3ident1>+<v3ident2>.z key certificates for specific authorities + /tor/server/all all present server descriptors + /tor/server/fp/<fp1>+<fp2>+<fp3> server descriptors with the given fingerprints + /tor/extra/all all present extrainfo descriptors + /tor/extra/fp/<fp1>+<fp2>+<fp3> extrainfo descriptors with the given fingerprints + /tor/micro/d/<hash1>-<hash2> microdescriptors with the given hashes + /tor/status-vote/current/consensus present consensus + /tor/status-vote/current/consensus-microdesc present microdescriptor consensus + /tor/keys/all key certificates for the authorities + /tor/keys/fp/<v3ident1>+<v3ident2> key certificates for specific authorities =============================================== =========== - The '.z' suffix can be excluded to get a plaintext rather than compressed - response. Compression is handled transparently, so this shouldn't matter to - the caller. + **LZMA** compression requires the `lzma module + <https://docs.python.org/3/library/lzma.html>`_ which was added in Python + 3.3. + + For legacy reasons if our resource has a '.z' suffix then our **compression** + argument is overwritten with Compression.GZIP. - :var str resource: resource being fetched, such as '/tor/server/all.z' + .. versionchanged:: 1.7.0 + Added the compression argument, and gzip compression is used now by + default. + + :var str resource: resource being fetched, such as '/tor/server/all' :var str descriptor_type: type of descriptors being fetched (for options see :func:`~stem.descriptor.__init__.parse_file`), this is guessed from the resource if **None** :var list endpoints: (address, dirport) tuples of the authority or mirror we're querying, this uses authorities if undefined + :var int stem.descriptor.remote.Compression: compression used when + downloading :var int retries: number of times to attempt the request if downloading it fails :var bool fall_back_to_authority: when retrying request issues the last @@ -305,11 +351,23 @@ class Query(object): the same as running **query.run(True)** (default is **False**) """ - def __init__(self, resource, descriptor_type = None, endpoints = None, retries = 2, fall_back_to_authority = False, timeout = None, start = True, block = False, validate = False, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **kwargs): + def __init__(self, resource, descriptor_type = None, endpoints = None, compression = Compression.GZIP, retries = 2, fall_back_to_authority = False, timeout = None, start = True, block = False, validate = False, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **kwargs): if not resource.startswith('/'): raise ValueError("Resources should start with a '/': %s" % resource) - self.resource = resource + if resource.endswith('.z'): + self.compression = Compression.GZIP + self.resource = resource[:-2] + else: + self.compression = compression + self.resource = resource + + if self.compression not in Compression: + raise ValueError("Compression was '%s', but valid options are: %s" % (self.compression, ', '.join(Compression))) + elif self.compression == Compression.ZSTD: + raise ImportError('ZSTD is not yet supported') # TODO: implement + elif self.compression == Compression.LZMA and not LZMA_SUPPORTED: + raise ImportError('LZMA compression requires the lzma module, which was added in python 3.3') if descriptor_type: self.descriptor_type = descriptor_type @@ -352,7 +410,7 @@ class Query(object): self._downloader_thread = threading.Thread( name = 'Descriptor Query', target = self._download_descriptors, - args = (self.retries,) + args = (self.compression, self.retries,) ) self._downloader_thread.setDaemon(True) @@ -435,26 +493,40 @@ class Query(object): if use_authority or not self.endpoints: directories = get_authorities().values() - picked = random.choice(directories) + picked = random.choice(list(directories)) address, dirport = picked.address, picked.dir_port else: address, dirport = random.choice(self.endpoints) return 'http://%s:%i/%s' % (address, dirport, self.resource.lstrip('/')) - def _download_descriptors(self, retries): + def _download_descriptors(self, compression, retries): try: use_authority = retries == 0 and self.fall_back_to_authority self.download_url = self._pick_url(use_authority) - self.start_time = time.time() - response = urllib.urlopen(self.download_url, timeout = self.timeout).read() - if self.download_url.endswith('.z'): - response = zlib.decompress(response) + print(COMPRESSION_HEADER[compression]) - self.content = response.strip() + response = urllib.urlopen( + urllib.Request( + self.download_url, + headers = {'Accept-Encoding': COMPRESSION_HEADER[compression]}, + ), + timeout = self.timeout, + ).read() + + if compression == Compression.GZIP: + # The '32' is for automatic header detection... + # https://stackoverflow.com/questions/3122145/zlib-error-error-3-while-decompressing-incorrect-header-check/22310760#22310760 + response = zlib.decompress(response, zlib.MAX_WBITS | 32) + elif compression == Compression.ZSTD: + pass # TODO: implement + elif compression == Compression.LZMA and LZMA_SUPPORTED: + response = lzma.decompress(response) + + self.content = response.strip() self.runtime = time.time() - self.start_time log.trace("Descriptors retrieved from '%s' in %0.2fs" % (self.download_url, self.runtime)) except: @@ -462,7 +534,7 @@ class Query(object): if retries > 0: log.debug("Unable to download descriptors from '%s' (%i retries remaining): %s" % (self.download_url, retries, exc)) - return self._download_descriptors(retries - 1) + return self._download_descriptors(compression, retries - 1) else: log.debug("Unable to download descriptors from '%s': %s" % (self.download_url, exc)) self.error = exc @@ -539,7 +611,7 @@ class DescriptorDownloader(object): fingerprints (this is due to a limit on the url length by squid proxies). """ - resource = '/tor/server/all.z' + resource = '/tor/server/all' if isinstance(fingerprints, str): fingerprints = [fingerprints] @@ -548,7 +620,7 @@ class DescriptorDownloader(object): if len(fingerprints) > MAX_FINGERPRINTS: raise ValueError('Unable to request more than %i descriptors at a time by their fingerprints' % MAX_FINGERPRINTS) - resource = '/tor/server/fp/%s.z' % '+'.join(fingerprints) + resource = '/tor/server/fp/%s' % '+'.join(fingerprints) return self.query(resource, **query_args) @@ -569,7 +641,7 @@ class DescriptorDownloader(object): fingerprints (this is due to a limit on the url length by squid proxies). """ - resource = '/tor/extra/all.z' + resource = '/tor/extra/all' if isinstance(fingerprints, str): fingerprints = [fingerprints] @@ -578,7 +650,7 @@ class DescriptorDownloader(object): if len(fingerprints) > MAX_FINGERPRINTS: raise ValueError('Unable to request more than %i descriptors at a time by their fingerprints' % MAX_FINGERPRINTS) - resource = '/tor/extra/fp/%s.z' % '+'.join(fingerprints) + resource = '/tor/extra/fp/%s' % '+'.join(fingerprints) return self.query(resource, **query_args) @@ -613,7 +685,7 @@ class DescriptorDownloader(object): if len(hashes) > MAX_MICRODESCRIPTOR_HASHES: raise ValueError('Unable to request more than %i microdescriptors at a time by their hashes' % MAX_MICRODESCRIPTOR_HASHES) - return self.query('/tor/micro/d/%s.z' % '-'.join(hashes), **query_args) + return self.query('/tor/micro/d/%s' % '-'.join(hashes), **query_args) def get_consensus(self, authority_v3ident = None, microdescriptor = False, **query_args): """ @@ -643,7 +715,7 @@ class DescriptorDownloader(object): if authority_v3ident: resource += '/%s' % authority_v3ident - consensus_query = self.query(resource + '.z', **query_args) + consensus_query = self.query(resource, **query_args) # if we're performing validation then check that it's signed by the # authority key certificates @@ -672,7 +744,7 @@ class DescriptorDownloader(object): if 'endpoint' not in query_args: query_args['endpoints'] = [(authority.address, authority.dir_port)] - return self.query(resource + '.z', **query_args) + return self.query(resource, **query_args) def get_key_certificates(self, authority_v3idents = None, **query_args): """ @@ -694,7 +766,7 @@ class DescriptorDownloader(object): squid proxies). """ - resource = '/tor/keys/all.z' + resource = '/tor/keys/all' if isinstance(authority_v3idents, str): authority_v3idents = [authority_v3idents] @@ -703,7 +775,7 @@ class DescriptorDownloader(object): if len(authority_v3idents) > MAX_FINGERPRINTS: raise ValueError('Unable to request more than %i key certificates at a time by their identity fingerprints' % MAX_FINGERPRINTS) - resource = '/tor/keys/fp/%s.z' % '+'.join(authority_v3idents) + resource = '/tor/keys/fp/%s' % '+'.join(authority_v3idents) return self.query(resource, **query_args) @@ -711,7 +783,7 @@ class DescriptorDownloader(object): """ Issues a request for the given resource. - :param str resource: resource being fetched, such as '/tor/server/all.z' + :param str resource: resource being fetched, such as '/tor/server/all' :param query_args: additional arguments for the :class:`~stem.descriptor.remote.Query` constructor diff --git a/stem/version.py b/stem/version.py index 9de2f1a5..9036effb 100644 --- a/stem/version.py +++ b/stem/version.py @@ -35,6 +35,7 @@ easily parsed and compared, for instance... Requirement Description ===================================== =========== **AUTH_SAFECOOKIE** SAFECOOKIE authentication method + **DESCRIPTOR_COMPRESSION** `Expanded compression support for ZSTD and LZMA <https://gitweb.torproject.org/torspec.git/commit/?id=1cb56afdc1e55e303e3e6b69e90d983ee217d93f>`_ **DROPGUARDS** DROPGUARDS requests **EVENT_AUTHDIR_NEWDESCS** AUTHDIR_NEWDESC events **EVENT_BUILDTIMEOUT_SET** BUILDTIMEOUT_SET events @@ -353,6 +354,7 @@ safecookie_req.greater_than(Version('0.2.3.13')) Requirement = stem.util.enum.Enum( ('AUTH_SAFECOOKIE', safecookie_req), + ('DESCRIPTOR_COMPRESSION', Version('0.3.1.1-alpha')), ('DROPGUARDS', Version('0.2.5.1-alpha')), ('EVENT_AUTHDIR_NEWDESCS', Version('0.1.1.10-alpha')), ('EVENT_BUILDTIMEOUT_SET', Version('0.2.2.7-alpha')), diff --git a/test/integ/descriptor/remote.py b/test/integ/descriptor/remote.py index 3123ef06..4e10eac2 100644 --- a/test/integ/descriptor/remote.py +++ b/test/integ/descriptor/remote.py @@ -16,6 +16,26 @@ import test.require class TestDescriptorDownloader(unittest.TestCase): @test.require.only_run_once @test.require.online + def test_compression(self): + """ + Issue a request for a plaintext descriptor. + """ + + moria1 = stem.descriptor.remote.get_authorities()['moria1'] + + descriptors = list(stem.descriptor.remote.Query( + '/tor/server/fp/%s' % moria1.fingerprint, + 'server-descriptor 1.0', + endpoints = [(moria1.address, moria1.dir_port)], + timeout = 30, + validate = True, + ).run()) + + self.assertEqual(1, len(descriptors)) + self.assertEqual('moria1', descriptors[0].nickname) + + @test.require.only_run_once + @test.require.online def test_shorthand_aliases(self): """ Quick sanity test that we can call our shorthand aliases for getting diff --git a/test/unit/descriptor/remote.py b/test/unit/descriptor/remote.py index ac150d5c..f017b294 100644 --- a/test/unit/descriptor/remote.py +++ b/test/unit/descriptor/remote.py @@ -11,6 +11,8 @@ import stem.descriptor.remote import stem.prereq import stem.util.conf +from stem.descriptor.remote import Compression + try: # added in python 2.7 from collections import OrderedDict @@ -29,6 +31,8 @@ except ImportError: URL_OPEN = 'urllib.request.urlopen' if stem.prereq.is_python_3() else 'urllib2.urlopen' +TEST_RESOURCE = '/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31' + # Output from requesting moria1's descriptor from itself... # % curl http://128.31.0.39:9131/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31 @@ -108,6 +112,26 @@ FALLBACK_ENTRY = b"""\ class TestDescriptorDownloader(unittest.TestCase): + def test_gzip_url_override(self): + query = stem.descriptor.remote.Query(TEST_RESOURCE, compression = Compression.PLAINTEXT, start = False) + self.assertEqual(Compression.PLAINTEXT, query.compression) + self.assertEqual(TEST_RESOURCE, query.resource) + + query = stem.descriptor.remote.Query(TEST_RESOURCE + '.z', compression = Compression.PLAINTEXT, start = False) + self.assertEqual(Compression.GZIP, query.compression) + self.assertEqual(TEST_RESOURCE, query.resource) + + def test_zstd_support_check(self): + self.assertRaises(ImportError, stem.descriptor.remote.Query, TEST_RESOURCE, compression = Compression.ZSTD, start = False) + + def test_lzma_support_check(self): + with patch('stem.descriptor.remote.LZMA_SUPPORTED', True): + query = stem.descriptor.remote.Query(TEST_RESOURCE, compression = Compression.LZMA, start = False) + self.assertEqual(Compression.LZMA, query.compression) + + with patch('stem.descriptor.remote.LZMA_SUPPORTED', False): + self.assertRaises(ImportError, stem.descriptor.remote.Query, TEST_RESOURCE, compression = Compression.LZMA, start = False) + @patch(URL_OPEN) def test_query_download(self, urlopen_mock): """ @@ -117,13 +141,14 @@ class TestDescriptorDownloader(unittest.TestCase): urlopen_mock.return_value = io.BytesIO(TEST_DESCRIPTOR) query = stem.descriptor.remote.Query( - '/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31', + TEST_RESOURCE, 'server-descriptor 1.0', endpoints = [('128.31.0.39', 9131)], + compression = Compression.PLAINTEXT, validate = True, ) - expeced_url = 'http://128.31.0.39:9131/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31' + expeced_url = 'http://128.31.0.39:9131' + TEST_RESOURCE self.assertEqual(expeced_url, query._pick_url()) descriptors = list(query) @@ -135,7 +160,7 @@ class TestDescriptorDownloader(unittest.TestCase): self.assertEqual('9695DFC35FFEB861329B9F1AB04C46397020CE31', desc.fingerprint) self.assertEqual(TEST_DESCRIPTOR.strip(), desc.get_bytes()) - urlopen_mock.assert_called_once_with(expeced_url, timeout = None) + self.assertEqual(1, urlopen_mock.call_count) @patch(URL_OPEN) def test_query_with_malformed_content(self, urlopen_mock): @@ -147,9 +172,10 @@ class TestDescriptorDownloader(unittest.TestCase): urlopen_mock.return_value = io.BytesIO(descriptor_content) query = stem.descriptor.remote.Query( - '/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31', + TEST_RESOURCE, 'server-descriptor 1.0', endpoints = [('128.31.0.39', 9131)], + compression = Compression.PLAINTEXT, validate = True, ) @@ -171,7 +197,7 @@ class TestDescriptorDownloader(unittest.TestCase): urlopen_mock.side_effect = socket.timeout('connection timed out') query = stem.descriptor.remote.Query( - '/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31', + TEST_RESOURCE, 'server-descriptor 1.0', endpoints = [('128.31.0.39', 9131)], fall_back_to_authority = False, @@ -180,10 +206,6 @@ class TestDescriptorDownloader(unittest.TestCase): ) self.assertRaises(socket.timeout, query.run) - urlopen_mock.assert_called_with( - 'http://128.31.0.39:9131/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31', - timeout = 5, - ) self.assertEqual(3, urlopen_mock.call_count) @patch(URL_OPEN) @@ -191,9 +213,10 @@ class TestDescriptorDownloader(unittest.TestCase): urlopen_mock.return_value = io.BytesIO(TEST_DESCRIPTOR) query = stem.descriptor.remote.Query( - '/tor/server/fp/9695DFC35FFEB861329B9F1AB04C46397020CE31', + TEST_RESOURCE, 'server-descriptor 1.0', endpoints = [('128.31.0.39', 9131)], + compression = Compression.PLAINTEXT, validate = True, ) |
