summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDamian Johnson <atagar@torproject.org>2015-01-25 13:57:03 -0800
committerDamian Johnson <atagar@torproject.org>2015-01-25 13:57:09 -0800
commit3dac7c51300062d78298b370b1286965652600e4 (patch)
treedfc0b9880e64bd9f245ce2393a6f91fd05a1f6cb
parent92dd46485385fec75699250d2c67d961bf188c2c (diff)
parent6484250c4000673074a261da95e1db3ed9d69db8 (diff)
Tor descriptor lazy loading
I've been wanting to do this for years. When reading a descriptor we parsed every field in it. This is necessary if we're validating it, but usually users don't care about validation and only want an attribute or two. When parsing without validation we now lazy load the document, meaning we parse fields on-demand rather than everything upfront. This naturally greatly improves our performance for reading descriptors... Server descriptors: 27% faster Extrainfo descriptors: 71% faster Microdescriptors: 43% faster Consensus: 37% faster It comes at a small cost to our performance for when we read with validation, but not big enough for it to be a concern. As an added benefit this actually makes our code a lot more maintainable too! https://trac.torproject.org/projects/tor/ticket/14011 -------------------------------------------------------------------------------- Benchmarking script -------------------------------------------------------------------------------- import time from stem.descriptor import parse_file start_time, fingerprints = time.time(), [] for desc in parse_file('/home/atagar/.tor/cached-descriptors', validate = True): fingerprints.append(desc.fingerprint) count, runtime = len(fingerprints), time.time() - start_time print 'read %i descriptors with validation, took %0.2f seconds (%0.5f seconds per descriptor)' % (count, runtime, runtime / count) start_time, fingerprints = time.time(), [] for desc in parse_file('/home/atagar/.tor/cached-descriptors', validate = False): fingerprints.append(desc.fingerprint) count, runtime = len(fingerprints), time.time() - start_time print 'read %i descriptors without validation, took %0.2f seconds (%0.5f seconds per descriptor)' % (count, runtime, runtime / count) -------------------------------------------------------------------------------- Results -------------------------------------------------------------------------------- Please keep in mind these are just the results on my system. These are, of course, influenced by your system and background load... Server descriptors: before: read 6679 descriptors with validation, took 10.71 seconds (0.00160 seconds per descriptor) before: read 6679 descriptors without validation, took 4.46 seconds (0.00067 seconds per descriptor) after: read 6679 descriptors with validation, took 11.48 seconds (0.00172 seconds per descriptor) after: read 6679 descriptors without validation, took 3.25 seconds (0.00049 seconds per descriptor) Extrainfo descriptors: before: read 6677 descriptors with validation, took 7.91 seconds (0.00119 seconds per descriptor) before: read 6677 descriptors without validation, took 7.64 seconds (0.00114 seconds per descriptor) after: read 6677 descriptors with validation, took 8.91 seconds (0.00133 seconds per descriptor) after: read 6677 descriptors without validation, took 2.22 seconds (0.00033 seconds per descriptor) Microdescriptors: before: read 10526 descriptors with validation, took 2.41 seconds (0.00023 seconds per descriptor) before: read 10526 descriptors without validation, took 2.34 seconds (0.00022 seconds per descriptor) after: read 10526 descriptors with validation, took 2.74 seconds (0.00026 seconds per descriptor) after: read 10526 descriptors without validation, took 1.34 seconds (0.00013 seconds per descriptor) Consensus: before: read 6688 descriptors with validation, took 2.11 seconds (0.00032 seconds per descriptor) before: read 6688 descriptors without validation, took 2.04 seconds (0.00030 seconds per descriptor) after: read 6688 descriptors with validation, took 2.47 seconds (0.00037 seconds per descriptor) after: read 6688 descriptors without validation, took 1.28 seconds (0.00019 seconds per descriptor)
-rw-r--r--stem/descriptor/__init__.py172
-rw-r--r--stem/descriptor/extrainfo_descriptor.py974
-rw-r--r--stem/descriptor/microdescriptor.py122
-rw-r--r--stem/descriptor/networkstatus.py1279
-rw-r--r--stem/descriptor/router_status_entry.py737
-rw-r--r--stem/descriptor/server_descriptor.py683
-rw-r--r--test/unit/descriptor/extrainfo_descriptor.py28
-rw-r--r--test/unit/descriptor/networkstatus/directory_authority.py9
-rw-r--r--test/unit/descriptor/networkstatus/document_v3.py36
-rw-r--r--test/unit/descriptor/networkstatus/key_certificate.py24
-rw-r--r--test/unit/descriptor/router_status_entry.py13
-rw-r--r--test/unit/descriptor/server_descriptor.py10
12 files changed, 1915 insertions, 2172 deletions
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 22218075..0d2295dd 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -50,6 +50,7 @@ __all__ = [
'Descriptor',
]
+import copy
import os
import re
import tarfile
@@ -310,15 +311,93 @@ def _parse_metrics_file(descriptor_type, major_version, minor_version, descripto
raise TypeError("Unrecognized metrics descriptor format. type: '%s', version: '%i.%i'" % (descriptor_type, major_version, minor_version))
+def _value(line, entries):
+ return entries[line][0][0]
+
+
+def _values(line, entries):
+ return [entry[0] for entry in entries[line]]
+
+
+def _parse_simple_line(keyword, attribute):
+ def _parse(descriptor, entries):
+ setattr(descriptor, attribute, _value(keyword, entries))
+
+ return _parse
+
+
+def _parse_bytes_line(keyword, attribute):
+ def _parse(descriptor, entries):
+ line_match = re.search(stem.util.str_tools._to_bytes('^(opt )?%s(?:[%s]+(.*))?$' % (keyword, WHITESPACE)), descriptor.get_bytes(), re.MULTILINE)
+ result = None
+
+ if line_match:
+ value = line_match.groups()[1]
+ result = b'' if value is None else value
+
+ setattr(descriptor, attribute, result)
+
+ return _parse
+
+
+def _parse_timestamp_line(keyword, attribute):
+ # "<keyword>" YYYY-MM-DD HH:MM:SS
+
+ def _parse(descriptor, entries):
+ value = _value(keyword, entries)
+
+ try:
+ setattr(descriptor, attribute, stem.util.str_tools._parse_timestamp(value))
+ except ValueError:
+ raise ValueError("Timestamp on %s line wasn't parsable: %s %s" % (keyword, keyword, value))
+
+ return _parse
+
+
+def _parse_forty_character_hex(keyword, attribute):
+ # format of fingerprints, sha1 digests, etc
+
+ def _parse(descriptor, entries):
+ value = _value(keyword, entries)
+
+ if not stem.util.tor_tools.is_hex_digits(value, 40):
+ raise ValueError('%s line had an invalid value (should be 40 hex characters): %s %s' % (keyword, keyword, value))
+
+ setattr(descriptor, attribute, value)
+
+ return _parse
+
+
+def _parse_key_block(keyword, attribute, expected_block_type, value_attribute = None):
+ def _parse(descriptor, entries):
+ value, block_type, block_contents = entries[keyword][0]
+
+ if not block_contents or block_type != expected_block_type:
+ raise ValueError("'%s' should be followed by a %s block, but was a %s" % (keyword, expected_block_type, block_type))
+
+ setattr(descriptor, attribute, block_contents)
+
+ if value_attribute:
+ setattr(descriptor, value_attribute, value)
+
+ return _parse
+
+
class Descriptor(object):
"""
Common parent for all types of descriptors.
"""
- def __init__(self, contents):
+ ATTRIBUTES = {} # mapping of 'attribute' => (default_value, parsing_function)
+ PARSER_FOR_LINE = {} # line keyword to its associated parsing function
+
+ def __init__(self, contents, lazy_load = False):
self._path = None
self._archive_path = None
self._raw_contents = contents
+ self._lazy_loading = lazy_load
+ self._entries = {}
+ self._unrecognized_lines = []
def get_path(self):
"""
@@ -361,7 +440,49 @@ class Descriptor(object):
:returns: **list** of lines of unrecognized content
"""
- raise NotImplementedError
+ if self._lazy_loading:
+ # we need to go ahead and parse the whole document to figure this out
+ self._parse(self._entries, False)
+ self._lazy_loading = False
+
+ return list(self._unrecognized_lines)
+
+ def _parse(self, entries, validate, parser_for_line = None):
+ """
+ Parses a series of 'keyword => (value, pgp block)' mappings and applies
+ them as attributes.
+
+ :param dict entries: descriptor contents to be applied
+ :param bool validate: checks the validity of descriptor content if True
+ :param dict parsers: mapping of lines to the function for parsing it
+
+ :raises: **ValueError** if an error occurs in validation
+ """
+
+ if parser_for_line is None:
+ parser_for_line = self.PARSER_FOR_LINE
+
+ # set defaults
+
+ for attr in self.ATTRIBUTES:
+ if not hasattr(self, attr):
+ setattr(self, attr, copy.copy(self.ATTRIBUTES[attr][0]))
+
+ for keyword, values in list(entries.items()):
+ try:
+ if keyword in parser_for_line:
+ parser_for_line[keyword](self, entries)
+ else:
+ for value, block_type, block_contents in values:
+ line = '%s %s' % (keyword, value)
+
+ if block_contents:
+ line += '\n%s' % block_contents
+
+ self._unrecognized_lines.append(line)
+ except ValueError as exc:
+ if validate:
+ raise exc
def _set_path(self, path):
self._path = path
@@ -372,6 +493,23 @@ class Descriptor(object):
def _name(self, is_plural = False):
return str(type(self))
+ def __getattr__(self, name):
+ # If attribute isn't already present we might be lazy loading it...
+
+ if self._lazy_loading and name in self.ATTRIBUTES:
+ default, parsing_function = self.ATTRIBUTES[name]
+
+ try:
+ parsing_function(self, self._entries)
+ except (ValueError, KeyError):
+ try:
+ # despite having a validation failure check to see if we set something
+ return super(Descriptor, self).__getattribute__(name)
+ except AttributeError:
+ setattr(self, name, copy.copy(default))
+
+ return super(Descriptor, self).__getattribute__(name)
+
def __str__(self):
if stem.prereq.is_python_3():
return stem.util.str_tools._to_unicode(self._raw_contents)
@@ -379,33 +517,6 @@ class Descriptor(object):
return self._raw_contents
-def _get_bytes_field(keyword, content):
- """
- Provides the value corresponding to the given keyword. This is handy to fetch
- values specifically allowed to be arbitrary bytes prior to converting to
- unicode.
-
- :param str keyword: line to look up
- :param bytes content: content to look through
-
- :returns: **bytes** value on the given line, **None** if the line doesn't
- exist
-
- :raises: **ValueError** if the content isn't bytes
- """
-
- if not isinstance(content, bytes):
- raise ValueError('Content must be bytes, got a %s' % type(content))
-
- line_match = re.search(stem.util.str_tools._to_bytes('^(opt )?%s(?:[%s]+(.*))?$' % (keyword, WHITESPACE)), content, re.MULTILINE)
-
- if line_match:
- value = line_match.groups()[1]
- return b'' if value is None else value
- else:
- return None
-
-
def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_first = False, skip = False, end_position = None, include_ending_keyword = False):
"""
Reads from the descriptor file until we get to one of the given keywords or reach the
@@ -538,6 +649,9 @@ def _get_descriptor_components(raw_contents, validate, extra_keywords = ()):
value tuple, the second being a list of those entries.
"""
+ if isinstance(raw_contents, bytes):
+ raw_contents = stem.util.str_tools._to_unicode(raw_contents)
+
entries = OrderedDict()
extra_entries = [] # entries with a keyword in extra_keywords
remaining_lines = raw_contents.split('\n')
diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py
index d6e6102d..5b0339cd 100644
--- a/stem/descriptor/extrainfo_descriptor.py
+++ b/stem/descriptor/extrainfo_descriptor.py
@@ -32,8 +32,7 @@ Extra-info descriptors are available from a few sources...
|- RelayExtraInfoDescriptor - Extra-info descriptor for a relay.
|- BridgeExtraInfoDescriptor - Extra-info descriptor for a bridge.
|
- |- digest - calculates the upper-case hex digest value for our content
- +- get_unrecognized_lines - lines with unrecognized content
+ +- digest - calculates the upper-case hex digest value for our content
.. data:: DirResponse (enum)
@@ -69,6 +68,7 @@ Extra-info descriptors are available from a few sources...
===================== ===========
"""
+import functools
import hashlib
import re
@@ -81,6 +81,11 @@ from stem.descriptor import (
Descriptor,
_read_until_keywords,
_get_descriptor_components,
+ _value,
+ _values,
+ _parse_timestamp_line,
+ _parse_forty_character_hex,
+ _parse_key_block,
)
try:
@@ -225,6 +230,316 @@ def _parse_timestamp_and_interval(keyword, content):
raise ValueError("%s line's timestamp wasn't parsable: %s" % (keyword, line))
+def _parse_extra_info_line(descriptor, entries):
+ # "extra-info" Nickname Fingerprint
+
+ value = _value('extra-info', entries)
+ extra_info_comp = value.split()
+
+ if len(extra_info_comp) < 2:
+ raise ValueError('Extra-info line must have two values: extra-info %s' % value)
+ elif not stem.util.tor_tools.is_valid_nickname(extra_info_comp[0]):
+ raise ValueError("Extra-info line entry isn't a valid nickname: %s" % extra_info_comp[0])
+ elif not stem.util.tor_tools.is_valid_fingerprint(extra_info_comp[1]):
+ raise ValueError('Tor relay fingerprints consist of forty hex digits: %s' % extra_info_comp[1])
+
+ descriptor.nickname = extra_info_comp[0]
+ descriptor.fingerprint = extra_info_comp[1]
+
+
+def _parse_transport_line(descriptor, entries):
+ # "transport" transportname address:port [arglist]
+ # Everything after the transportname is scrubbed in published bridge
+ # descriptors, so we'll never see it in practice.
+ #
+ # These entries really only make sense for bridges, but have been seen
+ # on non-bridges in the wild when the relay operator configured it this
+ # way.
+
+ transports = {}
+
+ for value in _values('transport', entries):
+ name, address, port, args = None, None, None, None
+
+ if ' ' not in value:
+ # scrubbed
+ name = value
+ else:
+ # not scrubbed
+ value_comp = value.split()
+
+ if len(value_comp) < 1:
+ raise ValueError('Transport line is missing its transport name: transport %s' % value)
+ elif len(value_comp) < 2:
+ raise ValueError('Transport line is missing its address:port value: transport %s' % value)
+ elif ':' not in value_comp[1]:
+ raise ValueError("Transport line's address:port entry is missing a colon: transport %s" % value)
+
+ name = value_comp[0]
+ address, port_str = value_comp[1].split(':', 1)
+
+ if not stem.util.connection.is_valid_ipv4_address(address) or \
+ stem.util.connection.is_valid_ipv6_address(address):
+ raise ValueError('Transport line has a malformed address: transport %s' % value)
+ elif not stem.util.connection.is_valid_port(port_str):
+ raise ValueError('Transport line has a malformed port: transport %s' % value)
+
+ port = int(port_str)
+ args = value_comp[2:] if len(value_comp) >= 3 else []
+
+ transports[name] = (address, port, args)
+
+ descriptor.transport = transports
+
+
+def _parse_cell_circuits_per_decline_line(descriptor, entries):
+ # "cell-circuits-per-decile" num
+
+ value = _value('cell-circuits-per-decile', entries)
+
+ if not value.isdigit():
+ raise ValueError('Non-numeric cell-circuits-per-decile value: %s' % value)
+ elif int(value) < 0:
+ raise ValueError('Negative cell-circuits-per-decile value: %s' % value)
+
+ descriptor.cell_circuits_per_decile = int(value)
+
+
+def _parse_dirreq_line(keyword, recognized_counts_attr, unrecognized_counts_attr, descriptor, entries):
+ value = _value(keyword, entries)
+
+ recognized_counts = {}
+ unrecognized_counts = {}
+
+ is_response_stats = keyword in ('dirreq-v2-resp', 'dirreq-v3-resp')
+ key_set = DirResponse if is_response_stats else DirStat
+
+ key_type = 'STATUS' if is_response_stats else 'STAT'
+ error_msg = '%s lines should contain %s=COUNT mappings: %s %s' % (keyword, key_type, keyword, value)
+
+ if value:
+ for entry in value.split(','):
+ if '=' not in entry:
+ raise ValueError(error_msg)
+
+ status, count = entry.split('=', 1)
+
+ if count.isdigit():
+ if status in key_set:
+ recognized_counts[status] = int(count)
+ else:
+ unrecognized_counts[status] = int(count)
+ else:
+ raise ValueError(error_msg)
+
+ setattr(descriptor, recognized_counts_attr, recognized_counts)
+ setattr(descriptor, unrecognized_counts_attr, unrecognized_counts)
+
+
+def _parse_dirreq_share_line(keyword, attribute, descriptor, entries):
+ value = _value(keyword, entries)
+
+ if not value.endswith('%'):
+ raise ValueError('%s lines should be a percentage: %s %s' % (keyword, keyword, value))
+ elif float(value[:-1]) < 0:
+ raise ValueError('Negative percentage value: %s %s' % (keyword, value))
+
+ # bug means it might be above 100%: https://lists.torproject.org/pipermail/tor-dev/2012-June/003679.html
+
+ setattr(descriptor, attribute, float(value[:-1]) / 100)
+
+
+def _parse_cell_line(keyword, attribute, descriptor, entries):
+ # "<keyword>" num,...,num
+
+ value = _value(keyword, entries)
+ entries, exc = [], None
+
+ if value:
+ for entry in value.split(','):
+ try:
+ # Values should be positive but as discussed in ticket #5849
+ # there was a bug around this. It was fixed in tor 0.2.2.1.
+
+ entries.append(float(entry))
+ except ValueError:
+ exc = ValueError('Non-numeric entry in %s listing: %s %s' % (keyword, keyword, value))
+
+ setattr(descriptor, attribute, entries)
+
+ if exc:
+ raise exc
+
+
+def _parse_timestamp_and_interval_line(keyword, end_attribute, interval_attribute, descriptor, entries):
+ # "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s)
+
+ timestamp, interval, _ = _parse_timestamp_and_interval(keyword, _value(keyword, entries))
+ setattr(descriptor, end_attribute, timestamp)
+ setattr(descriptor, interval_attribute, interval)
+
+
+def _parse_conn_bi_direct_line(descriptor, entries):
+ # "conn-bi-direct" YYYY-MM-DD HH:MM:SS (NSEC s) BELOW,READ,WRITE,BOTH
+
+ value = _value('conn-bi-direct', entries)
+ timestamp, interval, remainder = _parse_timestamp_and_interval('conn-bi-direct', value)
+ stats = remainder.split(',')
+
+ if len(stats) != 4 or not (stats[0].isdigit() and stats[1].isdigit() and stats[2].isdigit() and stats[3].isdigit()):
+ raise ValueError('conn-bi-direct line should end with four numeric values: conn-bi-direct %s' % value)
+
+ descriptor.conn_bi_direct_end = timestamp
+ descriptor.conn_bi_direct_interval = interval
+ descriptor.conn_bi_direct_below = int(stats[0])
+ descriptor.conn_bi_direct_read = int(stats[1])
+ descriptor.conn_bi_direct_write = int(stats[2])
+ descriptor.conn_bi_direct_both = int(stats[3])
+
+
+def _parse_history_line(keyword, end_attribute, interval_attribute, values_attribute, descriptor, entries):
+ # "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s) NUM,NUM,NUM,NUM,NUM...
+
+ value = _value(keyword, entries)
+ timestamp, interval, remainder = _parse_timestamp_and_interval(keyword, value)
+ history_values = []
+
+ if remainder:
+ try:
+ history_values = [int(entry) for entry in remainder.split(',')]
+ except ValueError:
+ raise ValueError('%s line has non-numeric values: %s %s' % (keyword, keyword, value))
+
+ setattr(descriptor, end_attribute, timestamp)
+ setattr(descriptor, interval_attribute, interval)
+ setattr(descriptor, values_attribute, history_values)
+
+
+def _parse_port_count_line(keyword, attribute, descriptor, entries):
+ # "<keyword>" port=N,port=N,...
+
+ value, port_mappings = _value(keyword, entries), {}
+ error_msg = 'Entries in %s line should only be PORT=N entries: %s %s' % (keyword, keyword, value)
+
+ if value:
+ for entry in value.split(','):
+ if '=' not in entry:
+ raise ValueError(error_msg)
+
+ port, stat = entry.split('=', 1)
+
+ if (port == 'other' or stem.util.connection.is_valid_port(port)) and stat.isdigit():
+ if port != 'other':
+ port = int(port)
+
+ port_mappings[port] = int(stat)
+ else:
+ raise ValueError(error_msg)
+
+ setattr(descriptor, attribute, port_mappings)
+
+
+def _parse_geoip_to_count_line(keyword, attribute, descriptor, entries):
+ # "<keyword>" CC=N,CC=N,...
+ #
+ # The maxmind geoip (https://www.maxmind.com/app/iso3166) has numeric
+ # locale codes for some special values, for instance...
+ # A1,"Anonymous Proxy"
+ # A2,"Satellite Provider"
+ # ??,"Unknown"
+
+ value, locale_usage = _value(keyword, entries), {}
+ error_msg = 'Entries in %s line should only be CC=N entries: %s %s' % (keyword, keyword, value)
+
+ if value:
+ for entry in value.split(','):
+ if '=' not in entry:
+ raise ValueError(error_msg)
+
+ locale, count = entry.split('=', 1)
+
+ if _locale_re.match(locale) and count.isdigit():
+ locale_usage[locale] = int(count)
+ else:
+ raise ValueError(error_msg)
+
+ setattr(descriptor, attribute, locale_usage)
+
+
+def _parse_bridge_ip_versions_line(descriptor, entries):
+ value, ip_versions = _value('bridge-ip-versions', entries), {}
+
+ if value:
+ for entry in value.split(','):
+ if '=' not in entry:
+ raise stem.ProtocolError("The bridge-ip-versions should be a comma separated listing of '<protocol>=<count>' mappings: bridge-ip-versions %s" % value)
+
+ protocol, count = entry.split('=', 1)
+
+ if not count.isdigit():
+ raise stem.ProtocolError('IP protocol count was non-numeric (%s): bridge-ip-versions %s' % (count, value))
+
+ ip_versions[protocol] = int(count)
+
+ descriptor.ip_versions = ip_versions
+
+
+def _parse_bridge_ip_transports_line(descriptor, entries):
+ value, ip_transports = _value('bridge-ip-transports', entries), {}
+
+ if value:
+ for entry in value.split(','):
+ if '=' not in entry:
+ raise stem.ProtocolError("The bridge-ip-transports should be a comma separated listing of '<protocol>=<count>' mappings: bridge-ip-transports %s" % value)
+
+ protocol, count = entry.split('=', 1)
+
+ if not count.isdigit():
+ raise stem.ProtocolError('Transport count was non-numeric (%s): bridge-ip-transports %s' % (count, value))
+
+ ip_transports[protocol] = int(count)
+
+ descriptor.ip_transports = ip_transports
+
+
+_parse_geoip_db_digest_line = _parse_forty_character_hex('geoip-db-digest', 'geoip_db_digest')
+_parse_geoip6_db_digest_line = _parse_forty_character_hex('geoip6-db-digest', 'geoip6_db_digest')
+_parse_dirreq_v2_resp_line = functools.partial(_parse_dirreq_line, 'dirreq-v2-resp', 'dir_v2_responses', 'dir_v2_responses_unknown')
+_parse_dirreq_v3_resp_line = functools.partial(_parse_dirreq_line, 'dirreq-v3-resp', 'dir_v3_responses', 'dir_v3_responses_unknown')
+_parse_dirreq_v2_direct_dl_line = functools.partial(_parse_dirreq_line, 'dirreq-v2-direct-dl', 'dir_v2_direct_dl', 'dir_v2_direct_dl_unknown')
+_parse_dirreq_v3_direct_dl_line = functools.partial(_parse_dirreq_line, 'dirreq-v3-direct-dl', 'dir_v3_direct_dl', 'dir_v3_direct_dl_unknown')
+_parse_dirreq_v2_tunneled_dl_line = functools.partial(_parse_dirreq_line, 'dirreq-v2-tunneled-dl', 'dir_v2_tunneled_dl', 'dir_v2_tunneled_dl_unknown')
+_parse_dirreq_v3_tunneled_dl_line = functools.partial(_parse_dirreq_line, 'dirreq-v3-tunneled-dl', 'dir_v3_tunneled_dl', 'dir_v3_tunneled_dl_unknown')
+_parse_dirreq_v2_share_line = functools.partial(_parse_dirreq_share_line, 'dirreq-v2-share', 'dir_v2_share')
+_parse_dirreq_v3_share_line = functools.partial(_parse_dirreq_share_line, 'dirreq-v3-share', 'dir_v3_share')
+_parse_cell_processed_cells_line = functools.partial(_parse_cell_line, 'cell-processed-cells', 'cell_processed_cells')
+_parse_cell_queued_cells_line = functools.partial(_parse_cell_line, 'cell-queued-cells', 'cell_queued_cells')
+_parse_cell_time_in_queue_line = functools.partial(_parse_cell_line, 'cell-time-in-queue', 'cell_time_in_queue')
+_parse_published_line = _parse_timestamp_line('published', 'published')
+_parse_geoip_start_time_line = _parse_timestamp_line('geoip-start-time', 'geoip_start_time')
+_parse_cell_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'cell-stats-end', 'cell_stats_end', 'cell_stats_interval')
+_parse_entry_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'entry-stats-end', 'entry_stats_end', 'entry_stats_interval')
+_parse_exit_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'exit-stats-end', 'exit_stats_end', 'exit_stats_interval')
+_parse_bridge_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'bridge-stats-end', 'bridge_stats_end', 'bridge_stats_interval')
+_parse_dirreq_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'dirreq-stats-end', 'dir_stats_end', 'dir_stats_interval')
+_parse_read_history_line = functools.partial(_parse_history_line, 'read-history', 'read_history_end', 'read_history_interval', 'read_history_values')
+_parse_write_history_line = functools.partial(_parse_history_line, 'write-history', 'write_history_end', 'write_history_interval', 'write_history_values')
+_parse_dirreq_read_history_line = functools.partial(_parse_history_line, 'dirreq-read-history', 'dir_read_history_end', 'dir_read_history_interval', 'dir_read_history_values')
+_parse_dirreq_write_history_line = functools.partial(_parse_history_line, 'dirreq-write-history', 'dir_write_history_end', 'dir_write_history_interval', 'dir_write_history_values')
+_parse_exit_kibibytes_written_line = functools.partial(_parse_port_count_line, 'exit-kibibytes-written', 'exit_kibibytes_written')
+_parse_exit_kibibytes_read_line = functools.partial(_parse_port_count_line, 'exit-kibibytes-read', 'exit_kibibytes_read')
+_parse_exit_streams_opened_line = functools.partial(_parse_port_count_line, 'exit-streams-opened', 'exit_streams_opened')
+_parse_dirreq_v2_ips_line = functools.partial(_parse_geoip_to_count_line, 'dirreq-v2-ips', 'dir_v2_ips')
+_parse_dirreq_v3_ips_line = functools.partial(_parse_geoip_to_count_line, 'dirreq-v3-ips', 'dir_v3_ips')
+_parse_dirreq_v2_reqs_line = functools.partial(_parse_geoip_to_count_line, 'dirreq-v2-reqs', 'dir_v2_requests')
+_parse_dirreq_v3_reqs_line = functools.partial(_parse_geoip_to_count_line, 'dirreq-v3-reqs', 'dir_v3_requests')
+_parse_geoip_client_origins_line = functools.partial(_parse_geoip_to_count_line, 'geoip-client-origins', 'geoip_client_origins')
+_parse_entry_ips_line = functools.partial(_parse_geoip_to_count_line, 'entry-ips', 'entry_ips')
+_parse_bridge_ips_line = functools.partial(_parse_geoip_to_count_line, 'bridge-ips', 'bridge_ips')
+_parse_router_digest_line = _parse_forty_character_hex('router-digest', '_digest')
+_parse_router_signature_line = _parse_key_block('router-signature', 'signature', 'SIGNATURE')
+
+
class ExtraInfoDescriptor(Descriptor):
"""
Extra-info descriptor document.
@@ -330,6 +645,128 @@ class ExtraInfoDescriptor(Descriptor):
a default value, others are left as **None** if undefined
"""
+ ATTRIBUTES = {
+ 'nickname': (None, _parse_extra_info_line),
+ 'fingerprint': (None, _parse_extra_info_line),
+ 'published': (None, _parse_published_line),
+ 'geoip_db_digest': (None, _parse_geoip_db_digest_line),
+ 'geoip6_db_digest': (None, _parse_geoip6_db_digest_line),
+ 'transport': ({}, _parse_transport_line),
+
+ 'conn_bi_direct_end': (None, _parse_conn_bi_direct_line),
+ 'conn_bi_direct_interval': (None, _parse_conn_bi_direct_line),
+ 'conn_bi_direct_below': (None, _parse_conn_bi_direct_line),
+ 'conn_bi_direct_read': (None, _parse_conn_bi_direct_line),
+ 'conn_bi_direct_write': (None, _parse_conn_bi_direct_line),
+ 'conn_bi_direct_both': (None, _parse_conn_bi_direct_line),
+
+ 'read_history_end': (None, _parse_read_history_line),
+ 'read_history_interval': (None, _parse_read_history_line),
+ 'read_history_values': (None, _parse_read_history_line),
+
+ 'write_history_end': (None, _parse_write_history_line),
+ 'write_history_interval': (None, _parse_write_history_line),
+ 'write_history_values': (None, _parse_write_history_line),
+
+ 'cell_stats_end': (None, _parse_cell_stats_end_line),
+ 'cell_stats_interval': (None, _parse_cell_stats_end_line),
+ 'cell_processed_cells': (None, _parse_cell_processed_cells_line),
+ 'cell_queued_cells': (None, _parse_cell_queued_cells_line),
+ 'cell_time_in_queue': (None, _parse_cell_time_in_queue_line),
+ 'cell_circuits_per_decile': (None, _parse_cell_circuits_per_decline_line),
+
+ 'dir_stats_end': (None, _parse_dirreq_stats_end_line),
+ 'dir_stats_interval': (None, _parse_dirreq_stats_end_line),
+ 'dir_v2_ips': (None, _parse_dirreq_v2_ips_line),
+ 'dir_v3_ips': (None, _parse_dirreq_v3_ips_line),
+ 'dir_v2_share': (None, _parse_dirreq_v2_share_line),
+ 'dir_v3_share': (None, _parse_dirreq_v3_share_line),
+ 'dir_v2_requests': (None, _parse_dirreq_v2_reqs_line),
+ 'dir_v3_requests': (None, _parse_dirreq_v3_reqs_line),
+ 'dir_v2_responses': (None, _parse_dirreq_v2_resp_line),
+ 'dir_v3_responses': (None, _parse_dirreq_v3_resp_line),
+ 'dir_v2_responses_unknown': (None, _parse_dirreq_v2_resp_line),
+ 'dir_v3_responses_unknown': (None, _parse_dirreq_v3_resp_line),
+ 'dir_v2_direct_dl': (None, _parse_dirreq_v2_direct_dl_line),
+ 'dir_v3_direct_dl': (None, _parse_dirreq_v3_direct_dl_line),
+ 'dir_v2_direct_dl_unknown': (None, _parse_dirreq_v2_direct_dl_line),
+ 'dir_v3_direct_dl_unknown': (None, _parse_dirreq_v3_direct_dl_line),
+ 'dir_v2_tunneled_dl': (None, _parse_dirreq_v2_tunneled_dl_line),
+ 'dir_v3_tunneled_dl': (None, _parse_dirreq_v3_tunneled_dl_line),
+ 'dir_v2_tunneled_dl_unknown': (None, _parse_dirreq_v2_tunneled_dl_line),
+ 'dir_v3_tunneled_dl_unknown': (None, _parse_dirreq_v3_tunneled_dl_line),
+
+ 'dir_read_history_end': (None, _parse_dirreq_read_history_line),
+ 'dir_read_history_interval': (None, _parse_dirreq_read_history_line),
+ 'dir_read_history_values': (None, _parse_dirreq_read_history_line),
+
+ 'dir_write_history_end': (None, _parse_dirreq_write_history_line),
+ 'dir_write_history_interval': (None, _parse_dirreq_write_history_line),
+ 'dir_write_history_values': (None, _parse_dirreq_write_history_line),
+
+ 'entry_stats_end': (None, _parse_entry_stats_end_line),
+ 'entry_stats_interval': (None, _parse_entry_stats_end_line),
+ 'entry_ips': (None, _parse_entry_ips_line),
+
+ 'exit_stats_end': (None, _parse_exit_stats_end_line),
+ 'exit_stats_interval': (None, _parse_exit_stats_end_line),
+ 'exit_kibibytes_written': (None, _parse_exit_kibibytes_written_line),
+ 'exit_kibibytes_read': (None, _parse_exit_kibibytes_read_line),
+ 'exit_streams_opened': (None, _parse_exit_streams_opened_line),
+
+ 'bridge_stats_end': (None, _parse_bridge_stats_end_line),
+ 'bridge_stats_interval': (None, _parse_bridge_stats_end_line),
+ 'bridge_ips': (None, _parse_bridge_ips_line),
+ 'geoip_start_time': (None, _parse_geoip_start_time_line),
+ 'geoip_client_origins': (None, _parse_geoip_client_origins_line),
+
+ 'ip_versions': (None, _parse_bridge_ip_versions_line),
+ 'ip_transports': (None, _parse_bridge_ip_transports_line),
+ }
+
+ PARSER_FOR_LINE = {
+ 'extra-info': _parse_extra_info_line,
+ 'geoip-db-digest': _parse_geoip_db_digest_line,
+ 'geoip6-db-digest': _parse_geoip6_db_digest_line,
+ 'transport': _parse_transport_line,
+ 'cell-circuits-per-decile': _parse_cell_circuits_per_decline_line,
+ 'dirreq-v2-resp': _parse_dirreq_v2_resp_line,
+ 'dirreq-v3-resp': _parse_dirreq_v3_resp_line,
+ 'dirreq-v2-direct-dl': _parse_dirreq_v2_direct_dl_line,
+ 'dirreq-v3-direct-dl': _parse_dirreq_v3_direct_dl_line,
+ 'dirreq-v2-tunneled-dl': _parse_dirreq_v2_tunneled_dl_line,
+ 'dirreq-v3-tunneled-dl': _parse_dirreq_v3_tunneled_dl_line,
+ 'dirreq-v2-share': _parse_dirreq_v2_share_line,
+ 'dirreq-v3-share': _parse_dirreq_v3_share_line,
+ 'cell-processed-cells': _parse_cell_processed_cells_line,
+ 'cell-queued-cells': _parse_cell_queued_cells_line,
+ 'cell-time-in-queue': _parse_cell_time_in_queue_line,
+ 'published': _parse_published_line,
+ 'geoip-start-time': _parse_geoip_start_time_line,
+ 'cell-stats-end': _parse_cell_stats_end_line,
+ 'entry-stats-end': _parse_entry_stats_end_line,
+ 'exit-stats-end': _parse_exit_stats_end_line,
+ 'bridge-stats-end': _parse_bridge_stats_end_line,
+ 'dirreq-stats-end': _parse_dirreq_stats_end_line,
+ 'conn-bi-direct': _parse_conn_bi_direct_line,
+ 'read-history': _parse_read_history_line,
+ 'write-history': _parse_write_history_line,
+ 'dirreq-read-history': _parse_dirreq_read_history_line,
+ 'dirreq-write-history': _parse_dirreq_write_history_line,
+ 'exit-kibibytes-written': _parse_exit_kibibytes_written_line,
+ 'exit-kibibytes-read': _parse_exit_kibibytes_read_line,
+ 'exit-streams-opened': _parse_exit_streams_opened_line,
+ 'dirreq-v2-ips': _parse_dirreq_v2_ips_line,
+ 'dirreq-v3-ips': _parse_dirreq_v3_ips_line,
+ 'dirreq-v2-reqs': _parse_dirreq_v2_reqs_line,
+ 'dirreq-v3-reqs': _parse_dirreq_v3_reqs_line,
+ 'geoip-client-origins': _parse_geoip_client_origins_line,
+ 'entry-ips': _parse_entry_ips_line,
+ 'bridge-ips': _parse_bridge_ips_line,
+ 'bridge-ip-versions': _parse_bridge_ip_versions_line,
+ 'bridge-ip-transports': _parse_bridge_ip_transports_line,
+ }
+
def __init__(self, raw_contents, validate = True):
"""
Extra-info descriptor constructor. By default this validates the
@@ -343,88 +780,7 @@ class ExtraInfoDescriptor(Descriptor):
:raises: **ValueError** if the contents is malformed and validate is True
"""
- super(ExtraInfoDescriptor, self).__init__(raw_contents)
- raw_contents = stem.util.str_tools._to_unicode(raw_contents)
-
- self.nickname = None
- self.fingerprint = None
- self.published = None
- self.geoip_db_digest = None
- self.geoip6_db_digest = None
- self.transport = {}
-
- self.conn_bi_direct_end = None
- self.conn_bi_direct_interval = None
- self.conn_bi_direct_below = None
- self.conn_bi_direct_read = None
- self.conn_bi_direct_write = None
- self.conn_bi_direct_both = None
-
- self.read_history_end = None
- self.read_history_interval = None
- self.read_history_values = None
-
- self.write_history_end = None
- self.write_history_interval = None
- self.write_history_values = None
-
- self.cell_stats_end = None
- self.cell_stats_interval = None
- self.cell_processed_cells = None
- self.cell_queued_cells = None
- self.cell_time_in_queue = None
- self.cell_circuits_per_decile = None
-
- self.dir_stats_end = None
- self.dir_stats_interval = None
- self.dir_v2_ips = None
- self.dir_v3_ips = None
- self.dir_v2_share = None
- self.dir_v3_share = None
- self.dir_v2_requests = None
- self.dir_v3_requests = None
- self.dir_v2_responses = None
- self.dir_v3_responses = None
- self.dir_v2_responses_unknown = None
- self.dir_v3_responses_unknown = None
- self.dir_v2_direct_dl = None
- self.dir_v3_direct_dl = None
- self.dir_v2_direct_dl_unknown = None
- self.dir_v3_direct_dl_unknown = None
- self.dir_v2_tunneled_dl = None
- self.dir_v3_tunneled_dl = None
- self.dir_v2_tunneled_dl_unknown = None
- self.dir_v3_tunneled_dl_unknown = None
-
- self.dir_read_history_end = None
- self.dir_read_history_interval = None
- self.dir_read_history_values = None
-
- self.dir_write_history_end = None
- self.dir_write_history_interval = None
- self.dir_write_history_values = None
-
- self.entry_stats_end = None
- self.entry_stats_interval = None
- self.entry_ips = None
-
- self.exit_stats_end = None
- self.exit_stats_interval = None
- self.exit_kibibytes_written = None
- self.exit_kibibytes_read = None
- self.exit_streams_opened = None
-
- self.bridge_stats_end = None
- self.bridge_stats_interval = None
- self.bridge_ips = None
- self.geoip_start_time = None
- self.geoip_client_origins = None
-
- self.ip_versions = None
- self.ip_transports = None
-
- self._unrecognized_lines = []
-
+ super(ExtraInfoDescriptor, self).__init__(raw_contents, lazy_load = not validate)
entries = _get_descriptor_components(raw_contents, validate)
if validate:
@@ -444,397 +800,9 @@ class ExtraInfoDescriptor(Descriptor):
if expected_last_keyword and expected_last_keyword != list(entries.keys())[-1]:
raise ValueError("Descriptor must end with a '%s' entry" % expected_last_keyword)
- self._parse(entries, validate)
-
- def get_unrecognized_lines(self):
- return list(self._unrecognized_lines)
-
- def _parse(self, entries, validate):
- """
- Parses a series of 'keyword => (value, pgp block)' mappings and applies
- them as attributes.
-
- :param dict entries: descriptor contents to be applied
- :param bool validate: checks the validity of descriptor content if True
-
- :raises: **ValueError** if an error occurs in validation
- """
-
- for keyword, values in list(entries.items()):
- # most just work with the first (and only) value
- value, _, _ = values[0]
- line = '%s %s' % (keyword, value) # original line
-
- if keyword == 'extra-info':
- # "extra-info" Nickname Fingerprint
- extra_info_comp = value.split()
-
- if len(extra_info_comp) < 2:
- if not validate:
- continue
-
- raise ValueError('Extra-info line must have two values: %s' % line)
-
- if validate:
- if not stem.util.tor_tools.is_valid_nickname(extra_info_comp[0]):
- raise ValueError("Extra-info line entry isn't a valid nickname: %s" % extra_info_comp[0])
- elif not stem.util.tor_tools.is_valid_fingerprint(extra_info_comp[1]):
- raise ValueError('Tor relay fingerprints consist of forty hex digits: %s' % extra_info_comp[1])
-
- self.nickname = extra_info_comp[0]
- self.fingerprint = extra_info_comp[1]
- elif keyword == 'geoip-db-digest':
- # "geoip-db-digest" Digest
-
- if validate and not stem.util.tor_tools.is_hex_digits(value, 40):
- raise ValueError('Geoip digest line had an invalid sha1 digest: %s' % line)
-
- self.geoip_db_digest = value
- elif keyword == 'geoip6-db-digest':
- # "geoip6-db-digest" Digest
-
- if validate and not stem.util.tor_tools.is_hex_digits(value, 40):
- raise ValueError('Geoip v6 digest line had an invalid sha1 digest: %s' % line)
-
- self.geoip6_db_digest = value
- elif keyword == 'transport':
- # "transport" transportname address:port [arglist]
- # Everything after the transportname is scrubbed in published bridge
- # descriptors, so we'll never see it in practice.
- #
- # These entries really only make sense for bridges, but have been seen
- # on non-bridges in the wild when the relay operator configured it this
- # way.
-
- for transport_value, _, _ in values:
- name, address, port, args = None, None, None, None
-
- if ' ' not in transport_value:
- # scrubbed
- name = transport_value
- else:
- # not scrubbed
- value_comp = transport_value.split()
-
- if len(value_comp) < 1:
- raise ValueError('Transport line is missing its transport name: %s' % line)
- else:
- name = value_comp[0]
-
- if len(value_comp) < 2:
- raise ValueError('Transport line is missing its address:port value: %s' % line)
- elif ':' not in value_comp[1]:
- raise ValueError("Transport line's address:port entry is missing a colon: %s" % line)
- else:
- address, port_str = value_comp[1].split(':', 1)
-
- if not stem.util.connection.is_valid_ipv4_address(address) or \
- stem.util.connection.is_valid_ipv6_address(address):
- raise ValueError('Transport line has a malformed address: %s' % line)
- elif not stem.util.connection.is_valid_port(port_str):
- raise ValueError('Transport line has a malformed port: %s' % line)
-
- port = int(port_str)
-
- if len(value_comp) >= 3:
- args = value_comp[2:]
- else:
- args = []
-
- self.transport[name] = (address, port, args)
- elif keyword == 'cell-circuits-per-decile':
- # "cell-circuits-per-decile" num
-
- if not value.isdigit():
- if validate:
- raise ValueError('Non-numeric cell-circuits-per-decile value: %s' % line)
- else:
- continue
-
- stat = int(value)
-
- if validate and stat < 0:
- raise ValueError('Negative cell-circuits-per-decile value: %s' % line)
-
- self.cell_circuits_per_decile = stat
- elif keyword in ('dirreq-v2-resp', 'dirreq-v3-resp', 'dirreq-v2-direct-dl', 'dirreq-v3-direct-dl', 'dirreq-v2-tunneled-dl', 'dirreq-v3-tunneled-dl'):
- recognized_counts = {}
- unrecognized_counts = {}
-
- is_response_stats = keyword in ('dirreq-v2-resp', 'dirreq-v3-resp')
- key_set = DirResponse if is_response_stats else DirStat
-
- key_type = 'STATUS' if is_response_stats else 'STAT'
- error_msg = '%s lines should contain %s=COUNT mappings: %s' % (keyword, key_type, line)
-
- if value:
- for entry in value.split(','):
- if '=' not in entry:
- if validate:
- raise ValueError(error_msg)
- else:
- continue
-
- status, count = entry.split('=', 1)
-
- if count.isdigit():
- if status in key_set:
- recognized_counts[status] = int(count)
- else:
- unrecognized_counts[status] = int(count)
- elif validate:
- raise ValueError(error_msg)
-
- if keyword == 'dirreq-v2-resp':
- self.dir_v2_responses = recognized_counts
- self.dir_v2_responses_unknown = unrecognized_counts
- elif keyword == 'dirreq-v3-resp':
- self.dir_v3_responses = recognized_counts
- self.dir_v3_responses_unknown = unrecognized_counts
- elif keyword == 'dirreq-v2-direct-dl':
- self.dir_v2_direct_dl = recognized_counts
- self.dir_v2_direct_dl_unknown = unrecognized_counts
- elif keyword == 'dirreq-v3-direct-dl':
- self.dir_v3_direct_dl = recognized_counts
- self.dir_v3_direct_dl_unknown = unrecognized_counts
- elif keyword == 'dirreq-v2-tunneled-dl':
- self.dir_v2_tunneled_dl = recognized_counts
- self.dir_v2_tunneled_dl_unknown = unrecognized_counts
- elif keyword == 'dirreq-v3-tunneled-dl':
- self.dir_v3_tunneled_dl = recognized_counts
- self.dir_v3_tunneled_dl_unknown = unrecognized_counts
- elif keyword in ('dirreq-v2-share', 'dirreq-v3-share'):
- # "<keyword>" num%
-
- try:
- if not value.endswith('%'):
- raise ValueError()
-
- percentage = float(value[:-1]) / 100
-
- # Bug lets these be above 100%, however they're soon going away...
- # https://lists.torproject.org/pipermail/tor-dev/2012-June/003679.html
-
- if validate and percentage < 0:
- raise ValueError('Negative percentage value: %s' % line)
-
- if keyword == 'dirreq-v2-share':
- self.dir_v2_share = percentage
- elif keyword == 'dirreq-v3-share':
- self.dir_v3_share = percentage
- except ValueError as exc:
- if validate:
- raise ValueError("Value can't be parsed as a percentage: %s" % line)
- elif keyword in ('cell-processed-cells', 'cell-queued-cells', 'cell-time-in-queue'):
- # "<keyword>" num,...,num
-
- entries = []
-
- if value:
- for entry in value.split(','):
- try:
- # Values should be positive but as discussed in ticket #5849
- # there was a bug around this. It was fixed in tor 0.2.2.1.
-
- entries.append(float(entry))
- except ValueError:
- if validate:
- raise ValueError('Non-numeric entry in %s listing: %s' % (keyword, line))
-
- if keyword == 'cell-processed-cells':
- self.cell_processed_cells = entries
- elif keyword == 'cell-queued-cells':
- self.cell_queued_cells = entries
- elif keyword == 'cell-time-in-queue':
- self.cell_time_in_queue = entries
- elif keyword in ('published', 'geoip-start-time'):
- # "<keyword>" YYYY-MM-DD HH:MM:SS
-
- try:
- timestamp = stem.util.str_tools._parse_timestamp(value)
-
- if keyword == 'published':
- self.published = timestamp
- elif keyword == 'geoip-start-time':
- self.geoip_start_time = timestamp
- except ValueError:
- if validate:
- raise ValueError("Timestamp on %s line wasn't parsable: %s" % (keyword, line))
- elif keyword in ('cell-stats-end', 'entry-stats-end', 'exit-stats-end', 'bridge-stats-end', 'dirreq-stats-end'):
- # "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s)
-
- try:
- timestamp, interval, _ = _parse_timestamp_and_interval(keyword, value)
-
- if keyword == 'cell-stats-end':
- self.cell_stats_end = timestamp
- self.cell_stats_interval = interval
- elif keyword == 'entry-stats-end':
- self.entry_stats_end = timestamp
- self.entry_stats_interval = interval
- elif keyword == 'exit-stats-end':
- self.exit_stats_end = timestamp
- self.exit_stats_interval = interval
- elif keyword == 'bridge-stats-end':
- self.bridge_stats_end = timestamp
- self.bridge_stats_interval = interval
- elif keyword == 'dirreq-stats-end':
- self.dir_stats_end = timestamp
- self.dir_stats_interval = interval
- except ValueError as exc:
- if validate:
- raise exc
- elif keyword == 'conn-bi-direct':
- # "conn-bi-direct" YYYY-MM-DD HH:MM:SS (NSEC s) BELOW,READ,WRITE,BOTH
-
- try:
- timestamp, interval, remainder = _parse_timestamp_and_interval(keyword, value)
- stats = remainder.split(',')
-
- if len(stats) != 4 or not \
- (stats[0].isdigit() and stats[1].isdigit() and stats[2].isdigit() and stats[3].isdigit()):
- raise ValueError('conn-bi-direct line should end with four numeric values: %s' % line)
-
- self.conn_bi_direct_end = timestamp
- self.conn_bi_direct_interval = interval
- self.conn_bi_direct_below = int(stats[0])
- self.conn_bi_direct_read = int(stats[1])
- self.conn_bi_direct_write = int(stats[2])
- self.conn_bi_direct_both = int(stats[3])
- except ValueError as exc:
- if validate:
- raise exc
- elif keyword in ('read-history', 'write-history', 'dirreq-read-history', 'dirreq-write-history'):
- # "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s) NUM,NUM,NUM,NUM,NUM...
- try:
- timestamp, interval, remainder = _parse_timestamp_and_interval(keyword, value)
- history_values = []
-
- if remainder:
- try:
- history_values = [int(entry) for entry in remainder.split(",")]
- except ValueError:
- raise ValueError('%s line has non-numeric values: %s' % (keyword, line))
-
- if keyword == 'read-history':
- self.read_history_end = timestamp
- self.read_history_interval = interval
- self.read_history_values = history_values
- elif keyword == 'write-history':
- self.write_history_end = timestamp
- self.write_history_interval = interval
- self.write_history_values = history_values
- elif keyword == 'dirreq-read-history':
- self.dir_read_history_end = timestamp
- self.dir_read_history_interval = interval
- self.dir_read_history_values = history_values
- elif keyword == 'dirreq-write-history':
- self.dir_write_history_end = timestamp
- self.dir_write_history_interval = interval
- self.dir_write_history_values = history_values
- except ValueError as exc:
- if validate:
- raise exc
- elif keyword in ('exit-kibibytes-written', 'exit-kibibytes-read', 'exit-streams-opened'):
- # "<keyword>" port=N,port=N,...
-
- port_mappings = {}
- error_msg = 'Entries in %s line should only be PORT=N entries: %s' % (keyword, line)
-
- if value:
- for entry in value.split(','):
- if '=' not in entry:
- if validate:
- raise ValueError(error_msg)
- else:
- continue
-
- port, stat = entry.split('=', 1)
-
- if (port == 'other' or stem.util.connection.is_valid_port(port)) and stat.isdigit():
- if port != 'other':
- port = int(port)
- port_mappings[port] = int(stat)
- elif validate:
- raise ValueError(error_msg)
-
- if keyword == 'exit-kibibytes-written':
- self.exit_kibibytes_written = port_mappings
- elif keyword == 'exit-kibibytes-read':
- self.exit_kibibytes_read = port_mappings
- elif keyword == 'exit-streams-opened':
- self.exit_streams_opened = port_mappings
- elif keyword in ('dirreq-v2-ips', 'dirreq-v3-ips', 'dirreq-v2-reqs', 'dirreq-v3-reqs', 'geoip-client-origins', 'entry-ips', 'bridge-ips'):
- # "<keyword>" CC=N,CC=N,...
- #
- # The maxmind geoip (https://www.maxmind.com/app/iso3166) has numeric
- # locale codes for some special values, for instance...
- # A1,"Anonymous Proxy"
- # A2,"Satellite Provider"
- # ??,"Unknown"
-
- locale_usage = {}
- error_msg = 'Entries in %s line should only be CC=N entries: %s' % (keyword, line)
-
- if value:
- for entry in value.split(','):
- if '=' not in entry:
- if validate:
- raise ValueError(error_msg)
- else:
- continue
-
- locale, count = entry.split('=', 1)
-
- if _locale_re.match(locale) and count.isdigit():
- locale_usage[locale] = int(count)
- elif validate:
- raise ValueError(error_msg)
-
- if keyword == 'dirreq-v2-ips':
- self.dir_v2_ips = locale_usage
- elif keyword == 'dirreq-v3-ips':
- self.dir_v3_ips = locale_usage
- elif keyword == 'dirreq-v2-reqs':
- self.dir_v2_requests = locale_usage
- elif keyword == 'dirreq-v3-reqs':
- self.dir_v3_requests = locale_usage
- elif keyword == 'geoip-client-origins':
- self.geoip_client_origins = locale_usage
- elif keyword == 'entry-ips':
- self.entry_ips = locale_usage
- elif keyword == 'bridge-ips':
- self.bridge_ips = locale_usage
- elif keyword == 'bridge-ip-versions':
- self.ip_versions = {}
-
- if value:
- for entry in value.split(','):
- if '=' not in entry:
- raise stem.ProtocolError("The bridge-ip-versions should be a comma separated listing of '<protocol>=<count>' mappings: %s" % line)
-
- protocol, count = entry.split('=', 1)
-
- if not count.isdigit():
- raise stem.ProtocolError('IP protocol count was non-numeric (%s): %s' % (count, line))
-
- self.ip_versions[protocol] = int(count)
- elif keyword == 'bridge-ip-transports':
- self.ip_transports = {}
-
- if value:
- for entry in value.split(','):
- if '=' not in entry:
- raise stem.ProtocolError("The bridge-ip-transports should be a comma separated listing of '<protocol>=<count>' mappings: %s" % line)
-
- protocol, count = entry.split('=', 1)
-
- if not count.isdigit():
- raise stem.ProtocolError('Transport count was non-numeric (%s): %s' % (count, line))
-
- self.ip_transports[protocol] = int(count)
- else:
- self._unrecognized_lines.append(line)
+ self._parse(entries, validate)
+ else:
+ self._entries = entries
def digest(self):
"""
@@ -868,10 +836,13 @@ class RelayExtraInfoDescriptor(ExtraInfoDescriptor):
**\*** attribute is required when we're parsed with validation
"""
- def __init__(self, raw_contents, validate = True):
- self.signature = None
+ ATTRIBUTES = dict(ExtraInfoDescriptor.ATTRIBUTES, **{
+ 'signature': (None, _parse_router_signature_line),
+ })
- super(RelayExtraInfoDescriptor, self).__init__(raw_contents, validate)
+ PARSER_FOR_LINE = dict(ExtraInfoDescriptor.PARSER_FOR_LINE, **{
+ 'router-signature': _parse_router_signature_line,
+ })
@lru_cache()
def digest(self):
@@ -880,27 +851,6 @@ class RelayExtraInfoDescriptor(ExtraInfoDescriptor):
raw_content = raw_content[:raw_content.find(ending) + len(ending)]
return hashlib.sha1(stem.util.str_tools._to_bytes(raw_content)).hexdigest().upper()
- def _parse(self, entries, validate):
- entries = dict(entries) # shallow copy since we're destructive
-
- # handles fields only in server descriptors
- for keyword, values in list(entries.items()):
- value, block_type, block_contents = values[0]
-
- line = '%s %s' % (keyword, value) # original line
-
- if block_contents:
- line += '\n%s' % block_contents
-
- if keyword == 'router-signature':
- if validate and (not block_contents or block_type != 'SIGNATURE'):
- raise ValueError("'router-signature' should be followed by a SIGNATURE block: %s" % line)
-
- self.signature = block_contents
- del entries['router-signature']
-
- ExtraInfoDescriptor._parse(self, entries, validate)
-
class BridgeExtraInfoDescriptor(ExtraInfoDescriptor):
"""
@@ -908,31 +858,17 @@ class BridgeExtraInfoDescriptor(ExtraInfoDescriptor):
<https://collector.torproject.org/formats.html#bridge-descriptors>`_)
"""
- def __init__(self, raw_contents, validate = True):
- self._digest = None
+ ATTRIBUTES = dict(ExtraInfoDescriptor.ATTRIBUTES, **{
+ '_digest': (None, _parse_router_digest_line),
+ })
- super(BridgeExtraInfoDescriptor, self).__init__(raw_contents, validate)
+ PARSER_FOR_LINE = dict(ExtraInfoDescriptor.PARSER_FOR_LINE, **{
+ 'router-digest': _parse_router_digest_line,
+ })
def digest(self):
return self._digest
- def _parse(self, entries, validate):
- entries = dict(entries) # shallow copy since we're destructive
-
- # handles fields only in server descriptors
- for keyword, values in list(entries.items()):
- value, _, _ = values[0]
- line = '%s %s' % (keyword, value) # original line
-
- if keyword == 'router-digest':
- if validate and not stem.util.tor_tools.is_hex_digits(value, 40):
- raise ValueError('Router digest line had an invalid sha1 digest: %s' % line)
-
- self._digest = value
- del entries['router-digest']
-
- ExtraInfoDescriptor._parse(self, entries, validate)
-
def _required_fields(self):
excluded_fields = [
'router-signature',
diff --git a/stem/descriptor/microdescriptor.py b/stem/descriptor/microdescriptor.py
index 093e6f0f..c9389e4d 100644
--- a/stem/descriptor/microdescriptor.py
+++ b/stem/descriptor/microdescriptor.py
@@ -66,13 +66,20 @@ Doing the same is trivial with server descriptors...
import hashlib
-import stem.descriptor.router_status_entry
import stem.exit_policy
from stem.descriptor import (
Descriptor,
_get_descriptor_components,
_read_until_keywords,
+ _value,
+ _parse_simple_line,
+ _parse_key_block,
+)
+
+from stem.descriptor.router_status_entry import (
+ _parse_a_line,
+ _parse_p_line,
)
try:
@@ -151,6 +158,24 @@ def _parse_file(descriptor_file, validate = True, **kwargs):
break # done parsing descriptors
+def _parse_id_line(descriptor, entries):
+ value = _value('id', entries)
+ value_comp = value.split()
+
+ if len(value_comp) >= 2:
+ descriptor.identifier_type = value_comp[0]
+ descriptor.identifier = value_comp[1]
+ else:
+ raise ValueError("'id' lines should contain both the key type and digest: id %s" % value)
+
+
+_parse_digest = lambda descriptor, entries: setattr(descriptor, 'digest', hashlib.sha256(descriptor.get_bytes()).hexdigest().upper())
+_parse_onion_key_line = _parse_key_block('onion-key', 'onion_key', 'RSA PUBLIC KEY')
+_parse_ntor_onion_key_line = _parse_simple_line('ntor-onion-key', 'ntor_onion_key')
+_parse_family_line = lambda descriptor, entries: setattr(descriptor, 'family', _value('family', entries).split(' '))
+_parse_p6_line = lambda descriptor, entries: setattr(descriptor, 'exit_policy_v6', stem.exit_policy.MicroExitPolicy(_value('p6', entries)))
+
+
class Microdescriptor(Descriptor):
"""
Microdescriptor (`descriptor specification
@@ -173,33 +198,39 @@ class Microdescriptor(Descriptor):
**\*** attribute is required when we're parsed with validation
"""
- def __init__(self, raw_contents, validate = True, annotations = None):
- super(Microdescriptor, self).__init__(raw_contents)
- raw_contents = stem.util.str_tools._to_unicode(raw_contents)
-
- self.digest = hashlib.sha256(self.get_bytes()).hexdigest().upper()
-
- self.onion_key = None
- self.ntor_onion_key = None
- self.or_addresses = []
- self.family = []
- self.exit_policy = stem.exit_policy.MicroExitPolicy('reject 1-65535')
- self.exit_policy_v6 = None
- self.identifier_type = None
- self.identifier = None
-
- self._unrecognized_lines = []
+ ATTRIBUTES = {
+ 'onion_key': (None, _parse_onion_key_line),
+ 'ntor_onion_key': (None, _parse_ntor_onion_key_line),
+ 'or_addresses': ([], _parse_a_line),
+ 'family': ([], _parse_family_line),
+ 'exit_policy': (stem.exit_policy.MicroExitPolicy('reject 1-65535'), _parse_p_line),
+ 'exit_policy_v6': (None, _parse_p6_line),
+ 'identifier_type': (None, _parse_id_line),
+ 'identifier': (None, _parse_id_line),
+ 'digest': (None, _parse_digest),
+ }
+
+ PARSER_FOR_LINE = {
+ 'onion-key': _parse_onion_key_line,
+ 'ntor-onion-key': _parse_ntor_onion_key_line,
+ 'a': _parse_a_line,
+ 'family': _parse_family_line,
+ 'p': _parse_p_line,
+ 'p6': _parse_p6_line,
+ 'id': _parse_id_line,
+ }
+ def __init__(self, raw_contents, validate = True, annotations = None):
+ super(Microdescriptor, self).__init__(raw_contents, lazy_load = not validate)
self._annotation_lines = annotations if annotations else []
-
entries = _get_descriptor_components(raw_contents, validate)
- self._parse(entries, validate)
if validate:
+ self.digest = hashlib.sha256(self.get_bytes()).hexdigest().upper()
+ self._parse(entries, validate)
self._check_constraints(entries)
-
- def get_unrecognized_lines(self):
- return list(self._unrecognized_lines)
+ else:
+ self._entries = entries
@lru_cache()
def get_annotations(self):
@@ -237,53 +268,6 @@ class Microdescriptor(Descriptor):
return self._annotation_lines
- def _parse(self, entries, validate):
- """
- Parses a series of 'keyword => (value, pgp block)' mappings and applies
- them as attributes.
-
- :param dict entries: descriptor contents to be applied
- :param bool validate: checks the validity of descriptor content if **True**
-
- :raises: **ValueError** if an error occurs in validation
- """
-
- for keyword, values in list(entries.items()):
- # most just work with the first (and only) value
- value, block_type, block_contents = values[0]
-
- line = '%s %s' % (keyword, value) # original line
-
- if block_contents:
- line += '\n%s' % block_contents
-
- if keyword == 'onion-key':
- if validate and (not block_contents or block_type != 'RSA PUBLIC KEY'):
- raise ValueError("'onion-key' should be followed by a RSA PUBLIC KEY block: %s" % line)
-
- self.onion_key = block_contents
- elif keyword == 'ntor-onion-key':
- self.ntor_onion_key = value
- elif keyword == 'a':
- for entry, _, _ in values:
- stem.descriptor.router_status_entry._parse_a_line(self, entry, validate)
- elif keyword == 'family':
- self.family = value.split(' ')
- elif keyword == 'p':
- stem.descriptor.router_status_entry._parse_p_line(self, value, validate)
- elif keyword == 'p6':
- self.exit_policy_v6 = stem.exit_policy.MicroExitPolicy(value)
- elif keyword == 'id':
- value_comp = value.split()
-
- if len(value_comp) >= 2:
- self.identifier_type = value_comp[0]
- self.identifier = value_comp[1]
- elif validate:
- raise ValueError("'id' lines should contain both the key type and digest: %s" % line)
- else:
- self._unrecognized_lines.append(line)
-
def _check_constraints(self, entries):
"""
Does a basic check that the entries conform to this descriptor type's
diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py
index 33beb4aa..feb8b37b 100644
--- a/stem/descriptor/networkstatus.py
+++ b/stem/descriptor/networkstatus.py
@@ -62,6 +62,17 @@ from stem.descriptor import (
DocumentHandler,
_get_descriptor_components,
_read_until_keywords,
+ _value,
+ _parse_simple_line,
+ _parse_timestamp_line,
+ _parse_forty_character_hex,
+ _parse_key_block,
+)
+
+from stem.descriptor.router_status_entry import (
+ RouterStatusEntryV2,
+ RouterStatusEntryV3,
+ RouterStatusEntryMicroV3,
)
# Version 2 network status document fields, tuples of the form...
@@ -148,6 +159,37 @@ KEY_CERTIFICATE_PARAMS = (
('dir-key-certification', True),
)
+# all parameters are constrained to int32 range
+MIN_PARAM, MAX_PARAM = -2147483648, 2147483647
+
+PARAM_RANGE = {
+ 'circwindow': (100, 1000),
+ 'CircuitPriorityHalflifeMsec': (-1, MAX_PARAM),
+ 'perconnbwrate': (-1, MAX_PARAM),
+ 'perconnbwburst': (-1, MAX_PARAM),
+ 'refuseunknownexits': (0, 1),
+ 'bwweightscale': (1, MAX_PARAM),
+ 'cbtdisabled': (0, 1),
+ 'cbtnummodes': (1, 20),
+ 'cbtrecentcount': (3, 1000),
+ 'cbtmaxtimeouts': (3, 10000),
+ 'cbtmincircs': (1, 10000),
+ 'cbtquantile': (10, 99),
+ 'cbtclosequantile': (MIN_PARAM, 99),
+ 'cbttestfreq': (1, MAX_PARAM),
+ 'cbtmintimeout': (500, MAX_PARAM),
+ 'UseOptimisticData': (0, 1),
+ 'Support022HiddenServices': (0, 1),
+ 'usecreatefast': (0, 1),
+ 'UseNTorHandshake': (0, 1),
+ 'FastFlagMinThreshold': (4, MAX_PARAM),
+ 'NumDirectoryGuards': (0, 10),
+ 'NumEntryGuards': (1, 10),
+ 'GuardLifetime': (2592000, 157766400), # min: 30 days, max: 1826 days
+ 'NumNTorsPerTAP': (1, 100000),
+ 'AllowNonearlyExtend': (0, 1),
+}
+
def _parse_file(document_file, document_type = None, validate = True, is_microdescriptor = False, document_handler = DocumentHandler.ENTRIES, **kwargs):
"""
@@ -179,16 +221,11 @@ def _parse_file(document_file, document_type = None, validate = True, is_microde
document_type = NetworkStatusDocumentV3
if document_type == NetworkStatusDocumentV2:
- document_type = NetworkStatusDocumentV2
- router_type = stem.descriptor.router_status_entry.RouterStatusEntryV2
+ document_type, router_type = NetworkStatusDocumentV2, RouterStatusEntryV2
elif document_type == NetworkStatusDocumentV3:
- if not is_microdescriptor:
- router_type = stem.descriptor.router_status_entry.RouterStatusEntryV3
- else:
- router_type = stem.descriptor.router_status_entry.RouterStatusEntryMicroV3
+ router_type = RouterStatusEntryMicroV3 if is_microdescriptor else RouterStatusEntryV3
elif document_type == BridgeNetworkStatusDocument:
- document_type = BridgeNetworkStatusDocument
- router_type = stem.descriptor.router_status_entry.RouterStatusEntryV2
+ document_type, router_type = BridgeNetworkStatusDocument, RouterStatusEntryV2
else:
raise ValueError("Document type %i isn't recognized (only able to parse v2, v3, and bridge)" % document_type)
@@ -265,12 +302,51 @@ class NetworkStatusDocument(Descriptor):
Common parent for network status documents.
"""
- def __init__(self, raw_content):
- super(NetworkStatusDocument, self).__init__(raw_content)
- self._unrecognized_lines = []
- def get_unrecognized_lines(self):
- return list(self._unrecognized_lines)
+def _parse_version_line(keyword, attribute, expected_version):
+ def _parse(descriptor, entries):
+ value = _value(keyword, entries)
+
+ if not value.isdigit():
+ raise ValueError('Document has a non-numeric version: %s %s' % (keyword, value))
+
+ setattr(descriptor, attribute, int(value))
+
+ if int(value) != expected_version:
+ raise ValueError("Expected a version %i document, but got version '%s' instead" % (expected_version, value))
+
+ return _parse
+
+
+def _parse_dir_source_line(descriptor, entries):
+ value = _value('dir-source', entries)
+ dir_source_comp = value.split()
+
+ if len(dir_source_comp) < 3:
+ raise ValueError("The 'dir-source' line of a v2 network status document must have three values: dir-source %s" % value)
+
+ if not dir_source_comp[0]:
+ # https://trac.torproject.org/7055
+ raise ValueError("Authority's hostname can't be blank: dir-source %s" % value)
+ elif not stem.util.connection.is_valid_ipv4_address(dir_source_comp[1]):
+ raise ValueError("Authority's address isn't a valid IPv4 address: %s" % dir_source_comp[1])
+ elif not stem.util.connection.is_valid_port(dir_source_comp[2], allow_zero = True):
+ raise ValueError("Authority's DirPort is invalid: %s" % dir_source_comp[2])
+
+ descriptor.hostname = dir_source_comp[0]
+ descriptor.address = dir_source_comp[1]
+ descriptor.dir_port = None if dir_source_comp[2] == '0' else int(dir_source_comp[2])
+
+
+_parse_network_status_version_line = _parse_version_line('network-status-version', 'version', 2)
+_parse_fingerprint_line = _parse_forty_character_hex('fingerprint', 'fingerprint')
+_parse_contact_line = _parse_simple_line('contact', 'contact')
+_parse_dir_signing_key_line = _parse_key_block('dir-signing-key', 'signing_key', 'RSA PUBLIC KEY')
+_parse_client_versions_line = lambda descriptor, entries: setattr(descriptor, 'client_versions', _value('client-versions', entries).split(','))
+_parse_server_versions_line = lambda descriptor, entries: setattr(descriptor, 'server_versions', _value('server-versions', entries).split(','))
+_parse_published_line = _parse_timestamp_line('published', 'published')
+_parse_dir_options_line = lambda descriptor, entries: setattr(descriptor, 'options', _value('dir-options', entries).split())
+_parse_directory_signature_line = _parse_key_block('directory-signature', 'signature', 'SIGNATURE', value_attribute = 'signing_authority')
class NetworkStatusDocumentV2(NetworkStatusDocument):
@@ -302,24 +378,39 @@ class NetworkStatusDocumentV2(NetworkStatusDocument):
a default value, others are left as **None** if undefined
"""
- def __init__(self, raw_content, validate = True):
- super(NetworkStatusDocumentV2, self).__init__(raw_content)
-
- self.version = None
- self.hostname = None
- self.address = None
- self.dir_port = None
- self.fingerprint = None
- self.contact = None
- self.signing_key = None
-
- self.client_versions = []
- self.server_versions = []
- self.published = None
- self.options = []
+ ATTRIBUTES = {
+ 'version': (None, _parse_network_status_version_line),
+ 'hostname': (None, _parse_dir_source_line),
+ 'address': (None, _parse_dir_source_line),
+ 'dir_port': (None, _parse_dir_source_line),
+ 'fingerprint': (None, _parse_fingerprint_line),
+ 'contact': (None, _parse_contact_line),
+ 'signing_key': (None, _parse_dir_signing_key_line),
+
+ 'client_versions': ([], _parse_client_versions_line),
+ 'server_versions': ([], _parse_server_versions_line),
+ 'published': (None, _parse_published_line),
+ 'options': ([], _parse_dir_options_line),
+
+ 'signing_authority': (None, _parse_directory_signature_line),
+ 'signatures': (None, _parse_directory_signature_line),
+ }
+
+ PARSER_FOR_LINE = {
+ 'network-status-version': _parse_network_status_version_line,
+ 'dir-source': _parse_dir_source_line,
+ 'fingerprint': _parse_fingerprint_line,
+ 'contact': _parse_contact_line,
+ 'dir-signing-key': _parse_dir_signing_key_line,
+ 'client-versions': _parse_client_versions_line,
+ 'server-versions': _parse_server_versions_line,
+ 'published': _parse_published_line,
+ 'dir-options': _parse_dir_options_line,
+ 'directory-signature': _parse_directory_signature_line,
+ }
- self.signing_authority = None
- self.signatures = None
+ def __init__(self, raw_content, validate = True):
+ super(NetworkStatusDocumentV2, self).__init__(raw_content, lazy_load = not validate)
# Splitting the document from the routers. Unlike v3 documents we're not
# bending over backwards on the validation by checking the field order or
@@ -332,7 +423,7 @@ class NetworkStatusDocumentV2(NetworkStatusDocument):
router_iter = stem.descriptor.router_status_entry._parse_file(
document_file,
validate,
- entry_class = stem.descriptor.router_status_entry.RouterStatusEntryV2,
+ entry_class = RouterStatusEntryV2,
entry_keyword = ROUTERS_START,
section_end_keywords = (V2_FOOTER_START,),
extra_args = (self,),
@@ -340,103 +431,19 @@ class NetworkStatusDocumentV2(NetworkStatusDocument):
self.routers = dict((desc.fingerprint, desc) for desc in router_iter)
- document_content += b'\n' + document_file.read()
- document_content = stem.util.str_tools._to_unicode(document_content)
-
- entries = _get_descriptor_components(document_content, validate)
+ entries = _get_descriptor_components(document_content + b'\n' + document_file.read(), validate)
if validate:
self._check_constraints(entries)
+ self._parse(entries, validate)
- self._parse(entries, validate)
-
- def _parse(self, entries, validate):
- for keyword, values in list(entries.items()):
- value, block_type, block_contents = values[0]
-
- line = '%s %s' % (keyword, value) # original line
-
- if block_contents:
- line += '\n%s' % block_contents
+ # 'client-versions' and 'server-versions' are only required if 'Versions'
+ # is among the options
- if keyword == 'network-status-version':
- if not value.isdigit():
- if not validate:
- continue
-
- raise ValueError('Network status document has a non-numeric version: %s' % line)
-
- self.version = int(value)
-
- if validate and self.version != 2:
- raise ValueError("Expected a version 2 network status document, got version '%s' instead" % self.version)
- elif keyword == 'dir-source':
- dir_source_comp = value.split()
-
- if len(dir_source_comp) < 3:
- if not validate:
- continue
-
- raise ValueError("The 'dir-source' line of a v2 network status document must have three values: %s" % line)
-
- if validate:
- if not dir_source_comp[0]:
- # https://trac.torproject.org/7055
- raise ValueError("Authority's hostname can't be blank: %s" % line)
- elif not stem.util.connection.is_valid_ipv4_address(dir_source_comp[1]):
- raise ValueError("Authority's address isn't a valid IPv4 address: %s" % dir_source_comp[1])
- elif not stem.util.connection.is_valid_port(dir_source_comp[2], allow_zero = True):
- raise ValueError("Authority's DirPort is invalid: %s" % dir_source_comp[2])
- elif not dir_source_comp[2].isdigit():
- continue
-
- self.hostname = dir_source_comp[0]
- self.address = dir_source_comp[1]
- self.dir_port = None if dir_source_comp[2] == '0' else int(dir_source_comp[2])
- elif keyword == 'fingerprint':
- if validate and not stem.util.tor_tools.is_valid_fingerprint(value):
- raise ValueError("Authority's fingerprint in a v2 network status document is malformed: %s" % line)
-
- self.fingerprint = value
- elif keyword == 'contact':
- self.contact = value
- elif keyword == 'dir-signing-key':
- if validate and (not block_contents or block_type != 'RSA PUBLIC KEY'):
- raise ValueError("'dir-signing-key' should be followed by a RSA PUBLIC KEY block: %s" % line)
-
- self.signing_key = block_contents
- elif keyword in ('client-versions', 'server-versions'):
- # v2 documents existed while there were tor versions using the 'old'
- # style, hence we aren't attempting to parse them
-
- for version_str in value.split(','):
- if keyword == 'client-versions':
- self.client_versions.append(version_str)
- elif keyword == 'server-versions':
- self.server_versions.append(version_str)
- elif keyword == 'published':
- try:
- self.published = stem.util.str_tools._parse_timestamp(value)
- except ValueError:
- if validate:
- raise ValueError("Version 2 network status document's 'published' time wasn't parsable: %s" % value)
- elif keyword == 'dir-options':
- self.options = value.split()
- elif keyword == 'directory-signature':
- if validate and (not block_contents or block_type != 'SIGNATURE'):
- raise ValueError("'directory-signature' should be followed by a SIGNATURE block: %s" % line)
-
- self.signing_authority = value
- self.signature = block_contents
- else:
- self._unrecognized_lines.append(line)
-
- # 'client-versions' and 'server-versions' are only required if 'Versions'
- # is among the options
-
- if validate and 'Versions' in self.options:
- if not ('client-versions' in entries and 'server-versions' in entries):
+ if 'Versions' in self.options and not ('client-versions' in entries and 'server-versions' in entries):
raise ValueError("Version 2 network status documents must have a 'client-versions' and 'server-versions' when 'Versions' is listed among its dir-options:\n%s" % str(self))
+ else:
+ self._entries = entries
def _check_constraints(self, entries):
required_fields = [field for (field, is_mandatory) in NETWORK_STATUS_V2_FIELDS if is_mandatory]
@@ -454,6 +461,192 @@ class NetworkStatusDocumentV2(NetworkStatusDocument):
raise ValueError("Network status document (v2) are expected to start with a 'network-status-version' line:\n%s" % str(self))
+def _parse_header_network_status_version_line(descriptor, entries):
+ # "network-status-version" version
+
+ value = _value('network-status-version', entries)
+
+ if ' ' in value:
+ version, flavor = value.split(' ', 1)
+ else:
+ version, flavor = value, None
+
+ if not version.isdigit():
+ raise ValueError('Network status document has a non-numeric version: network-status-version %s' % value)
+
+ descriptor.version = int(version)
+ descriptor.version_flavor = flavor
+ descriptor.is_microdescriptor = flavor == 'microdesc'
+
+ if descriptor.version != 3:
+ raise ValueError("Expected a version 3 network status document, got version '%s' instead" % descriptor.version)
+
+
+def _parse_header_vote_status_line(descriptor, entries):
+ # "vote-status" type
+ #
+ # The consensus-method and consensus-methods fields are optional since
+ # they weren't included in version 1. Setting a default now that we
+ # know if we're a vote or not.
+
+ value = _value('vote-status', entries)
+
+ if value == 'consensus':
+ descriptor.is_consensus, descriptor.is_vote = True, False
+ elif value == 'vote':
+ descriptor.is_consensus, descriptor.is_vote = False, True
+ else:
+ raise ValueError("A network status document's vote-status line can only be 'consensus' or 'vote', got '%s' instead" % value)
+
+
+def _parse_header_consensus_methods_line(descriptor, entries):
+ # "consensus-methods" IntegerList
+
+ if descriptor._lazy_loading and descriptor.is_vote:
+ descriptor.consensus_methods = [1]
+
+ value, consensus_methods = _value('consensus-methods', entries), []
+
+ for entry in value.split(' '):
+ if not entry.isdigit():
+ raise ValueError("A network status document's consensus-methods must be a list of integer values, but was '%s'" % value)
+
+ consensus_methods.append(int(entry))
+
+ descriptor.consensus_methods = consensus_methods
+
+
+def _parse_header_consensus_method_line(descriptor, entries):
+ # "consensus-method" Integer
+
+ if descriptor._lazy_loading and descriptor.is_consensus:
+ descriptor.consensus_method = 1
+
+ value = _value('consensus-method', entries)
+
+ if not value.isdigit():
+ raise ValueError("A network status document's consensus-method must be an integer, but was '%s'" % value)
+
+ descriptor.consensus_method = int(value)
+
+
+def _parse_header_voting_delay_line(descriptor, entries):
+ # "voting-delay" VoteSeconds DistSeconds
+
+ value = _value('voting-delay', entries)
+ value_comp = value.split(' ')
+
+ if len(value_comp) == 2 and value_comp[0].isdigit() and value_comp[1].isdigit():
+ descriptor.vote_delay = int(value_comp[0])
+ descriptor.dist_delay = int(value_comp[1])
+ else:
+ raise ValueError("A network status document's 'voting-delay' line must be a pair of integer values, but was '%s'" % value)
+
+
+def _parse_versions_line(keyword, attribute):
+ def _parse(descriptor, entries):
+ value, entries = _value(keyword, entries), []
+
+ for entry in value.split(','):
+ try:
+ entries.append(stem.version._get_version(entry))
+ except ValueError:
+ raise ValueError("Network status document's '%s' line had '%s', which isn't a parsable tor version: %s %s" % (keyword, entry, keyword, value))
+
+ setattr(descriptor, attribute, entries)
+
+ return _parse
+
+
+def _parse_header_flag_thresholds_line(descriptor, entries):
+ # "flag-thresholds" SP THRESHOLDS
+
+ value, thresholds = _value('flag-thresholds', entries).strip(), {}
+
+ if value:
+ for entry in value.split(' '):
+ if '=' not in entry:
+ raise ValueError("Network status document's 'flag-thresholds' line is expected to be space separated key=value mappings, got: flag-thresholds %s" % value)
+
+ entry_key, entry_value = entry.split('=', 1)
+
+ try:
+ if entry_value.endswith('%'):
+ # opting for string manipulation rather than just
+ # 'float(entry_value) / 100' because floating point arithmetic
+ # will lose precision
+
+ thresholds[entry_key] = float('0.' + entry_value[:-1].replace('.', '', 1))
+ elif '.' in entry_value:
+ thresholds[entry_key] = float(entry_value)
+ else:
+ thresholds[entry_key] = int(entry_value)
+ except ValueError:
+ raise ValueError("Network status document's 'flag-thresholds' line is expected to have float values, got: flag-thresholds %s" % value)
+
+ descriptor.flag_thresholds = thresholds
+
+
+def _parse_header_parameters_line(descriptor, entries):
+ # "params" [Parameters]
+ # Parameter ::= Keyword '=' Int32
+ # Int32 ::= A decimal integer between -2147483648 and 2147483647.
+ # Parameters ::= Parameter | Parameters SP Parameter
+
+ if descriptor._lazy_loading and descriptor._default_params:
+ descriptor.params = dict(DEFAULT_PARAMS)
+
+ value = _value('params', entries)
+
+ # should only appear in consensus-method 7 or later
+
+ if not descriptor.meets_consensus_method(7):
+ raise ValueError("A network status document's 'params' line should only appear in consensus-method 7 or later")
+
+ if value != '':
+ descriptor.params = _parse_int_mappings('params', value, True)
+ descriptor._check_params_constraints()
+
+
+def _parse_directory_footer_line(descriptor, entries):
+ # nothing to parse, simply checking that we don't have a value
+
+ value = _value('directory-footer', entries)
+
+ if value:
+ raise ValueError("A network status document's 'directory-footer' line shouldn't have any content, got 'directory-footer %s'" % value)
+
+
+def _parse_footer_directory_signature_line(descriptor, entries):
+ signatures = []
+
+ for sig_value, block_type, block_contents in entries['directory-signature']:
+ if sig_value.count(' ') not in (1, 2):
+ raise ValueError("Authority signatures in a network status document are expected to be of the form 'directory-signature [METHOD] FINGERPRINT KEY_DIGEST', received: %s" % sig_value)
+
+ if not block_contents or block_type != 'SIGNATURE':
+ raise ValueError("'directory-signature' should be followed by a SIGNATURE block, but was a %s" % block_type)
+
+ if sig_value.count(' ') == 1:
+ method = 'sha1' # default if none was provided
+ fingerprint, key_digest = sig_value.split(' ', 1)
+ else:
+ method, fingerprint, key_digest = sig_value.split(' ', 2)
+
+ signatures.append(DocumentSignature(method, fingerprint, key_digest, block_contents, True))
+
+ descriptor.signatures = signatures
+
+
+_parse_header_valid_after_line = _parse_timestamp_line('valid-after', 'valid_after')
+_parse_header_fresh_until_line = _parse_timestamp_line('fresh-until', 'fresh_until')
+_parse_header_valid_until_line = _parse_timestamp_line('valid-until', 'valid_until')
+_parse_header_client_versions_line = _parse_versions_line('client-versions', 'client_versions')
+_parse_header_server_versions_line = _parse_versions_line('server-versions', 'server_versions')
+_parse_header_known_flags_line = lambda descriptor, entries: setattr(descriptor, 'known_flags', [entry for entry in _value('known-flags', entries).split(' ') if entry])
+_parse_footer_bandwidth_weights_line = lambda descriptor, entries: setattr(descriptor, 'bandwidth_weights', _parse_int_mappings('bandwidth-weights', _value('bandwidth-weights', entries), True))
+
+
class NetworkStatusDocumentV3(NetworkStatusDocument):
"""
Version 3 network status document. This could be either a vote or consensus.
@@ -498,6 +691,53 @@ class NetworkStatusDocumentV3(NetworkStatusDocument):
a default value, others are left as None if undefined
"""
+ ATTRIBUTES = {
+ 'version': (None, _parse_header_network_status_version_line),
+ 'version_flavor': (None, _parse_header_network_status_version_line),
+ 'is_consensus': (True, _parse_header_vote_status_line),
+ 'is_vote': (False, _parse_header_vote_status_line),
+ 'is_microdescriptor': (False, _parse_header_network_status_version_line),
+ 'consensus_methods': ([], _parse_header_consensus_methods_line),
+ 'published': (None, _parse_published_line),
+ 'consensus_method': (None, _parse_header_consensus_method_line),
+ 'valid_after': (None, _parse_header_valid_after_line),
+ 'fresh_until': (None, _parse_header_fresh_until_line),
+ 'valid_until': (None, _parse_header_valid_until_line),
+ 'vote_delay': (None, _parse_header_voting_delay_line),
+ 'dist_delay': (None, _parse_header_voting_delay_line),
+ 'client_versions': ([], _parse_header_client_versions_line),
+ 'server_versions': ([], _parse_header_server_versions_line),
+ 'known_flags': ([], _parse_header_known_flags_line),
+ 'flag_thresholds': ({}, _parse_header_flag_thresholds_line),
+ 'params': ({}, _parse_header_parameters_line),
+
+ 'signatures': ([], _parse_footer_directory_signature_line),
+ 'bandwidth_weights': ({}, _parse_footer_bandwidth_weights_line),
+ }
+
+ HEADER_PARSER_FOR_LINE = {
+ 'network-status-version': _parse_header_network_status_version_line,
+ 'vote-status': _parse_header_vote_status_line,
+ 'consensus-methods': _parse_header_consensus_methods_line,
+ 'consensus-method': _parse_header_consensus_method_line,
+ 'published': _parse_published_line,
+ 'valid-after': _parse_header_valid_after_line,
+ 'fresh-until': _parse_header_fresh_until_line,
+ 'valid-until': _parse_header_valid_until_line,
+ 'voting-delay': _parse_header_voting_delay_line,
+ 'client-versions': _parse_header_client_versions_line,
+ 'server-versions': _parse_header_server_versions_line,
+ 'known-flags': _parse_header_known_flags_line,
+ 'flag-thresholds': _parse_header_flag_thresholds_line,
+ 'params': _parse_header_parameters_line,
+ }
+
+ FOOTER_PARSER_FOR_LINE = {
+ 'directory-footer': _parse_directory_footer_line,
+ 'bandwidth-weights': _parse_footer_bandwidth_weights_line,
+ 'directory-signature': _parse_footer_directory_signature_line,
+ }
+
def __init__(self, raw_content, validate = True, default_params = True):
"""
Parse a v3 network status document.
@@ -510,17 +750,11 @@ class NetworkStatusDocumentV3(NetworkStatusDocument):
:raises: **ValueError** if the document is invalid
"""
- super(NetworkStatusDocumentV3, self).__init__(raw_content)
+ super(NetworkStatusDocumentV3, self).__init__(raw_content, lazy_load = not validate)
document_file = io.BytesIO(raw_content)
- self._header = _DocumentHeader(document_file, validate, default_params)
-
- # merge header attributes into us
- for attr, value in vars(self._header).items():
- if attr != '_unrecognized_lines':
- setattr(self, attr, value)
- else:
- self._unrecognized_lines += value
+ self._default_params = default_params
+ self._header(document_file, validate)
self.directory_authorities = tuple(stem.descriptor.router_status_entry._parse_file(
document_file,
@@ -528,36 +762,31 @@ class NetworkStatusDocumentV3(NetworkStatusDocument):
entry_class = DirectoryAuthority,
entry_keyword = AUTH_START,
section_end_keywords = (ROUTERS_START, FOOTER_START, V2_FOOTER_START),
- extra_args = (self._header.is_vote,),
+ extra_args = (self.is_vote,),
))
- if validate and self._header.is_vote and len(self.directory_authorities) != 1:
+ if validate and self.is_vote and len(self.directory_authorities) != 1:
raise ValueError('Votes should only have an authority entry for the one that issued it, got %i: %s' % (len(self.directory_authorities), self.directory_authorities))
- if not self._header.is_microdescriptor:
- router_type = stem.descriptor.router_status_entry.RouterStatusEntryV3
- else:
- router_type = stem.descriptor.router_status_entry.RouterStatusEntryMicroV3
-
router_iter = stem.descriptor.router_status_entry._parse_file(
document_file,
validate,
- entry_class = router_type,
+ entry_class = RouterStatusEntryMicroV3 if self.is_microdescriptor else RouterStatusEntryV3,
entry_keyword = ROUTERS_START,
section_end_keywords = (FOOTER_START, V2_FOOTER_START),
extra_args = (self,),
)
self.routers = dict((desc.fingerprint, desc) for desc in router_iter)
+ self._footer(document_file, validate)
- self._footer = _DocumentFooter(document_file, validate, self._header)
+ def get_unrecognized_lines(self):
+ if self._lazy_loading:
+ self._parse(self._header_entries, False, parser_for_line = self.HEADER_PARSER_FOR_LINE)
+ self._parse(self._footer_entries, False, parser_for_line = self.FOOTER_PARSER_FOR_LINE)
+ self._lazy_loading = False
- # merge header attributes into us
- for attr, value in vars(self._footer).items():
- if attr != '_unrecognized_lines':
- setattr(self, attr, value)
- else:
- self._unrecognized_lines += value
+ return super(NetworkStatusDocumentV3, self).get_unrecognized_lines()
def meets_consensus_method(self, method):
"""
@@ -570,7 +799,12 @@ class NetworkStatusDocumentV3(NetworkStatusDocument):
:returns: **True** if we meet the given consensus-method, and **False** otherwise
"""
- return self._header.meets_consensus_method(method)
+ if self.consensus_method is not None:
+ return self.consensus_method >= method
+ elif self.consensus_methods is not None:
+ return bool([x for x in self.consensus_methods if x >= method])
+ else:
+ return False # malformed document
def _compare(self, other, method):
if not isinstance(other, NetworkStatusDocumentV3):
@@ -578,217 +812,66 @@ class NetworkStatusDocumentV3(NetworkStatusDocument):
return method(str(self).strip(), str(other).strip())
- def __hash__(self):
- return hash(str(self).strip())
-
- def __eq__(self, other):
- return self._compare(other, lambda s, o: s == o)
-
- def __lt__(self, other):
- return self._compare(other, lambda s, o: s < o)
-
- def __le__(self, other):
- return self._compare(other, lambda s, o: s <= o)
+ def _header(self, document_file, validate):
+ content = bytes.join(b'', _read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file))
+ entries = _get_descriptor_components(content, validate)
+ if validate:
+ # all known header fields can only appear once except
-class _DocumentHeader(object):
- def __init__(self, document_file, validate, default_params):
- self.version = None
- self.version_flavor = None
- self.is_consensus = True
- self.is_vote = False
- self.is_microdescriptor = False
- self.consensus_methods = []
- self.published = None
- self.consensus_method = None
- self.valid_after = None
- self.fresh_until = None
- self.valid_until = None
- self.vote_delay = None
- self.dist_delay = None
- self.client_versions = []
- self.server_versions = []
- self.known_flags = []
- self.flag_thresholds = {}
- self.params = dict(DEFAULT_PARAMS) if default_params else {}
-
- self._unrecognized_lines = []
+ for keyword, values in list(entries.items()):
+ if len(values) > 1 and keyword in HEADER_FIELDS:
+ raise ValueError("Network status documents can only have a single '%s' line, got %i" % (keyword, len(values)))
- content = bytes.join(b'', _read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file))
- content = stem.util.str_tools._to_unicode(content)
- entries = _get_descriptor_components(content, validate)
- self._parse(entries, validate)
+ if self._default_params:
+ self.params = dict(DEFAULT_PARAMS)
- # doing this validation afterward so we know our 'is_consensus' and
- # 'is_vote' attributes
+ self._parse(entries, validate, parser_for_line = self.HEADER_PARSER_FOR_LINE)
- if validate:
_check_for_missing_and_disallowed_fields(self, entries, HEADER_STATUS_DOCUMENT_FIELDS)
_check_for_misordered_fields(entries, HEADER_FIELDS)
- def meets_consensus_method(self, method):
- if self.consensus_method is not None:
- return self.consensus_method >= method
- elif self.consensus_methods is not None:
- return bool([x for x in self.consensus_methods if x >= method])
+ # default consensus_method and consensus_methods based on if we're a consensus or vote
+
+ if self.is_consensus and not self.consensus_method:
+ self.consensus_method = 1
+ elif self.is_vote and not self.consensus_methods:
+ self.consensus_methods = [1]
else:
- return False # malformed document
+ self._header_entries = entries
+ self._entries.update(entries)
- def _parse(self, entries, validate):
- for keyword, values in list(entries.items()):
- value, _, _ = values[0]
- line = '%s %s' % (keyword, value)
+ def _footer(self, document_file, validate):
+ entries = _get_descriptor_components(document_file.read(), validate)
- # all known header fields can only appear once except
- if validate and len(values) > 1 and keyword in HEADER_FIELDS:
- raise ValueError("Network status documents can only have a single '%s' line, got %i" % (keyword, len(values)))
+ if validate:
+ for keyword, values in list(entries.items()):
+ # all known footer fields can only appear once except...
+ # * 'directory-signature' in a consensus
- if keyword == 'network-status-version':
- # "network-status-version" version
+ if len(values) > 1 and keyword in FOOTER_FIELDS:
+ if not (keyword == 'directory-signature' and self.is_consensus):
+ raise ValueError("Network status documents can only have a single '%s' line, got %i" % (keyword, len(values)))
+
+ self._parse(entries, validate, parser_for_line = self.FOOTER_PARSER_FOR_LINE)
+
+ # Check that the footer has the right initial line. Prior to consensus
+ # method 9 it's a 'directory-signature' and after that footers start with
+ # 'directory-footer'.
- if ' ' in value:
- version, flavor = value.split(' ', 1)
+ if entries:
+ if self.meets_consensus_method(9):
+ if list(entries.keys())[0] != 'directory-footer':
+ raise ValueError("Network status document's footer should start with a 'directory-footer' line in consensus-method 9 or later")
else:
- version, flavor = value, None
-
- if not version.isdigit():
- if not validate:
- continue
-
- raise ValueError('Network status document has a non-numeric version: %s' % line)
-
- self.version = int(version)
- self.version_flavor = flavor
- self.is_microdescriptor = flavor == 'microdesc'
-
- if validate and self.version != 3:
- raise ValueError("Expected a version 3 network status document, got version '%s' instead" % self.version)
- elif keyword == 'vote-status':
- # "vote-status" type
- #
- # The consensus-method and consensus-methods fields are optional since
- # they weren't included in version 1. Setting a default now that we
- # know if we're a vote or not.
-
- if value == 'consensus':
- self.is_consensus, self.is_vote = True, False
- self.consensus_method = 1
- elif value == 'vote':
- self.is_consensus, self.is_vote = False, True
- self.consensus_methods = [1]
- elif validate:
- raise ValueError("A network status document's vote-status line can only be 'consensus' or 'vote', got '%s' instead" % value)
- elif keyword == 'consensus-methods':
- # "consensus-methods" IntegerList
-
- consensus_methods = []
- for entry in value.split(' '):
- if entry.isdigit():
- consensus_methods.append(int(entry))
- elif validate:
- raise ValueError("A network status document's consensus-methods must be a list of integer values, but was '%s'" % value)
-
- self.consensus_methods = consensus_methods
- elif keyword == 'consensus-method':
- # "consensus-method" Integer
-
- if value.isdigit():
- self.consensus_method = int(value)
- elif validate:
- raise ValueError("A network status document's consensus-method must be an integer, but was '%s'" % value)
- elif keyword in ('published', 'valid-after', 'fresh-until', 'valid-until'):
- try:
- date_value = stem.util.str_tools._parse_timestamp(value)
-
- if keyword == 'published':
- self.published = date_value
- elif keyword == 'valid-after':
- self.valid_after = date_value
- elif keyword == 'fresh-until':
- self.fresh_until = date_value
- elif keyword == 'valid-until':
- self.valid_until = date_value
- except ValueError:
- if validate:
- raise ValueError("Network status document's '%s' time wasn't parsable: %s" % (keyword, value))
- elif keyword == 'voting-delay':
- # "voting-delay" VoteSeconds DistSeconds
-
- value_comp = value.split(' ')
-
- if len(value_comp) == 2 and value_comp[0].isdigit() and value_comp[1].isdigit():
- self.vote_delay = int(value_comp[0])
- self.dist_delay = int(value_comp[1])
- elif validate:
- raise ValueError("A network status document's 'voting-delay' line must be a pair of integer values, but was '%s'" % value)
- elif keyword in ('client-versions', 'server-versions'):
- for entry in value.split(','):
- try:
- version_value = stem.version._get_version(entry)
-
- if keyword == 'client-versions':
- self.client_versions.append(version_value)
- elif keyword == 'server-versions':
- self.server_versions.append(version_value)
- except ValueError:
- if validate:
- raise ValueError("Network status document's '%s' line had '%s', which isn't a parsable tor version: %s" % (keyword, entry, line))
- elif keyword == 'known-flags':
- # "known-flags" FlagList
-
- # simply fetches the entries, excluding empty strings
- self.known_flags = [entry for entry in value.split(' ') if entry]
- elif keyword == 'flag-thresholds':
- # "flag-thresholds" SP THRESHOLDS
-
- value = value.strip()
-
- if value:
- for entry in value.split(' '):
- if '=' not in entry:
- if not validate:
- continue
-
- raise ValueError("Network status document's '%s' line is expected to be space separated key=value mappings, got: %s" % (keyword, line))
-
- entry_key, entry_value = entry.split('=', 1)
-
- try:
- if entry_value.endswith('%'):
- # opting for string manipulation rather than just
- # 'float(entry_value) / 100' because floating point arithmetic
- # will lose precision
-
- self.flag_thresholds[entry_key] = float('0.' + entry_value[:-1].replace('.', '', 1))
- elif '.' in entry_value:
- self.flag_thresholds[entry_key] = float(entry_value)
- else:
- self.flag_thresholds[entry_key] = int(entry_value)
- except ValueError:
- if validate:
- raise ValueError("Network status document's '%s' line is expected to have float values, got: %s" % (keyword, line))
- elif keyword == 'params':
- # "params" [Parameters]
- # Parameter ::= Keyword '=' Int32
- # Int32 ::= A decimal integer between -2147483648 and 2147483647.
- # Parameters ::= Parameter | Parameters SP Parameter
-
- # should only appear in consensus-method 7 or later
-
- if validate and not self.meets_consensus_method(7):
- raise ValueError("A network status document's 'params' line should only appear in consensus-method 7 or later")
-
- # skip if this is a blank line
-
- if value == '':
- continue
-
- self.params.update(_parse_int_mappings(keyword, value, validate))
+ if list(entries.keys())[0] != 'directory-signature':
+ raise ValueError("Network status document's footer should start with a 'directory-signature' line prior to consensus-method 9")
- if validate:
- self._check_params_constraints()
- else:
- self._unrecognized_lines.append(line)
+ _check_for_missing_and_disallowed_fields(self, entries, FOOTER_STATUS_DOCUMENT_FIELDS)
+ _check_for_misordered_fields(entries, FOOTER_FIELDS)
+ else:
+ self._footer_entries = entries
+ self._entries.update(entries)
def _check_params_constraints(self):
"""
@@ -796,139 +879,38 @@ class _DocumentHeader(object):
"""
for key, value in self.params.items():
- # all parameters are constrained to int32 range
- minimum, maximum = -2147483648, 2147483647
-
- if key == 'circwindow':
- minimum, maximum = 100, 1000
- elif key == 'CircuitPriorityHalflifeMsec':
- minimum = -1
- elif key in ('perconnbwrate', 'perconnbwburst'):
- minimum = 1
- elif key == 'refuseunknownexits':
- minimum, maximum = 0, 1
- elif key == 'bwweightscale':
- minimum = 1
- elif key == 'cbtdisabled':
- minimum, maximum = 0, 1
- elif key == 'cbtnummodes':
- minimum, maximum = 1, 20
- elif key == 'cbtrecentcount':
- minimum, maximum = 3, 1000
- elif key == 'cbtmaxtimeouts':
- minimum, maximum = 3, 10000
- elif key == 'cbtmincircs':
- minimum, maximum = 1, 10000
- elif key == 'cbtquantile':
- minimum, maximum = 10, 99
- elif key == 'cbtclosequantile':
- minimum, maximum = self.params.get('cbtquantile', minimum), 99
- elif key == 'cbttestfreq':
- minimum = 1
- elif key == 'cbtmintimeout':
- minimum = 500
+ minimum, maximum = PARAM_RANGE.get(key, (MIN_PARAM, MAX_PARAM))
+
+ # there's a few dynamic parameter ranges
+
+ if key == 'cbtclosequantile':
+ minimum = self.params.get('cbtquantile', minimum)
elif key == 'cbtinitialtimeout':
minimum = self.params.get('cbtmintimeout', minimum)
- elif key == 'UseOptimisticData':
- minimum, maximum = 0, 1
- elif key == 'Support022HiddenServices':
- minimum, maximum = 0, 1
- elif key == 'usecreatefast':
- minimum, maximum = 0, 1
- elif key == 'UseNTorHandshake':
- minimum, maximum = 0, 1
- elif key == 'FastFlagMinThreshold':
- minimum = 4
- elif key == 'NumDirectoryGuards':
- minimum, maximum = 0, 10
- elif key == 'NumEntryGuards':
- minimum, maximum = 1, 10
- elif key == 'GuardLifetime':
- minimum, maximum = 2592000, 157766400 # min: 30 days, max: 1826 days
- elif key == 'NumNTorsPerTAP':
- minimum, maximum = 1, 100000
- elif key == 'AllowNonearlyExtend':
- minimum, maximum = 0, 1
if value < minimum or value > maximum:
raise ValueError("'%s' value on the params line must be in the range of %i - %i, was %i" % (key, minimum, maximum, value))
+ def __hash__(self):
+ return hash(str(self).strip())
-class _DocumentFooter(object):
- def __init__(self, document_file, validate, header):
- self.signatures = []
- self.bandwidth_weights = {}
- self._unrecognized_lines = []
-
- content = stem.util.str_tools._to_unicode(document_file.read())
-
- if not content:
- return # footer is optional and there's nothing to parse
-
- entries = _get_descriptor_components(content, validate)
- self._parse(entries, validate, header)
-
- if validate:
- # Check that the footer has the right initial line. Prior to consensus
- # method 9 it's a 'directory-signature' and after that footers start with
- # 'directory-footer'.
-
- if header.meets_consensus_method(9):
- if list(entries.keys())[0] != 'directory-footer':
- raise ValueError("Network status document's footer should start with a 'directory-footer' line in consensus-method 9 or later")
- else:
- if list(entries.keys())[0] != 'directory-signature':
- raise ValueError("Network status document's footer should start with a 'directory-signature' line prior to consensus-method 9")
-
- _check_for_missing_and_disallowed_fields(header, entries, FOOTER_STATUS_DOCUMENT_FIELDS)
- _check_for_misordered_fields(entries, FOOTER_FIELDS)
-
- def _parse(self, entries, validate, header):
- for keyword, values in list(entries.items()):
- value, block_type, block_contents = values[0]
- line = '%s %s' % (keyword, value)
-
- # all known footer fields can only appear once except...
- # * 'directory-signature' in a consensus
-
- if validate and len(values) > 1 and keyword in FOOTER_FIELDS:
- if not (keyword == 'directory-signature' and header.is_consensus):
- raise ValueError("Network status documents can only have a single '%s' line, got %i" % (keyword, len(values)))
-
- if keyword == 'directory-footer':
- # nothing to parse, simply checking that we don't have a value
-
- if validate and value:
- raise ValueError("A network status document's 'directory-footer' line shouldn't have any content, got '%s'" % line)
- elif keyword == 'bandwidth-weights':
- self.bandwidth_weights = _parse_int_mappings(keyword, value, validate)
- elif keyword == 'directory-signature':
- for sig_value, block_type, block_contents in values:
- if sig_value.count(' ') not in (1, 2):
- if not validate:
- continue
-
- raise ValueError("Authority signatures in a network status document are expected to be of the form 'directory-signature [METHOD] FINGERPRINT KEY_DIGEST', received: %s" % sig_value)
-
- if validate and (not block_contents or block_type != 'SIGNATURE'):
- raise ValueError("'directory-signature' should be followed by a SIGNATURE block: %s" % line)
+ def __eq__(self, other):
+ return self._compare(other, lambda s, o: s == o)
- if sig_value.count(' ') == 1:
- method = 'sha1' # default if none was provided
- fingerprint, key_digest = sig_value.split(' ', 1)
- else:
- method, fingerprint, key_digest = sig_value.split(' ', 2)
+ def __lt__(self, other):
+ return self._compare(other, lambda s, o: s < o)
- self.signatures.append(DocumentSignature(method, fingerprint, key_digest, block_contents, validate))
+ def __le__(self, other):
+ return self._compare(other, lambda s, o: s <= o)
-def _check_for_missing_and_disallowed_fields(header, entries, fields):
+def _check_for_missing_and_disallowed_fields(document, entries, fields):
"""
Checks that we have mandatory fields for our type, and that we don't have
any fields exclusive to the other (ie, no vote-only fields appear in a
consensus or vice versa).
- :param _DocumentHeader header: document header
+ :param NetworkStatusDocumentV3 document: network status document
:param dict entries: ordered keyword/value mappings of the header or footer
:param list fields: expected field attributes (either
**HEADER_STATUS_DOCUMENT_FIELDS** or **FOOTER_STATUS_DOCUMENT_FIELDS**)
@@ -939,11 +921,11 @@ def _check_for_missing_and_disallowed_fields(header, entries, fields):
missing_fields, disallowed_fields = [], []
for field, in_votes, in_consensus, mandatory in fields:
- if mandatory and ((header.is_consensus and in_consensus) or (header.is_vote and in_votes)):
+ if mandatory and ((document.is_consensus and in_consensus) or (document.is_vote and in_votes)):
# mandatory field, check that we have it
if field not in entries.keys():
missing_fields.append(field)
- elif (header.is_consensus and not in_consensus) or (header.is_vote and not in_votes):
+ elif (document.is_consensus and not in_consensus) or (document.is_vote and not in_votes):
# field we shouldn't have, check that we don't
if field in entries.keys():
disallowed_fields.append(field)
@@ -1024,6 +1006,42 @@ def _parse_int_mappings(keyword, value, validate):
return results
+def _parse_dir_source_line(descriptor, entries):
+ # "dir-source" nickname identity address IP dirport orport
+
+ value = _value('dir-source', entries)
+ dir_source_comp = value.split(' ')
+
+ if len(dir_source_comp) < 6:
+ raise ValueError("Authority entry's 'dir-source' line must have six values: dir-source %s" % value)
+
+ if not stem.util.tor_tools.is_valid_nickname(dir_source_comp[0].rstrip('-legacy')):
+ raise ValueError("Authority's nickname is invalid: %s" % dir_source_comp[0])
+ elif not stem.util.tor_tools.is_valid_fingerprint(dir_source_comp[1]):
+ raise ValueError("Authority's fingerprint is invalid: %s" % dir_source_comp[1])
+ elif not dir_source_comp[2]:
+ # https://trac.torproject.org/7055
+ raise ValueError("Authority's hostname can't be blank: dir-source %s" % value)
+ elif not stem.util.connection.is_valid_ipv4_address(dir_source_comp[3]):
+ raise ValueError("Authority's address isn't a valid IPv4 address: %s" % dir_source_comp[3])
+ elif not stem.util.connection.is_valid_port(dir_source_comp[4], allow_zero = True):
+ raise ValueError("Authority's DirPort is invalid: %s" % dir_source_comp[4])
+ elif not stem.util.connection.is_valid_port(dir_source_comp[5]):
+ raise ValueError("Authority's ORPort is invalid: %s" % dir_source_comp[5])
+
+ descriptor.nickname = dir_source_comp[0]
+ descriptor.fingerprint = dir_source_comp[1]
+ descriptor.hostname = dir_source_comp[2]
+ descriptor.address = dir_source_comp[3]
+ descriptor.dir_port = None if dir_source_comp[4] == '0' else int(dir_source_comp[4])
+ descriptor.or_port = int(dir_source_comp[5])
+ descriptor.is_legacy = descriptor.nickname.endswith('-legacy')
+
+
+_parse_legacy_dir_key_line = _parse_forty_character_hex('legacy-dir-key', 'legacy_dir_key')
+_parse_vote_digest_line = _parse_forty_character_hex('vote-digest', 'vote_digest')
+
+
class DirectoryAuthority(Descriptor):
"""
Directory authority information obtained from a v3 network status document.
@@ -1056,6 +1074,26 @@ class DirectoryAuthority(Descriptor):
**\*** mandatory attribute
"""
+ ATTRIBUTES = {
+ 'nickname': (None, _parse_dir_source_line),
+ 'fingerprint': (None, _parse_dir_source_line),
+ 'hostname': (None, _parse_dir_source_line),
+ 'address': (None, _parse_dir_source_line),
+ 'dir_port': (None, _parse_dir_source_line),
+ 'or_port': (None, _parse_dir_source_line),
+ 'is_legacy': (False, _parse_dir_source_line),
+ 'contact': (None, _parse_contact_line),
+ 'vote_digest': (None, _parse_vote_digest_line),
+ 'legacy_dir_key': (None, _parse_legacy_dir_key_line),
+ }
+
+ PARSER_FOR_LINE = {
+ 'dir-source': _parse_dir_source_line,
+ 'contact': _parse_contact_line,
+ 'legacy-dir-key': _parse_legacy_dir_key_line,
+ 'vote-digest': _parse_vote_digest_line,
+ }
+
def __init__(self, raw_content, validate = True, is_vote = False):
"""
Parse a directory authority entry in a v3 network status document.
@@ -1068,47 +1106,17 @@ class DirectoryAuthority(Descriptor):
:raises: ValueError if the descriptor data is invalid
"""
- super(DirectoryAuthority, self).__init__(raw_content)
- raw_content = stem.util.str_tools._to_unicode(raw_content)
-
- self.nickname = None
- self.fingerprint = None
- self.hostname = None
- self.address = None
- self.dir_port = None
- self.or_port = None
- self.is_legacy = False
- self.contact = None
-
- self.vote_digest = None
-
- self.legacy_dir_key = None
- self.key_certificate = None
-
- self._unrecognized_lines = []
-
- self._parse(raw_content, validate, is_vote)
-
- def _parse(self, content, validate, is_vote):
- """
- Parses the given content and applies the attributes.
-
- :param str content: descriptor content
- :param bool validate: checks validity if True
- :param bool is_vote: **True** if this is for a vote, **False** if it's for
- a consensus
-
- :raises: **ValueError** if a validity check fails
- """
+ super(DirectoryAuthority, self).__init__(raw_content, lazy_load = not validate)
+ content = stem.util.str_tools._to_unicode(raw_content)
# separate the directory authority entry from its key certificate
key_div = content.find('\ndir-key-certificate-version')
if key_div != -1:
- key_cert_content = content[key_div + 1:]
+ self.key_certificate = KeyCertificate(content[key_div + 1:], validate)
content = content[:key_div + 1]
else:
- key_cert_content = None
+ self.key_certificate = None
entries = _get_descriptor_components(content, validate)
@@ -1129,12 +1137,12 @@ class DirectoryAuthority(Descriptor):
required_fields += ['contact']
if is_vote:
- if not key_cert_content:
+ if not self.key_certificate:
raise ValueError('Authority votes must have a key certificate:\n%s' % content)
excluded_fields += ['vote-digest']
elif not is_vote:
- if key_cert_content:
+ if self.key_certificate:
raise ValueError("Authority consensus entries shouldn't have a key certificate:\n%s" % content)
if not is_legacy:
@@ -1151,82 +1159,14 @@ class DirectoryAuthority(Descriptor):
type_label = 'votes' if is_vote else 'consensus entries'
raise ValueError("Authority %s shouldn't have a '%s' line:\n%s" % (type_label, keyword, content))
- for keyword, values in list(entries.items()):
- value, _, _ = values[0]
- line = '%s %s' % (keyword, value)
-
# all known attributes can only appear at most once
- if validate and len(values) > 1 and keyword in ('dir-source', 'contact', 'legacy-dir-key', 'vote-digest'):
- raise ValueError("Authority entries can only have a single '%s' line, got %i:\n%s" % (keyword, len(values), content))
-
- if keyword == 'dir-source':
- # "dir-source" nickname identity address IP dirport orport
-
- dir_source_comp = value.split(' ')
-
- if len(dir_source_comp) < 6:
- if not validate:
- continue
+ for keyword, values in list(entries.items()):
+ if len(values) > 1 and keyword in ('dir-source', 'contact', 'legacy-dir-key', 'vote-digest'):
+ raise ValueError("Authority entries can only have a single '%s' line, got %i:\n%s" % (keyword, len(values), content))
- raise ValueError("Authority entry's 'dir-source' line must have six values: %s" % line)
-
- if validate:
- if not stem.util.tor_tools.is_valid_nickname(dir_source_comp[0].rstrip('-legacy')):
- raise ValueError("Authority's nickname is invalid: %s" % dir_source_comp[0])
- elif not stem.util.tor_tools.is_valid_fingerprint(dir_source_comp[1]):
- raise ValueError("Authority's fingerprint is invalid: %s" % dir_source_comp[1])
- elif not dir_source_comp[2]:
- # https://trac.torproject.org/7055
- raise ValueError("Authority's hostname can't be blank: %s" % line)
- elif not stem.util.connection.is_valid_ipv4_address(dir_source_comp[3]):
- raise ValueError("Authority's address isn't a valid IPv4 address: %s" % dir_source_comp[3])
- elif not stem.util.connection.is_valid_port(dir_source_comp[4], allow_zero = True):
- raise ValueError("Authority's DirPort is invalid: %s" % dir_source_comp[4])
- elif not stem.util.connection.is_valid_port(dir_source_comp[5]):
- raise ValueError("Authority's ORPort is invalid: %s" % dir_source_comp[5])
- elif not (dir_source_comp[4].isdigit() and dir_source_comp[5].isdigit()):
- continue
-
- self.nickname = dir_source_comp[0]
- self.fingerprint = dir_source_comp[1]
- self.hostname = dir_source_comp[2]
- self.address = dir_source_comp[3]
- self.dir_port = None if dir_source_comp[4] == '0' else int(dir_source_comp[4])
- self.or_port = int(dir_source_comp[5])
- self.is_legacy = self.nickname.endswith('-legacy')
- elif keyword == 'contact':
- # "contact" string
-
- self.contact = value
- elif keyword == 'legacy-dir-key':
- # "legacy-dir-key" FINGERPRINT
-
- if validate and not stem.util.tor_tools.is_valid_fingerprint(value):
- raise ValueError('Authority has a malformed legacy directory key: %s' % line)
-
- self.legacy_dir_key = value
- elif keyword == 'vote-digest':
- # "vote-digest" digest
-
- # technically not a fingerprint, but has the same characteristics
- if validate and not stem.util.tor_tools.is_valid_fingerprint(value):
- raise ValueError('Authority has a malformed vote digest: %s' % line)
-
- self.vote_digest = value
- else:
- self._unrecognized_lines.append(line)
-
- if key_cert_content:
- self.key_certificate = KeyCertificate(key_cert_content, validate)
-
- def get_unrecognized_lines(self):
- """
- Returns any unrecognized lines.
-
- :returns: a list of unrecognized lines
- """
-
- return self._unrecognized_lines
+ self._parse(entries, validate)
+ else:
+ self._entries = entries
def _compare(self, other, method):
if not isinstance(other, DirectoryAuthority):
@@ -1244,6 +1184,34 @@ class DirectoryAuthority(Descriptor):
return self._compare(other, lambda s, o: s <= o)
+def _parse_dir_address_line(descriptor, entries):
+ # "dir-address" IPPort
+
+ value = _value('dir-address', entries)
+
+ if ':' not in value:
+ raise ValueError("Key certificate's 'dir-address' is expected to be of the form ADDRESS:PORT: dir-address %s" % value)
+
+ address, dirport = value.split(':', 1)
+
+ if not stem.util.connection.is_valid_ipv4_address(address):
+ raise ValueError("Key certificate's address isn't a valid IPv4 address: dir-address %s" % value)
+ elif not stem.util.connection.is_valid_port(dirport):
+ raise ValueError("Key certificate's dirport is invalid: dir-address %s" % value)
+
+ descriptor.address = address
+ descriptor.dir_port = int(dirport)
+
+
+_parse_dir_key_certificate_version_line = _parse_version_line('dir-key-certificate-version', 'version', 3)
+_parse_dir_key_published_line = _parse_timestamp_line('dir-key-published', 'published')
+_parse_dir_key_expires_line = _parse_timestamp_line('dir-key-expires', 'expires')
+_parse_identity_key_line = _parse_key_block('dir-identity-key', 'identity_key', 'RSA PUBLIC KEY')
+_parse_signing_key_line = _parse_key_block('dir-signing-key', 'signing_key', 'RSA PUBLIC KEY')
+_parse_dir_key_crosscert_line = _parse_key_block('dir-key-crosscert', 'crosscert', 'ID SIGNATURE')
+_parse_dir_key_certification_line = _parse_key_block('dir-key-certification', 'certification', 'SIGNATURE')
+
+
class KeyCertificate(Descriptor):
"""
Directory key certificate for a v3 network status document.
@@ -1263,152 +1231,55 @@ class KeyCertificate(Descriptor):
**\*** mandatory attribute
"""
- def __init__(self, raw_content, validate = True):
- super(KeyCertificate, self).__init__(raw_content)
- raw_content = stem.util.str_tools._to_unicode(raw_content)
-
- self.version = None
- self.address = None
- self.dir_port = None
- self.fingerprint = None
- self.identity_key = None
- self.published = None
- self.expires = None
- self.signing_key = None
- self.crosscert = None
- self.certification = None
-
- self._unrecognized_lines = []
-
- self._parse(raw_content, validate)
-
- def _parse(self, content, validate):
- """
- Parses the given content and applies the attributes.
-
- :param str content: descriptor content
- :param bool validate: checks validity if **True**
-
- :raises: **ValueError** if a validity check fails
- """
+ ATTRIBUTES = {
+ 'version': (None, _parse_dir_key_certificate_version_line),
+ 'address': (None, _parse_dir_address_line),
+ 'dir_port': (None, _parse_dir_address_line),
+ 'fingerprint': (None, _parse_fingerprint_line),
+ 'identity_key': (None, _parse_identity_key_line),
+ 'published': (None, _parse_dir_key_published_line),
+ 'expires': (None, _parse_dir_key_expires_line),
+ 'signing_key': (None, _parse_signing_key_line),
+ 'crosscert': (None, _parse_dir_key_crosscert_line),
+ 'certification': (None, _parse_dir_key_certification_line),
+ }
+
+ PARSER_FOR_LINE = {
+ 'dir-key-certificate-version': _parse_dir_key_certificate_version_line,
+ 'dir-address': _parse_dir_address_line,
+ 'fingerprint': _parse_fingerprint_line,
+ 'dir-key-published': _parse_dir_key_published_line,
+ 'dir-key-expires': _parse_dir_key_expires_line,
+ 'dir-identity-key': _parse_identity_key_line,
+ 'dir-signing-key': _parse_signing_key_line,
+ 'dir-key-crosscert': _parse_dir_key_crosscert_line,
+ 'dir-key-certification': _parse_dir_key_certification_line,
+ }
- entries = _get_descriptor_components(content, validate)
+ def __init__(self, raw_content, validate = True):
+ super(KeyCertificate, self).__init__(raw_content, lazy_load = not validate)
+ entries = _get_descriptor_components(raw_content, validate)
if validate:
if 'dir-key-certificate-version' != list(entries.keys())[0]:
- raise ValueError("Key certificates must start with a 'dir-key-certificate-version' line:\n%s" % (content))
+ raise ValueError("Key certificates must start with a 'dir-key-certificate-version' line:\n%s" % (raw_content))
elif 'dir-key-certification' != list(entries.keys())[-1]:
- raise ValueError("Key certificates must end with a 'dir-key-certification' line:\n%s" % (content))
+ raise ValueError("Key certificates must end with a 'dir-key-certification' line:\n%s" % (raw_content))
# check that we have mandatory fields and that our known fields only
# appear once
for keyword, is_mandatory in KEY_CERTIFICATE_PARAMS:
if is_mandatory and keyword not in entries:
- raise ValueError("Key certificates must have a '%s' line:\n%s" % (keyword, content))
+ raise ValueError("Key certificates must have a '%s' line:\n%s" % (keyword, raw_content))
entry_count = len(entries.get(keyword, []))
if entry_count > 1:
- raise ValueError("Key certificates can only have a single '%s' line, got %i:\n%s" % (keyword, entry_count, content))
-
- for keyword, values in list(entries.items()):
- value, block_type, block_contents = values[0]
- line = '%s %s' % (keyword, value)
-
- if keyword == 'dir-key-certificate-version':
- # "dir-key-certificate-version" version
+ raise ValueError("Key certificates can only have a single '%s' line, got %i:\n%s" % (keyword, entry_count, raw_content))
- if not value.isdigit():
- if not validate:
- continue
-
- raise ValueError('Key certificate has a non-integer version: %s' % line)
-
- self.version = int(value)
-
- if validate and self.version != 3:
- raise ValueError("Expected a version 3 key certificate, got version '%i' instead" % self.version)
- elif keyword == 'dir-address':
- # "dir-address" IPPort
-
- if ':' not in value:
- if not validate:
- continue
-
- raise ValueError("Key certificate's 'dir-address' is expected to be of the form ADDRESS:PORT: %s" % line)
-
- address, dirport = value.split(':', 1)
-
- if validate:
- if not stem.util.connection.is_valid_ipv4_address(address):
- raise ValueError("Key certificate's address isn't a valid IPv4 address: %s" % line)
- elif not stem.util.connection.is_valid_port(dirport):
- raise ValueError("Key certificate's dirport is invalid: %s" % line)
- elif not dirport.isdigit():
- continue
-
- self.address = address
- self.dir_port = int(dirport)
- elif keyword == 'fingerprint':
- # "fingerprint" fingerprint
-
- if validate and not stem.util.tor_tools.is_valid_fingerprint(value):
- raise ValueError("Key certificate's fingerprint is malformed: %s" % line)
-
- self.fingerprint = value
- elif keyword in ('dir-key-published', 'dir-key-expires'):
- # "dir-key-published" YYYY-MM-DD HH:MM:SS
- # "dir-key-expires" YYYY-MM-DD HH:MM:SS
-
- try:
- date_value = stem.util.str_tools._parse_timestamp(value)
-
- if keyword == 'dir-key-published':
- self.published = date_value
- elif keyword == 'dir-key-expires':
- self.expires = date_value
- except ValueError:
- if validate:
- raise ValueError("Key certificate's '%s' time wasn't parsable: %s" % (keyword, value))
- elif keyword == 'dir-identity-key':
- # "dir-identity-key" NL a public key in PEM format
-
- if validate and (not block_contents or block_type != 'RSA PUBLIC KEY'):
- raise ValueError("'dir-identity-key' should be followed by a RSA PUBLIC KEY block: %s" % line)
-
- self.identity_key = block_contents
- elif keyword == 'dir-signing-key':
- # "dir-signing-key" NL a key in PEM format
-
- if validate and (not block_contents or block_type != 'RSA PUBLIC KEY'):
- raise ValueError("'dir-signing-key' should be followed by a RSA PUBLIC KEY block: %s" % line)
-
- self.signing_key = block_contents
- elif keyword == 'dir-key-crosscert':
- # "dir-key-crosscert" NL CrossSignature
-
- if validate and (not block_contents or block_type != 'ID SIGNATURE'):
- raise ValueError("'dir-key-crosscert' should be followed by a ID SIGNATURE block: %s" % line)
-
- self.crosscert = block_contents
- elif keyword == 'dir-key-certification':
- # "dir-key-certification" NL Signature
-
- if validate and (not block_contents or block_type != 'SIGNATURE'):
- raise ValueError("'dir-key-certification' should be followed by a SIGNATURE block: %s" % line)
-
- self.certification = block_contents
- else:
- self._unrecognized_lines.append(line)
-
- def get_unrecognized_lines(self):
- """
- Returns any unrecognized lines.
-
- :returns: **list** of unrecognized lines
- """
-
- return self._unrecognized_lines
+ self._parse(entries, validate)
+ else:
+ self._entries = entries
def _compare(self, other, method):
if not isinstance(other, KeyCertificate):
@@ -1507,7 +1378,7 @@ class BridgeNetworkStatusDocument(NetworkStatusDocument):
router_iter = stem.descriptor.router_status_entry._parse_file(
document_file,
validate,
- entry_class = stem.descriptor.router_status_entry.RouterStatusEntryV2,
+ entry_class = RouterStatusEntryV2,
extra_args = (self,),
)
diff --git a/stem/descriptor/router_status_entry.py b/stem/descriptor/router_status_entry.py
index ab93adab..e0421cd3 100644
--- a/stem/descriptor/router_status_entry.py
+++ b/stem/descriptor/router_status_entry.py
@@ -29,6 +29,8 @@ import stem.util.str_tools
from stem.descriptor import (
KEYWORD_LINE,
Descriptor,
+ _value,
+ _values,
_get_descriptor_components,
_read_until_keywords,
)
@@ -101,6 +103,248 @@ def _parse_file(document_file, validate, entry_class, entry_keyword = 'r', start
break
+def _parse_r_line(descriptor, entries):
+ # Parses a RouterStatusEntry's 'r' line. They're very nearly identical for
+ # all current entry types (v2, v3, and microdescriptor v3) with one little
+ # wrinkle: only the microdescriptor flavor excludes a 'digest' field.
+ #
+ # For v2 and v3 router status entries:
+ # "r" nickname identity digest publication IP ORPort DirPort
+ # example: r mauer BD7xbfsCFku3+tgybEZsg8Yjhvw itcuKQ6PuPLJ7m/Oi928WjO2j8g 2012-06-22 13:19:32 80.101.105.103 9001 0
+ #
+ # For v3 microdescriptor router status entries:
+ # "r" nickname identity publication IP ORPort DirPort
+ # example: r Konata ARIJF2zbqirB9IwsW0mQznccWww 2012-09-24 13:40:40 69.64.48.168 9001 9030
+
+ value = _value('r', entries)
+ include_digest = not isinstance(descriptor, RouterStatusEntryMicroV3)
+
+ r_comp = value.split(' ')
+
+ # inject a None for the digest to normalize the field positioning
+ if not include_digest:
+ r_comp.insert(2, None)
+
+ if len(r_comp) < 8:
+ expected_field_count = 'eight' if include_digest else 'seven'
+ raise ValueError("%s 'r' line must have %s values: r %s" % (descriptor._name(), expected_field_count, value))
+
+ if not stem.util.tor_tools.is_valid_nickname(r_comp[0]):
+ raise ValueError("%s nickname isn't valid: %s" % (descriptor._name(), r_comp[0]))
+ elif not stem.util.connection.is_valid_ipv4_address(r_comp[5]):
+ raise ValueError("%s address isn't a valid IPv4 address: %s" % (descriptor._name(), r_comp[5]))
+ elif not stem.util.connection.is_valid_port(r_comp[6]):
+ raise ValueError('%s ORPort is invalid: %s' % (descriptor._name(), r_comp[6]))
+ elif not stem.util.connection.is_valid_port(r_comp[7], allow_zero = True):
+ raise ValueError('%s DirPort is invalid: %s' % (descriptor._name(), r_comp[7]))
+
+ descriptor.nickname = r_comp[0]
+ descriptor.fingerprint = _base64_to_hex(r_comp[1])
+
+ if include_digest:
+ descriptor.digest = _base64_to_hex(r_comp[2])
+
+ descriptor.address = r_comp[5]
+ descriptor.or_port = int(r_comp[6])
+ descriptor.dir_port = None if r_comp[7] == '0' else int(r_comp[7])
+
+ try:
+ published = '%s %s' % (r_comp[3], r_comp[4])
+ descriptor.published = stem.util.str_tools._parse_timestamp(published)
+ except ValueError:
+ raise ValueError("Publication time time wasn't parsable: r %s" % value)
+
+
+def _parse_a_line(descriptor, entries):
+ # "a" SP address ":" portlist
+ # example: a [2001:888:2133:0:82:94:251:204]:9001
+
+ or_addresses = []
+
+ for value in _values('a', entries):
+ if ':' not in value:
+ raise ValueError("%s 'a' line must be of the form '[address]:[ports]': a %s" % (descriptor._name(), value))
+
+ address, port = value.rsplit(':', 1)
+ is_ipv6 = address.startswith('[') and address.endswith(']')
+
+ if is_ipv6:
+ address = address[1:-1] # remove brackets
+
+ if not ((not is_ipv6 and stem.util.connection.is_valid_ipv4_address(address)) or
+ (is_ipv6 and stem.util.connection.is_valid_ipv6_address(address))):
+ raise ValueError("%s 'a' line must start with an IPv6 address: a %s" % (descriptor._name(), value))
+
+ if stem.util.connection.is_valid_port(port):
+ or_addresses.append((address, int(port), is_ipv6))
+ else:
+ raise ValueError("%s 'a' line had an invalid port (%s): a %s" % (descriptor._name(), port, value))
+
+ descriptor.or_addresses = or_addresses
+
+
+def _parse_s_line(descriptor, entries):
+ # "s" Flags
+ # example: s Named Running Stable Valid
+
+ value = _value('s', entries)
+ flags = [] if value == '' else value.split(' ')
+ descriptor.flags = flags
+
+ for flag in flags:
+ if flags.count(flag) > 1:
+ raise ValueError('%s had duplicate flags: s %s' % (descriptor._name(), value))
+ elif flag == "":
+ raise ValueError("%s had extra whitespace on its 's' line: s %s" % (descriptor._name(), value))
+
+
+def _parse_v_line(descriptor, entries):
+ # "v" version
+ # example: v Tor 0.2.2.35
+ #
+ # The spec says that if this starts with "Tor " then what follows is a
+ # tor version. If not then it has "upgraded to a more sophisticated
+ # protocol versioning system".
+
+ value = _value('v', entries)
+ descriptor.version_line = value
+
+ if value.startswith('Tor '):
+ try:
+ descriptor.version = stem.version._get_version(value[4:])
+ except ValueError as exc:
+ raise ValueError('%s has a malformed tor version (%s): v %s' % (descriptor._name(), exc, value))
+
+
+def _parse_w_line(descriptor, entries):
+ # "w" "Bandwidth=" INT ["Measured=" INT] ["Unmeasured=1"]
+ # example: w Bandwidth=7980
+
+ value = _value('w', entries)
+ w_comp = value.split(' ')
+
+ if len(w_comp) < 1:
+ raise ValueError("%s 'w' line is blank: w %s" % (descriptor._name(), value))
+ elif not w_comp[0].startswith('Bandwidth='):
+ raise ValueError("%s 'w' line needs to start with a 'Bandwidth=' entry: w %s" % (descriptor._name(), value))
+
+ for w_entry in w_comp:
+ if '=' in w_entry:
+ w_key, w_value = w_entry.split('=', 1)
+ else:
+ w_key, w_value = w_entry, None
+
+ if w_key == 'Bandwidth':
+ if not (w_value and w_value.isdigit()):
+ raise ValueError("%s 'Bandwidth=' entry needs to have a numeric value: w %s" % (descriptor._name(), value))
+
+ descriptor.bandwidth = int(w_value)
+ elif w_key == 'Measured':
+ if not (w_value and w_value.isdigit()):
+ raise ValueError("%s 'Measured=' entry needs to have a numeric value: w %s" % (descriptor._name(), value))
+
+ descriptor.measured = int(w_value)
+ elif w_key == 'Unmeasured':
+ if w_value != '1':
+ raise ValueError("%s 'Unmeasured=' should only have the value of '1': w %s" % (descriptor._name(), value))
+
+ descriptor.is_unmeasured = True
+ else:
+ descriptor.unrecognized_bandwidth_entries.append(w_entry)
+
+
+def _parse_p_line(descriptor, entries):
+ # "p" ("accept" / "reject") PortList
+ # p reject 1-65535
+ # example: p accept 80,110,143,443,993,995,6660-6669,6697,7000-7001
+
+ value = _value('p', entries)
+
+ try:
+ descriptor.exit_policy = stem.exit_policy.MicroExitPolicy(value)
+ except ValueError as exc:
+ raise ValueError('%s exit policy is malformed (%s): p %s' % (descriptor._name(), exc, value))
+
+
+def _parse_m_line(descriptor, entries):
+ # "m" methods 1*(algorithm "=" digest)
+ # example: m 8,9,10,11,12 sha256=g1vx9si329muxV3tquWIXXySNOIwRGMeAESKs/v4DWs
+
+ all_hashes = []
+
+ for value in _values('m', entries):
+ m_comp = value.split(' ')
+
+ if not (descriptor.document and descriptor.document.is_vote):
+ vote_status = 'vote' if descriptor.document else '<undefined document>'
+ raise ValueError("%s 'm' line should only appear in votes (appeared in a %s): m %s" % (descriptor._name(), vote_status, value))
+ elif len(m_comp) < 1:
+ raise ValueError("%s 'm' line needs to start with a series of methods: m %s" % (descriptor._name(), value))
+
+ try:
+ methods = [int(entry) for entry in m_comp[0].split(',')]
+ except ValueError:
+ raise ValueError('%s microdescriptor methods should be a series of comma separated integers: m %s' % (descriptor._name(), value))
+
+ hashes = {}
+
+ for entry in m_comp[1:]:
+ if '=' not in entry:
+ raise ValueError("%s can only have a series of 'algorithm=digest' mappings after the methods: m %s" % (descriptor._name(), value))
+
+ hash_name, digest = entry.split('=', 1)
+ hashes[hash_name] = digest
+
+ all_hashes.append((methods, hashes))
+
+ descriptor.microdescriptor_hashes = all_hashes
+
+
+def _parse_microdescriptor_m_line(descriptor, entries):
+ # "m" digest
+ # example: m aiUklwBrua82obG5AsTX+iEpkjQA2+AQHxZ7GwMfY70
+
+ descriptor.digest = _base64_to_hex(_value('m', entries), check_if_fingerprint = False)
+
+
+def _base64_to_hex(identity, check_if_fingerprint = True):
+ """
+ Decodes a base64 value to hex. For example...
+
+ ::
+
+ >>> _base64_to_hex('p1aag7VwarGxqctS7/fS0y5FU+s')
+ 'A7569A83B5706AB1B1A9CB52EFF7D2D32E4553EB'
+
+ :param str identity: encoded fingerprint from the consensus
+ :param bool check_if_fingerprint: asserts that the result is a fingerprint if **True**
+
+ :returns: **str** with the uppercase hex encoding of the relay's fingerprint
+
+ :raises: **ValueError** if the result isn't a valid fingerprint
+ """
+
+ # trailing equal signs were stripped from the identity
+ missing_padding = len(identity) % 4
+ identity += '=' * missing_padding
+
+ try:
+ identity_decoded = base64.b64decode(stem.util.str_tools._to_bytes(identity))
+ except (TypeError, binascii.Error):
+ raise ValueError("Unable to decode identity string '%s'" % identity)
+
+ fingerprint = binascii.b2a_hex(identity_decoded).upper()
+
+ if stem.prereq.is_python_3():
+ fingerprint = stem.util.str_tools._to_unicode(fingerprint)
+
+ if check_if_fingerprint:
+ if not stem.util.tor_tools.is_valid_fingerprint(fingerprint):
+ raise ValueError("Decoded '%s' to be '%s', which isn't a valid fingerprint" % (identity, fingerprint))
+
+ return fingerprint
+
+
class RouterStatusEntry(Descriptor):
"""
Information about an individual router stored within a network status
@@ -122,7 +366,27 @@ class RouterStatusEntry(Descriptor):
:var str version_line: versioning information reported by the relay
"""
- def __init__(self, content, validate, document):
+ ATTRIBUTES = {
+ 'nickname': (None, _parse_r_line),
+ 'fingerprint': (None, _parse_r_line),
+ 'published': (None, _parse_r_line),
+ 'address': (None, _parse_r_line),
+ 'or_port': (None, _parse_r_line),
+ 'dir_port': (None, _parse_r_line),
+
+ 'flags': (None, _parse_s_line),
+
+ 'version_line': (None, _parse_v_line),
+ 'version': (None, _parse_v_line),
+ }
+
+ PARSER_FOR_LINE = {
+ 'r': _parse_r_line,
+ 's': _parse_s_line,
+ 'v': _parse_v_line,
+ }
+
+ def __init__(self, content, validate = True, document = None):
"""
Parse a router descriptor in a network status document.
@@ -134,82 +398,32 @@ class RouterStatusEntry(Descriptor):
:raises: **ValueError** if the descriptor data is invalid
"""
- super(RouterStatusEntry, self).__init__(content)
- content = stem.util.str_tools._to_unicode(content)
-
+ super(RouterStatusEntry, self).__init__(content, lazy_load = not validate)
self.document = document
-
- self.nickname = None
- self.fingerprint = None
- self.published = None
- self.address = None
- self.or_port = None
- self.dir_port = None
-
- self.flags = None
-
- self.version_line = None
- self.version = None
-
- self._unrecognized_lines = []
-
entries = _get_descriptor_components(content, validate)
if validate:
- self._check_constraints(entries)
-
- self._parse(entries, validate)
-
- def _parse(self, entries, validate):
- """
- Parses the given content and applies the attributes.
-
- :param dict entries: keyword => (value, pgp key) entries
- :param bool validate: checks validity if **True**
-
- :raises: **ValueError** if a validity check fails
- """
-
- for keyword, values in list(entries.items()):
- value, _, _ = values[0]
-
- if keyword == 's':
- _parse_s_line(self, value, validate)
- elif keyword == 'v':
- _parse_v_line(self, value, validate)
- else:
- self._unrecognized_lines.append('%s %s' % (keyword, value))
-
- def _check_constraints(self, entries):
- """
- Does a basic check that the entries conform to this descriptor type's
- constraints.
-
- :param dict entries: keyword => (value, pgp key) entries
-
- :raises: **ValueError** if an issue arises in validation
- """
+ for keyword in self._required_fields():
+ if keyword not in entries:
+ raise ValueError("%s must have a '%s' line:\n%s" % (self._name(True), keyword, str(self)))
- for keyword in self._required_fields():
- if keyword not in entries:
- raise ValueError("%s must have a '%s' line:\n%s" % (self._name(True), keyword, str(self)))
+ for keyword in self._single_fields():
+ if keyword in entries and len(entries[keyword]) > 1:
+ raise ValueError("%s can only have a single '%s' line, got %i:\n%s" % (self._name(True), keyword, len(entries[keyword]), str(self)))
- for keyword in self._single_fields():
- if keyword in entries and len(entries[keyword]) > 1:
- raise ValueError("%s can only have a single '%s' line, got %i:\n%s" % (self._name(True), keyword, len(entries[keyword]), str(self)))
+ if 'r' != list(entries.keys())[0]:
+ raise ValueError("%s are expected to start with a 'r' line:\n%s" % (self._name(True), str(self)))
- if 'r' != list(entries.keys())[0]:
- raise ValueError("%s are expected to start with a 'r' line:\n%s" % (self._name(True), str(self)))
+ self._parse(entries, validate)
+ else:
+ self._entries = entries
def _name(self, is_plural = False):
"""
Name for this descriptor type.
"""
- if is_plural:
- return 'Router status entries'
- else:
- return 'Router status entry'
+ return 'Router status entries' if is_plural else 'Router status entry'
def _required_fields(self):
"""
@@ -225,15 +439,6 @@ class RouterStatusEntry(Descriptor):
return ()
- def get_unrecognized_lines(self):
- """
- Provides any unrecognized lines.
-
- :returns: list of unrecognized lines
- """
-
- return list(self._unrecognized_lines)
-
def _compare(self, other, method):
if not isinstance(other, RouterStatusEntry):
return False
@@ -261,25 +466,12 @@ class RouterStatusEntryV2(RouterStatusEntry):
a default value, others are left as **None** if undefined
"""
- def __init__(self, content, validate = True, document = None):
- self.digest = None
- super(RouterStatusEntryV2, self).__init__(content, validate, document)
-
- def _parse(self, entries, validate):
- for keyword, values in list(entries.items()):
- value, _, _ = values[0]
-
- if keyword == 'r':
- _parse_r_line(self, value, validate, True)
- del entries['r']
-
- RouterStatusEntry._parse(self, entries, validate)
+ ATTRIBUTES = dict(RouterStatusEntry.ATTRIBUTES, **{
+ 'digest': (None, _parse_r_line),
+ })
def _name(self, is_plural = False):
- if is_plural:
- return 'Router status entries (v2)'
- else:
- return 'Router status entry (v2)'
+ return 'Router status entries (v2)' if is_plural else 'Router status entry (v2)'
def _required_fields(self):
return ('r')
@@ -331,51 +523,28 @@ class RouterStatusEntryV3(RouterStatusEntry):
a default value, others are left as **None** if undefined
"""
- def __init__(self, content, validate = True, document = None):
- self.or_addresses = []
- self.digest = None
-
- self.bandwidth = None
- self.measured = None
- self.is_unmeasured = False
- self.unrecognized_bandwidth_entries = []
-
- self.exit_policy = None
- self.microdescriptor_hashes = []
-
- super(RouterStatusEntryV3, self).__init__(content, validate, document)
+ ATTRIBUTES = dict(RouterStatusEntry.ATTRIBUTES, **{
+ 'digest': (None, _parse_r_line),
+ 'or_addresses': ([], _parse_a_line),
- def _parse(self, entries, validate):
- for keyword, values in list(entries.items()):
- value, _, _ = values[0]
+ 'bandwidth': (None, _parse_w_line),
+ 'measured': (None, _parse_w_line),
+ 'is_unmeasured': (False, _parse_w_line),
+ 'unrecognized_bandwidth_entries': ([], _parse_w_line),
- if keyword == 'r':
- _parse_r_line(self, value, validate, True)
- del entries['r']
- elif keyword == 'a':
- for entry, _, _ in values:
- _parse_a_line(self, entry, validate)
+ 'exit_policy': (None, _parse_p_line),
+ 'microdescriptor_hashes': ([], _parse_m_line),
+ })
- del entries['a']
- elif keyword == 'w':
- _parse_w_line(self, value, validate)
- del entries['w']
- elif keyword == 'p':
- _parse_p_line(self, value, validate)
- del entries['p']
- elif keyword == 'm':
- for entry, _, _ in values:
- _parse_m_line(self, entry, validate)
-
- del entries['m']
-
- RouterStatusEntry._parse(self, entries, validate)
+ PARSER_FOR_LINE = dict(RouterStatusEntry.PARSER_FOR_LINE, **{
+ 'a': _parse_a_line,
+ 'w': _parse_w_line,
+ 'p': _parse_p_line,
+ 'm': _parse_m_line,
+ })
def _name(self, is_plural = False):
- if is_plural:
- return 'Router status entries (v3)'
- else:
- return 'Router status entry (v3)'
+ return 'Router status entries (v3)' if is_plural else 'Router status entry (v3)'
def _required_fields(self):
return ('r', 's')
@@ -417,40 +586,22 @@ class RouterStatusEntryMicroV3(RouterStatusEntry):
a default value, others are left as **None** if undefined
"""
- def __init__(self, content, validate = True, document = None):
- self.bandwidth = None
- self.measured = None
- self.is_unmeasured = False
- self.unrecognized_bandwidth_entries = []
-
- self.digest = None
-
- super(RouterStatusEntryMicroV3, self).__init__(content, validate, document)
+ ATTRIBUTES = dict(RouterStatusEntry.ATTRIBUTES, **{
+ 'bandwidth': (None, _parse_w_line),
+ 'measured': (None, _parse_w_line),
+ 'is_unmeasured': (False, _parse_w_line),
+ 'unrecognized_bandwidth_entries': ([], _parse_w_line),
- def _parse(self, entries, validate):
- for keyword, values in list(entries.items()):
- value, _, _ = values[0]
+ 'digest': (None, _parse_microdescriptor_m_line),
+ })
- if keyword == 'r':
- _parse_r_line(self, value, validate, False)
- del entries['r']
- elif keyword == 'w':
- _parse_w_line(self, value, validate)
- del entries['w']
- elif keyword == 'm':
- # "m" digest
- # example: m aiUklwBrua82obG5AsTX+iEpkjQA2+AQHxZ7GwMfY70
-
- self.digest = _base64_to_hex(value, validate, False)
- del entries['m']
-
- RouterStatusEntry._parse(self, entries, validate)
+ PARSER_FOR_LINE = dict(RouterStatusEntry.PARSER_FOR_LINE, **{
+ 'w': _parse_w_line,
+ 'm': _parse_microdescriptor_m_line,
+ })
def _name(self, is_plural = False):
- if is_plural:
- return 'Router status entries (micro v3)'
- else:
- return 'Router status entry (micro v3)'
+ return 'Router status entries (micro v3)' if is_plural else 'Router status entry (micro v3)'
def _required_fields(self):
return ('r', 's', 'm')
@@ -472,269 +623,3 @@ class RouterStatusEntryMicroV3(RouterStatusEntry):
def __le__(self, other):
return self._compare(other, lambda s, o: s <= o)
-
-
-def _parse_r_line(desc, value, validate, include_digest = True):
- # Parses a RouterStatusEntry's 'r' line. They're very nearly identical for
- # all current entry types (v2, v3, and microdescriptor v3) with one little
- # wrinkle: only the microdescriptor flavor excludes a 'digest' field.
- #
- # For v2 and v3 router status entries:
- # "r" nickname identity digest publication IP ORPort DirPort
- # example: r mauer BD7xbfsCFku3+tgybEZsg8Yjhvw itcuKQ6PuPLJ7m/Oi928WjO2j8g 2012-06-22 13:19:32 80.101.105.103 9001 0
- #
- # For v3 microdescriptor router status entries:
- # "r" nickname identity publication IP ORPort DirPort
- # example: r Konata ARIJF2zbqirB9IwsW0mQznccWww 2012-09-24 13:40:40 69.64.48.168 9001 9030
-
- r_comp = value.split(' ')
-
- # inject a None for the digest to normalize the field positioning
- if not include_digest:
- r_comp.insert(2, None)
-
- if len(r_comp) < 8:
- if not validate:
- return
-
- expected_field_count = 'eight' if include_digest else 'seven'
- raise ValueError("%s 'r' line must have %s values: r %s" % (desc._name(), expected_field_count, value))
-
- if validate:
- if not stem.util.tor_tools.is_valid_nickname(r_comp[0]):
- raise ValueError("%s nickname isn't valid: %s" % (desc._name(), r_comp[0]))
- elif not stem.util.connection.is_valid_ipv4_address(r_comp[5]):
- raise ValueError("%s address isn't a valid IPv4 address: %s" % (desc._name(), r_comp[5]))
- elif not stem.util.connection.is_valid_port(r_comp[6]):
- raise ValueError('%s ORPort is invalid: %s' % (desc._name(), r_comp[6]))
- elif not stem.util.connection.is_valid_port(r_comp[7], allow_zero = True):
- raise ValueError('%s DirPort is invalid: %s' % (desc._name(), r_comp[7]))
- elif not (r_comp[6].isdigit() and r_comp[7].isdigit()):
- return
-
- desc.nickname = r_comp[0]
- desc.fingerprint = _base64_to_hex(r_comp[1], validate)
-
- if include_digest:
- desc.digest = _base64_to_hex(r_comp[2], validate)
-
- desc.address = r_comp[5]
- desc.or_port = int(r_comp[6])
- desc.dir_port = None if r_comp[7] == '0' else int(r_comp[7])
-
- try:
- published = '%s %s' % (r_comp[3], r_comp[4])
- desc.published = stem.util.str_tools._parse_timestamp(published)
- except ValueError:
- if validate:
- raise ValueError("Publication time time wasn't parsable: r %s" % value)
-
-
-def _parse_a_line(desc, value, validate):
- # "a" SP address ":" portlist
- # example: a [2001:888:2133:0:82:94:251:204]:9001
-
- if ':' not in value:
- if not validate:
- return
-
- raise ValueError("%s 'a' line must be of the form '[address]:[ports]': a %s" % (desc._name(), value))
-
- address, port = value.rsplit(':', 1)
- is_ipv6 = address.startswith('[') and address.endswith(']')
-
- if is_ipv6:
- address = address[1:-1] # remove brackets
-
- if not ((not is_ipv6 and stem.util.connection.is_valid_ipv4_address(address)) or
- (is_ipv6 and stem.util.connection.is_valid_ipv6_address(address))):
- if not validate:
- return
- else:
- raise ValueError("%s 'a' line must start with an IPv6 address: a %s" % (desc._name(), value))
-
- if stem.util.connection.is_valid_port(port):
- desc.or_addresses.append((address, int(port), is_ipv6))
- elif validate:
- raise ValueError("%s 'a' line had an invalid port (%s): a %s" % (desc._name(), port, value))
-
-
-def _parse_s_line(desc, value, validate):
- # "s" Flags
- # example: s Named Running Stable Valid
-
- flags = [] if value == '' else value.split(' ')
- desc.flags = flags
-
- if validate:
- for flag in flags:
- if flags.count(flag) > 1:
- raise ValueError('%s had duplicate flags: s %s' % (desc._name(), value))
- elif flag == "":
- raise ValueError("%s had extra whitespace on its 's' line: s %s" % (desc._name(), value))
-
-
-def _parse_v_line(desc, value, validate):
- # "v" version
- # example: v Tor 0.2.2.35
- #
- # The spec says that if this starts with "Tor " then what follows is a
- # tor version. If not then it has "upgraded to a more sophisticated
- # protocol versioning system".
-
- desc.version_line = value
-
- if value.startswith('Tor '):
- try:
- desc.version = stem.version._get_version(value[4:])
- except ValueError as exc:
- if validate:
- raise ValueError('%s has a malformed tor version (%s): v %s' % (desc._name(), exc, value))
-
-
-def _parse_w_line(desc, value, validate):
- # "w" "Bandwidth=" INT ["Measured=" INT] ["Unmeasured=1"]
- # example: w Bandwidth=7980
-
- w_comp = value.split(' ')
-
- if len(w_comp) < 1:
- if not validate:
- return
-
- raise ValueError("%s 'w' line is blank: w %s" % (desc._name(), value))
- elif not w_comp[0].startswith('Bandwidth='):
- if not validate:
- return
-
- raise ValueError("%s 'w' line needs to start with a 'Bandwidth=' entry: w %s" % (desc._name(), value))
-
- for w_entry in w_comp:
- if '=' in w_entry:
- w_key, w_value = w_entry.split('=', 1)
- else:
- w_key, w_value = w_entry, None
-
- if w_key == 'Bandwidth':
- if not (w_value and w_value.isdigit()):
- if not validate:
- return
-
- raise ValueError("%s 'Bandwidth=' entry needs to have a numeric value: w %s" % (desc._name(), value))
-
- desc.bandwidth = int(w_value)
- elif w_key == 'Measured':
- if not (w_value and w_value.isdigit()):
- if not validate:
- return
-
- raise ValueError("%s 'Measured=' entry needs to have a numeric value: w %s" % (desc._name(), value))
-
- desc.measured = int(w_value)
- elif w_key == 'Unmeasured':
- if validate and w_value != '1':
- raise ValueError("%s 'Unmeasured=' should only have the value of '1': w %s" % (desc._name(), value))
-
- desc.is_unmeasured = True
- else:
- desc.unrecognized_bandwidth_entries.append(w_entry)
-
-
-def _parse_p_line(desc, value, validate):
- # "p" ("accept" / "reject") PortList
- # p reject 1-65535
- # example: p accept 80,110,143,443,993,995,6660-6669,6697,7000-7001
-
- try:
- desc.exit_policy = stem.exit_policy.MicroExitPolicy(value)
- except ValueError as exc:
- if not validate:
- return
-
- raise ValueError('%s exit policy is malformed (%s): p %s' % (desc._name(), exc, value))
-
-
-def _parse_m_line(desc, value, validate):
- # "m" methods 1*(algorithm "=" digest)
- # example: m 8,9,10,11,12 sha256=g1vx9si329muxV3tquWIXXySNOIwRGMeAESKs/v4DWs
-
- m_comp = value.split(' ')
-
- if not (desc.document and desc.document.is_vote):
- if not validate:
- return
-
- vote_status = 'vote' if desc.document else '<undefined document>'
- raise ValueError("%s 'm' line should only appear in votes (appeared in a %s): m %s" % (desc._name(), vote_status, value))
- elif len(m_comp) < 1:
- if not validate:
- return
-
- raise ValueError("%s 'm' line needs to start with a series of methods: m %s" % (desc._name(), value))
-
- try:
- methods = [int(entry) for entry in m_comp[0].split(',')]
- except ValueError:
- if not validate:
- return
-
- raise ValueError('%s microdescriptor methods should be a series of comma separated integers: m %s' % (desc._name(), value))
-
- hashes = {}
-
- for entry in m_comp[1:]:
- if '=' not in entry:
- if not validate:
- continue
-
- raise ValueError("%s can only have a series of 'algorithm=digest' mappings after the methods: m %s" % (desc._name(), value))
-
- hash_name, digest = entry.split('=', 1)
- hashes[hash_name] = digest
-
- desc.microdescriptor_hashes.append((methods, hashes))
-
-
-def _base64_to_hex(identity, validate, check_if_fingerprint = True):
- """
- Decodes a base64 value to hex. For example...
-
- ::
-
- >>> _base64_to_hex('p1aag7VwarGxqctS7/fS0y5FU+s', True)
- 'A7569A83B5706AB1B1A9CB52EFF7D2D32E4553EB'
-
- :param str identity: encoded fingerprint from the consensus
- :param bool validate: checks validity if **True**
- :param bool check_if_fingerprint: asserts that the result is a fingerprint if **True**
-
- :returns: **str** with the uppercase hex encoding of the relay's fingerprint
-
- :raises: **ValueError** if the result isn't a valid fingerprint
- """
-
- # trailing equal signs were stripped from the identity
- missing_padding = len(identity) % 4
- identity += '=' * missing_padding
-
- try:
- identity_decoded = base64.b64decode(stem.util.str_tools._to_bytes(identity))
- except (TypeError, binascii.Error):
- if not validate:
- return None
-
- raise ValueError("Unable to decode identity string '%s'" % identity)
-
- fingerprint = binascii.b2a_hex(identity_decoded).upper()
-
- if stem.prereq.is_python_3():
- fingerprint = stem.util.str_tools._to_unicode(fingerprint)
-
- if check_if_fingerprint:
- if not stem.util.tor_tools.is_valid_fingerprint(fingerprint):
- if not validate:
- return None
-
- raise ValueError("Decoded '%s' to be '%s', which isn't a valid fingerprint" % (identity, fingerprint))
-
- return fingerprint
diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py
index 67260777..afcdac2f 100644
--- a/stem/descriptor/server_descriptor.py
+++ b/stem/descriptor/server_descriptor.py
@@ -27,13 +27,13 @@ etc). This information is provided from a few sources...
| +- get_scrubbing_issues - description of issues with our scrubbing
|
|- digest - calculates the upper-case hex digest value for our content
- |- get_unrecognized_lines - lines with unrecognized content
|- get_annotations - dictionary of content prior to the descriptor entry
+- get_annotation_lines - lines that provided the annotations
"""
import base64
import codecs
+import functools
import hashlib
import re
@@ -51,9 +51,15 @@ from stem.util import log
from stem.descriptor import (
PGP_BLOCK_END,
Descriptor,
- _get_bytes_field,
_get_descriptor_components,
_read_until_keywords,
+ _value,
+ _values,
+ _parse_simple_line,
+ _parse_bytes_line,
+ _parse_timestamp_line,
+ _parse_forty_character_hex,
+ _parse_key_block,
)
try:
@@ -167,6 +173,219 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True, **kwargs):
break # done parsing descriptors
+def _parse_router_line(descriptor, entries):
+ # "router" nickname address ORPort SocksPort DirPort
+
+ value = _value('router', entries)
+ router_comp = value.split()
+
+ if len(router_comp) < 5:
+ raise ValueError('Router line must have five values: router %s' % value)
+ elif not stem.util.tor_tools.is_valid_nickname(router_comp[0]):
+ raise ValueError("Router line entry isn't a valid nickname: %s" % router_comp[0])
+ elif not stem.util.connection.is_valid_ipv4_address(router_comp[1]):
+ raise ValueError("Router line entry isn't a valid IPv4 address: %s" % router_comp[1])
+ elif not stem.util.connection.is_valid_port(router_comp[2], allow_zero = True):
+ raise ValueError("Router line's ORPort is invalid: %s" % router_comp[2])
+ elif not stem.util.connection.is_valid_port(router_comp[3], allow_zero = True):
+ raise ValueError("Router line's SocksPort is invalid: %s" % router_comp[3])
+ elif not stem.util.connection.is_valid_port(router_comp[4], allow_zero = True):
+ raise ValueError("Router line's DirPort is invalid: %s" % router_comp[4])
+
+ descriptor.nickname = router_comp[0]
+ descriptor.address = router_comp[1]
+ descriptor.or_port = int(router_comp[2])
+ descriptor.socks_port = None if router_comp[3] == '0' else int(router_comp[3])
+ descriptor.dir_port = None if router_comp[4] == '0' else int(router_comp[4])
+
+
+def _parse_bandwidth_line(descriptor, entries):
+ # "bandwidth" bandwidth-avg bandwidth-burst bandwidth-observed
+
+ value = _value('bandwidth', entries)
+ bandwidth_comp = value.split()
+
+ if len(bandwidth_comp) < 3:
+ raise ValueError('Bandwidth line must have three values: bandwidth %s' % value)
+ elif not bandwidth_comp[0].isdigit():
+ raise ValueError("Bandwidth line's average rate isn't numeric: %s" % bandwidth_comp[0])
+ elif not bandwidth_comp[1].isdigit():
+ raise ValueError("Bandwidth line's burst rate isn't numeric: %s" % bandwidth_comp[1])
+ elif not bandwidth_comp[2].isdigit():
+ raise ValueError("Bandwidth line's observed rate isn't numeric: %s" % bandwidth_comp[2])
+
+ descriptor.average_bandwidth = int(bandwidth_comp[0])
+ descriptor.burst_bandwidth = int(bandwidth_comp[1])
+ descriptor.observed_bandwidth = int(bandwidth_comp[2])
+
+
+def _parse_platform_line(descriptor, entries):
+ # "platform" string
+
+ _parse_bytes_line('platform', 'platform')(descriptor, entries)
+
+ # The platform attribute was set earlier. This line can contain any
+ # arbitrary data, but tor seems to report its version followed by the
+ # os like the following...
+ #
+ # platform Tor 0.2.2.35 (git-73ff13ab3cc9570d) on Linux x86_64
+ #
+ # There's no guarantee that we'll be able to pick these out the
+ # version, but might as well try to save our caller the effort.
+
+ value = _value('platform', entries)
+ platform_match = re.match('^(?:node-)?Tor (\S*).* on (.*)$', value)
+
+ if platform_match:
+ version_str, descriptor.operating_system = platform_match.groups()
+
+ try:
+ descriptor.tor_version = stem.version._get_version(version_str)
+ except ValueError:
+ pass
+
+
+def _parse_fingerprint_line(descriptor, entries):
+ # This is forty hex digits split into space separated groups of four.
+ # Checking that we match this pattern.
+
+ value = _value('fingerprint', entries)
+ fingerprint = value.replace(' ', '')
+
+ for grouping in value.split(' '):
+ if len(grouping) != 4:
+ raise ValueError('Fingerprint line should have groupings of four hex digits: %s' % value)
+
+ if not stem.util.tor_tools.is_valid_fingerprint(fingerprint):
+ raise ValueError('Tor relay fingerprints consist of forty hex digits: %s' % value)
+
+ descriptor.fingerprint = fingerprint
+
+
+def _parse_hibernating_line(descriptor, entries):
+ # "hibernating" 0|1 (in practice only set if one)
+
+ value = _value('hibernating', entries)
+
+ if value not in ('0', '1'):
+ raise ValueError('Hibernating line had an invalid value, must be zero or one: %s' % value)
+
+ descriptor.hibernating = value == '1'
+
+
+def _parse_hidden_service_dir_line(descriptor, entries):
+ value = _value('hidden-service-dir', entries)
+
+ if value:
+ descriptor.hidden_service_dir = value.split(' ')
+ else:
+ descriptor.hidden_service_dir = ['2']
+
+
+def _parse_uptime_line(descriptor, entries):
+ # We need to be tolerant of negative uptimes to accommodate a past tor
+ # bug...
+ #
+ # Changes in version 0.1.2.7-alpha - 2007-02-06
+ # - If our system clock jumps back in time, don't publish a negative
+ # uptime in the descriptor. Also, don't let the global rate limiting
+ # buckets go absurdly negative.
+ #
+ # After parsing all of the attributes we'll double check that negative
+ # uptimes only occurred prior to this fix.
+
+ value = _value('uptime', entries)
+
+ try:
+ descriptor.uptime = int(value)
+ except ValueError:
+ raise ValueError('Uptime line must have an integer value: %s' % value)
+
+
+def _parse_protocols_line(descriptor, entries):
+ value = _value('protocols', entries)
+ protocols_match = re.match('^Link (.*) Circuit (.*)$', value)
+
+ if not protocols_match:
+ raise ValueError('Protocols line did not match the expected pattern: protocols %s' % value)
+
+ link_versions, circuit_versions = protocols_match.groups()
+ descriptor.link_protocols = link_versions.split(' ')
+ descriptor.circuit_protocols = circuit_versions.split(' ')
+
+
+def _parse_or_address_line(descriptor, entries):
+ all_values = _values('or-address', entries)
+ or_addresses = []
+
+ for entry in all_values:
+ line = 'or-address %s' % entry
+
+ if ':' not in entry:
+ raise ValueError('or-address line missing a colon: %s' % line)
+
+ address, port = entry.rsplit(':', 1)
+ is_ipv6 = address.startswith('[') and address.endswith(']')
+
+ if is_ipv6:
+ address = address[1:-1] # remove brackets
+
+ if not ((not is_ipv6 and stem.util.connection.is_valid_ipv4_address(address)) or
+ (is_ipv6 and stem.util.connection.is_valid_ipv6_address(address))):
+ raise ValueError('or-address line has a malformed address: %s' % line)
+
+ if not stem.util.connection.is_valid_port(port):
+ raise ValueError('or-address line has a malformed port: %s' % line)
+
+ or_addresses.append((address, int(port), is_ipv6))
+
+ descriptor.or_addresses = or_addresses
+
+
+def _parse_history_line(keyword, history_end_attribute, history_interval_attribute, history_values_attribute, descriptor, entries):
+ value = _value(keyword, entries)
+ timestamp, interval, remainder = stem.descriptor.extrainfo_descriptor._parse_timestamp_and_interval(keyword, value)
+
+ try:
+ if remainder:
+ history_values = [int(entry) for entry in remainder.split(',')]
+ else:
+ history_values = []
+ except ValueError:
+ raise ValueError('%s line has non-numeric values: %s %s' % (keyword, keyword, value))
+
+ setattr(descriptor, history_end_attribute, timestamp)
+ setattr(descriptor, history_interval_attribute, interval)
+ setattr(descriptor, history_values_attribute, history_values)
+
+
+def _parse_exit_policy(descriptor, entries):
+ if hasattr(descriptor, '_unparsed_exit_policy'):
+ if descriptor._unparsed_exit_policy == [str_type('reject *:*')]:
+ descriptor.exit_policy = REJECT_ALL_POLICY
+ else:
+ descriptor.exit_policy = stem.exit_policy.ExitPolicy(*descriptor._unparsed_exit_policy)
+
+ del descriptor._unparsed_exit_policy
+
+
+_parse_contact_line = _parse_bytes_line('contact', 'contact')
+_parse_published_line = _parse_timestamp_line('published', 'published')
+_parse_extrainfo_digest_line = _parse_forty_character_hex('extra-info-digest', 'extra_info_digest')
+_parse_read_history_line = functools.partial(_parse_history_line, 'read-history', 'read_history_end', 'read_history_interval', 'read_history_values')
+_parse_write_history_line = functools.partial(_parse_history_line, 'write-history', 'write_history_end', 'write_history_interval', 'write_history_values')
+_parse_ipv6_policy_line = lambda descriptor, entries: setattr(descriptor, 'exit_policy_v6', stem.exit_policy.MicroExitPolicy(_value('ipv6-policy', entries)))
+_parse_allow_single_hop_exits_line = lambda descriptor, entries: setattr(descriptor, 'allow_single_hop_exits', True)
+_parse_caches_extra_info_line = lambda descriptor, entries: setattr(descriptor, 'extra_info_cache', True)
+_parse_family_line = lambda descriptor, entries: setattr(descriptor, 'family', set(_value('family', entries).split(' ')))
+_parse_eventdns_line = lambda descriptor, entries: setattr(descriptor, 'eventdns', _value('eventdns', entries) == '1')
+_parse_onion_key_line = _parse_key_block('onion-key', 'onion_key', 'RSA PUBLIC KEY')
+_parse_signing_key_line = _parse_key_block('signing-key', 'signing_key', 'RSA PUBLIC KEY')
+_parse_router_signature_line = _parse_key_block('router-signature', 'signature', 'SIGNATURE')
+_parse_ntor_onion_key_line = _parse_simple_line('ntor-onion-key', 'ntor_onion_key')
+_parse_router_digest_line = _parse_forty_character_hex('router-digest', '_digest')
+
+
class ServerDescriptor(Descriptor):
"""
Common parent for server descriptors.
@@ -218,6 +437,70 @@ class ServerDescriptor(Descriptor):
a default value, others are left as **None** if undefined
"""
+ ATTRIBUTES = {
+ 'nickname': (None, _parse_router_line),
+ 'fingerprint': (None, _parse_fingerprint_line),
+ 'contact': (None, _parse_contact_line),
+ 'published': (None, _parse_published_line),
+ 'exit_policy': (None, _parse_exit_policy),
+
+ 'address': (None, _parse_router_line),
+ 'or_port': (None, _parse_router_line),
+ 'socks_port': (None, _parse_router_line),
+ 'dir_port': (None, _parse_router_line),
+
+ 'platform': (None, _parse_platform_line),
+ 'tor_version': (None, _parse_platform_line),
+ 'operating_system': (None, _parse_platform_line),
+ 'uptime': (None, _parse_uptime_line),
+ 'exit_policy_v6': (DEFAULT_IPV6_EXIT_POLICY, _parse_ipv6_policy_line),
+ 'family': (set(), _parse_family_line),
+
+ 'average_bandwidth': (None, _parse_bandwidth_line),
+ 'burst_bandwidth': (None, _parse_bandwidth_line),
+ 'observed_bandwidth': (None, _parse_bandwidth_line),
+
+ 'link_protocols': (None, _parse_protocols_line),
+ 'circuit_protocols': (None, _parse_protocols_line),
+ 'hibernating': (False, _parse_hibernating_line),
+ 'allow_single_hop_exits': (False, _parse_allow_single_hop_exits_line),
+ 'extra_info_cache': (False, _parse_caches_extra_info_line),
+ 'extra_info_digest': (None, _parse_extrainfo_digest_line),
+ 'hidden_service_dir': (None, _parse_hidden_service_dir_line),
+ 'eventdns': (None, _parse_eventdns_line),
+ 'or_addresses': ([], _parse_or_address_line),
+
+ 'read_history_end': (None, _parse_read_history_line),
+ 'read_history_interval': (None, _parse_read_history_line),
+ 'read_history_values': (None, _parse_read_history_line),
+
+ 'write_history_end': (None, _parse_write_history_line),
+ 'write_history_interval': (None, _parse_write_history_line),
+ 'write_history_values': (None, _parse_write_history_line),
+ }
+
+ PARSER_FOR_LINE = {
+ 'router': _parse_router_line,
+ 'bandwidth': _parse_bandwidth_line,
+ 'platform': _parse_platform_line,
+ 'published': _parse_published_line,
+ 'fingerprint': _parse_fingerprint_line,
+ 'contact': _parse_contact_line,
+ 'hibernating': _parse_hibernating_line,
+ 'extra-info-digest': _parse_extrainfo_digest_line,
+ 'hidden-service-dir': _parse_hidden_service_dir_line,
+ 'uptime': _parse_uptime_line,
+ 'protocols': _parse_protocols_line,
+ 'or-address': _parse_or_address_line,
+ 'read-history': _parse_read_history_line,
+ 'write-history': _parse_write_history_line,
+ 'ipv6-policy': _parse_ipv6_policy_line,
+ 'allow-single-hop-exits': _parse_allow_single_hop_exits_line,
+ 'caches-extra-info': _parse_caches_extra_info_line,
+ 'family': _parse_family_line,
+ 'eventdns': _parse_eventdns_line,
+ }
+
def __init__(self, raw_contents, validate = True, annotations = None):
"""
Server descriptor constructor, created from an individual relay's
@@ -236,56 +519,7 @@ class ServerDescriptor(Descriptor):
:raises: **ValueError** if the contents is malformed and validate is True
"""
- super(ServerDescriptor, self).__init__(raw_contents)
-
- # Only a few things can be arbitrary bytes according to the dir-spec, so
- # parsing them separately.
-
- self.platform = _get_bytes_field('platform', raw_contents)
- self.contact = _get_bytes_field('contact', raw_contents)
-
- raw_contents = stem.util.str_tools._to_unicode(raw_contents)
-
- self.nickname = None
- self.fingerprint = None
- self.published = None
-
- self.address = None
- self.or_port = None
- self.socks_port = None
- self.dir_port = None
-
- self.tor_version = None
- self.operating_system = None
- self.uptime = None
- self.exit_policy = None
- self.exit_policy_v6 = DEFAULT_IPV6_EXIT_POLICY
- self.family = set()
-
- self.average_bandwidth = None
- self.burst_bandwidth = None
- self.observed_bandwidth = None
-
- self.link_protocols = None
- self.circuit_protocols = None
- self.hibernating = False
- self.allow_single_hop_exits = False
- self.extra_info_cache = False
- self.extra_info_digest = None
- self.hidden_service_dir = None
- self.eventdns = None
- self.or_addresses = []
-
- self.read_history_end = None
- self.read_history_interval = None
- self.read_history_values = None
-
- self.write_history_end = None
- self.write_history_interval = None
- self.write_history_values = None
-
- self._unrecognized_lines = []
-
+ super(ServerDescriptor, self).__init__(raw_contents, lazy_load = not validate)
self._annotation_lines = annotations if annotations else []
# A descriptor contains a series of 'keyword lines' which are simply a
@@ -296,17 +530,23 @@ class ServerDescriptor(Descriptor):
# influences the resulting exit policy, but for everything else the order
# does not matter so breaking it into key / value pairs.
- entries, policy = _get_descriptor_components(raw_contents, validate, ('accept', 'reject'))
+ entries, self._unparsed_exit_policy = _get_descriptor_components(stem.util.str_tools._to_unicode(raw_contents), validate, ('accept', 'reject'))
- if policy == [str_type('reject *:*')]:
- self.exit_policy = REJECT_ALL_POLICY
- else:
- self.exit_policy = stem.exit_policy.ExitPolicy(*policy)
+ if validate:
+ self._parse(entries, validate)
- self._parse(entries, validate)
+ _parse_exit_policy(self, entries)
+
+ # if we have a negative uptime and a tor version that shouldn't exhibit
+ # this bug then fail validation
+
+ if validate and self.uptime and self.tor_version:
+ if self.uptime < 0 and self.tor_version >= stem.version.Version('0.1.2.7'):
+ raise ValueError("Descriptor for version '%s' had a negative uptime value: %i" % (self.tor_version, self.uptime))
- if validate:
self._check_constraints(entries)
+ else:
+ self._entries = entries
def digest(self):
"""
@@ -318,9 +558,6 @@ class ServerDescriptor(Descriptor):
raise NotImplementedError('Unsupported Operation: this should be implemented by the ServerDescriptor subclass')
- def get_unrecognized_lines(self):
- return list(self._unrecognized_lines)
-
@lru_cache()
def get_annotations(self):
"""
@@ -358,250 +595,6 @@ class ServerDescriptor(Descriptor):
return self._annotation_lines
- def _parse(self, entries, validate):
- """
- Parses a series of 'keyword => (value, pgp block)' mappings and applies
- them as attributes.
-
- :param dict entries: descriptor contents to be applied
- :param bool validate: checks the validity of descriptor content if **True**
-
- :raises: **ValueError** if an error occurs in validation
- """
-
- for keyword, values in list(entries.items()):
- # most just work with the first (and only) value
- value, block_type, block_contents = values[0]
-
- line = '%s %s' % (keyword, value) # original line
-
- if block_contents:
- line += '\n%s' % block_contents
-
- if keyword == 'router':
- # "router" nickname address ORPort SocksPort DirPort
- router_comp = value.split()
-
- if len(router_comp) < 5:
- if not validate:
- continue
-
- raise ValueError('Router line must have five values: %s' % line)
-
- if validate:
- if not stem.util.tor_tools.is_valid_nickname(router_comp[0]):
- raise ValueError("Router line entry isn't a valid nickname: %s" % router_comp[0])
- elif not stem.util.connection.is_valid_ipv4_address(router_comp[1]):
- raise ValueError("Router line entry isn't a valid IPv4 address: %s" % router_comp[1])
- elif not stem.util.connection.is_valid_port(router_comp[2], allow_zero = True):
- raise ValueError("Router line's ORPort is invalid: %s" % router_comp[2])
- elif not stem.util.connection.is_valid_port(router_comp[3], allow_zero = True):
- raise ValueError("Router line's SocksPort is invalid: %s" % router_comp[3])
- elif not stem.util.connection.is_valid_port(router_comp[4], allow_zero = True):
- raise ValueError("Router line's DirPort is invalid: %s" % router_comp[4])
- elif not (router_comp[2].isdigit() and router_comp[3].isdigit() and router_comp[4].isdigit()):
- continue
-
- self.nickname = router_comp[0]
- self.address = router_comp[1]
- self.or_port = int(router_comp[2])
- self.socks_port = None if router_comp[3] == '0' else int(router_comp[3])
- self.dir_port = None if router_comp[4] == '0' else int(router_comp[4])
- elif keyword == 'bandwidth':
- # "bandwidth" bandwidth-avg bandwidth-burst bandwidth-observed
- bandwidth_comp = value.split()
-
- if len(bandwidth_comp) < 3:
- if not validate:
- continue
-
- raise ValueError('Bandwidth line must have three values: %s' % line)
- elif not bandwidth_comp[0].isdigit():
- if not validate:
- continue
-
- raise ValueError("Bandwidth line's average rate isn't numeric: %s" % bandwidth_comp[0])
- elif not bandwidth_comp[1].isdigit():
- if not validate:
- continue
-
- raise ValueError("Bandwidth line's burst rate isn't numeric: %s" % bandwidth_comp[1])
- elif not bandwidth_comp[2].isdigit():
- if not validate:
- continue
-
- raise ValueError("Bandwidth line's observed rate isn't numeric: %s" % bandwidth_comp[2])
-
- self.average_bandwidth = int(bandwidth_comp[0])
- self.burst_bandwidth = int(bandwidth_comp[1])
- self.observed_bandwidth = int(bandwidth_comp[2])
- elif keyword == 'platform':
- # "platform" string
-
- # The platform attribute was set earlier. This line can contain any
- # arbitrary data, but tor seems to report its version followed by the
- # os like the following...
- #
- # platform Tor 0.2.2.35 (git-73ff13ab3cc9570d) on Linux x86_64
- #
- # There's no guarantee that we'll be able to pick these out the
- # version, but might as well try to save our caller the effort.
-
- platform_match = re.match('^(?:node-)?Tor (\S*).* on (.*)$', value)
-
- if platform_match:
- version_str, self.operating_system = platform_match.groups()
-
- try:
- self.tor_version = stem.version._get_version(version_str)
- except ValueError:
- pass
- elif keyword == 'published':
- # "published" YYYY-MM-DD HH:MM:SS
-
- try:
- self.published = stem.util.str_tools._parse_timestamp(value)
- except ValueError:
- if validate:
- raise ValueError("Published line's time wasn't parsable: %s" % line)
- elif keyword == 'fingerprint':
- # This is forty hex digits split into space separated groups of four.
- # Checking that we match this pattern.
-
- fingerprint = value.replace(' ', '')
-
- if validate:
- for grouping in value.split(' '):
- if len(grouping) != 4:
- raise ValueError('Fingerprint line should have groupings of four hex digits: %s' % value)
-
- if not stem.util.tor_tools.is_valid_fingerprint(fingerprint):
- raise ValueError('Tor relay fingerprints consist of forty hex digits: %s' % value)
-
- self.fingerprint = fingerprint
- elif keyword == 'hibernating':
- # "hibernating" 0|1 (in practice only set if one)
-
- if validate and value not in ('0', '1'):
- raise ValueError('Hibernating line had an invalid value, must be zero or one: %s' % value)
-
- self.hibernating = value == '1'
- elif keyword == 'allow-single-hop-exits':
- self.allow_single_hop_exits = True
- elif keyword == 'caches-extra-info':
- self.extra_info_cache = True
- elif keyword == 'extra-info-digest':
- # this is forty hex digits which just so happens to be the same a
- # fingerprint
-
- if validate and not stem.util.tor_tools.is_valid_fingerprint(value):
- raise ValueError('Extra-info digests should consist of forty hex digits: %s' % value)
-
- self.extra_info_digest = value
- elif keyword == 'hidden-service-dir':
- if value:
- self.hidden_service_dir = value.split(' ')
- else:
- self.hidden_service_dir = ['2']
- elif keyword == 'uptime':
- # We need to be tolerant of negative uptimes to accommodate a past tor
- # bug...
- #
- # Changes in version 0.1.2.7-alpha - 2007-02-06
- # - If our system clock jumps back in time, don't publish a negative
- # uptime in the descriptor. Also, don't let the global rate limiting
- # buckets go absurdly negative.
- #
- # After parsing all of the attributes we'll double check that negative
- # uptimes only occurred prior to this fix.
-
- try:
- self.uptime = int(value)
- except ValueError:
- if not validate:
- continue
-
- raise ValueError('Uptime line must have an integer value: %s' % value)
- elif keyword == 'contact':
- pass # parsed as a bytes field earlier
- elif keyword == 'protocols':
- protocols_match = re.match('^Link (.*) Circuit (.*)$', value)
-
- if protocols_match:
- link_versions, circuit_versions = protocols_match.groups()
- self.link_protocols = link_versions.split(' ')
- self.circuit_protocols = circuit_versions.split(' ')
- elif validate:
- raise ValueError('Protocols line did not match the expected pattern: %s' % line)
- elif keyword == 'family':
- self.family = set(value.split(' '))
- elif keyword == 'eventdns':
- self.eventdns = value == '1'
- elif keyword == 'ipv6-policy':
- self.exit_policy_v6 = stem.exit_policy.MicroExitPolicy(value)
- elif keyword == 'or-address':
- or_address_entries = [address_entry for (address_entry, _, _) in values]
-
- for entry in or_address_entries:
- line = '%s %s' % (keyword, entry)
-
- if ':' not in entry:
- if not validate:
- continue
- else:
- raise ValueError('or-address line missing a colon: %s' % line)
-
- address, port = entry.rsplit(':', 1)
- is_ipv6 = address.startswith('[') and address.endswith(']')
-
- if is_ipv6:
- address = address[1:-1] # remove brackets
-
- if not ((not is_ipv6 and stem.util.connection.is_valid_ipv4_address(address)) or
- (is_ipv6 and stem.util.connection.is_valid_ipv6_address(address))):
- if not validate:
- continue
- else:
- raise ValueError('or-address line has a malformed address: %s' % line)
-
- if stem.util.connection.is_valid_port(port):
- self.or_addresses.append((address, int(port), is_ipv6))
- elif validate:
- raise ValueError('or-address line has a malformed port: %s' % line)
- elif keyword in ('read-history', 'write-history'):
- try:
- timestamp, interval, remainder = \
- stem.descriptor.extrainfo_descriptor._parse_timestamp_and_interval(keyword, value)
-
- try:
- if remainder:
- history_values = [int(entry) for entry in remainder.split(',')]
- else:
- history_values = []
- except ValueError:
- raise ValueError('%s line has non-numeric values: %s' % (keyword, line))
-
- if keyword == 'read-history':
- self.read_history_end = timestamp
- self.read_history_interval = interval
- self.read_history_values = history_values
- else:
- self.write_history_end = timestamp
- self.write_history_interval = interval
- self.write_history_values = history_values
- except ValueError as exc:
- if validate:
- raise exc
- else:
- self._unrecognized_lines.append(line)
-
- # if we have a negative uptime and a tor version that shouldn't exhibit
- # this bug then fail validation
-
- if validate and self.uptime and self.tor_version:
- if self.uptime < 0 and self.tor_version >= stem.version.Version('0.1.2.7'):
- raise ValueError("Descriptor for version '%s' had a negative uptime value: %i" % (self.tor_version, self.uptime))
-
def _check_constraints(self, entries):
"""
Does a basic check that the entries conform to this descriptor type's
@@ -660,12 +653,21 @@ class RelayDescriptor(ServerDescriptor):
**\*** attribute is required when we're parsed with validation
"""
- def __init__(self, raw_contents, validate = True, annotations = None):
- self.onion_key = None
- self.ntor_onion_key = None
- self.signing_key = None
- self.signature = None
+ ATTRIBUTES = dict(ServerDescriptor.ATTRIBUTES, **{
+ 'onion_key': (None, _parse_onion_key_line),
+ 'ntor_onion_key': (None, _parse_ntor_onion_key_line),
+ 'signing_key': (None, _parse_signing_key_line),
+ 'signature': (None, _parse_router_signature_line),
+ })
+
+ PARSER_FOR_LINE = dict(ServerDescriptor.PARSER_FOR_LINE, **{
+ 'onion-key': _parse_onion_key_line,
+ 'ntor-onion-key': _parse_ntor_onion_key_line,
+ 'signing-key': _parse_signing_key_line,
+ 'router-signature': _parse_router_signature_line,
+ })
+ def __init__(self, raw_contents, validate = True, annotations = None):
super(RelayDescriptor, self).__init__(raw_contents, validate, annotations)
# validate the descriptor if required
@@ -789,39 +791,6 @@ class RelayDescriptor(ServerDescriptor):
if digest != local_digest:
raise ValueError('Decrypted digest does not match local digest (calculated: %s, local: %s)' % (digest, local_digest))
- def _parse(self, entries, validate):
- entries = dict(entries) # shallow copy since we're destructive
-
- # handles fields only in server descriptors
-
- for keyword, values in list(entries.items()):
- value, block_type, block_contents = values[0]
- line = '%s %s' % (keyword, value)
-
- if keyword == 'onion-key':
- if validate and (not block_contents or block_type != 'RSA PUBLIC KEY'):
- raise ValueError("'onion-key' should be followed by a RSA PUBLIC KEY block: %s" % line)
-
- self.onion_key = block_contents
- del entries['onion-key']
- elif keyword == 'ntor-onion-key':
- self.ntor_onion_key = value
- del entries['ntor-onion-key']
- elif keyword == 'signing-key':
- if validate and (not block_contents or block_type != 'RSA PUBLIC KEY'):
- raise ValueError("'signing-key' should be followed by a RSA PUBLIC KEY block: %s" % line)
-
- self.signing_key = block_contents
- del entries['signing-key']
- elif keyword == 'router-signature':
- if validate and (not block_contents or block_type != 'SIGNATURE'):
- raise ValueError("'router-signature' should be followed by a SIGNATURE block: %s" % line)
-
- self.signature = block_contents
- del entries['router-signature']
-
- ServerDescriptor._parse(self, entries, validate)
-
def _compare(self, other, method):
if not isinstance(other, RelayDescriptor):
return False
@@ -861,31 +830,17 @@ class BridgeDescriptor(ServerDescriptor):
<https://collector.torproject.org/formats.html#bridge-descriptors>`_)
"""
- def __init__(self, raw_contents, validate = True, annotations = None):
- self._digest = None
+ ATTRIBUTES = dict(ServerDescriptor.ATTRIBUTES, **{
+ '_digest': (None, _parse_router_digest_line),
+ })
- super(BridgeDescriptor, self).__init__(raw_contents, validate, annotations)
+ PARSER_FOR_LINE = dict(ServerDescriptor.PARSER_FOR_LINE, **{
+ 'router-digest': _parse_router_digest_line,
+ })
def digest(self):
return self._digest
- def _parse(self, entries, validate):
- entries = dict(entries)
-
- # handles fields only in bridge descriptors
- for keyword, values in list(entries.items()):
- value, block_type, block_contents = values[0]
- line = '%s %s' % (keyword, value)
-
- if keyword == 'router-digest':
- if validate and not stem.util.tor_tools.is_hex_digits(value, 40):
- raise ValueError('Router digest line had an invalid sha1 digest: %s' % line)
-
- self._digest = stem.util.str_tools._to_unicode(value)
- del entries['router-digest']
-
- ServerDescriptor._parse(self, entries, validate)
-
def is_scrubbed(self):
"""
Checks if we've been properly scrubbed in accordance with the `bridge
diff --git a/test/unit/descriptor/extrainfo_descriptor.py b/test/unit/descriptor/extrainfo_descriptor.py
index 7e670194..525ff06f 100644
--- a/test/unit/descriptor/extrainfo_descriptor.py
+++ b/test/unit/descriptor/extrainfo_descriptor.py
@@ -200,10 +200,10 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw
for entry in test_entries:
desc_text = get_relay_extrainfo_descriptor({'geoip-db-digest': entry}, content = True)
- self._expect_invalid_attr(desc_text, 'geoip_db_digest', entry)
+ self._expect_invalid_attr(desc_text, 'geoip_db_digest')
desc_text = get_relay_extrainfo_descriptor({'geoip6-db-digest': entry}, content = True)
- self._expect_invalid_attr(desc_text, 'geoip6_db_digest', entry)
+ self._expect_invalid_attr(desc_text, 'geoip6_db_digest')
def test_cell_circuits_per_decile(self):
"""
@@ -257,8 +257,8 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw
for entry in test_entries:
desc_text = get_relay_extrainfo_descriptor({keyword: entry}, content = True)
desc = self._expect_invalid_attr(desc_text)
- self.assertEqual({}, getattr(desc, attr))
- self.assertEqual({}, getattr(desc, unknown_attr))
+ self.assertEqual(None, getattr(desc, attr))
+ self.assertEqual(None, getattr(desc, unknown_attr))
def test_dir_stat_lines(self):
"""
@@ -299,8 +299,8 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw
for entry in test_entries:
desc_text = get_relay_extrainfo_descriptor({keyword: entry}, content = True)
desc = self._expect_invalid_attr(desc_text)
- self.assertEqual({}, getattr(desc, attr))
- self.assertEqual({}, getattr(desc, unknown_attr))
+ self.assertEqual(None, getattr(desc, attr))
+ self.assertEqual(None, getattr(desc, unknown_attr))
def test_conn_bi_direct(self):
"""
@@ -360,15 +360,15 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw
self.assertEqual(expected_value, getattr(desc, attr))
test_entries = (
- ('', None),
- (' ', None),
- ('100', None),
- ('-5%', -0.05),
+ (''),
+ (' '),
+ ('100'),
+ ('-5%'),
)
- for entry, expected in test_entries:
+ for entry in test_entries:
desc_text = get_relay_extrainfo_descriptor({keyword: entry}, content = True)
- self._expect_invalid_attr(desc_text, attr, expected)
+ self._expect_invalid_attr(desc_text, attr)
def test_number_list_lines(self):
"""
@@ -525,7 +525,7 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw
for entry in test_entries:
desc_text = get_relay_extrainfo_descriptor({keyword: entry}, content = True)
- self._expect_invalid_attr(desc_text, attr, {})
+ self._expect_invalid_attr(desc_text, attr)
def test_locale_mapping_lines(self):
"""
@@ -554,7 +554,7 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw
for entry in test_entries:
desc_text = get_relay_extrainfo_descriptor({keyword: entry}, content = True)
- self._expect_invalid_attr(desc_text, attr, {})
+ self._expect_invalid_attr(desc_text, attr)
def test_minimal_bridge_descriptor(self):
"""
diff --git a/test/unit/descriptor/networkstatus/directory_authority.py b/test/unit/descriptor/networkstatus/directory_authority.py
index 1114518f..a5bc647d 100644
--- a/test/unit/descriptor/networkstatus/directory_authority.py
+++ b/test/unit/descriptor/networkstatus/directory_authority.py
@@ -164,7 +164,7 @@ class TestDirectoryAuthority(unittest.TestCase):
self.assertRaises(ValueError, DirectoryAuthority, content)
authority = DirectoryAuthority(content, False)
- self.assertEqual(value, authority.fingerprint)
+ self.assertEqual(None, authority.fingerprint)
def test_malformed_address(self):
"""
@@ -186,7 +186,7 @@ class TestDirectoryAuthority(unittest.TestCase):
self.assertRaises(ValueError, DirectoryAuthority, content)
authority = DirectoryAuthority(content, False)
- self.assertEqual(value, authority.address)
+ self.assertEqual(None, authority.address)
def test_malformed_port(self):
"""
@@ -219,9 +219,8 @@ class TestDirectoryAuthority(unittest.TestCase):
authority = DirectoryAuthority(content, False)
- expected_value = 399482 if value == '399482' else None
actual_value = authority.or_port if include_or_port else authority.dir_port
- self.assertEqual(expected_value, actual_value)
+ self.assertEqual(None, actual_value)
def test_legacy_dir_key(self):
"""
@@ -247,7 +246,7 @@ class TestDirectoryAuthority(unittest.TestCase):
self.assertRaises(ValueError, DirectoryAuthority, content)
authority = DirectoryAuthority(content, False)
- self.assertEqual(value, authority.legacy_dir_key)
+ self.assertEqual(None, authority.legacy_dir_key)
def test_key_certificate(self):
"""
diff --git a/test/unit/descriptor/networkstatus/document_v3.py b/test/unit/descriptor/networkstatus/document_v3.py
index 4b8efc41..b0bb14a0 100644
--- a/test/unit/descriptor/networkstatus/document_v3.py
+++ b/test/unit/descriptor/networkstatus/document_v3.py
@@ -586,19 +586,21 @@ DnN5aFtYKiTc19qIC7Nmo+afPdDEf0MlJvEOP5EWl3w=
self.assertEqual(None, document.consensus_method)
test_values = (
- ('', []),
- (' ', []),
- ('1 2 3 a 5', [1, 2, 3, 5]),
- ('1 2 3 4.0 5', [1, 2, 3, 5]),
- ('2 3 4', [2, 3, 4]), # spec says version one must be included
+ (''),
+ (' '),
+ ('1 2 3 a 5'),
+ ('1 2 3 4.0 5'),
+ ('2 3 4'), # spec says version one must be included
)
- for test_value, expected_consensus_methods in test_values:
+ for test_value in test_values:
content = get_network_status_document_v3({'vote-status': 'vote', 'consensus-methods': test_value}, content = True)
self.assertRaises(ValueError, NetworkStatusDocumentV3, content)
+ expected_value = [2, 3, 4] if test_value == '2 3 4' else [1]
+
document = NetworkStatusDocumentV3(content, False)
- self.assertEqual(expected_consensus_methods, document.consensus_methods)
+ self.assertEqual(expected_value, document.consensus_methods)
def test_consensus_method(self):
"""
@@ -708,21 +710,21 @@ DnN5aFtYKiTc19qIC7Nmo+afPdDEf0MlJvEOP5EWl3w=
self.assertEqual(expected, document.server_versions)
test_values = (
- ('', []),
- (' ', []),
- ('1.2.3.4,', [stem.version.Version('1.2.3.4')]),
- ('1.2.3.4,1.2.3.a', [stem.version.Version('1.2.3.4')]),
+ (''),
+ (' '),
+ ('1.2.3.4,'),
+ ('1.2.3.4,1.2.3.a'),
)
for field in ('client-versions', 'server-versions'):
attr = field.replace('-', '_')
- for test_value, expected_value in test_values:
+ for test_value in test_values:
content = get_network_status_document_v3({field: test_value}, content = True)
self.assertRaises(ValueError, NetworkStatusDocumentV3, content)
document = NetworkStatusDocumentV3(content, False)
- self.assertEqual(expected_value, getattr(document, attr))
+ self.assertEqual([], getattr(document, attr))
def test_known_flags(self):
"""
@@ -872,7 +874,7 @@ DnN5aFtYKiTc19qIC7Nmo+afPdDEf0MlJvEOP5EWl3w=
self.assertRaises(ValueError, NetworkStatusDocumentV3, content)
document = NetworkStatusDocumentV3(content, False, default_params = False)
- self.assertEqual({'unrecognized': -122, 'bwauthpid': 1}, document.params)
+ self.assertEqual({}, document.params)
def test_footer_consensus_method_requirement(self):
"""
@@ -951,7 +953,6 @@ DnN5aFtYKiTc19qIC7Nmo+afPdDEf0MlJvEOP5EWl3w=
)
base_weight_entry = ' '.join(['%s=5' % e for e in BANDWIDTH_WEIGHT_ENTRIES])
- expected = dict([(e, 5) for e in BANDWIDTH_WEIGHT_ENTRIES if e != 'Wbe'])
for test_value in test_values:
weight_entry = base_weight_entry.replace('Wbe=5', test_value)
@@ -959,7 +960,7 @@ DnN5aFtYKiTc19qIC7Nmo+afPdDEf0MlJvEOP5EWl3w=
self.assertRaises(ValueError, NetworkStatusDocumentV3, content)
document = NetworkStatusDocumentV3(content, False)
- self.assertEqual(expected, document.bandwidth_weights)
+ self.assertEqual({}, document.bandwidth_weights)
def test_bandwidth_wights_misordered(self):
"""
@@ -967,13 +968,12 @@ DnN5aFtYKiTc19qIC7Nmo+afPdDEf0MlJvEOP5EWl3w=
"""
weight_entry = ' '.join(['%s=5' % e for e in reversed(BANDWIDTH_WEIGHT_ENTRIES)])
- expected = dict([(e, 5) for e in BANDWIDTH_WEIGHT_ENTRIES])
content = get_network_status_document_v3({'bandwidth-weights': weight_entry}, content = True)
self.assertRaises(ValueError, NetworkStatusDocumentV3, content)
document = NetworkStatusDocumentV3(content, False)
- self.assertEqual(expected, document.bandwidth_weights)
+ self.assertEqual({}, document.bandwidth_weights)
def test_bandwidth_wights_in_vote(self):
"""
diff --git a/test/unit/descriptor/networkstatus/key_certificate.py b/test/unit/descriptor/networkstatus/key_certificate.py
index 8939c7e4..12facaa6 100644
--- a/test/unit/descriptor/networkstatus/key_certificate.py
+++ b/test/unit/descriptor/networkstatus/key_certificate.py
@@ -112,23 +112,23 @@ class TestKeyCertificate(unittest.TestCase):
self.assertEqual(80, certificate.dir_port)
test_values = (
- ('', None, None),
- (' ', None, None),
- ('127.0.0.1', None, None),
- ('127.0.0.1:', None, None),
- ('80', None, None),
- (':80', '', 80),
- ('127.0.0.1a:80', '127.0.0.1a', 80),
- ('127.0.0.1:80a', None, None),
+ (''),
+ (' '),
+ ('127.0.0.1'),
+ ('127.0.0.1:'),
+ ('80'),
+ (':80'),
+ ('127.0.0.1a:80'),
+ ('127.0.0.1:80a'),
)
- for test_value, expected_address, expected_port in test_values:
+ for test_value in test_values:
content = get_key_certificate({'dir-address': test_value}, content = True)
self.assertRaises(ValueError, KeyCertificate, content)
certificate = KeyCertificate(content, False)
- self.assertEqual(expected_address, certificate.address)
- self.assertEqual(expected_port, certificate.dir_port)
+ self.assertEqual(None, certificate.address)
+ self.assertEqual(None, certificate.dir_port)
def test_fingerprint(self):
"""
@@ -147,7 +147,7 @@ class TestKeyCertificate(unittest.TestCase):
self.assertRaises(ValueError, KeyCertificate, content)
certificate = KeyCertificate(content, False)
- self.assertEqual(test_value.strip(), certificate.fingerprint)
+ self.assertEqual(None, certificate.fingerprint)
def test_time_fields(self):
"""
diff --git a/test/unit/descriptor/router_status_entry.py b/test/unit/descriptor/router_status_entry.py
index 50924def..1243a55b 100644
--- a/test/unit/descriptor/router_status_entry.py
+++ b/test/unit/descriptor/router_status_entry.py
@@ -39,8 +39,7 @@ class TestRouterStatusEntry(unittest.TestCase):
# checks with some malformed inputs
for arg in ('', '20wYcb', '20wYcb' * 30):
- self.assertRaises(ValueError, _base64_to_hex, arg, True)
- self.assertEqual(None, _base64_to_hex(arg, False))
+ self.assertRaises(ValueError, _base64_to_hex, arg)
def test_minimal_v2(self):
"""
@@ -138,7 +137,8 @@ class TestRouterStatusEntry(unittest.TestCase):
"""
content = b'z some stuff\n' + get_router_status_entry_v3(content = True)
- self._expect_invalid_attr(content, '_unrecognized_lines', ['z some stuff'])
+ self.assertRaises(ValueError, RouterStatusEntryV3, content)
+ self.assertEqual(['z some stuff'], RouterStatusEntryV3(content, False).get_unrecognized_lines())
def test_blank_lines(self):
"""
@@ -215,7 +215,7 @@ class TestRouterStatusEntry(unittest.TestCase):
if value == '':
value = None
- self._expect_invalid_attr(content, 'nickname', value)
+ self._expect_invalid_attr(content, 'nickname')
def test_malformed_fingerprint(self):
"""
@@ -275,7 +275,7 @@ class TestRouterStatusEntry(unittest.TestCase):
for value in test_values:
r_line = ROUTER_STATUS_ENTRY_V3_HEADER[0][1].replace('71.35.150.29', value)
content = get_router_status_entry_v3({'r': r_line}, content = True)
- self._expect_invalid_attr(content, 'address', value)
+ self._expect_invalid_attr(content, 'address')
def test_malformed_port(self):
"""
@@ -304,10 +304,9 @@ class TestRouterStatusEntry(unittest.TestCase):
r_line = r_line[:-1] + value
attr = 'or_port' if include_or_port else 'dir_port'
- expected = int(value) if value.isdigit() else None
content = get_router_status_entry_v3({'r': r_line}, content = True)
- self._expect_invalid_attr(content, attr, expected)
+ self._expect_invalid_attr(content, attr)
def test_ipv6_addresses(self):
"""
diff --git a/test/unit/descriptor/server_descriptor.py b/test/unit/descriptor/server_descriptor.py
index c57c476e..f2617989 100644
--- a/test/unit/descriptor/server_descriptor.py
+++ b/test/unit/descriptor/server_descriptor.py
@@ -378,7 +378,7 @@ Qlx9HNCqCY877ztFRC624ja2ql6A2hBcuoYMbkHjcQ4=
"""
desc_text = get_relay_server_descriptor({'router': 'saberrider2008ReallyLongNickname 71.35.133.197 9001 0 0'}, content = True)
- self._expect_invalid_attr(desc_text, 'nickname', 'saberrider2008ReallyLongNickname')
+ self._expect_invalid_attr(desc_text, 'nickname')
def test_nickname_invalid_char(self):
"""
@@ -386,7 +386,7 @@ Qlx9HNCqCY877ztFRC624ja2ql6A2hBcuoYMbkHjcQ4=
"""
desc_text = get_relay_server_descriptor({'router': '$aberrider2008 71.35.133.197 9001 0 0'}, content = True)
- self._expect_invalid_attr(desc_text, 'nickname', '$aberrider2008')
+ self._expect_invalid_attr(desc_text, 'nickname')
def test_address_malformed(self):
"""
@@ -394,7 +394,7 @@ Qlx9HNCqCY877ztFRC624ja2ql6A2hBcuoYMbkHjcQ4=
"""
desc_text = get_relay_server_descriptor({'router': 'caerSidi 371.35.133.197 9001 0 0'}, content = True)
- self._expect_invalid_attr(desc_text, 'address', '371.35.133.197')
+ self._expect_invalid_attr(desc_text, 'address')
def test_port_too_high(self):
"""
@@ -402,7 +402,7 @@ Qlx9HNCqCY877ztFRC624ja2ql6A2hBcuoYMbkHjcQ4=
"""
desc_text = get_relay_server_descriptor({'router': 'caerSidi 71.35.133.197 900001 0 0'}, content = True)
- self._expect_invalid_attr(desc_text, 'or_port', 900001)
+ self._expect_invalid_attr(desc_text, 'or_port')
def test_port_malformed(self):
"""
@@ -672,7 +672,7 @@ Qlx9HNCqCY877ztFRC624ja2ql6A2hBcuoYMbkHjcQ4=
self.assertRaises(ValueError, BridgeDescriptor, desc_text)
desc = BridgeDescriptor(desc_text, validate = False)
- self.assertEqual(value, desc.digest())
+ self.assertEqual(None, desc.digest())
def test_or_address_v4(self):
"""