diff options
| author | Karsten Loesing <karsten.loesing@gmx.net> | 2019-08-30 11:09:23 +0200 |
|---|---|---|
| committer | Karsten Loesing <karsten.loesing@gmx.net> | 2019-08-30 11:23:49 +0200 |
| commit | db2a6bdab1bfc12377c515577589aa67d34fa2ba (patch) | |
| tree | 9ea4cc5ab0f7c29ed9bfaed2e6c7b55d61619475 | |
| parent | 801008431115951fe3b0f967c7d4ca0469610ae8 (diff) | |
Archive bridge pool assignments again.
Implements #31558.
12 files changed, 305 insertions, 1 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 4fd0401..1da4b10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ versions resolved by Ivy are the same as in Debian stretch with few exceptions. - Remove Cobertura from the build process. + - Archive bridge pool assignments again. # Changes in version 1.9.1 - 2019-05-29 diff --git a/src/main/java/org/torproject/metrics/collector/Main.java b/src/main/java/org/torproject/metrics/collector/Main.java index 46e93af..5539a43 100644 --- a/src/main/java/org/torproject/metrics/collector/Main.java +++ b/src/main/java/org/torproject/metrics/collector/Main.java @@ -4,6 +4,7 @@ package org.torproject.metrics.collector; import org.torproject.metrics.collector.bridgedescs.SanitizedBridgesWriter; +import org.torproject.metrics.collector.bridgepools.BridgePoolAssignmentsProcessor; import org.torproject.metrics.collector.conf.Configuration; import org.torproject.metrics.collector.conf.ConfigurationException; import org.torproject.metrics.collector.conf.Key; @@ -48,6 +49,8 @@ public class Main { static { // add a new main class here collecTorMains.put(Key.BridgedescsActivated, SanitizedBridgesWriter.class); + collecTorMains.put(Key.BridgePoolAssignmentsActivated, + BridgePoolAssignmentsProcessor.class); collecTorMains.put(Key.ExitlistsActivated, ExitListDownloader.class); collecTorMains.put(Key.UpdateindexActivated, CreateIndexJson.class); collecTorMains.put(Key.RelaydescsActivated, ArchiveWriter.class); diff --git a/src/main/java/org/torproject/metrics/collector/bridgepools/BridgePoolAssignmentsProcessor.java b/src/main/java/org/torproject/metrics/collector/bridgepools/BridgePoolAssignmentsProcessor.java new file mode 100644 index 0000000..c7188a0 --- /dev/null +++ b/src/main/java/org/torproject/metrics/collector/bridgepools/BridgePoolAssignmentsProcessor.java @@ -0,0 +1,231 @@ +/* Copyright 2011--2019 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.collector.bridgepools; + +import org.torproject.metrics.collector.conf.Configuration; +import org.torproject.metrics.collector.conf.ConfigurationException; +import org.torproject.metrics.collector.conf.Key; +import org.torproject.metrics.collector.cron.CollecTorMain; + +import org.apache.commons.codec.DecoderException; +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Paths; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.SortedSet; +import java.util.Stack; +import java.util.TimeZone; +import java.util.TreeSet; + +public class BridgePoolAssignmentsProcessor extends CollecTorMain { + + private static final Logger logger = LoggerFactory.getLogger( + BridgePoolAssignmentsProcessor.class); + private static final String BRIDGE_POOL_ASSIGNMENTS + = "bridge-pool-assignments"; + + /** Initialize configuration. */ + public BridgePoolAssignmentsProcessor(Configuration config) { + super(config); + } + + private String outputPathName; + + private String recentPathName; + + @Override + public String module() { + return "BridgePoolAssignments"; + } + + @Override + protected String syncMarker() { + return "BridgePoolAssignments"; + } + + @Override + protected void startProcessing() throws ConfigurationException { + + logger.info("Starting bridge-pool-assignments module of CollecTor."); + + outputPathName = Paths.get(config.getPath(Key.OutputPath).toString(), + BRIDGE_POOL_ASSIGNMENTS).toString(); + recentPathName = Paths.get(config.getPath(Key.RecentPath).toString(), + BRIDGE_POOL_ASSIGNMENTS).toString(); + File assignmentsDirectory = + config.getPath(Key.BridgePoolAssignmentsLocalOrigins).toFile(); + + List<File> assignmentFiles = new ArrayList<>(); + Stack<File> files = new Stack<>(); + files.add(assignmentsDirectory); + while (!files.isEmpty()) { + File file = files.pop(); + if (file.isDirectory()) { + files.addAll(Arrays.asList(file.listFiles())); + } else if (file.getName().startsWith("assignments.log")) { + assignmentFiles.add(file); + } + } + + SimpleDateFormat assignmentFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + assignmentFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + SimpleDateFormat filenameFormat = + new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); + filenameFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + String duplicateFingerprint = null; + long maxBridgePoolAssignmentTime = 0L; + for (File assignmentFile : assignmentFiles) { + logger.info("Processing bridge pool assignment file '{}'...", + assignmentFile.getAbsolutePath()); + try { + BufferedReader br; + if (assignmentFile.getName().endsWith(".gz")) { + br = new BufferedReader(new InputStreamReader( + new GzipCompressorInputStream(new FileInputStream( + assignmentFile)))); + } else { + br = new BufferedReader(new FileReader(assignmentFile)); + } + String line; + String bridgePoolAssignmentLine = null; + SortedSet<String> sanitizedAssignments = new TreeSet<>(); + boolean wroteLastLine = false; + Set<String> hashedFingerprints = new HashSet<>(); + while ((line = br.readLine()) != null || !wroteLastLine) { + if (line == null + || line.startsWith("bridge-pool-assignment ")) { + if (bridgePoolAssignmentLine != null) { + try { + long bridgePoolAssignmentTime = assignmentFormat.parse( + bridgePoolAssignmentLine.substring( + "bridge-pool-assignment ".length())).getTime(); + maxBridgePoolAssignmentTime = Math.max( + maxBridgePoolAssignmentTime, + bridgePoolAssignmentTime); + File tarballFile = Paths.get(outputPathName, + filenameFormat.format(bridgePoolAssignmentTime)).toFile(); + File rsyncFile = new File(recentPathName, + tarballFile.getName()); + File[] outputFiles = new File[] { tarballFile, + rsyncFile }; + for (File outputFile : outputFiles) { + if (!outputFile.exists()) { + outputFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + outputFile)); + bw.write("@type bridge-pool-assignment 1.0\n"); + bw.write(bridgePoolAssignmentLine + "\n"); + for (String assignmentLine : sanitizedAssignments) { + bw.write(assignmentLine + "\n"); + } + bw.close(); + } + } + } catch (IOException | ParseException e) { + logger.warn("Could not write sanitized bridge pool assignment " + + "file for line '{}' to disk. Skipping bridge pool " + + "assignment file '{}'.", bridgePoolAssignmentLine, + assignmentFile.getAbsolutePath(), e); + break; + } + sanitizedAssignments.clear(); + } + if (line == null) { + wroteLastLine = true; + } else { + bridgePoolAssignmentLine = line; + hashedFingerprints.clear(); + } + } else { + String[] parts = line.split(" "); + if (parts.length < 2 || parts[0].length() < 40) { + logger.warn("Unrecognized line '{}'. Aborting.", line); + break; + } + String hashedFingerprint; + try { + hashedFingerprint = Hex.encodeHexString(DigestUtils.sha1( + Hex.decodeHex(line.split(" ")[0].toCharArray()))) + .toLowerCase(); + } catch (DecoderException e) { + logger.warn("Unable to decode hex fingerprint in line '{}'. " + + "Aborting.", line); + break; + } + if (hashedFingerprints.contains(hashedFingerprint)) { + duplicateFingerprint = bridgePoolAssignmentLine; + } + hashedFingerprints.add(hashedFingerprint); + String assignmentDetails = line.substring(40); + sanitizedAssignments.add(hashedFingerprint + + assignmentDetails); + } + } + br.close(); + } catch (IOException e) { + logger.warn("Could not read bridge pool assignment file '{}'. " + + "Skipping.", assignmentFile.getAbsolutePath(), e); + } + } + + if (duplicateFingerprint != null) { + logger.warn("At least one bridge pool assignment list contained " + + "duplicate fingerprints. Last found in assignment list " + + "starting with '{}'.", duplicateFingerprint); + } + + if (maxBridgePoolAssignmentTime > 0L + && maxBridgePoolAssignmentTime + 330L * 60L * 1000L + < System.currentTimeMillis()) { + SimpleDateFormat dateTimeFormat = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + logger.warn("The last known bridge pool assignment list was " + + "published at {}, which is more than 5:30 hours in the past.", + dateTimeFormat.format(maxBridgePoolAssignmentTime)); + } + + this.cleanUpRsyncDirectory(); + + logger.info("Finished processing bridge pool assignment file(s)."); + } + + /** Delete all files from the rsync directory that have not been modified + * in the last three days. */ + public void cleanUpRsyncDirectory() { + long cutOffMillis = System.currentTimeMillis() + - 3L * 24L * 60L * 60L * 1000L; + Stack<File> allFiles = new Stack<>(); + allFiles.add(new File("recent/bridge-pool-assignments")); + while (!allFiles.isEmpty()) { + File file = allFiles.pop(); + if (file.isDirectory()) { + allFiles.addAll(Arrays.asList(file.listFiles())); + } else if (file.lastModified() < cutOffMillis) { + file.delete(); + } + } + } +} + + diff --git a/src/main/java/org/torproject/metrics/collector/conf/Annotation.java b/src/main/java/org/torproject/metrics/collector/conf/Annotation.java index 2e47df0..c05c38c 100644 --- a/src/main/java/org/torproject/metrics/collector/conf/Annotation.java +++ b/src/main/java/org/torproject/metrics/collector/conf/Annotation.java @@ -8,6 +8,7 @@ public enum Annotation { BandwidthFile("@type bandwidth-file 1.0\n"), BridgeExtraInfo("@type bridge-extra-info 1.3\n"), + BridgePoolAssignment("@type bridge-pool-assignment 1.0\n"), BridgeServer("@type bridge-server-descriptor 1.2\n"), Cert("@type dir-key-certificate-3 1.0\n"), Consensus("@type network-status-consensus-3 1.0\n"), diff --git a/src/main/java/org/torproject/metrics/collector/conf/Configuration.java b/src/main/java/org/torproject/metrics/collector/conf/Configuration.java index 69d3bcd..b751564 100644 --- a/src/main/java/org/torproject/metrics/collector/conf/Configuration.java +++ b/src/main/java/org/torproject/metrics/collector/conf/Configuration.java @@ -88,6 +88,7 @@ public class Configuration extends Observable implements Cloneable { private void anythingActivated() throws ConfigurationException { if (!(this.getBool(Key.RelaydescsActivated) || this.getBool(Key.BridgedescsActivated) + || this.getBool(Key.BridgePoolAssignmentsActivated) || this.getBool(Key.ExitlistsActivated) || this.getBool(Key.UpdateindexActivated) || this.getBool(Key.OnionPerfActivated) diff --git a/src/main/java/org/torproject/metrics/collector/conf/Key.java b/src/main/java/org/torproject/metrics/collector/conf/Key.java index ba4bcd9..b613795 100644 --- a/src/main/java/org/torproject/metrics/collector/conf/Key.java +++ b/src/main/java/org/torproject/metrics/collector/conf/Key.java @@ -27,6 +27,7 @@ public enum Key { SyncPath(Path.class), RelaySources(SourceType[].class), BridgeSources(SourceType[].class), + BridgePoolAssignmentsSources(SourceType[].class), ExitlistSources(SourceType[].class), OnionPerfSources(SourceType[].class), WebstatsSources(SourceType[].class), @@ -35,6 +36,8 @@ public enum Key { RelaySyncOrigins(URL[].class), BridgeSyncOrigins(URL[].class), BridgeLocalOrigins(Path.class), + BridgePoolAssignmentsLocalOrigins(Path.class), + BridgePoolAssignmentsSyncOrigins(URL[].class), ExitlistSyncOrigins(URL[].class), OnionPerfSyncOrigins(URL[].class), WebstatsSyncOrigins(URL[].class), @@ -42,6 +45,9 @@ public enum Key { BridgedescsActivated(Boolean.class), BridgedescsOffsetMinutes(Integer.class), BridgedescsPeriodMinutes(Integer.class), + BridgePoolAssignmentsActivated(Boolean.class), + BridgePoolAssignmentsOffsetMinutes(Integer.class), + BridgePoolAssignmentsPeriodMinutes(Integer.class), ExitlistsActivated(Boolean.class), ExitlistsOffsetMinutes(Integer.class), ExitlistsPeriodMinutes(Integer.class), diff --git a/src/main/java/org/torproject/metrics/collector/persist/BridgePoolAssignmentPersistence.java b/src/main/java/org/torproject/metrics/collector/persist/BridgePoolAssignmentPersistence.java new file mode 100644 index 0000000..5613060 --- /dev/null +++ b/src/main/java/org/torproject/metrics/collector/persist/BridgePoolAssignmentPersistence.java @@ -0,0 +1,34 @@ +/* Copyright 2016--2018 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.collector.persist; + +import org.torproject.descriptor.BridgePoolAssignment; +import org.torproject.metrics.collector.conf.Annotation; + +import java.nio.file.Paths; + +public class BridgePoolAssignmentPersistence + extends DescriptorPersistence<BridgePoolAssignment> { + + public BridgePoolAssignmentPersistence(BridgePoolAssignment desc) { + super(desc, Annotation.BridgePoolAssignment.bytes()); + calculatePaths(); + } + + private void calculatePaths() { + String file = PersistenceUtils.dateTime(desc.getPublishedMillis()); + String[] parts = file.split(DASH); + this.recentPath = Paths.get( + BRIDGEPOOLASSIGNMENTS, + file).toString(); + this.storagePath = Paths.get( + BRIDGEPOOLASSIGNMENTS, + parts[0], // year + parts[1], // month + parts[2], // day + file).toString(); + } + +} + diff --git a/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java b/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java index fed4839..3e7a06b 100644 --- a/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java +++ b/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java @@ -18,6 +18,8 @@ public abstract class DescriptorPersistence<T extends Descriptor> { DescriptorPersistence.class); protected static final String BRIDGEDESCS = "bridge-descriptors"; + protected static final String BRIDGEPOOLASSIGNMENTS + = "bridge-pool-assignments"; protected static final String DASH = "-"; protected static final String DOT = "."; protected static final String MICRODESC = "microdesc"; diff --git a/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java b/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java index 0d344bf..f8e7b27 100644 --- a/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java +++ b/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java @@ -6,6 +6,7 @@ package org.torproject.metrics.collector.sync; import org.torproject.descriptor.BandwidthFile; import org.torproject.descriptor.BridgeExtraInfoDescriptor; import org.torproject.descriptor.BridgeNetworkStatus; +import org.torproject.descriptor.BridgePoolAssignment; import org.torproject.descriptor.BridgeServerDescriptor; import org.torproject.descriptor.Descriptor; import org.torproject.descriptor.ExitList; @@ -20,6 +21,7 @@ import org.torproject.metrics.collector.conf.ConfigurationException; import org.torproject.metrics.collector.conf.Key; import org.torproject.metrics.collector.persist.BandwidthFilePersistence; import org.torproject.metrics.collector.persist.BridgeExtraInfoPersistence; +import org.torproject.metrics.collector.persist.BridgePoolAssignmentPersistence; import org.torproject.metrics.collector.persist.BridgeServerDescriptorPersistence; import org.torproject.metrics.collector.persist.ConsensusPersistence; import org.torproject.metrics.collector.persist.DescriptorPersistence; @@ -130,6 +132,10 @@ public class SyncPersistence { descPersist = new BridgeServerDescriptorPersistence( (BridgeServerDescriptor) desc, received); break; + case "BridgePoolAssignment": + descPersist = new BridgePoolAssignmentPersistence( + (BridgePoolAssignment) desc); + break; case "ExitList": // downloaded is part of desc, which to use? descPersist = new ExitlistPersistence((ExitList) desc, received); break; diff --git a/src/main/resources/collector.properties b/src/main/resources/collector.properties index 292e876..4cd339f 100644 --- a/src/main/resources/collector.properties +++ b/src/main/resources/collector.properties @@ -18,6 +18,12 @@ BridgedescsPeriodMinutes = 60 # offset in minutes since the epoch and BridgedescsOffsetMinutes = 9 ## the following defines, if this module is activated +BridgePoolAssignmentsActivated = false +# period in minutes +BridgePoolAssignmentsPeriodMinutes = 60 +# offset in minutes since the epoch and +BridgePoolAssignmentsOffsetMinutes = 9 +## the following defines, if this module is activated ExitlistsActivated = false # period in minutes ExitlistsPeriodMinutes = 60 @@ -139,6 +145,18 @@ ReplaceIpAddressesWithHashes = false BridgeDescriptorMappingsLimit = inf # # +######## Bridge pool assignments ######## +# +## Define descriptor sources +# possible values: Sync, Remote +BridgePoolAssignmentsSources = Local +## Retrieve files from the following instances. +## List of URLs separated by comma. +BridgePoolAssignmentsSyncOrigins = https://collector.torproject.org +## Relative path to directory to read bridge pool assignment files from +BridgePoolAssignmentsLocalOrigins = in/bridge-pool-assignments/ +# +# ######## Exit lists ######## # ## Define descriptor sources diff --git a/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java b/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java index 4ac623e..3a69c0c 100644 --- a/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java +++ b/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java @@ -38,7 +38,7 @@ public class ConfigurationTest { public void testKeyCount() { assertEquals("The number of properties keys in enum Key changed." + "\n This test class should be adapted.", - 53, Key.values().length); + 59, Key.values().length); } @Test() diff --git a/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java b/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java index 78b6ac7..17737f0 100644 --- a/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java +++ b/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java @@ -69,6 +69,7 @@ public class CollecTorMainTest { switch (marker) { case "Relay": case "Bridge": + case "BridgePoolAssignments": case "Exitlist": case "OnionPerf": case "Webstats": |
