diff options
| author | Karsten Loesing <karsten.loesing@gmx.net> | 2019-09-18 15:57:52 +0200 |
|---|---|---|
| committer | Karsten Loesing <karsten.loesing@gmx.net> | 2019-09-18 17:38:23 +0200 |
| commit | 3866164a09962e6ed640d70b623c089b680aa4d6 (patch) | |
| tree | ccf2d200a8f0aea983496517db0e4e24e090a30a | |
| parent | 16894a4d0096ac76e0ccbba1cd7b593ab1ddab2c (diff) | |
Add new BridgedbStats module.
Implements part of #19332.
Temp commit: build.xml needs an update once metrics-lib 2.8.0 is
released.
13 files changed, 272 insertions, 3 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index b38b124..c13dd60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +# Changes in version 1.1?.? - 2019-??-?? + + * Medium changes + - Archive BridgeDB statistics. + - Update to metrics-lib 2.8.0. + + # Changes in version 1.10.0 - 2019-09-12 * Medium changes @@ -12,7 +12,7 @@ <property name="release.version" value="1.10.0-dev" /> <property name="project-main-class" value="org.torproject.metrics.collector.Main" /> <property name="name" value="collector"/> - <property name="metricslibversion" value="2.7.0" /> + <property name="metricslibversion" value="2.7.0-dev" /> <property name="jarincludes" value="collector.properties logback.xml" /> <patternset id="runtime" > diff --git a/src/main/java/org/torproject/metrics/collector/Main.java b/src/main/java/org/torproject/metrics/collector/Main.java index 6907e93..6f67111 100644 --- a/src/main/java/org/torproject/metrics/collector/Main.java +++ b/src/main/java/org/torproject/metrics/collector/Main.java @@ -3,6 +3,7 @@ package org.torproject.metrics.collector; +import org.torproject.metrics.collector.bridgedb.BridgedbStatsProcessor; import org.torproject.metrics.collector.bridgedescs.SanitizedBridgesWriter; import org.torproject.metrics.collector.conf.Configuration; import org.torproject.metrics.collector.conf.ConfigurationException; @@ -56,6 +57,8 @@ public class Main { collecTorMains.put(Key.WebstatsActivated, SanitizeWeblogs.class); collecTorMains.put(Key.SnowflakeStatsActivated, SnowflakeStatsDownloader.class); + collecTorMains.put(Key.BridgedbStatsActivated, + BridgedbStatsProcessor.class); } private static Configuration conf = new Configuration(); diff --git a/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbStatsProcessor.java b/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbStatsProcessor.java new file mode 100644 index 0000000..6ba84bb --- /dev/null +++ b/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbStatsProcessor.java @@ -0,0 +1,190 @@ +/* Copyright 2019 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.collector.bridgedb; + +import org.torproject.descriptor.BridgedbStats; +import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.UnparseableDescriptor; +import org.torproject.metrics.collector.conf.Configuration; +import org.torproject.metrics.collector.conf.ConfigurationException; +import org.torproject.metrics.collector.conf.Key; +import org.torproject.metrics.collector.cron.CollecTorMain; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.Instant; +import java.time.format.DateTimeFormatter; +import java.time.temporal.ChronoUnit; +import java.util.Arrays; +import java.util.Stack; + +public class BridgedbStatsProcessor extends CollecTorMain { + + /** + * Class logger. + */ + private static final Logger logger = LoggerFactory.getLogger( + BridgedbStatsProcessor.class); + + /** + * Directory for reading BridgeDB statistics files. + */ + private File inputDirectory; + + /** + * Directory for writing BridgeDB statistics files to be archived in tarballs. + */ + private String outputPathName; + + /** + * Directory for writing recently processed BridgeDB statistics files. + */ + private String recentPathName; + + /** + * File name format. + */ + private DateTimeFormatter filenameFormat = DateTimeFormatter.ofPattern( + "uuuu/MM/dd/uuuu-MM-dd-HH-mm-ss"); + + /** + * Initialize this class with the given configuration. + */ + public BridgedbStatsProcessor(Configuration config) { + super(config); + } + + /** + * Return the module identifier. + * + * @return Module identifier. + */ + @Override + public String module() { + return "BridgedbStats"; + } + + /** + * Return the synchronization marker. + * + * @return Synchronization marker. + */ + @Override + protected String syncMarker() { + return "BridgedbStats"; + } + + /** + * Start processing files, which includes reading BridgeDB statistics files + * from disk, possibly decompressing them and splitting them by date, and + * writing them back to disk. + * + * @throws ConfigurationException Thrown if configuration values cannot be + * obtained. + */ + @Override + protected void startProcessing() throws ConfigurationException { + logger.info("Starting BridgeDB statistics module of CollecTor."); + this.initializeConfiguration(); + logger.info("Reading BridgeDB statistics files in {}.", + this.inputDirectory); + for (Descriptor descriptor + : DescriptorSourceFactory.createDescriptorReader() + .readDescriptors(this.inputDirectory)) { + if (descriptor instanceof BridgedbStats) { + BridgedbStats bridgedbStats = (BridgedbStats) descriptor; + Path tarballPath = Paths.get(this.outputPathName, + bridgedbStats.bridgedbStatsEnd().format(this.filenameFormat)); + Path rsyncPath = Paths.get(this.recentPathName, + bridgedbStats.bridgedbStatsEnd().format(this.filenameFormat)); + this.writeDescriptor(bridgedbStats.getRawDescriptorBytes(), + tarballPath, rsyncPath); + } else if (descriptor instanceof UnparseableDescriptor) { + logger.warn("Skipping unparseable descriptor in file {}.", + descriptor.getDescriptorFile(), + ((UnparseableDescriptor) descriptor).getDescriptorParseException()); + } else { + logger.warn("Skipping unexpected descriptor of type {} in file {}.", + descriptor.getClass(), descriptor.getDescriptorFile()); + } + } + logger.info("Cleaning up directory {} containing recent files.", + this.recentPathName); + this.cleanUpRsyncDirectory(); + logger.info("Finished processing BridgeDB statistics file(s)."); + } + + /** + * Initialize configuration by obtaining current configuration values and + * storing them in instance attributes. + */ + private void initializeConfiguration() throws ConfigurationException { + this.outputPathName = Paths.get(config.getPath(Key.OutputPath).toString(), + "bridgedb-stats").toString(); + this.recentPathName = Paths.get(config.getPath(Key.RecentPath).toString(), + "bridgedb-stats").toString(); + this.inputDirectory = + config.getPath(Key.BridgedbStatsLocalOrigins).toFile(); + } + + /** + * Write the given raw descriptor bytes to the given files, and stop at the + * first file that already exists. + * + * @param rawDescriptorBytes Raw descriptor bytes to write. + * @param outputPaths One or more paths to write to. + */ + private void writeDescriptor(byte[] rawDescriptorBytes, + Path ... outputPaths) { + for (Path outputPath : outputPaths) { + try { + File outputFile = outputPath.toFile(); + if (outputFile.exists()) { + continue; + } + if (!outputFile.getParentFile().exists() + && !outputFile.getParentFile().mkdirs()) { + logger.warn("Could not create parent directories of {}.", outputFile); + return; + } + Files.write(outputPath, rawDescriptorBytes); + } catch (IOException e) { + logger.warn("Unable to write descriptor to file {}.", outputPath, e); + } + } + } + + /** + * Delete all files from the rsync directory that have not been modified in + * the last three days. + */ + public void cleanUpRsyncDirectory() { + Instant cutOff = Instant.now().minus(3L, ChronoUnit.DAYS); + Stack<File> allFiles = new Stack<>(); + allFiles.add(new File(this.recentPathName)); + while (!allFiles.isEmpty()) { + File file = allFiles.pop(); + if (file.isDirectory()) { + File[] filesInDirectory = file.listFiles(); + if (null != filesInDirectory) { + allFiles.addAll(Arrays.asList(filesInDirectory)); + } + } else if (Instant.ofEpochMilli(file.lastModified()).isBefore(cutOff)) { + try { + Files.deleteIfExists(file.toPath()); + } catch (IOException e) { + logger.warn("Unable to delete file {} that is apparently older than " + + "three days.", file, e); + } + } + } + } +} diff --git a/src/main/java/org/torproject/metrics/collector/conf/Annotation.java b/src/main/java/org/torproject/metrics/collector/conf/Annotation.java index 8cd3324..9f5ad6b 100644 --- a/src/main/java/org/torproject/metrics/collector/conf/Annotation.java +++ b/src/main/java/org/torproject/metrics/collector/conf/Annotation.java @@ -7,6 +7,7 @@ package org.torproject.metrics.collector.conf; public enum Annotation { BandwidthFile("@type bandwidth-file 1.0\n"), + BridgedbStats("@type bridgedb-stats 1.0\n"), BridgeExtraInfo("@type bridge-extra-info 1.3\n"), BridgeServer("@type bridge-server-descriptor 1.2\n"), Cert("@type dir-key-certificate-3 1.0\n"), diff --git a/src/main/java/org/torproject/metrics/collector/conf/Configuration.java b/src/main/java/org/torproject/metrics/collector/conf/Configuration.java index 27f5125..2307059 100644 --- a/src/main/java/org/torproject/metrics/collector/conf/Configuration.java +++ b/src/main/java/org/torproject/metrics/collector/conf/Configuration.java @@ -88,6 +88,7 @@ public class Configuration extends Observable implements Cloneable { private void anythingActivated() throws ConfigurationException { if (!(this.getBool(Key.RelaydescsActivated) || this.getBool(Key.BridgedescsActivated) + || this.getBool(Key.BridgedbStatsActivated) || this.getBool(Key.ExitlistsActivated) || this.getBool(Key.UpdateindexActivated) || this.getBool(Key.OnionPerfActivated) diff --git a/src/main/java/org/torproject/metrics/collector/conf/Key.java b/src/main/java/org/torproject/metrics/collector/conf/Key.java index e683fe2..c02a983 100644 --- a/src/main/java/org/torproject/metrics/collector/conf/Key.java +++ b/src/main/java/org/torproject/metrics/collector/conf/Key.java @@ -72,7 +72,13 @@ public enum Key { SnowflakeStatsPeriodMinutes(Integer.class), SnowflakeStatsUrl(URL.class), SnowflakeStatsSources(SourceType[].class), - SnowflakeStatsSyncOrigins(URL[].class); + SnowflakeStatsSyncOrigins(URL[].class), + BridgedbStatsActivated(Boolean.class), + BridgedbStatsOffsetMinutes(Integer.class), + BridgedbStatsPeriodMinutes(Integer.class), + BridgedbStatsSources(SourceType[].class), + BridgedbStatsLocalOrigins(Path.class), + BridgedbStatsSyncOrigins(URL[].class); private Class clazz; private static Set<String> keys; diff --git a/src/main/java/org/torproject/metrics/collector/persist/BridgedbStatsPersistence.java b/src/main/java/org/torproject/metrics/collector/persist/BridgedbStatsPersistence.java new file mode 100644 index 0000000..14997a2 --- /dev/null +++ b/src/main/java/org/torproject/metrics/collector/persist/BridgedbStatsPersistence.java @@ -0,0 +1,37 @@ +/* Copyright 2019 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.collector.persist; + +import org.torproject.descriptor.BridgedbStats; +import org.torproject.metrics.collector.conf.Annotation; + +import java.nio.file.Paths; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; + +public class BridgedbStatsPersistence + extends DescriptorPersistence<BridgedbStats> { + + private static final String BRIDGEDB_STATS = "bridgedb-stats"; + + public BridgedbStatsPersistence(BridgedbStats desc) { + super(desc, Annotation.BridgedbStats.bytes()); + calculatePaths(); + } + + private void calculatePaths() { + DateTimeFormatter directoriesFormatter = DateTimeFormatter + .ofPattern("uuuu/MM/dd").withZone(ZoneOffset.UTC); + String[] directories = this.desc.bridgedbStatsEnd() + .format(directoriesFormatter).split("/"); + DateTimeFormatter fileFormatter = DateTimeFormatter + .ofPattern("uuuu-MM-dd-HH-mm-ss").withZone(ZoneOffset.UTC); + String fileOut = this.desc.bridgedbStatsEnd().format(fileFormatter) + + "-bridgedb-stats"; + this.recentPath = Paths.get(BRIDGEDB_STATS, fileOut).toString(); + this.storagePath = Paths.get(BRIDGEDB_STATS, directories[0], directories[1], + directories[2], fileOut).toString(); + } +} + diff --git a/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java b/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java index fed4839..5955a36 100644 --- a/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java +++ b/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java @@ -18,6 +18,7 @@ public abstract class DescriptorPersistence<T extends Descriptor> { DescriptorPersistence.class); protected static final String BRIDGEDESCS = "bridge-descriptors"; + protected static final String BRIDGEDBSTATS = "bridgedb-stats"; protected static final String DASH = "-"; protected static final String DOT = "."; protected static final String MICRODESC = "microdesc"; diff --git a/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java b/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java index 4b3b7bc..0f1c157 100644 --- a/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java +++ b/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java @@ -7,6 +7,7 @@ import org.torproject.descriptor.BandwidthFile; import org.torproject.descriptor.BridgeExtraInfoDescriptor; import org.torproject.descriptor.BridgeNetworkStatus; import org.torproject.descriptor.BridgeServerDescriptor; +import org.torproject.descriptor.BridgedbStats; import org.torproject.descriptor.Descriptor; import org.torproject.descriptor.ExitList; import org.torproject.descriptor.RelayExtraInfoDescriptor; @@ -22,6 +23,7 @@ import org.torproject.metrics.collector.conf.Key; import org.torproject.metrics.collector.persist.BandwidthFilePersistence; import org.torproject.metrics.collector.persist.BridgeExtraInfoPersistence; import org.torproject.metrics.collector.persist.BridgeServerDescriptorPersistence; +import org.torproject.metrics.collector.persist.BridgedbStatsPersistence; import org.torproject.metrics.collector.persist.ConsensusPersistence; import org.torproject.metrics.collector.persist.DescriptorPersistence; import org.torproject.metrics.collector.persist.ExitlistPersistence; @@ -148,6 +150,9 @@ public class SyncPersistence { case "SnowflakeStats": descPersist = new SnowflakeStatsPersistence((SnowflakeStats) desc); break; + case "BridgedbStats": + descPersist = new BridgedbStatsPersistence((BridgedbStats) desc); + break; default: log.trace("Invalid descriptor type {} for sync-merge.", clazz.getName()); diff --git a/src/main/resources/collector.properties b/src/main/resources/collector.properties index a4eed7a..59ba546 100644 --- a/src/main/resources/collector.properties +++ b/src/main/resources/collector.properties @@ -53,6 +53,12 @@ SnowflakeStatsActivated = false SnowflakeStatsPeriodMinutes = 480 # offset in minutes since the epoch and SnowflakeStatsOffsetMinutes = 100 +# the following defines, if this module is activated +BridgedbStatsActivated = false +# period in minutes +BridgedbStatsPeriodMinutes = 480 +# offset in minutes since the epoch and +BridgedbStatsOffsetMinutes = 340 ########################################## ## All below can be changed at runtime. @@ -198,3 +204,14 @@ SnowflakeStatsSyncOrigins = https://collector.torproject.org ## Where to download snowflake statistics from. SnowflakeStatsUrl = https://snowflake-broker.torproject.net/metrics # +######## BridgeDB statistics ######## +# +## Define descriptor sources +# possible values: Local, Sync +BridgedbStatsSources = Local +## Relative path to directory to import BridgeDB statistics from. +BridgedbStatsLocalOrigins = in/bridgedb-stats +## Retrieve files from the following instances. +## List of URLs separated by comma. +BridgedbStatsSyncOrigins = https://collector.torproject.org +#
\ No newline at end of file diff --git a/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java b/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java index 3a69c0c..201d541 100644 --- a/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java +++ b/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java @@ -38,7 +38,7 @@ public class ConfigurationTest { public void testKeyCount() { assertEquals("The number of properties keys in enum Key changed." + "\n This test class should be adapted.", - 59, Key.values().length); + 65, Key.values().length); } @Test() diff --git a/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java b/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java index d0fe173..a353dd9 100644 --- a/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java +++ b/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java @@ -69,6 +69,7 @@ public class CollecTorMainTest { switch (marker) { case "Relay": case "Bridge": + case "BridgedbStats": case "Exitlist": case "OnionPerf": case "Webstats": |
