summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarsten Loesing <karsten.loesing@gmx.net>2019-09-18 15:57:52 +0200
committerKarsten Loesing <karsten.loesing@gmx.net>2019-09-18 17:38:23 +0200
commit3866164a09962e6ed640d70b623c089b680aa4d6 (patch)
treeccf2d200a8f0aea983496517db0e4e24e090a30a
parent16894a4d0096ac76e0ccbba1cd7b593ab1ddab2c (diff)
Add new BridgedbStats module.
Implements part of #19332. Temp commit: build.xml needs an update once metrics-lib 2.8.0 is released.
-rw-r--r--CHANGELOG.md7
-rw-r--r--build.xml2
-rw-r--r--src/main/java/org/torproject/metrics/collector/Main.java3
-rw-r--r--src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbStatsProcessor.java190
-rw-r--r--src/main/java/org/torproject/metrics/collector/conf/Annotation.java1
-rw-r--r--src/main/java/org/torproject/metrics/collector/conf/Configuration.java1
-rw-r--r--src/main/java/org/torproject/metrics/collector/conf/Key.java8
-rw-r--r--src/main/java/org/torproject/metrics/collector/persist/BridgedbStatsPersistence.java37
-rw-r--r--src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java1
-rw-r--r--src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java5
-rw-r--r--src/main/resources/collector.properties17
-rw-r--r--src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java2
-rw-r--r--src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java1
13 files changed, 272 insertions, 3 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b38b124..c13dd60 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+# Changes in version 1.1?.? - 2019-??-??
+
+ * Medium changes
+ - Archive BridgeDB statistics.
+ - Update to metrics-lib 2.8.0.
+
+
# Changes in version 1.10.0 - 2019-09-12
* Medium changes
diff --git a/build.xml b/build.xml
index 4bada49..57c7b7d 100644
--- a/build.xml
+++ b/build.xml
@@ -12,7 +12,7 @@
<property name="release.version" value="1.10.0-dev" />
<property name="project-main-class" value="org.torproject.metrics.collector.Main" />
<property name="name" value="collector"/>
- <property name="metricslibversion" value="2.7.0" />
+ <property name="metricslibversion" value="2.7.0-dev" />
<property name="jarincludes" value="collector.properties logback.xml" />
<patternset id="runtime" >
diff --git a/src/main/java/org/torproject/metrics/collector/Main.java b/src/main/java/org/torproject/metrics/collector/Main.java
index 6907e93..6f67111 100644
--- a/src/main/java/org/torproject/metrics/collector/Main.java
+++ b/src/main/java/org/torproject/metrics/collector/Main.java
@@ -3,6 +3,7 @@
package org.torproject.metrics.collector;
+import org.torproject.metrics.collector.bridgedb.BridgedbStatsProcessor;
import org.torproject.metrics.collector.bridgedescs.SanitizedBridgesWriter;
import org.torproject.metrics.collector.conf.Configuration;
import org.torproject.metrics.collector.conf.ConfigurationException;
@@ -56,6 +57,8 @@ public class Main {
collecTorMains.put(Key.WebstatsActivated, SanitizeWeblogs.class);
collecTorMains.put(Key.SnowflakeStatsActivated,
SnowflakeStatsDownloader.class);
+ collecTorMains.put(Key.BridgedbStatsActivated,
+ BridgedbStatsProcessor.class);
}
private static Configuration conf = new Configuration();
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbStatsProcessor.java b/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbStatsProcessor.java
new file mode 100644
index 0000000..6ba84bb
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbStatsProcessor.java
@@ -0,0 +1,190 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.bridgedb;
+
+import org.torproject.descriptor.BridgedbStats;
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.UnparseableDescriptor;
+import org.torproject.metrics.collector.conf.Configuration;
+import org.torproject.metrics.collector.conf.ConfigurationException;
+import org.torproject.metrics.collector.conf.Key;
+import org.torproject.metrics.collector.cron.CollecTorMain;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.time.Instant;
+import java.time.format.DateTimeFormatter;
+import java.time.temporal.ChronoUnit;
+import java.util.Arrays;
+import java.util.Stack;
+
+public class BridgedbStatsProcessor extends CollecTorMain {
+
+ /**
+ * Class logger.
+ */
+ private static final Logger logger = LoggerFactory.getLogger(
+ BridgedbStatsProcessor.class);
+
+ /**
+ * Directory for reading BridgeDB statistics files.
+ */
+ private File inputDirectory;
+
+ /**
+ * Directory for writing BridgeDB statistics files to be archived in tarballs.
+ */
+ private String outputPathName;
+
+ /**
+ * Directory for writing recently processed BridgeDB statistics files.
+ */
+ private String recentPathName;
+
+ /**
+ * File name format.
+ */
+ private DateTimeFormatter filenameFormat = DateTimeFormatter.ofPattern(
+ "uuuu/MM/dd/uuuu-MM-dd-HH-mm-ss");
+
+ /**
+ * Initialize this class with the given configuration.
+ */
+ public BridgedbStatsProcessor(Configuration config) {
+ super(config);
+ }
+
+ /**
+ * Return the module identifier.
+ *
+ * @return Module identifier.
+ */
+ @Override
+ public String module() {
+ return "BridgedbStats";
+ }
+
+ /**
+ * Return the synchronization marker.
+ *
+ * @return Synchronization marker.
+ */
+ @Override
+ protected String syncMarker() {
+ return "BridgedbStats";
+ }
+
+ /**
+ * Start processing files, which includes reading BridgeDB statistics files
+ * from disk, possibly decompressing them and splitting them by date, and
+ * writing them back to disk.
+ *
+ * @throws ConfigurationException Thrown if configuration values cannot be
+ * obtained.
+ */
+ @Override
+ protected void startProcessing() throws ConfigurationException {
+ logger.info("Starting BridgeDB statistics module of CollecTor.");
+ this.initializeConfiguration();
+ logger.info("Reading BridgeDB statistics files in {}.",
+ this.inputDirectory);
+ for (Descriptor descriptor
+ : DescriptorSourceFactory.createDescriptorReader()
+ .readDescriptors(this.inputDirectory)) {
+ if (descriptor instanceof BridgedbStats) {
+ BridgedbStats bridgedbStats = (BridgedbStats) descriptor;
+ Path tarballPath = Paths.get(this.outputPathName,
+ bridgedbStats.bridgedbStatsEnd().format(this.filenameFormat));
+ Path rsyncPath = Paths.get(this.recentPathName,
+ bridgedbStats.bridgedbStatsEnd().format(this.filenameFormat));
+ this.writeDescriptor(bridgedbStats.getRawDescriptorBytes(),
+ tarballPath, rsyncPath);
+ } else if (descriptor instanceof UnparseableDescriptor) {
+ logger.warn("Skipping unparseable descriptor in file {}.",
+ descriptor.getDescriptorFile(),
+ ((UnparseableDescriptor) descriptor).getDescriptorParseException());
+ } else {
+ logger.warn("Skipping unexpected descriptor of type {} in file {}.",
+ descriptor.getClass(), descriptor.getDescriptorFile());
+ }
+ }
+ logger.info("Cleaning up directory {} containing recent files.",
+ this.recentPathName);
+ this.cleanUpRsyncDirectory();
+ logger.info("Finished processing BridgeDB statistics file(s).");
+ }
+
+ /**
+ * Initialize configuration by obtaining current configuration values and
+ * storing them in instance attributes.
+ */
+ private void initializeConfiguration() throws ConfigurationException {
+ this.outputPathName = Paths.get(config.getPath(Key.OutputPath).toString(),
+ "bridgedb-stats").toString();
+ this.recentPathName = Paths.get(config.getPath(Key.RecentPath).toString(),
+ "bridgedb-stats").toString();
+ this.inputDirectory =
+ config.getPath(Key.BridgedbStatsLocalOrigins).toFile();
+ }
+
+ /**
+ * Write the given raw descriptor bytes to the given files, and stop at the
+ * first file that already exists.
+ *
+ * @param rawDescriptorBytes Raw descriptor bytes to write.
+ * @param outputPaths One or more paths to write to.
+ */
+ private void writeDescriptor(byte[] rawDescriptorBytes,
+ Path ... outputPaths) {
+ for (Path outputPath : outputPaths) {
+ try {
+ File outputFile = outputPath.toFile();
+ if (outputFile.exists()) {
+ continue;
+ }
+ if (!outputFile.getParentFile().exists()
+ && !outputFile.getParentFile().mkdirs()) {
+ logger.warn("Could not create parent directories of {}.", outputFile);
+ return;
+ }
+ Files.write(outputPath, rawDescriptorBytes);
+ } catch (IOException e) {
+ logger.warn("Unable to write descriptor to file {}.", outputPath, e);
+ }
+ }
+ }
+
+ /**
+ * Delete all files from the rsync directory that have not been modified in
+ * the last three days.
+ */
+ public void cleanUpRsyncDirectory() {
+ Instant cutOff = Instant.now().minus(3L, ChronoUnit.DAYS);
+ Stack<File> allFiles = new Stack<>();
+ allFiles.add(new File(this.recentPathName));
+ while (!allFiles.isEmpty()) {
+ File file = allFiles.pop();
+ if (file.isDirectory()) {
+ File[] filesInDirectory = file.listFiles();
+ if (null != filesInDirectory) {
+ allFiles.addAll(Arrays.asList(filesInDirectory));
+ }
+ } else if (Instant.ofEpochMilli(file.lastModified()).isBefore(cutOff)) {
+ try {
+ Files.deleteIfExists(file.toPath());
+ } catch (IOException e) {
+ logger.warn("Unable to delete file {} that is apparently older than "
+ + "three days.", file, e);
+ }
+ }
+ }
+ }
+}
diff --git a/src/main/java/org/torproject/metrics/collector/conf/Annotation.java b/src/main/java/org/torproject/metrics/collector/conf/Annotation.java
index 8cd3324..9f5ad6b 100644
--- a/src/main/java/org/torproject/metrics/collector/conf/Annotation.java
+++ b/src/main/java/org/torproject/metrics/collector/conf/Annotation.java
@@ -7,6 +7,7 @@ package org.torproject.metrics.collector.conf;
public enum Annotation {
BandwidthFile("@type bandwidth-file 1.0\n"),
+ BridgedbStats("@type bridgedb-stats 1.0\n"),
BridgeExtraInfo("@type bridge-extra-info 1.3\n"),
BridgeServer("@type bridge-server-descriptor 1.2\n"),
Cert("@type dir-key-certificate-3 1.0\n"),
diff --git a/src/main/java/org/torproject/metrics/collector/conf/Configuration.java b/src/main/java/org/torproject/metrics/collector/conf/Configuration.java
index 27f5125..2307059 100644
--- a/src/main/java/org/torproject/metrics/collector/conf/Configuration.java
+++ b/src/main/java/org/torproject/metrics/collector/conf/Configuration.java
@@ -88,6 +88,7 @@ public class Configuration extends Observable implements Cloneable {
private void anythingActivated() throws ConfigurationException {
if (!(this.getBool(Key.RelaydescsActivated)
|| this.getBool(Key.BridgedescsActivated)
+ || this.getBool(Key.BridgedbStatsActivated)
|| this.getBool(Key.ExitlistsActivated)
|| this.getBool(Key.UpdateindexActivated)
|| this.getBool(Key.OnionPerfActivated)
diff --git a/src/main/java/org/torproject/metrics/collector/conf/Key.java b/src/main/java/org/torproject/metrics/collector/conf/Key.java
index e683fe2..c02a983 100644
--- a/src/main/java/org/torproject/metrics/collector/conf/Key.java
+++ b/src/main/java/org/torproject/metrics/collector/conf/Key.java
@@ -72,7 +72,13 @@ public enum Key {
SnowflakeStatsPeriodMinutes(Integer.class),
SnowflakeStatsUrl(URL.class),
SnowflakeStatsSources(SourceType[].class),
- SnowflakeStatsSyncOrigins(URL[].class);
+ SnowflakeStatsSyncOrigins(URL[].class),
+ BridgedbStatsActivated(Boolean.class),
+ BridgedbStatsOffsetMinutes(Integer.class),
+ BridgedbStatsPeriodMinutes(Integer.class),
+ BridgedbStatsSources(SourceType[].class),
+ BridgedbStatsLocalOrigins(Path.class),
+ BridgedbStatsSyncOrigins(URL[].class);
private Class clazz;
private static Set<String> keys;
diff --git a/src/main/java/org/torproject/metrics/collector/persist/BridgedbStatsPersistence.java b/src/main/java/org/torproject/metrics/collector/persist/BridgedbStatsPersistence.java
new file mode 100644
index 0000000..14997a2
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/persist/BridgedbStatsPersistence.java
@@ -0,0 +1,37 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.persist;
+
+import org.torproject.descriptor.BridgedbStats;
+import org.torproject.metrics.collector.conf.Annotation;
+
+import java.nio.file.Paths;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+
+public class BridgedbStatsPersistence
+ extends DescriptorPersistence<BridgedbStats> {
+
+ private static final String BRIDGEDB_STATS = "bridgedb-stats";
+
+ public BridgedbStatsPersistence(BridgedbStats desc) {
+ super(desc, Annotation.BridgedbStats.bytes());
+ calculatePaths();
+ }
+
+ private void calculatePaths() {
+ DateTimeFormatter directoriesFormatter = DateTimeFormatter
+ .ofPattern("uuuu/MM/dd").withZone(ZoneOffset.UTC);
+ String[] directories = this.desc.bridgedbStatsEnd()
+ .format(directoriesFormatter).split("/");
+ DateTimeFormatter fileFormatter = DateTimeFormatter
+ .ofPattern("uuuu-MM-dd-HH-mm-ss").withZone(ZoneOffset.UTC);
+ String fileOut = this.desc.bridgedbStatsEnd().format(fileFormatter)
+ + "-bridgedb-stats";
+ this.recentPath = Paths.get(BRIDGEDB_STATS, fileOut).toString();
+ this.storagePath = Paths.get(BRIDGEDB_STATS, directories[0], directories[1],
+ directories[2], fileOut).toString();
+ }
+}
+
diff --git a/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java b/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java
index fed4839..5955a36 100644
--- a/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java
+++ b/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java
@@ -18,6 +18,7 @@ public abstract class DescriptorPersistence<T extends Descriptor> {
DescriptorPersistence.class);
protected static final String BRIDGEDESCS = "bridge-descriptors";
+ protected static final String BRIDGEDBSTATS = "bridgedb-stats";
protected static final String DASH = "-";
protected static final String DOT = ".";
protected static final String MICRODESC = "microdesc";
diff --git a/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java b/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java
index 4b3b7bc..0f1c157 100644
--- a/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java
+++ b/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java
@@ -7,6 +7,7 @@ import org.torproject.descriptor.BandwidthFile;
import org.torproject.descriptor.BridgeExtraInfoDescriptor;
import org.torproject.descriptor.BridgeNetworkStatus;
import org.torproject.descriptor.BridgeServerDescriptor;
+import org.torproject.descriptor.BridgedbStats;
import org.torproject.descriptor.Descriptor;
import org.torproject.descriptor.ExitList;
import org.torproject.descriptor.RelayExtraInfoDescriptor;
@@ -22,6 +23,7 @@ import org.torproject.metrics.collector.conf.Key;
import org.torproject.metrics.collector.persist.BandwidthFilePersistence;
import org.torproject.metrics.collector.persist.BridgeExtraInfoPersistence;
import org.torproject.metrics.collector.persist.BridgeServerDescriptorPersistence;
+import org.torproject.metrics.collector.persist.BridgedbStatsPersistence;
import org.torproject.metrics.collector.persist.ConsensusPersistence;
import org.torproject.metrics.collector.persist.DescriptorPersistence;
import org.torproject.metrics.collector.persist.ExitlistPersistence;
@@ -148,6 +150,9 @@ public class SyncPersistence {
case "SnowflakeStats":
descPersist = new SnowflakeStatsPersistence((SnowflakeStats) desc);
break;
+ case "BridgedbStats":
+ descPersist = new BridgedbStatsPersistence((BridgedbStats) desc);
+ break;
default:
log.trace("Invalid descriptor type {} for sync-merge.",
clazz.getName());
diff --git a/src/main/resources/collector.properties b/src/main/resources/collector.properties
index a4eed7a..59ba546 100644
--- a/src/main/resources/collector.properties
+++ b/src/main/resources/collector.properties
@@ -53,6 +53,12 @@ SnowflakeStatsActivated = false
SnowflakeStatsPeriodMinutes = 480
# offset in minutes since the epoch and
SnowflakeStatsOffsetMinutes = 100
+# the following defines, if this module is activated
+BridgedbStatsActivated = false
+# period in minutes
+BridgedbStatsPeriodMinutes = 480
+# offset in minutes since the epoch and
+BridgedbStatsOffsetMinutes = 340
##########################################
## All below can be changed at runtime.
@@ -198,3 +204,14 @@ SnowflakeStatsSyncOrigins = https://collector.torproject.org
## Where to download snowflake statistics from.
SnowflakeStatsUrl = https://snowflake-broker.torproject.net/metrics
#
+######## BridgeDB statistics ########
+#
+## Define descriptor sources
+# possible values: Local, Sync
+BridgedbStatsSources = Local
+## Relative path to directory to import BridgeDB statistics from.
+BridgedbStatsLocalOrigins = in/bridgedb-stats
+## Retrieve files from the following instances.
+## List of URLs separated by comma.
+BridgedbStatsSyncOrigins = https://collector.torproject.org
+# \ No newline at end of file
diff --git a/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java b/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java
index 3a69c0c..201d541 100644
--- a/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java
+++ b/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java
@@ -38,7 +38,7 @@ public class ConfigurationTest {
public void testKeyCount() {
assertEquals("The number of properties keys in enum Key changed."
+ "\n This test class should be adapted.",
- 59, Key.values().length);
+ 65, Key.values().length);
}
@Test()
diff --git a/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java b/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java
index d0fe173..a353dd9 100644
--- a/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java
+++ b/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java
@@ -69,6 +69,7 @@ public class CollecTorMainTest {
switch (marker) {
case "Relay":
case "Bridge":
+ case "BridgedbStats":
case "Exitlist":
case "OnionPerf":
case "Webstats":