summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarsten Loesing <karsten.loesing@gmx.net>2020-01-09 11:28:36 +0100
committerKarsten Loesing <karsten.loesing@gmx.net>2020-01-09 11:43:04 +0100
commit94b87099197890c1614b301a3c493f47c8003f02 (patch)
tree189f32972fe9b071b0380571214d6be304deb16b
parentd7117f8c8ee946748eea4d2f2741195d4dbfe056 (diff)
squash! Avoid reprocessing webstats files.
- Avoid duplicating code from WebServerAccessLogPersistence for calculating storage paths.
-rw-r--r--src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java64
-rw-r--r--src/main/java/org/torproject/metrics/collector/webstats/WebServerAccessLogImpl.java19
2 files changed, 45 insertions, 38 deletions
diff --git a/src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java b/src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java
index 019fe66..e65f834 100644
--- a/src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java
+++ b/src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java
@@ -17,12 +17,14 @@ import org.torproject.metrics.collector.conf.Key;
import org.torproject.metrics.collector.conf.SourceType;
import org.torproject.metrics.collector.cron.CollecTorMain;
import org.torproject.metrics.collector.persist.PersistenceUtils;
+import org.torproject.metrics.collector.persist.WebServerAccessLogPersistence;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
+import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
@@ -41,6 +43,7 @@ import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.SortedSet;
+import java.util.StringJoiner;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.function.UnaryOperator;
@@ -89,10 +92,8 @@ public class SanitizeWeblogs extends CollecTorMain {
Files.createDirectories(this.config.getPath(Key.OutputPath));
Files.createDirectories(this.config.getPath(Key.RecentPath));
Files.createDirectories(this.config.getPath(Key.StatsPath));
- this.outputDirectory = this.config.getPath(Key.OutputPath)
- .resolve(WEBSTATS);
- this.recentDirectory = this.config.getPath(Key.RecentPath)
- .resolve(WEBSTATS);
+ this.outputDirectory = this.config.getPath(Key.OutputPath);
+ this.recentDirectory = this.config.getPath(Key.RecentPath);
this.processedWebstatsFile = this.config.getPath(Key.StatsPath)
.resolve("processed-webstats");
this.limits = this.config.getBool(Key.WebstatsLimits);
@@ -178,8 +179,11 @@ public class SanitizeWeblogs extends CollecTorMain {
LocalDate[] interval = determineInterval(sanitizedLinesByDate.keySet());
for (LocalDate newDate : sanitizedLinesByDate.keySet()) {
if (newDate.isAfter(interval[0]) && newDate.isBefore(interval[1])) {
- Path outputPath = this.calculateOutputPath(virtualHost,
- physicalHost, newDate);
+ WebServerAccessLogPersistence walp
+ = new WebServerAccessLogPersistence(
+ new WebServerAccessLogImpl(virtualHost, physicalHost, newDate));
+ Path outputPath = this.outputDirectory
+ .resolve(walp.getStoragePath());
if (!Files.exists(outputPath)) {
storeDates.add(newDate);
}
@@ -221,44 +225,28 @@ public class SanitizeWeblogs extends CollecTorMain {
return newlySanitizedLinesByDate.keySet();
}
- private static DateTimeFormatter yearPattern
- = DateTimeFormatter.ofPattern("yyyy");
- private static DateTimeFormatter monthPattern
- = DateTimeFormatter.ofPattern("MM");
- private static DateTimeFormatter dayPattern
- = DateTimeFormatter.ofPattern("dd");
- private static DateTimeFormatter datePattern
- = DateTimeFormatter.BASIC_ISO_DATE;
-
- private Path calculateOutputPath(String virtualHost, String physicalHost,
- LocalDate logDate) {
- return this.outputDirectory.resolve(Paths.get(virtualHost,
- logDate.format(yearPattern), logDate.format(monthPattern),
- logDate.format(dayPattern), String.format("%s_%s_access.log_%s.xz",
- virtualHost, physicalHost, logDate.format(datePattern))));
- }
-
- private Path calculateRecentPath(String virtualHost, String physicalHost,
- LocalDate logDate) {
- return this.recentDirectory.resolve(
- Paths.get(String.format("%s_%s_access.log_%s.xz", virtualHost,
- physicalHost, logDate.format(datePattern))));
- }
-
private void storeSortedAndForget(String virtualHost, String physicalHost,
LocalDate date, Map<String, Long> lineCounts) {
+ String name = new StringJoiner(WebServerAccessLogImpl.SEP)
+ .add(virtualHost).add(physicalHost)
+ .add(WebServerAccessLogImpl.MARKER)
+ .add(date.format(DateTimeFormatter.BASIC_ISO_DATE))
+ .toString() + "." + FileType.XZ.name().toLowerCase();
+ log.debug("Storing {}.", name);
Map<String, Long> retainedLines = new TreeMap<>(lineCounts);
lineCounts.clear(); // not needed anymore
try {
- byte[] compressedBytes = toCompressedBytes(retainedLines);
- PersistenceUtils.storeToFileSystem(new byte[0], compressedBytes,
- calculateOutputPath(virtualHost, physicalHost, date),
- StandardOpenOption.CREATE_NEW);
- PersistenceUtils.storeToFileSystem(new byte[0], compressedBytes,
- calculateRecentPath(virtualHost, physicalHost, date),
- StandardOpenOption.CREATE_NEW);
+ WebServerAccessLogPersistence walp
+ = new WebServerAccessLogPersistence(
+ new WebServerAccessLogImpl(toCompressedBytes(retainedLines),
+ new File(name), name));
+ log.debug("Storing {}.", name);
+ walp.storeOut(this.outputDirectory.toString());
+ walp.storeRecent(this.recentDirectory.toString());
+ } catch (DescriptorParseException dpe) {
+ log.error("Cannot store log desriptor {}.", name, dpe);
} catch (Throwable th) { // catch all else
- log.error("Cannot store log descriptor.", th);
+ log.error("Serious problem. Cannot store log desriptor {}.", name, th);
}
}
diff --git a/src/main/java/org/torproject/metrics/collector/webstats/WebServerAccessLogImpl.java b/src/main/java/org/torproject/metrics/collector/webstats/WebServerAccessLogImpl.java
index af77c94..f091aa1 100644
--- a/src/main/java/org/torproject/metrics/collector/webstats/WebServerAccessLogImpl.java
+++ b/src/main/java/org/torproject/metrics/collector/webstats/WebServerAccessLogImpl.java
@@ -104,6 +104,25 @@ public class WebServerAccessLogImpl implements WebServerAccessLog {
}
}
+ /**
+ * Creates an empty WebServerAccessLog from the given filename parts.
+ *
+ * <p>This instance is not intended to be written to disk, as it doesn't have
+ * any content. The main intention of this instance is to compute storage
+ * paths.</p>
+ *
+ * @param virtualHost Virtual host name.
+ * @param physicalHost Physical host name.
+ * @param logDate Log date.
+ */
+ protected WebServerAccessLogImpl(String virtualHost, String physicalHost,
+ LocalDate logDate) {
+ this.descriptorFile = null;
+ this.virtualHost = virtualHost;
+ this.physicalHost = physicalHost;
+ this.logDate = logDate;
+ }
+
@Override
public InputStream decompressedByteStream() throws DescriptorParseException {
try {