diff options
| author | Karsten Loesing <karsten.loesing@gmx.net> | 2020-01-09 11:28:36 +0100 |
|---|---|---|
| committer | Karsten Loesing <karsten.loesing@gmx.net> | 2020-01-09 11:43:04 +0100 |
| commit | 94b87099197890c1614b301a3c493f47c8003f02 (patch) | |
| tree | 189f32972fe9b071b0380571214d6be304deb16b | |
| parent | d7117f8c8ee946748eea4d2f2741195d4dbfe056 (diff) | |
squash! Avoid reprocessing webstats files.
- Avoid duplicating code from WebServerAccessLogPersistence for
calculating storage paths.
| -rw-r--r-- | src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java | 64 | ||||
| -rw-r--r-- | src/main/java/org/torproject/metrics/collector/webstats/WebServerAccessLogImpl.java | 19 |
2 files changed, 45 insertions, 38 deletions
diff --git a/src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java b/src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java index 019fe66..e65f834 100644 --- a/src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java +++ b/src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java @@ -17,12 +17,14 @@ import org.torproject.metrics.collector.conf.Key; import org.torproject.metrics.collector.conf.SourceType; import org.torproject.metrics.collector.cron.CollecTorMain; import org.torproject.metrics.collector.persist.PersistenceUtils; +import org.torproject.metrics.collector.persist.WebServerAccessLogPersistence; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedReader; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStream; @@ -41,6 +43,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.SortedSet; +import java.util.StringJoiner; import java.util.TreeMap; import java.util.TreeSet; import java.util.function.UnaryOperator; @@ -89,10 +92,8 @@ public class SanitizeWeblogs extends CollecTorMain { Files.createDirectories(this.config.getPath(Key.OutputPath)); Files.createDirectories(this.config.getPath(Key.RecentPath)); Files.createDirectories(this.config.getPath(Key.StatsPath)); - this.outputDirectory = this.config.getPath(Key.OutputPath) - .resolve(WEBSTATS); - this.recentDirectory = this.config.getPath(Key.RecentPath) - .resolve(WEBSTATS); + this.outputDirectory = this.config.getPath(Key.OutputPath); + this.recentDirectory = this.config.getPath(Key.RecentPath); this.processedWebstatsFile = this.config.getPath(Key.StatsPath) .resolve("processed-webstats"); this.limits = this.config.getBool(Key.WebstatsLimits); @@ -178,8 +179,11 @@ public class SanitizeWeblogs extends CollecTorMain { LocalDate[] interval = determineInterval(sanitizedLinesByDate.keySet()); for (LocalDate newDate : sanitizedLinesByDate.keySet()) { if (newDate.isAfter(interval[0]) && newDate.isBefore(interval[1])) { - Path outputPath = this.calculateOutputPath(virtualHost, - physicalHost, newDate); + WebServerAccessLogPersistence walp + = new WebServerAccessLogPersistence( + new WebServerAccessLogImpl(virtualHost, physicalHost, newDate)); + Path outputPath = this.outputDirectory + .resolve(walp.getStoragePath()); if (!Files.exists(outputPath)) { storeDates.add(newDate); } @@ -221,44 +225,28 @@ public class SanitizeWeblogs extends CollecTorMain { return newlySanitizedLinesByDate.keySet(); } - private static DateTimeFormatter yearPattern - = DateTimeFormatter.ofPattern("yyyy"); - private static DateTimeFormatter monthPattern - = DateTimeFormatter.ofPattern("MM"); - private static DateTimeFormatter dayPattern - = DateTimeFormatter.ofPattern("dd"); - private static DateTimeFormatter datePattern - = DateTimeFormatter.BASIC_ISO_DATE; - - private Path calculateOutputPath(String virtualHost, String physicalHost, - LocalDate logDate) { - return this.outputDirectory.resolve(Paths.get(virtualHost, - logDate.format(yearPattern), logDate.format(monthPattern), - logDate.format(dayPattern), String.format("%s_%s_access.log_%s.xz", - virtualHost, physicalHost, logDate.format(datePattern)))); - } - - private Path calculateRecentPath(String virtualHost, String physicalHost, - LocalDate logDate) { - return this.recentDirectory.resolve( - Paths.get(String.format("%s_%s_access.log_%s.xz", virtualHost, - physicalHost, logDate.format(datePattern)))); - } - private void storeSortedAndForget(String virtualHost, String physicalHost, LocalDate date, Map<String, Long> lineCounts) { + String name = new StringJoiner(WebServerAccessLogImpl.SEP) + .add(virtualHost).add(physicalHost) + .add(WebServerAccessLogImpl.MARKER) + .add(date.format(DateTimeFormatter.BASIC_ISO_DATE)) + .toString() + "." + FileType.XZ.name().toLowerCase(); + log.debug("Storing {}.", name); Map<String, Long> retainedLines = new TreeMap<>(lineCounts); lineCounts.clear(); // not needed anymore try { - byte[] compressedBytes = toCompressedBytes(retainedLines); - PersistenceUtils.storeToFileSystem(new byte[0], compressedBytes, - calculateOutputPath(virtualHost, physicalHost, date), - StandardOpenOption.CREATE_NEW); - PersistenceUtils.storeToFileSystem(new byte[0], compressedBytes, - calculateRecentPath(virtualHost, physicalHost, date), - StandardOpenOption.CREATE_NEW); + WebServerAccessLogPersistence walp + = new WebServerAccessLogPersistence( + new WebServerAccessLogImpl(toCompressedBytes(retainedLines), + new File(name), name)); + log.debug("Storing {}.", name); + walp.storeOut(this.outputDirectory.toString()); + walp.storeRecent(this.recentDirectory.toString()); + } catch (DescriptorParseException dpe) { + log.error("Cannot store log desriptor {}.", name, dpe); } catch (Throwable th) { // catch all else - log.error("Cannot store log descriptor.", th); + log.error("Serious problem. Cannot store log desriptor {}.", name, th); } } diff --git a/src/main/java/org/torproject/metrics/collector/webstats/WebServerAccessLogImpl.java b/src/main/java/org/torproject/metrics/collector/webstats/WebServerAccessLogImpl.java index af77c94..f091aa1 100644 --- a/src/main/java/org/torproject/metrics/collector/webstats/WebServerAccessLogImpl.java +++ b/src/main/java/org/torproject/metrics/collector/webstats/WebServerAccessLogImpl.java @@ -104,6 +104,25 @@ public class WebServerAccessLogImpl implements WebServerAccessLog { } } + /** + * Creates an empty WebServerAccessLog from the given filename parts. + * + * <p>This instance is not intended to be written to disk, as it doesn't have + * any content. The main intention of this instance is to compute storage + * paths.</p> + * + * @param virtualHost Virtual host name. + * @param physicalHost Physical host name. + * @param logDate Log date. + */ + protected WebServerAccessLogImpl(String virtualHost, String physicalHost, + LocalDate logDate) { + this.descriptorFile = null; + this.virtualHost = virtualHost; + this.physicalHost = physicalHost; + this.logDate = logDate; + } + @Override public InputStream decompressedByteStream() throws DescriptorParseException { try { |
