summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarsten Loesing <karsten.loesing@gmx.net>2020-03-21 15:17:32 +0100
committerKarsten Loesing <karsten.loesing@gmx.net>2020-03-22 18:57:30 +0100
commit9d4e79a57b266a454b7255ea4c31570967cd2983 (patch)
tree45a8553eadad37f3ff877e88db3e4a568f7c9abc
parent22807e8f829912ad521756a8218873079c78c274 (diff)
Encode details statuses and documents as UTF-8.task-21933
Fixes #21933.
-rw-r--r--src/main/java/org/torproject/metrics/onionoo/docs/DetailsDocument.java107
-rw-r--r--src/main/java/org/torproject/metrics/onionoo/docs/DetailsStatus.java109
-rw-r--r--src/main/java/org/torproject/metrics/onionoo/docs/DocumentStore.java57
3 files changed, 66 insertions, 207 deletions
diff --git a/src/main/java/org/torproject/metrics/onionoo/docs/DetailsDocument.java b/src/main/java/org/torproject/metrics/onionoo/docs/DetailsDocument.java
index 81892fc..3e2959c 100644
--- a/src/main/java/org/torproject/metrics/onionoo/docs/DetailsDocument.java
+++ b/src/main/java/org/torproject/metrics/onionoo/docs/DetailsDocument.java
@@ -3,32 +3,13 @@
package org.torproject.metrics.onionoo.docs;
-import org.apache.commons.lang3.StringEscapeUtils;
-
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
-import java.util.TreeSet;
public class DetailsDocument extends Document {
- /* We must ensure that details files only contain ASCII characters
- * and no UTF-8 characters. While UTF-8 characters are perfectly
- * valid in JSON, this would break compatibility with existing files
- * pretty badly. We do this by escaping non-ASCII characters, e.g.,
- * \u00F2. Gson won't treat this as UTF-8, but will think that we want
- * to write six characters '\', 'u', '0', '0', 'F', '2'. The only thing
- * we'll have to do is to change back the '\\' that Gson writes for the
- * '\'. */
- private static String escapeJson(String stringToEscape) {
- return StringEscapeUtils.escapeJava(stringToEscape);
- }
-
- private static String unescapeJson(String stringToUnescape) {
- return StringEscapeUtils.unescapeJava(stringToUnescape);
- }
-
private String nickname;
public void setNickname(String nickname) {
@@ -155,31 +136,31 @@ public class DetailsDocument extends Document {
private String countryName;
public void setCountryName(String countryName) {
- this.countryName = escapeJson(countryName);
+ this.countryName = countryName;
}
public String getCountryName() {
- return unescapeJson(this.countryName);
+ return this.countryName;
}
private String regionName;
public void setRegionName(String regionName) {
- this.regionName = escapeJson(regionName);
+ this.regionName = regionName;
}
public String getRegionName() {
- return unescapeJson(this.regionName);
+ return this.regionName;
}
private String cityName;
public void setCityName(String cityName) {
- this.cityName = escapeJson(cityName);
+ this.cityName = cityName;
}
public String getCityName() {
- return unescapeJson(this.cityName);
+ return this.cityName;
}
private Float latitude;
@@ -205,21 +186,21 @@ public class DetailsDocument extends Document {
private String as;
public void setAs(String as) {
- this.as = escapeJson(as);
+ this.as = as;
}
public String getAs() {
- return unescapeJson(this.as);
+ return this.as;
}
private String asName;
public void setAsName(String asName) {
- this.asName = escapeJson(asName);
+ this.asName = asName;
}
public String getAsName() {
- return unescapeJson(this.asName);
+ return this.asName;
}
private Long consensusWeight;
@@ -235,79 +216,31 @@ public class DetailsDocument extends Document {
private String hostName;
public void setHostName(String hostName) {
- this.hostName = escapeJson(hostName);
+ this.hostName = hostName;
}
public String getHostName() {
- return unescapeJson(this.hostName);
+ return this.hostName;
}
private SortedSet<String> verifiedHostNames;
- /**
- * Creates a copy of the list with each string escaped for JSON compatibility
- * and sets this as the verified host names, unless the argument was null in
- * which case the verified host names are just set to null.
- */
public void setVerifiedHostNames(SortedSet<String> verifiedHostNames) {
- if (null == verifiedHostNames) {
- this.verifiedHostNames = null;
- return;
- }
- this.verifiedHostNames = new TreeSet<>();
- for (String hostName : verifiedHostNames) {
- this.verifiedHostNames.add(escapeJson(hostName));
- }
+ this.verifiedHostNames = verifiedHostNames;
}
- /**
- * Creates a copy of the list with each string having its escaping for JSON
- * compatibility reversed and returns the copy, unless the held reference was
- * null in which case null is returned.
- */
public SortedSet<String> getVerifiedHostNames() {
- if (null == this.verifiedHostNames) {
- return null;
- }
- SortedSet<String> verifiedHostNames = new TreeSet<>();
- for (String escapedHostName : this.verifiedHostNames) {
- verifiedHostNames.add(unescapeJson(escapedHostName));
- }
- return verifiedHostNames;
+ return this.verifiedHostNames;
}
private SortedSet<String> unverifiedHostNames;
- /**
- * Creates a copy of the list with each string escaped for JSON compatibility
- * and sets this as the unverified host names, unless the argument was null in
- * which case the unverified host names are just set to null.
- */
public void setUnverifiedHostNames(SortedSet<String> unverifiedHostNames) {
- if (null == unverifiedHostNames) {
- this.unverifiedHostNames = null;
- return;
- }
- this.unverifiedHostNames = new TreeSet<>();
- for (String hostName : unverifiedHostNames) {
- this.unverifiedHostNames.add(escapeJson(hostName));
- }
+ this.unverifiedHostNames = unverifiedHostNames;
}
- /**
- * Creates a copy of the list with each string having its escaping for JSON
- * compatibility reversed and returns the copy, unless the held reference was
- * null in which case null is returned.
- */
public SortedSet<String> getUnverifiedHostNames() {
- if (null == this.unverifiedHostNames) {
- return null;
- }
- SortedSet<String> unverifiedHostNames = new TreeSet<>();
- for (String escapedHostName : this.unverifiedHostNames) {
- unverifiedHostNames.add(unescapeJson(escapedHostName));
- }
- return unverifiedHostNames;
+ return this.unverifiedHostNames;
}
private String lastRestarted;
@@ -397,21 +330,21 @@ public class DetailsDocument extends Document {
private String contact;
public void setContact(String contact) {
- this.contact = escapeJson(contact);
+ this.contact = contact;
}
public String getContact() {
- return unescapeJson(this.contact);
+ return this.contact;
}
private String platform;
public void setPlatform(String platform) {
- this.platform = escapeJson(platform);
+ this.platform = platform;
}
public String getPlatform() {
- return unescapeJson(this.platform);
+ return this.platform;
}
private String version;
diff --git a/src/main/java/org/torproject/metrics/onionoo/docs/DetailsStatus.java b/src/main/java/org/torproject/metrics/onionoo/docs/DetailsStatus.java
index 33fef6d..488a84c 100644
--- a/src/main/java/org/torproject/metrics/onionoo/docs/DetailsStatus.java
+++ b/src/main/java/org/torproject/metrics/onionoo/docs/DetailsStatus.java
@@ -3,9 +3,6 @@
package org.torproject.metrics.onionoo.docs;
-import org.apache.commons.lang3.StringEscapeUtils;
-
-import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
@@ -13,22 +10,6 @@ import java.util.TreeSet;
public class DetailsStatus extends Document {
- /* We must ensure that details files only contain ASCII characters
- * and no UTF-8 characters. While UTF-8 characters are perfectly
- * valid in JSON, this would break compatibility with existing files
- * pretty badly. We do this by escaping non-ASCII characters, e.g.,
- * \u00F2. Gson won't treat this as UTF-8, but will think that we want
- * to write six characters '\', 'u', '0', '0', 'F', '2'. The only thing
- * we'll have to do is to change back the '\\' that Gson writes for the
- * '\'. */
- private static String escapeJson(String stringToEscape) {
- return StringEscapeUtils.escapeJava(stringToEscape);
- }
-
- private static String unescapeJson(String stringToUnescape) {
- return StringEscapeUtils.unescapeJava(stringToUnescape);
- }
-
/* From most recently published server descriptor: */
private String descPublished;
@@ -107,21 +88,21 @@ public class DetailsStatus extends Document {
private String contact;
public void setContact(String contact) {
- this.contact = escapeJson(contact);
+ this.contact = contact;
}
public String getContact() {
- return unescapeJson(this.contact);
+ return this.contact;
}
private String platform;
public void setPlatform(String platform) {
- this.platform = escapeJson(platform);
+ this.platform = platform;
}
public String getPlatform() {
- return unescapeJson(this.platform);
+ return this.platform;
}
private SortedSet<String> allegedFamily;
@@ -493,51 +474,51 @@ public class DetailsStatus extends Document {
private String countryName;
public void setCountryName(String countryName) {
- this.countryName = escapeJson(countryName);
+ this.countryName = countryName;
}
public String getCountryName() {
- return unescapeJson(this.countryName);
+ return this.countryName;
}
private String regionName;
public void setRegionName(String regionName) {
- this.regionName = escapeJson(regionName);
+ this.regionName = regionName;
}
public String getRegionName() {
- return unescapeJson(this.regionName);
+ return this.regionName;
}
private String cityName;
public void setCityName(String cityName) {
- this.cityName = escapeJson(cityName);
+ this.cityName = cityName;
}
public String getCityName() {
- return unescapeJson(this.cityName);
+ return this.cityName;
}
private String asName;
public void setAsName(String asName) {
- this.asName = escapeJson(asName);
+ this.asName = asName;
}
public String getAsName() {
- return unescapeJson(this.asName);
+ return this.asName;
}
private String asNumber;
public void setAsNumber(String asNumber) {
- this.asNumber = escapeJson(asNumber);
+ this.asNumber = asNumber;
}
public String getAsNumber() {
- return unescapeJson(this.asNumber);
+ return this.asNumber;
}
/* Reverse DNS lookup result: */
@@ -545,79 +526,31 @@ public class DetailsStatus extends Document {
private String hostName;
public void setHostName(String hostName) {
- this.hostName = escapeJson(hostName);
+ this.hostName = hostName;
}
public String getHostName() {
- return unescapeJson(this.hostName);
+ return this.hostName;
}
- private List<String> verifiedHostNames;
+ private SortedSet<String> verifiedHostNames;
- /**
- * Creates a copy of the list with each string escaped for JSON compatibility
- * and sets this as the verified host names, unless the argument was null in
- * which case the verified host names are just set to null.
- */
public void setVerifiedHostNames(SortedSet<String> verifiedHostNames) {
- if (null == verifiedHostNames) {
- this.verifiedHostNames = null;
- return;
- }
- this.verifiedHostNames = new ArrayList<>();
- for (String hostName : verifiedHostNames) {
- this.verifiedHostNames.add(escapeJson(hostName));
- }
+ this.verifiedHostNames = verifiedHostNames;
}
- /**
- * Creates a copy of the list with each string having its escaping for JSON
- * compatibility reversed and returns the copy, unless the held reference was
- * null in which case null is returned.
- */
public SortedSet<String> getVerifiedHostNames() {
- if (null == this.verifiedHostNames) {
- return null;
- }
- SortedSet<String> verifiedHostNames = new TreeSet<>();
- for (String escapedHostName : this.verifiedHostNames) {
- verifiedHostNames.add(unescapeJson(escapedHostName));
- }
- return verifiedHostNames;
+ return this.verifiedHostNames;
}
private SortedSet<String> unverifiedHostNames;
- /**
- * Creates a copy of the list with each string escaped for JSON compatibility
- * and sets this as the unverified host names, unless the argument was null in
- * which case the unverified host names are just set to null.
- */
public void setUnverifiedHostNames(SortedSet<String> unverifiedHostNames) {
- if (null == unverifiedHostNames) {
- this.unverifiedHostNames = null;
- return;
- }
- this.unverifiedHostNames = new TreeSet<>();
- for (String hostName : unverifiedHostNames) {
- this.unverifiedHostNames.add(escapeJson(hostName));
- }
+ this.unverifiedHostNames = unverifiedHostNames;
}
- /**
- * Creates a copy of the list with each string having its escaping for JSON
- * compatibility reversed and returns the copy, unless the held reference was
- * null in which case null is returned.
- */
public SortedSet<String> getUnverifiedHostNames() {
- if (null == this.unverifiedHostNames) {
- return null;
- }
- SortedSet<String> unverifiedHostNames = new TreeSet<>();
- for (String escapedHostName : this.unverifiedHostNames) {
- unverifiedHostNames.add(unescapeJson(escapedHostName));
- }
- return unverifiedHostNames;
+ return this.unverifiedHostNames;
}
private List<String> advertisedOrAddresses;
diff --git a/src/main/java/org/torproject/metrics/onionoo/docs/DocumentStore.java b/src/main/java/org/torproject/metrics/onionoo/docs/DocumentStore.java
index 4ad6709..9ad866a 100644
--- a/src/main/java/org/torproject/metrics/onionoo/docs/DocumentStore.java
+++ b/src/main/java/org/torproject/metrics/onionoo/docs/DocumentStore.java
@@ -304,7 +304,9 @@ public class DocumentStore {
String documentString;
if (document.getDocumentString() != null) {
documentString = document.getDocumentString();
- } else if (document instanceof BandwidthDocument
+ } else if (document instanceof DetailsStatus
+ || document instanceof DetailsDocument
+ || document instanceof BandwidthDocument
|| document instanceof WeightsDocument
|| document instanceof ClientsDocument
|| document instanceof UptimeDocument) {
@@ -315,31 +317,6 @@ public class DocumentStore {
document.getClass().getName(), e);
return false;
}
- } else if (document instanceof DetailsStatus
- || document instanceof DetailsDocument) {
- /* We must ensure that details files only contain ASCII characters
- * and no UTF-8 characters. While UTF-8 characters are perfectly
- * valid in JSON, this would break compatibility with existing files
- * pretty badly. We already make sure that all strings in details
- * objects are escaped JSON, e.g., \u00F2. When Gson serlializes
- * this string, it escapes the \ to \\, hence writes \\u00F2. We
- * need to undo this and change \\u00F2 back to \u00F2. */
- try {
- documentString = FormattingUtils.replaceValidUtf(
- objectMapper.writeValueAsString(document));
- } catch (JsonProcessingException e) {
- log.error("Serializing failed for type {}.",
- document.getClass().getName(), e);
- return false;
- }
- /* Existing details statuses don't contain opening and closing curly
- * brackets, so we should remove them from new details statuses,
- * too. */
- if (document instanceof DetailsStatus) {
- documentString = documentString.substring(
- documentString.indexOf("{") + 1,
- documentString.lastIndexOf("}"));
- }
} else if (document instanceof BandwidthStatus
|| document instanceof WeightsStatus
|| document instanceof ClientsStatus
@@ -500,7 +477,7 @@ public class DocumentStore {
/* Document file is empty. */
return null;
}
- documentString = new String(allData, StandardCharsets.US_ASCII);
+ documentString = new String(allData, StandardCharsets.UTF_8);
this.retrievedFiles++;
this.retrievedBytes += documentString.length();
} catch (IOException e) {
@@ -516,7 +493,26 @@ public class DocumentStore {
return this.retrieveUnparsedDocumentFile(documentType,
documentString);
} else if (documentType.equals(DetailsDocument.class)
- || documentType.equals(BandwidthDocument.class)
+ || documentType.equals(DetailsStatus.class)) {
+ /* Details statuses and documents written by older versions had UTF-8
+ * characters escaped as \\u1234 in order to store them encoded as ASCII.
+ * This was undone in memory in getters and setters. This version does not
+ * escape or unescape UTF-8 characters in memory anymore, but it expects
+ * files to either use UTF-8 encoding or to contain valid escaped UTF-8
+ * characters like \u1234. Further, details statuses written by older
+ * versions did not start and end with braces. Fixing both things here. */
+ StringBuilder documentStringBuilder = new StringBuilder();
+ if (!documentString.startsWith("{")) {
+ documentStringBuilder.append('{');
+ }
+ documentStringBuilder.append(documentString.replaceAll(
+ "\\\\(\\\\u[0-9a-fA-F]{4})", "$1"));
+ if (!documentString.endsWith("}")) {
+ documentStringBuilder.append('}');
+ }
+ return this.retrieveParsedDocumentFile(documentType,
+ documentStringBuilder.toString());
+ } else if (documentType.equals(BandwidthDocument.class)
|| documentType.equals(WeightsDocument.class)
|| documentType.equals(ClientsDocument.class)
|| documentType.equals(UptimeDocument.class)) {
@@ -528,9 +524,6 @@ public class DocumentStore {
|| documentType.equals(UptimeStatus.class)
|| documentType.equals(UpdateStatus.class)) {
return this.retrieveParsedStatusFile(documentType, documentString);
- } else if (documentType.equals(DetailsStatus.class)) {
- return this.retrieveParsedDocumentFile(documentType, "{"
- + documentString + "}");
} else {
log.error("Parsing is not supported for type {}.",
documentType.getName());
@@ -783,7 +776,7 @@ public class DocumentStore {
throws IOException {
try (BufferedOutputStream bos = new BufferedOutputStream(
new FileOutputStream(file))) {
- bos.write(content.getBytes(StandardCharsets.US_ASCII));
+ bos.write(content.getBytes(StandardCharsets.UTF_8));
}
}