diff options
| author | Karsten Loesing <karsten.loesing@gmx.net> | 2020-03-21 15:17:32 +0100 |
|---|---|---|
| committer | Karsten Loesing <karsten.loesing@gmx.net> | 2020-03-22 18:57:30 +0100 |
| commit | 9d4e79a57b266a454b7255ea4c31570967cd2983 (patch) | |
| tree | 45a8553eadad37f3ff877e88db3e4a568f7c9abc | |
| parent | 22807e8f829912ad521756a8218873079c78c274 (diff) | |
Encode details statuses and documents as UTF-8.task-21933
Fixes #21933.
3 files changed, 66 insertions, 207 deletions
diff --git a/src/main/java/org/torproject/metrics/onionoo/docs/DetailsDocument.java b/src/main/java/org/torproject/metrics/onionoo/docs/DetailsDocument.java index 81892fc..3e2959c 100644 --- a/src/main/java/org/torproject/metrics/onionoo/docs/DetailsDocument.java +++ b/src/main/java/org/torproject/metrics/onionoo/docs/DetailsDocument.java @@ -3,32 +3,13 @@ package org.torproject.metrics.onionoo.docs; -import org.apache.commons.lang3.StringEscapeUtils; - import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.SortedSet; -import java.util.TreeSet; public class DetailsDocument extends Document { - /* We must ensure that details files only contain ASCII characters - * and no UTF-8 characters. While UTF-8 characters are perfectly - * valid in JSON, this would break compatibility with existing files - * pretty badly. We do this by escaping non-ASCII characters, e.g., - * \u00F2. Gson won't treat this as UTF-8, but will think that we want - * to write six characters '\', 'u', '0', '0', 'F', '2'. The only thing - * we'll have to do is to change back the '\\' that Gson writes for the - * '\'. */ - private static String escapeJson(String stringToEscape) { - return StringEscapeUtils.escapeJava(stringToEscape); - } - - private static String unescapeJson(String stringToUnescape) { - return StringEscapeUtils.unescapeJava(stringToUnescape); - } - private String nickname; public void setNickname(String nickname) { @@ -155,31 +136,31 @@ public class DetailsDocument extends Document { private String countryName; public void setCountryName(String countryName) { - this.countryName = escapeJson(countryName); + this.countryName = countryName; } public String getCountryName() { - return unescapeJson(this.countryName); + return this.countryName; } private String regionName; public void setRegionName(String regionName) { - this.regionName = escapeJson(regionName); + this.regionName = regionName; } public String getRegionName() { - return unescapeJson(this.regionName); + return this.regionName; } private String cityName; public void setCityName(String cityName) { - this.cityName = escapeJson(cityName); + this.cityName = cityName; } public String getCityName() { - return unescapeJson(this.cityName); + return this.cityName; } private Float latitude; @@ -205,21 +186,21 @@ public class DetailsDocument extends Document { private String as; public void setAs(String as) { - this.as = escapeJson(as); + this.as = as; } public String getAs() { - return unescapeJson(this.as); + return this.as; } private String asName; public void setAsName(String asName) { - this.asName = escapeJson(asName); + this.asName = asName; } public String getAsName() { - return unescapeJson(this.asName); + return this.asName; } private Long consensusWeight; @@ -235,79 +216,31 @@ public class DetailsDocument extends Document { private String hostName; public void setHostName(String hostName) { - this.hostName = escapeJson(hostName); + this.hostName = hostName; } public String getHostName() { - return unescapeJson(this.hostName); + return this.hostName; } private SortedSet<String> verifiedHostNames; - /** - * Creates a copy of the list with each string escaped for JSON compatibility - * and sets this as the verified host names, unless the argument was null in - * which case the verified host names are just set to null. - */ public void setVerifiedHostNames(SortedSet<String> verifiedHostNames) { - if (null == verifiedHostNames) { - this.verifiedHostNames = null; - return; - } - this.verifiedHostNames = new TreeSet<>(); - for (String hostName : verifiedHostNames) { - this.verifiedHostNames.add(escapeJson(hostName)); - } + this.verifiedHostNames = verifiedHostNames; } - /** - * Creates a copy of the list with each string having its escaping for JSON - * compatibility reversed and returns the copy, unless the held reference was - * null in which case null is returned. - */ public SortedSet<String> getVerifiedHostNames() { - if (null == this.verifiedHostNames) { - return null; - } - SortedSet<String> verifiedHostNames = new TreeSet<>(); - for (String escapedHostName : this.verifiedHostNames) { - verifiedHostNames.add(unescapeJson(escapedHostName)); - } - return verifiedHostNames; + return this.verifiedHostNames; } private SortedSet<String> unverifiedHostNames; - /** - * Creates a copy of the list with each string escaped for JSON compatibility - * and sets this as the unverified host names, unless the argument was null in - * which case the unverified host names are just set to null. - */ public void setUnverifiedHostNames(SortedSet<String> unverifiedHostNames) { - if (null == unverifiedHostNames) { - this.unverifiedHostNames = null; - return; - } - this.unverifiedHostNames = new TreeSet<>(); - for (String hostName : unverifiedHostNames) { - this.unverifiedHostNames.add(escapeJson(hostName)); - } + this.unverifiedHostNames = unverifiedHostNames; } - /** - * Creates a copy of the list with each string having its escaping for JSON - * compatibility reversed and returns the copy, unless the held reference was - * null in which case null is returned. - */ public SortedSet<String> getUnverifiedHostNames() { - if (null == this.unverifiedHostNames) { - return null; - } - SortedSet<String> unverifiedHostNames = new TreeSet<>(); - for (String escapedHostName : this.unverifiedHostNames) { - unverifiedHostNames.add(unescapeJson(escapedHostName)); - } - return unverifiedHostNames; + return this.unverifiedHostNames; } private String lastRestarted; @@ -397,21 +330,21 @@ public class DetailsDocument extends Document { private String contact; public void setContact(String contact) { - this.contact = escapeJson(contact); + this.contact = contact; } public String getContact() { - return unescapeJson(this.contact); + return this.contact; } private String platform; public void setPlatform(String platform) { - this.platform = escapeJson(platform); + this.platform = platform; } public String getPlatform() { - return unescapeJson(this.platform); + return this.platform; } private String version; diff --git a/src/main/java/org/torproject/metrics/onionoo/docs/DetailsStatus.java b/src/main/java/org/torproject/metrics/onionoo/docs/DetailsStatus.java index 33fef6d..488a84c 100644 --- a/src/main/java/org/torproject/metrics/onionoo/docs/DetailsStatus.java +++ b/src/main/java/org/torproject/metrics/onionoo/docs/DetailsStatus.java @@ -3,9 +3,6 @@ package org.torproject.metrics.onionoo.docs; -import org.apache.commons.lang3.StringEscapeUtils; - -import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.SortedSet; @@ -13,22 +10,6 @@ import java.util.TreeSet; public class DetailsStatus extends Document { - /* We must ensure that details files only contain ASCII characters - * and no UTF-8 characters. While UTF-8 characters are perfectly - * valid in JSON, this would break compatibility with existing files - * pretty badly. We do this by escaping non-ASCII characters, e.g., - * \u00F2. Gson won't treat this as UTF-8, but will think that we want - * to write six characters '\', 'u', '0', '0', 'F', '2'. The only thing - * we'll have to do is to change back the '\\' that Gson writes for the - * '\'. */ - private static String escapeJson(String stringToEscape) { - return StringEscapeUtils.escapeJava(stringToEscape); - } - - private static String unescapeJson(String stringToUnescape) { - return StringEscapeUtils.unescapeJava(stringToUnescape); - } - /* From most recently published server descriptor: */ private String descPublished; @@ -107,21 +88,21 @@ public class DetailsStatus extends Document { private String contact; public void setContact(String contact) { - this.contact = escapeJson(contact); + this.contact = contact; } public String getContact() { - return unescapeJson(this.contact); + return this.contact; } private String platform; public void setPlatform(String platform) { - this.platform = escapeJson(platform); + this.platform = platform; } public String getPlatform() { - return unescapeJson(this.platform); + return this.platform; } private SortedSet<String> allegedFamily; @@ -493,51 +474,51 @@ public class DetailsStatus extends Document { private String countryName; public void setCountryName(String countryName) { - this.countryName = escapeJson(countryName); + this.countryName = countryName; } public String getCountryName() { - return unescapeJson(this.countryName); + return this.countryName; } private String regionName; public void setRegionName(String regionName) { - this.regionName = escapeJson(regionName); + this.regionName = regionName; } public String getRegionName() { - return unescapeJson(this.regionName); + return this.regionName; } private String cityName; public void setCityName(String cityName) { - this.cityName = escapeJson(cityName); + this.cityName = cityName; } public String getCityName() { - return unescapeJson(this.cityName); + return this.cityName; } private String asName; public void setAsName(String asName) { - this.asName = escapeJson(asName); + this.asName = asName; } public String getAsName() { - return unescapeJson(this.asName); + return this.asName; } private String asNumber; public void setAsNumber(String asNumber) { - this.asNumber = escapeJson(asNumber); + this.asNumber = asNumber; } public String getAsNumber() { - return unescapeJson(this.asNumber); + return this.asNumber; } /* Reverse DNS lookup result: */ @@ -545,79 +526,31 @@ public class DetailsStatus extends Document { private String hostName; public void setHostName(String hostName) { - this.hostName = escapeJson(hostName); + this.hostName = hostName; } public String getHostName() { - return unescapeJson(this.hostName); + return this.hostName; } - private List<String> verifiedHostNames; + private SortedSet<String> verifiedHostNames; - /** - * Creates a copy of the list with each string escaped for JSON compatibility - * and sets this as the verified host names, unless the argument was null in - * which case the verified host names are just set to null. - */ public void setVerifiedHostNames(SortedSet<String> verifiedHostNames) { - if (null == verifiedHostNames) { - this.verifiedHostNames = null; - return; - } - this.verifiedHostNames = new ArrayList<>(); - for (String hostName : verifiedHostNames) { - this.verifiedHostNames.add(escapeJson(hostName)); - } + this.verifiedHostNames = verifiedHostNames; } - /** - * Creates a copy of the list with each string having its escaping for JSON - * compatibility reversed and returns the copy, unless the held reference was - * null in which case null is returned. - */ public SortedSet<String> getVerifiedHostNames() { - if (null == this.verifiedHostNames) { - return null; - } - SortedSet<String> verifiedHostNames = new TreeSet<>(); - for (String escapedHostName : this.verifiedHostNames) { - verifiedHostNames.add(unescapeJson(escapedHostName)); - } - return verifiedHostNames; + return this.verifiedHostNames; } private SortedSet<String> unverifiedHostNames; - /** - * Creates a copy of the list with each string escaped for JSON compatibility - * and sets this as the unverified host names, unless the argument was null in - * which case the unverified host names are just set to null. - */ public void setUnverifiedHostNames(SortedSet<String> unverifiedHostNames) { - if (null == unverifiedHostNames) { - this.unverifiedHostNames = null; - return; - } - this.unverifiedHostNames = new TreeSet<>(); - for (String hostName : unverifiedHostNames) { - this.unverifiedHostNames.add(escapeJson(hostName)); - } + this.unverifiedHostNames = unverifiedHostNames; } - /** - * Creates a copy of the list with each string having its escaping for JSON - * compatibility reversed and returns the copy, unless the held reference was - * null in which case null is returned. - */ public SortedSet<String> getUnverifiedHostNames() { - if (null == this.unverifiedHostNames) { - return null; - } - SortedSet<String> unverifiedHostNames = new TreeSet<>(); - for (String escapedHostName : this.unverifiedHostNames) { - unverifiedHostNames.add(unescapeJson(escapedHostName)); - } - return unverifiedHostNames; + return this.unverifiedHostNames; } private List<String> advertisedOrAddresses; diff --git a/src/main/java/org/torproject/metrics/onionoo/docs/DocumentStore.java b/src/main/java/org/torproject/metrics/onionoo/docs/DocumentStore.java index 4ad6709..9ad866a 100644 --- a/src/main/java/org/torproject/metrics/onionoo/docs/DocumentStore.java +++ b/src/main/java/org/torproject/metrics/onionoo/docs/DocumentStore.java @@ -304,7 +304,9 @@ public class DocumentStore { String documentString; if (document.getDocumentString() != null) { documentString = document.getDocumentString(); - } else if (document instanceof BandwidthDocument + } else if (document instanceof DetailsStatus + || document instanceof DetailsDocument + || document instanceof BandwidthDocument || document instanceof WeightsDocument || document instanceof ClientsDocument || document instanceof UptimeDocument) { @@ -315,31 +317,6 @@ public class DocumentStore { document.getClass().getName(), e); return false; } - } else if (document instanceof DetailsStatus - || document instanceof DetailsDocument) { - /* We must ensure that details files only contain ASCII characters - * and no UTF-8 characters. While UTF-8 characters are perfectly - * valid in JSON, this would break compatibility with existing files - * pretty badly. We already make sure that all strings in details - * objects are escaped JSON, e.g., \u00F2. When Gson serlializes - * this string, it escapes the \ to \\, hence writes \\u00F2. We - * need to undo this and change \\u00F2 back to \u00F2. */ - try { - documentString = FormattingUtils.replaceValidUtf( - objectMapper.writeValueAsString(document)); - } catch (JsonProcessingException e) { - log.error("Serializing failed for type {}.", - document.getClass().getName(), e); - return false; - } - /* Existing details statuses don't contain opening and closing curly - * brackets, so we should remove them from new details statuses, - * too. */ - if (document instanceof DetailsStatus) { - documentString = documentString.substring( - documentString.indexOf("{") + 1, - documentString.lastIndexOf("}")); - } } else if (document instanceof BandwidthStatus || document instanceof WeightsStatus || document instanceof ClientsStatus @@ -500,7 +477,7 @@ public class DocumentStore { /* Document file is empty. */ return null; } - documentString = new String(allData, StandardCharsets.US_ASCII); + documentString = new String(allData, StandardCharsets.UTF_8); this.retrievedFiles++; this.retrievedBytes += documentString.length(); } catch (IOException e) { @@ -516,7 +493,26 @@ public class DocumentStore { return this.retrieveUnparsedDocumentFile(documentType, documentString); } else if (documentType.equals(DetailsDocument.class) - || documentType.equals(BandwidthDocument.class) + || documentType.equals(DetailsStatus.class)) { + /* Details statuses and documents written by older versions had UTF-8 + * characters escaped as \\u1234 in order to store them encoded as ASCII. + * This was undone in memory in getters and setters. This version does not + * escape or unescape UTF-8 characters in memory anymore, but it expects + * files to either use UTF-8 encoding or to contain valid escaped UTF-8 + * characters like \u1234. Further, details statuses written by older + * versions did not start and end with braces. Fixing both things here. */ + StringBuilder documentStringBuilder = new StringBuilder(); + if (!documentString.startsWith("{")) { + documentStringBuilder.append('{'); + } + documentStringBuilder.append(documentString.replaceAll( + "\\\\(\\\\u[0-9a-fA-F]{4})", "$1")); + if (!documentString.endsWith("}")) { + documentStringBuilder.append('}'); + } + return this.retrieveParsedDocumentFile(documentType, + documentStringBuilder.toString()); + } else if (documentType.equals(BandwidthDocument.class) || documentType.equals(WeightsDocument.class) || documentType.equals(ClientsDocument.class) || documentType.equals(UptimeDocument.class)) { @@ -528,9 +524,6 @@ public class DocumentStore { || documentType.equals(UptimeStatus.class) || documentType.equals(UpdateStatus.class)) { return this.retrieveParsedStatusFile(documentType, documentString); - } else if (documentType.equals(DetailsStatus.class)) { - return this.retrieveParsedDocumentFile(documentType, "{" - + documentString + "}"); } else { log.error("Parsing is not supported for type {}.", documentType.getName()); @@ -783,7 +776,7 @@ public class DocumentStore { throws IOException { try (BufferedOutputStream bos = new BufferedOutputStream( new FileOutputStream(file))) { - bos.write(content.getBytes(StandardCharsets.US_ASCII)); + bos.write(content.getBytes(StandardCharsets.UTF_8)); } } |
