summaryrefslogtreecommitdiff
path: root/onionperf/onionperf
blob: 6078d46819665e2297a3b74ccccb21af5c1d8eae (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
#!/usr/bin/env python3

'''
  OnionPerf
  Authored by Rob Jansen, 2015
  See LICENSE for licensing information
'''

import sys, os, argparse, logging, re, datetime
from itertools import cycle
from socket import gethostname

import onionperf.util as util
from onionperf.monitor import get_supported_torctl_events

DESC_MAIN = """
OnionPerf is a utility to monitor, measure, analyze, and visualize the
performance of Tor and Onion Services.

OnionPerf must be run with a subcommand to specify a mode of operation.
The primary mode is measure, but tools to monitor a running Tor process and
to analyze and visualize the Tor and TGen output that is collected in the
measure mode are also available.

For more information, see https://github.com/robgjansen/onionperf and
https://www.torproject.org.
"""
HELP_MAIN = """
Use 'onionperf <subcommand> --help' for more info
"""

DESC_MONITOR = """
This utility connects to a running Tor process on the Tor control port,
registers for several asynchronous events, and logs the events to disk
as they occur over time.
"""
HELP_MONITOR = """
Connect to Tor and log controller events to file
"""

DESC_MEASURE = """
OnionPerf uses multiple processes and threads to download random data through
Tor while tracking the performance of those downloads. The data is served and
fetched on localhost using two TGen (traffic generator) processes, but is
transferred through Tor using an ephemeral Tor Onion Service and a Tor
client process. Tor control information and TGen performance statistics are
logged to disk and can be later analyzed (using `onionperf analyze`) to
visualize changes in Tor performance over time.
"""
HELP_MEASURE = """
Measure Tor and Onion Service Performance using TGen
"""

DESC_ANALYZE = """
Parse results from the TGen traffic generator and Tor.

This subcommand processes TGen and Tor log files and stores the processed
data in json format for plotting. It was written so that the log files
need never be stored on disk decompressed, which is useful when log file
sizes reach tens of gigabytes.

The standard way to run this subcommand is to give the path to a TGen and/or
a TorCtl file (e.g., those produced with the `measure` subcommand) using the
`--tgen` and `--torctl` options, and the statistics file resulting from the
analysis will be dumped to `onionperf.analysis.json.xz`.
Another way to run this subcommand is to give two paths to directories
containing TGen and TorCtl files to have files matched by filename and analysis
files dumped to `%Y-%m-%d.onionperf.analysis.json.xz`.
(See https://collector.torproject.org/#type-onionperf.)
Stats files in the default Torperf format can also be exported.
"""

HELP_ANALYZE = """
Analyze Tor and TGen output
"""
DESC_VISUALIZE = """
Loads an OnionPerf json file, e.g., one produced with the `analyze` subcommand,
and plots various interesting performance metrics to PDF files.
"""
HELP_VISUALIZE = """
Visualize OnionPerf analysis results
"""

logging.basicConfig(format='%(asctime)s %(created)f [onionperf] [%(levelname)s] %(message)s', level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S')
logging.getLogger("stem").setLevel(logging.WARN)


class CustomHelpFormatter(argparse.ArgumentDefaultsHelpFormatter):
    # adds the 'RawDescriptionHelpFormatter' to the ArgsDefault one
    def _format_action(self, action):
        parts = super()._format_action(action)
        if '--data' in parts:
             return parts.replace(' [LABEL ...]', '')
        return parts
    def _fill_text(self, text, width, indent):
        return ''.join([indent + line for line in text.splitlines(True)])

def main():
    hostname = gethostname().split('.')[0]

    # argparse.RawDescriptionHelpFormatter, RawTextHelpFormatter, RawDescriptionHelpFormatter
    my_formatter_class = CustomHelpFormatter

    # construct the options
    main_parser = argparse.ArgumentParser(description=DESC_MAIN, formatter_class=my_formatter_class)

    sub_parser = main_parser.add_subparsers(help=HELP_MAIN)

    # monitor
    monitor_parser = sub_parser.add_parser('monitor', description=DESC_MONITOR, help=HELP_MONITOR,
        formatter_class=my_formatter_class)
    monitor_parser.set_defaults(func=monitor, formatter_class=my_formatter_class)

    monitor_parser.add_argument('-p', '--port',
        help="""the Tor control port number N""",
        metavar="N", type=type_nonnegative_integer,
        action="store", dest="ctlport",
        default=9051)

    monitor_parser.add_argument('-l', '--log',
        help="""a file PATH to log Tor controller output, may be '-' for STDOUT""",
        metavar="PATH", type=type_str_file_path_out,
        action="store", dest="logpath",
        default="-")

    monitor_parser.add_argument('-e', '--events',
        help="""the Tor control EVENT(s) recognized by stem that should be monitored and logged""",
        metavar="EVENT", nargs='+',
        action="store", dest="events",
        default=get_supported_torctl_events(),
        choices=get_supported_torctl_events())

    monitor_parser.add_argument('-c', '--custom-events',
        help="""custom Tor control EVENT(s) not recognized by stem that should be monitored and logged""",
        metavar="CUSTOM_EVENT", nargs='+',
        action="store", dest="custom_events",
        default=[])

    # measure
    measure_parser = sub_parser.add_parser('measure', description=DESC_MEASURE, help=HELP_MEASURE,
        formatter_class=my_formatter_class)
    measure_parser.set_defaults(func=measure, formatter_class=my_formatter_class)

    measure_parser.add_argument('--tor',
        help="""a file PATH to a Tor binary""",
        metavar="PATH", type=type_str_path_in,
        action="store", dest="torpath",
        default=util.which("tor"))

    measure_parser.add_argument('--tgen',
        help="""a file PATH to a TGen binary""",
        metavar="PATH", type=type_str_path_in,
        action="store", dest="tgenpath",
        default=util.which("tgen"))

    measure_parser.add_argument('--oneshot',
        help="""Enables oneshot mode, onionperf closes on successfully downloading a file""",
        action="store_true", dest="oneshot",
        default=False)

    measure_parser.add_argument('--additional-client-conf',
        help="""Additional configuration lines for the Tor client, for example bridge lines""",
        metavar="CONFIG", type=str,
        action="store", dest="additional_client_conf",
        default="")

    measure_parser.add_argument('--torclient-conf-file',
        help="""Configuration file for the Tor client""",
        metavar="CONFIG", type=str,
        action="store", dest="torclient_conf_file",
        default="")

    measure_parser.add_argument('--torserver-conf-file',
        help="""Configuration file for the Tor server""",
        metavar="CONFIG", type=str,
        action="store", dest="torserver_conf_file",
        default="")

    measure_parser.add_argument('--tgen-connect-ip',
        help="""the TGen client connect IP address ADDR, or 0.0.0.0 to do an external IP lookup; must be Internet-accessible for non-onion downloads to work""",
        metavar="ADDR", type=type_str_ip_in,
        action="store", dest="tgenconnectip",
        default="0.0.0.0")

    measure_parser.add_argument('--tgen-listen-port',
        help="""the TGen server listen port number N, must either be equal to '--tgen-connect-port' or a firewall rule should be in place to forward '--tgen-connect-port' to this port""",
        metavar="N", type=type_nonnegative_integer,
        action="store", dest="tgenlistenport",
        default=8080)

    measure_parser.add_argument('--tgen-connect-port',
        help="""the TGen client connect port number N, must be Internet-accessible for non-onion downloads to work""",
        metavar="N", type=type_nonnegative_integer,
        action="store", dest="tgenconnectport",
        default=8080)

    onion_or_inet_only_group = measure_parser.add_mutually_exclusive_group()

    onion_or_inet_only_group.add_argument('-o', '--onion-only',
        help="""only measure download times over Tor to an ephemeral onion service""",
        action="store_true", dest="onion_only",
        default=False)

    onion_or_inet_only_group.add_argument('-i', '--inet-only',
        help="""only measure download times over Tor exiting to a public webserver""",
        action="store_true", dest="inet_only",
        default=False)

    measure_parser.add_argument('-n', '--nickname',
        help="""the 'SOURCE' STRING to use in measurement result files produced by OnionPerf""",
        metavar="STRING", type=str,
        action="store", dest="nickname",
        default=hostname)

    measure_parser.add_argument('-p', '--prefix',
        help="""a directory PATH prefix where OnionPerf will run""",
        metavar="PATH", type=type_str_dir_path_out,
        action="store", dest="prefix",
        default=os.getcwd() + "/onionperf-data")

    measure_parser.add_argument('-k', '--key-prefix',
        help="""a directory PATH prefix where OnionPerf will store its private key files""",
        metavar="PATH", type=type_str_dir_path_out,
        action="store", dest="private_prefix",
        default=os.getcwd() + "/onionperf-private")


    # analyze
    analyze_parser = sub_parser.add_parser('analyze', description=DESC_ANALYZE, help=HELP_ANALYZE,
        formatter_class=my_formatter_class)
    analyze_parser.set_defaults(func=analyze, formatter_class=my_formatter_class)

    analyze_parser.add_argument('--tgen',
        help="""a file or directory PATH to a TGen logfile or logfile directory""",
        metavar="PATH", type=type_str_path_in,
        action="store", dest="tgen_logpath",
        default=None)

    analyze_parser.add_argument('--torctl',
        help="""a file or directory PATH to a TorCtl logfile or logfile directory (in the format output by the monitor subcommmand)""",
        metavar="PATH", type=type_str_path_in,
        action="store", dest="torctl_logpath",
        default=None)

    analyze_parser.add_argument('-p', '--prefix',
        help="""A directory PATH prefix where the processed data
files generated by this script will be written""",
        metavar="PATH", type=type_str_dir_path_out,
        action="store", dest="prefix",
        default=os.getcwd())

    analyze_parser.add_argument('-a', '--address',
        help="""an IP address STRING that identifies the machine where the input logfiles were produced""",
        metavar="STRING", type=type_str_ip_in,
        action="store", dest="ip_address",
        default=None)

    analyze_parser.add_argument('-n', '--nickname',
        help="""a nickname STRING that identifies the machine where the input logfiles were produced""",
        metavar="STRING", type=str,
        action="store", dest="nickname",
        default=None)

    analyze_parser.add_argument('-d', '--date-filter',
        help="""a DATE string in the form YYYY-MM-DD, all log messages that did not occur on this date will be filtered out of the analysis""",
        metavar="DATE", type=type_str_date_in,
        action="store", dest="date_filter",
        default=None)

    analyze_parser.add_argument('-s', '--do-simple-parse',
        help="""parse and export only summary statistics rather than full transfer/circuit/stream data""",
        action="store_true", dest="do_simple",
        default=False)

    # visualize
    visualize_parser = sub_parser.add_parser('visualize', description=DESC_VISUALIZE, help=HELP_VISUALIZE,
        formatter_class=my_formatter_class)
    visualize_parser.set_defaults(func=visualize, formatter_class=my_formatter_class)

    visualize_parser.add_argument('-d', '--data',
        help="""Appends one or more PATHS to OnionPerf
                analysis results files or directories of such files, and a LABEL
                to be used for the graph legend for this dataset""",
        metavar=("PATH [PATH...] LABEL", "LABEL"),
        nargs='+',
        required="True",
        action=PathStringArgsAction, dest="datasets")

    visualize_parser.add_argument('-p', '--prefix',
        help="a STRING filename prefix for graphs we generate",
        metavar="STRING", type=str,
        action="store", dest="prefix",
        default=None)

    # get args and call the command handler for the chosen mode
    if len(sys.argv) == 1:
        main_parser.print_help()
        sys.exit(1)
    else:
        args = main_parser.parse_args()
        args.func(args)

def monitor(args):
    from onionperf.monitor import TorMonitor

    events = args.events + args.custom_events
    eventstr = ','.join(events)

    writer = util.FileWritable(args.logpath)
    mon = TorMonitor(args.ctlport, writer, events=events)
    try:
        fname = 'STDOUT' if args.logpath == '-' else args.logpath
        startup_msg = "tor-ctl-logger started logging Tor events {0} from port {1} to {2}".format(eventstr, args.ctlport, fname)
        logging.info(startup_msg)
        mon.run()
        logging.info("tor-ctl-logger is done logging Tor events {0} from port {1}".format(eventstr, args.ctlport))
    except KeyboardInterrupt:
        pass  # the user hit ctrl+c
    writer.close()

def measure(args):
    from onionperf.measurement import Measurement

    # check paths
    args.torpath = util.find_path(args.torpath, "tor")
    if args.torpath is not None: args.tgenpath = util.find_path(args.tgenpath, "tgen")

    # validate paths and run
    if args.torpath is not None and args.tgenpath is not None:
        os.chdir(args.prefix)
        if args.model:
            # load custom model instead of Torperf model
            pass
        else:
            # build a Torperf model
            # model.TorperfModel(tgen_port="{0}".format(tgen_port), tgen_servers=server_urls, socksproxy="127.0.0.1:{0}".format(socks_port))
            pass

        client_connect_ip = args.tgenconnectip
        client_connect_port = args.tgenconnectport
        client_tgen_port = util.get_random_free_port()
        client_tor_ctl_port = util.get_random_free_port()
        client_tor_socks_port = util.get_random_free_port()

        server_tgen_port = args.tgenlistenport
        server_tor_ctl_port = util.get_random_free_port()
        server_tor_socks_port = util.get_random_free_port()

        meas = Measurement(args.torpath, args.tgenpath, args.prefix, args.private_prefix, args.nickname, args.oneshot, args.additional_client_conf, args.torclient_conf_file, args.torserver_conf_file)
        meas.run(do_onion=not args.inet_only, do_inet=not args.onion_only,
             client_tgen_listen_port=client_tgen_port, client_tgen_connect_ip=client_connect_ip, client_tgen_connect_port=client_connect_port, client_tor_ctl_port=client_tor_ctl_port, client_tor_socks_port=client_tor_socks_port,
             server_tgen_listen_port=server_tgen_port, server_tor_ctl_port=server_tor_ctl_port, server_tor_socks_port=server_tor_socks_port)
    else:
        logging.info("Please fix path errors to continue")

def analyze(args):

    if args.tgen_logpath is None and args.torctl_logpath is None:
        logging.warning("No logfile paths were given, nothing will be analyzed")

    elif (args.tgen_logpath is None or os.path.isfile(args.tgen_logpath)) and (args.torctl_logpath is None or os.path.isfile(args.torctl_logpath)):
        from onionperf.analysis import Analysis
        analysis = Analysis(nickname=args.nickname, ip_address=args.ip_address)
        if args.tgen_logpath is not None:
            analysis.add_tgen_file(args.tgen_logpath)
        if args.torctl_logpath is not None:
            analysis.add_torctl_file(args.torctl_logpath)
        analysis.analyze(args.do_simple, date_filter=args.date_filter)
        analysis.save(output_prefix=args.prefix)

    elif args.tgen_logpath is not None and os.path.isdir(args.tgen_logpath) and args.torctl_logpath is not None and os.path.isdir(args.torctl_logpath):
        from onionperf import reprocessing
        tgen_logs = reprocessing.collect_logs(args.tgen_logpath, '*tgen.log*')
        torctl_logs = reprocessing.collect_logs(args.torctl_logpath, '*torctl.log*')
        log_pairs = reprocessing.match(tgen_logs, torctl_logs, args.date_filter)
        logging.info("Found {0} matching log pairs to be reprocessed".format(len(log_pairs)))
        reprocessing.multiprocess_logs(log_pairs, args.prefix, args.nickname, args.do_simple)

    else:
        logging.error("Given paths were an unrecognized mix of file and directory paths, nothing will be analyzed")

def visualize(args):
    from onionperf.visualization import TGenVisualization
    from onionperf.analysis import Analysis

    tgen_viz = TGenVisualization()
    for (paths, label) in args.datasets:
        analyses = []
        for path in paths:
            analysis = Analysis.load(filename=path)
            if analysis is not None:
               analyses.append(analysis)
        tgen_viz.add_dataset(analyses, label)
    tgen_viz.plot_all(args.prefix)

def type_nonnegative_integer(value):
    i = int(value)
    if i < 0: raise argparse.ArgumentTypeError("'%s' is an invalid non-negative int value" % value)
    return i

def type_supported_analysis(value):
    t = value.lower()
    if t != "all" and t != "tgen" and t != "tor":
        raise argparse.ArgumentTypeError("'%s' is an invalid Analysis type" % value)
    return t

def type_str_file_path_out(value):
    s = str(value)
    if s == "-":
        return s
    p = os.path.abspath(os.path.expanduser(s))
    util.make_dir_path(os.path.dirname(p))
    return p

def type_str_dir_path_out(value):
    s = str(value)
    p = os.path.abspath(os.path.expanduser(s))
    util.make_dir_path(p)
    return p

def type_str_path_in(value):
    s = str(value)
    if s == "-":
        return s
    p = os.path.abspath(os.path.expanduser(s))
    if not os.path.exists(p):
        raise argparse.ArgumentTypeError("path '%s' does not exist" % s)
    return p

def type_str_ip_in(value):
    s = str(value)
    ip = re.match(r'[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3}', s)
    if ip is None:
        raise argparse.ArgumentTypeError("IP address '%s' is not a valid address" % s)
    return ip.group(0)

def type_str_date_in(value):
    s = str(value)
    parse_ok = False
    try:
        parts = s.split('-')
        if len(parts) == 3:
            y, m, d = int(parts[0]), int(parts[1]), int(parts[2])
            parse_ok = True
    except:
        parse_ok = False
    if not parse_ok:
        raise argparse.ArgumentTypeError("date '%s' is not in the valid YYYY-MM-DD format" % s)
    if y < datetime.MINYEAR or y > datetime.MAXYEAR:
        raise argparse.ArgumentTypeError("the year portion of date '%s' must be in the range [%d, %d]" % s, datetime.MINYEAR, datetime.MAXYEAR)
    if m < 1 or m > 12:
        raise argparse.ArgumentTypeError("the month portion of date '%s' must be in the range [1, 12]" % s)
    if d < 1 or d > 31:
        raise argparse.ArgumentTypeError("the day portion of date '%s' must be in the range [1, 31]" % s)
    return datetime.date(y, m, d)

# a custom action for passing in experimental data directories when plotting
class PathStringArgsAction(argparse.Action):
    def __call__(self, parser, namespace, values, option_string=None):
        # at least two arguments are required
        if len(values)<2: raise argparse.ArgumentError(self, "Must specify at least one path and one data label")

        # extract the paths to our data, and the label for the legend
        label = values[-1]
        paths = values[:-1]
        # fail if the label is a path
        check_label = os.path.abspath(os.path.expanduser(label))
        if os.path.exists(check_label) and label not in paths:
            logging.warning("The label supplied looks like a path: {}".format(label))

        dir_paths = []
        for item in paths:
            p = os.path.abspath(os.path.expanduser(item))
            if not os.path.exists(p): raise argparse.ArgumentError(self, "The supplied path does not exist: '{0}'".format(p))
            if os.path.isdir(p):
                from onionperf import reprocessing
                dir_paths.extend(reprocessing.collect_logs(item, "*onionperf.analysis.json*"))

        # remove any directories from the list of paths
        paths = [p for p in paths if not os.path.isdir(p)]
        # add all json.xz files found in directories to the list of paths
        paths.extend(dir_paths)
        # remove any duplicates
        paths = list(set(paths))

        # remove the default
        if "_didremovedefault" not in namespace:
            setattr(namespace, self.dest, [])
            setattr(namespace, "_didremovedefault", True)
        # append out new experiment path
        dest = getattr(namespace, self.dest)
        dest.append((paths, label))

if __name__ == '__main__': sys.exit(main())