#!/usr/bin/env python """ Metadata anonymisation toolkit - CLI edition """ import sys import argparse import os from libmat import mat from libmat import archive def create_arg_parser(): """ Get options passed to the program """ parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit') parser.add_argument('files', nargs='*') options = parser.add_argument_group('Options') options.add_argument('-a', '--add2archive', action='store_true', help='add to output archive non-supported filetypes (Off by default)') options.add_argument('-b', '--backup', '-b', action='store_true', help='keep a backup copy') options.add_argument('-L', '--low-pdf-quality', action='store_true', help='produces a lighter, but lower quality PDF') info = parser.add_argument_group('Information') info.add_argument('-c', '--check', action='store_true', help='check if a file is free of harmful metadatas') info.add_argument('-d', '--display', action='store_true', help='list all the harmful metadata of a file without removing them') info.add_argument('-l', '--list', action='store_true', help='list all supported fileformats') info.add_argument('-v', '--version', action='version', version='MAT %s' % mat.__version__) return parser def list_meta(class_file, filename, add2archive): """ Print all the metadata of $filename on stdout :param parser.GenericParser class_file: The class file representing $filename :param str filename: File to parse :param bool add2archive: Unused parameter, check the `main` function for more information """ print('[+] File %s :' % filename) if class_file.is_clean(): print('No harmful metadata found') else: print ('Harmful metadata found:') meta = class_file.get_meta() if meta: for key, value in class_file.get_meta().items(): print('\t%s: %s' % (key, value)) return 0 def is_clean(class_file, filename, add2archive): """ Tell if 'filename' is clean or not :param parser.GenericParser class_file: The class file representing $filename :param str filename: File to parse :param bool add2archive: Unused parameter, check the `main` function for more information """ if class_file.is_clean(): print('[+] %s is clean' % filename) else: print('[+] %s is not clean' % filename) return 0 def clean_meta(class_file, filename, add2archive): """ Clean the file 'filename' :param parser.GenericParser class_file: The class file representing $filename :param str filename: File to parse :param bool add2archive: Unused parameter, check the `main` function for more information """ if not class_file.is_writable: print('[-] %s is not writable' % filename) return 1 print('[*] Cleaning %s' % filename) if not add2archive: is_archive = isinstance(class_file, archive.GenericArchiveStripper) is_terminal = isinstance(class_file, archive.TerminalZipStripper) if is_archive and not is_terminal: unsupported_list = class_file.list_unsupported() if type(unsupported_list) == list and unsupported_list: print('[-] Can not clean: %s.' 'It contains unsupported filetypes:' % filename) for i in unsupported_list: print('- %s' % i) return 1 if class_file.remove_all(): print('[+] %s cleaned!' % filename) else: print('[-] Unable to clean %s', filename) return 1 return 0 def list_supported(): """ Print all supported fileformat """ for item in mat.list_supported_formats(): print('%s (%s)' % (item['name'], item['extension'])) print('\tsupport: %s' % item['support']) print('\tmetadata: %s' % item['metadata']) print('\tmethod: %s' % item['method']) print('\tremaining: %s' % item['remaining']) print('\n') def main(): """ Main function: get args and launch the appropriate function """ argparser = create_arg_parser() args = argparser.parse_args() # show help if: neither list nor file argument given; no argument at # all given or the list argument mixed with some other argument given if not (args.list or args.files) or (not sys.argv) or (args.list and len(sys.argv) > 2): argparser.print_help() sys.exit(2) # func receives the function corresponding to the options given as parameters if args.display: # only print metadatas func = list_meta elif args.check: # only check if the file is clean func = is_clean elif args.list: # print the list of all supported format list_supported() sys.exit(0) else: # clean the file func = clean_meta ret = 0 # We're using a while loop, instead of a for, # because we support folders. This allow us # to add their content, and to process it. while args.files: filename = args.files.pop() if os.path.isdir(filename): for root, sub, files in os.walk(filename): for fname in files: args.files.append(os.path.join(root, fname)) continue class_file = mat.create_class_file(filename, args.backup, add2archive=args.add2archive, low_pdf_quality=args.low_pdf_quality) if class_file: ret += func(class_file, filename, args.add2archive) else: ret = 1 print('[-] Unable to process %s' % filename) sys.exit(ret) if __name__ == '__main__': main()