#!/usr/bin/env python
"""
Metadata anonymisation toolkit - CLI edition
"""
import sys
import argparse
import os
from libmat import mat
from libmat import archive
def create_arg_parser():
""" Get options passed to the program
"""
parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit')
parser.add_argument('files', nargs='*')
options = parser.add_argument_group('Options')
options.add_argument('-a', '--add2archive', action='store_true',
help='add to output archive non-supported filetypes (Off by default)')
options.add_argument('-b', '--backup', '-b', action='store_true',
help='keep a backup copy')
options.add_argument('-L', '--low-pdf-quality', action='store_true',
help='produces a lighter, but lower quality PDF')
info = parser.add_argument_group('Information')
info.add_argument('-c', '--check', action='store_true',
help='check if a file is free of harmful metadatas')
info.add_argument('-d', '--display', action='store_true',
help='list all the harmful metadata of a file without removing them')
info.add_argument('-l', '--list', action='store_true',
help='list all supported fileformats')
info.add_argument('-v', '--version', action='version',
version='MAT %s' % mat.__version__)
return parser
def list_meta(class_file, filename, add2archive):
""" Print all the metadata of $filename on stdout
:param parser.GenericParser class_file: The class file representing $filename
:param str filename: File to parse
:param bool add2archive: Unused parameter, check the `main` function for more information
"""
print('[+] File %s :' % filename)
if class_file.is_clean():
print('No harmful metadata found')
else:
print ('Harmful metadata found:')
meta = class_file.get_meta()
if meta:
for key, value in class_file.get_meta().items():
print('\t%s: %s' % (key, value))
return 0
def is_clean(class_file, filename, add2archive):
""" Tell if 'filename' is clean or not
:param parser.GenericParser class_file: The class file representing $filename
:param str filename: File to parse
:param bool add2archive: Unused parameter, check the `main` function for more information
"""
if class_file.is_clean():
print('[+] %s is clean' % filename)
else:
print('[+] %s is not clean' % filename)
return 0
def clean_meta(class_file, filename, add2archive):
""" Clean the file 'filename'
:param parser.GenericParser class_file: The class file representing $filename
:param str filename: File to parse
:param bool add2archive: Unused parameter, check the `main` function for more information
"""
if not class_file.is_writable:
print('[-] %s is not writable' % filename)
return 1
print('[*] Cleaning %s' % filename)
if not add2archive:
is_archive = isinstance(class_file, archive.GenericArchiveStripper)
is_terminal = isinstance(class_file, archive.TerminalZipStripper)
if is_archive and not is_terminal:
unsupported_list = class_file.list_unsupported()
if type(unsupported_list) == list and unsupported_list:
print('[-] Can not clean: %s.'
'It contains unsupported filetypes:' % filename)
for i in unsupported_list:
print('- %s' % i)
return 1
if class_file.remove_all():
print('[+] %s cleaned!' % filename)
else:
print('[-] Unable to clean %s', filename)
return 1
return 0
def list_supported():
""" Print all supported fileformat """
for item in mat.list_supported_formats():
print('%s (%s)' % (item['name'], item['extension']))
print('\tsupport: %s' % item['support'])
print('\tmetadata: %s' % item['metadata'])
print('\tmethod: %s' % item['method'])
print('\tremaining: %s' % item['remaining'])
print('\n')
def main():
""" Main function: get args and launch the appropriate function """
argparser = create_arg_parser()
args = argparser.parse_args()
# show help if: neither list nor file argument given; no argument at
# all given or the list argument mixed with some other argument given
if not (args.list or args.files) or (not sys.argv) or (args.list and len(sys.argv) > 2):
argparser.print_help()
sys.exit(2)
# func receives the function corresponding to the options given as parameters
if args.display: # only print metadatas
func = list_meta
elif args.check: # only check if the file is clean
func = is_clean
elif args.list: # print the list of all supported format
list_supported()
sys.exit(0)
else: # clean the file
func = clean_meta
ret = 0
# We're using a while loop, instead of a for,
# because we support folders. This allow us
# to add their content, and to process it.
while args.files:
filename = args.files.pop()
if os.path.isdir(filename):
for root, sub, files in os.walk(filename):
for fname in files:
args.files.append(os.path.join(root, fname))
continue
class_file = mat.create_class_file(filename, args.backup,
add2archive=args.add2archive, low_pdf_quality=args.low_pdf_quality)
if class_file:
ret += func(class_file, filename, args.add2archive)
else:
ret = 1
print('[-] Unable to process %s' % filename)
sys.exit(ret)
if __name__ == '__main__':
main()