import io import os import re import tempfile import mgzip as gzip import tarfile import sqlite3 import pathlib import crypt class Backup: key = None tarball_size = "50M" save_location = None def __init__(self, key, bdd): self.key = key self.bdd = DataBase(bdd) def recurse(self, path): tarball_size = parse_size(self.tarball_size) files = [] print("Start", path, ":", files) for f in os.listdir(path): uri = os.path.join(path, f) if os.path.isfile(uri): size = os.path.getsize(uri) print(f + " : ", human_size(size)) if size > tarball_size: crypt_name = self.bdd.add([{'name': f, 'path': path, 'size': size}], compress_mode="gz") enc = crypt(compress(uri), self.key) save(enc, os.path.join(self.save_location, crypt_name)) else: files.append({'name': f, 'path': path, 'size': size}) elif os.path.isdir(uri): self.recurse(uri) if len(files) > 0: print("End", path, ":", files) crypt_name = self.bdd.add(files, compress_mode="tar.gz") tarball = tar(files) enc = crypt(compress(tarball), self.key) save(enc, os.path.join(self.save_location, crypt_name)) def tar(files): tarball = tempfile.SpooledTemporaryFile() with tarfile.open(fileobj=tarball, mode='w') as zipfile: for file in files: zipfile.add(file['path']) return tarball def untar(tar_file, files, save_path): if type(tar_file) is io.BufferedRandom or tempfile.SpooledTemporaryFile: tar_file.seek(0) zipfile = tarfile.open(fileobj=tar_file, mode='r') else: zipfile = tarfile.open(tar_file, 'r') for file in files: zipfile.extract(file['path'], path=save_path) zipfile.close() def compress(file): if type(file) is str: infile = open(file, 'rb') elif type(file) is io.BufferedRandom or tempfile.SpooledTemporaryFile: file.seek(0) infile = file compressed_file = tempfile.SpooledTemporaryFile() with gzip.open(compressed_file, 'wb') as zipfile: while chunk := infile.read(64 * 1024): zipfile.write(chunk) return compressed_file def uncompress(file): if type(file) is io.BufferedRandom or tempfile.SpooledTemporaryFile: file.seek(0) decompressed_file = tempfile.SpooledTemporaryFile() with gzip.open(file, 'rb') as zipfile: while chunk := zipfile.read(64 * 1024): decompressed_file.write(chunk) return decompressed_file def crypt(file, key): encrypted_file = tempfile.SpooledTemporaryFile() crypt.encrypt_file(key, file, encrypted_file) return encrypted_file def uncrypt(file, key): decrypted_file = tempfile.SpooledTemporaryFile() crypt.decrypt_file(key, file, decrypted_file) return decrypted_file def save(file, save_path): if not os.path.isdir(os.path.dirname(save_path)): os.mkdir(os.path.dirname(save_path)) if type(file) is io.BufferedRandom or tempfile.SpooledTemporaryFile: file.seek(0) with open(save_path, 'wb') as save: while chunk := file.read(64 * 1024): save.write(chunk) else: print("Unable to save " + str(file) + " of type " + str(type(file))) return class DataBase: def __init__(self, base_file): self.conn = sqlite3.connect(base_file) self.__create_table() def __del__(self): self.conn.commit() self.conn.close() def __create_table(self): cursor = self.conn.cursor() cursor.execute(""" CREATE TABLE IF NOT EXISTS files( id INTEGER PRIMARY KEY UNIQUE NOT NULL, name TEXT, path TEXT, size TEXT, crypt_id INTEGER, CONSTRAINT files_crypt_FK FOREIGN KEY (crypt_id) REFERENCES crypt(id) ) """) cursor.execute(""" CREATE TABLE IF NOT EXISTS crypt( id INTEGER PRIMARY KEY UNIQUE NOT NULL, compress_mode TEXT ) """) self.conn.commit() def get_crypt_id(self, list_file): cursor = self.conn.cursor() crypt_id_list = [] for file in list_file: cursor.execute("""SELECT crypt_id FROM files WHERE name=? AND path=?""", (file['name'], file['path'])) try: crypt_id_list.append(cursor.fetchone()[0]) except TypeError: pass if len(crypt_id_list) > 0: return most_frequent(crypt_id_list) else: return None def exist(self, file): cursor = self.conn.cursor() cursor.execute("""SELECT EXISTS (SELECT 1 FROM files WHERE name=? AND path=?)""", (file['name'], file['path'])) return cursor.fetchone()[0] def add(self, list_file, compress_mode=None): cursor = self.conn.cursor() crypt_id = self.get_crypt_id(list_file) if crypt_id is None: cursor.execute("""SELECT IFNULL(max(id) + 1, 0) FROM crypt""") crypt_id = cursor.fetchone()[0] cursor.execute("""SELECT IFNULL(max(id) + 1, 0) FROM files""") files_id = cursor.fetchone()[0] for file in list_file: if self.exist(file): cursor.execute("""UPDATE files SET size=?, crypt_id=? WHERE name=? AND path=?""", (file['size'], crypt_id, file['name'], file['path'])) else: cursor.execute("""INSERT INTO files VALUES(?, ?, ?, ?, ?)""", (files_id, file['name'], file['path'], file['size'], crypt_id)) files_id += 1 cursor.execute("""INSERT OR IGNORE INTO crypt VALUES(?, ?)""", (crypt_id, compress_mode)) self.conn.commit() return str(crypt_id) def human_size(size, decimal_places=0): for unit in ['B', 'K', 'M', 'G', 'T']: if size < 1024.0: break size /= 1024.0 return f"{size:.{decimal_places}f}{unit}" def parse_size(size): units = {"B": 1, "K": 2**10, "M": 2**20, "G": 2**30, "T": 2**40} if size[-1].isdigit(): size = size + 'K' number, unit = re.match(r"([0-9]+)([BKMGT])", size, re.I).groups() return int(float(number)*units[unit]) def most_frequent(list): return max(set(list), key = list.count)