import io import os from datetime import datetime import re import tempfile import mgzip as gzip import tarfile import sqlite3 import pathlib from b2sdk.v2 import B2Api from crypt import encrypt_file, decrypt_file class Backup: key = None tarball_size = "50M" save_location = None def __init__(self, key, bdd, app_key_id=None, app_key=None, bucket_id=None): self.key = key self.bdd = DataBase(bdd) self.b2 = B2Api() self.b2.authorize_account("production", app_key_id, app_key) self.buk = self.b2.get_bucket_by_id(bucket_id) def recurse(self, path): tarball_size = parse_size(self.tarball_size) files = [] for f in os.listdir(path): uri = os.path.join(path, f) if os.path.isfile(uri): size = os.path.getsize(uri) m_date = datetime.fromtimestamp(os.path.getmtime(uri)).strftime("%Y-%m-%d %H:%M:%S.%f") c_date = datetime.fromtimestamp(os.path.getctime(uri)).strftime("%Y-%m-%d %H:%M:%S.%f") if size > tarball_size: crypt_name = self.bdd.add([{'name': f, 'path': pathlib.Path(uri).as_posix(), 'size': size, 'm_date': m_date, 'c_date': c_date}], compress_mode="gz") if crypt_name is not None: print("Proceed", uri) enc = crypt(compress(uri), self.key) print(" Size : ", get_size(enc)) self.buk.upload_bytes(enc.read(), crypt_name) # save(enc, os.path.join(self.save_location, crypt_name)) else: files.append({'name': f, 'path': pathlib.Path(uri).as_posix(), 'size': size, 'm_date': m_date, 'c_date': c_date}) elif os.path.isdir(uri): self.recurse(uri) if len(files) > 0: crypt_name = self.bdd.add(files, compress_mode="tar.gz") if crypt_name is not None: print("Proceed", path, ":", [file['name'] for file in files]) tarball = tar([file['path'] for file in files]) enc = crypt(compress(tarball), self.key) print(" Size : ", get_size(enc)) self.buk.upload_bytes(enc.read(), crypt_name) # save(enc, os.path.join(self.save_location, crypt_name)) def recover_file(self, paths, parents=False, save_path=os.getcwd()): files = self.bdd.get_crypt_file(paths) for file in files: dl = tempfile.SpooledTemporaryFile() self.buk.download_file_by_name(file['crypt']).save(dl) if parents: save_path = os.path.join(save_path, file['path']) untar(uncompress(uncrypt(dl, self.key)), file['path'], save_path) def get_size(in_file): if type(in_file) is str: filesize = os.path.getsize(in_file) elif type(in_file) is tempfile.SpooledTemporaryFile: in_file.seek(0, 2) filesize = in_file.tell() in_file.seek(0) return human_size(filesize, decimal_places=1, unit='si') def tar(files): tarball = tempfile.SpooledTemporaryFile() with tarfile.open(fileobj=tarball, mode='w') as zipfile: for file in files: # zipfile.add(os.path.join(file['path'], file['name'])) zipfile.add(file) return tarball def untar(tar_file, file, save_path): if type(tar_file) is tempfile.SpooledTemporaryFile: tar_file.seek(0) zipfile = tarfile.open(fileobj=tar_file, mode='r') else: zipfile = tarfile.open(tar_file, 'r') if not os.path.isdir(save_path): os.mkdir(save_path) for member in zipfile.getmembers(): if member.name == file: member.name = os.path.basename(member.name) # remove the path zipfile.extract(member, path=save_path) zipfile.close() def compress(file): if type(file) is str: infile = open(file, 'rb') elif type(file) is tempfile.SpooledTemporaryFile: file.seek(0) infile = file compressed_file = tempfile.SpooledTemporaryFile() with gzip.open(compressed_file, 'wb') as zipfile: zipfile.write(infile.read()) return compressed_file def uncompress(file): if type(file) is tempfile.SpooledTemporaryFile: file.seek(0) decompressed_file = tempfile.SpooledTemporaryFile() with gzip.open(file, 'rb') as zipfile: decompressed_file.write(zipfile.read()) return decompressed_file def crypt(file, key): encrypted_file = tempfile.SpooledTemporaryFile() encrypt_file(key, file, encrypted_file) return encrypted_file def uncrypt(file, key): decrypted_file = tempfile.SpooledTemporaryFile() decrypt_file(key, file, decrypted_file) return decrypted_file def save(file, save_path): if not os.path.isdir(os.path.dirname(save_path)): os.mkdir(os.path.dirname(save_path)) if type(file) is io.BufferedRandom or tempfile.SpooledTemporaryFile: file.seek(0) with open(save_path, 'wb') as save: while chunk := file.read(64 * 1024): save.write(chunk) else: print("Unable to save " + str(file) + " of type " + str(type(file))) return class DataBase: def __init__(self, base_file): self.conn = sqlite3.connect(base_file) self.__create_table() def __del__(self): self.conn.commit() self.conn.close() def __create_table(self): cursor = self.conn.cursor() # cursor.execute("""DROP TABLE IF EXISTS files""") # # cursor.execute("""DROP TABLE IF EXISTS crypt""") # # self.conn.commit() cursor.execute(""" CREATE TABLE IF NOT EXISTS files( id INTEGER PRIMARY KEY UNIQUE NOT NULL, name TEXT, path TEXT, size INTEGER, m_date DATE, c_date DATE, crypt_id INTEGER, CONSTRAINT files_crypt_FK FOREIGN KEY (crypt_id) REFERENCES crypt(id) ) """) cursor.execute(""" CREATE TABLE IF NOT EXISTS crypt( id INTEGER PRIMARY KEY UNIQUE NOT NULL, compress_mode TEXT ) """) self.conn.row_factory = dict_factory self.conn.commit() def get_crypt_file(self, list_file): cursor = self.conn.cursor() crypt_list = [] # for path in [file['path'] for file in list_file]: for path in list_file: path = pathlib.Path(path).as_posix() cursor.execute("""SELECT crypt_id FROM files WHERE path=?""", (path,)) try: crypt_list.append({'path': path, 'crypt': str(cursor.fetchone()['crypt_id']).zfill(5)}) except TypeError: crypt_list.append({'path': path, 'crypt': None}) return crypt_list def __get_crypt_id(self, list_file): cursor = self.conn.cursor() crypt_id_list = [] for file in list_file: cursor.execute("""SELECT crypt_id FROM files WHERE name=? AND path=?""", (file['name'], file['path'])) try: crypt_id_list.append(cursor.fetchone()['crypt_id']) except TypeError: cursor.execute("""SELECT IFNULL(max(id) + 1, 0) as crypt_id FROM crypt""") return cursor.fetchone()['crypt_id'] return most_frequent(crypt_id_list) def exist(self, file): cursor = self.conn.cursor() cursor.execute("""SELECT EXISTS (SELECT 1 FROM files WHERE name=? AND path=?) as exist""", (file['name'], file['path'])) return cursor.fetchone()['exist'] def modified(self, file): cursor = self.conn.cursor() cursor.execute("""SELECT name, path, size, m_date, c_date FROM files WHERE name=? AND path=?""", (file['name'], file['path'])) bdd_file = cursor.fetchone() # for key in ['m_date', 'c_date']: # bdd_file[key] = datetime.strptime(bdd_file[key], "%Y-%m-%d %H:%M:%S.%f") # bdd_file['size'] = int(bdd_file['size']) if file == bdd_file: return False else: return True def add(self, list_file, compress_mode=None): cursor = self.conn.cursor() crypt_id = self.__get_crypt_id(list_file) cursor.execute("""SELECT IFNULL(max(id) + 1, 0) as files_id FROM files""") file_id = cursor.fetchone()['files_id'] proceed = False for file in list_file: if self.exist(file): if self.modified(file): cursor.execute("""UPDATE files SET size=?, m_date=?, c_date=?, crypt_id=? WHERE name=? AND path=?""", (file['size'], file['m_date'], file['c_date'], crypt_id, file['name'], file['path'])) proceed = True else: cursor.execute("""INSERT INTO files VALUES({id}, '{name}', '{path}', '{size}', '{m_date}', '{c_date}', {crypt_id})""".format( id=file_id, crypt_id=crypt_id, **file)) file_id += 1 proceed = True if proceed: cursor.execute("""INSERT INTO crypt VALUES(?, ?)""", (crypt_id, compress_mode)) self.conn.commit() return str(crypt_id).zfill(5) else: return None def human_size(size, decimal_places=0, unit=None): unit_tab = ['B', 'K', 'M', 'G', 'T'] format = 1024.0 if unit == 'iec': format = 1024.0 unit_tab = ['B', 'KiB', 'MiB', 'GiB', 'TiB'] elif unit == 'si': format = 1000.0 unit_tab = ['B', 'KB', 'MB', 'GB', 'TB'] for unit in unit_tab: if size < format: break size /= format return f"{size:.{decimal_places}f}{unit}" def parse_size(size): units = {"B": 1, "K": 2**10, "M": 2**20, "G": 2**30, "T": 2**40} if size[-1].isdigit(): size = size + 'K' number, unit = re.match(r"([0-9]+)([BKMGT])", size, re.I).groups() return int(float(number)*units[unit]) def most_frequent(list): return max(set(list), key=list.count) def dict_factory(cursor, row): d = {} for idx, col in enumerate(cursor.description): d[col[0]] = row[idx] return d