import io import os from datetime import datetime import re import tempfile import mgzip as gzip import tarfile import sqlite3 import pathlib import hashlib from b2sdk.v2 import B2Api from crypt import encrypt_file, decrypt_file ZFILL = 5 BACKBLAZE = 100 LOCAL = 101 class Backup: key = None buk = None tarball_size = "50M" save_location = "" def __init__(self, key, bdd=None, save_mode=None, save_location=None, app_key_id=None, app_key=None, bucket_id=None): self.key = key if save_mode is None: raise Exception("No save_mode") elif save_mode == BACKBLAZE: if None in (app_key_id, app_key, bucket_id): raise Exception("Some arguments are not filled") self.b2 = B2Api() self.b2.authorize_account("production", app_key_id, app_key) self.buk = self.b2.get_bucket_by_id(bucket_id) self.bdd = DataBase(bdd) elif save_mode == LOCAL: if None in (save_location, bdd): raise Exception("Some arguments are not filled") self.save_location = save_location self.bdd = DataBase(bdd) self.save_mode = save_mode def save(self, path, recurse=True): self.__save(path, recurse=recurse) return self def __save(self, path, recurse=True): tarball_size = parse_size(self.tarball_size) files = [] for f in os.listdir(path): uri = os.path.join(path, f) if os.path.isfile(uri): size = os.path.getsize(uri) m_date = datetime.fromtimestamp(os.path.getmtime(uri)).strftime("%Y-%m-%d %H:%M:%S.%f") c_date = datetime.fromtimestamp(os.path.getctime(uri)).strftime("%Y-%m-%d %H:%M:%S.%f") if size > tarball_size: crypt_id = self.bdd.add([{'name': f, 'path': pathlib.Path(uri).as_posix(), 'size': size, 'm_date': m_date, 'c_date': c_date}]) if crypt_id is not None: print("Proceed", uri, ' ==> ', crypt_id) enc = crypt(compress(uri), self.key) self.bdd.set_crypt_attr(crypt_id, compress_mode="gz", sha1sum=get_hash(enc)) print(" Size : ", get_size(enc)) upload_file(enc, self.save_mode, file_name=os.path.join(self.save_location, str(crypt_id).zfill(ZFILL)), bucket=self.buk) else: files.append({'name': f, 'path': pathlib.Path(uri).as_posix(), 'size': size, 'm_date': m_date, 'c_date': c_date}) elif os.path.isdir(uri) and recurse: self.__save(uri, recurse=recurse) if len(files) > 0: crypt_id = self.bdd.add(files) if crypt_id is not None: print("Proceed", path, ":", [file['name'] for file in files], ' ==> ', crypt_id) tarball = tar([file['path'] for file in files]) enc = crypt(compress(tarball), self.key) self.bdd.set_crypt_attr(crypt_id, compress_mode="tar.gz", sha1sum=get_hash(enc)) print(" Size : ", get_size(enc)) upload_file(enc, self.save_mode, file_name=os.path.join(self.save_location, str(crypt_id).zfill(ZFILL)), bucket=self.buk) def recover_file(self, paths, parents=False, save_path=os.getcwd()): files = self.bdd.get_crypt_name(paths) for file in files: if file['crypt_id'] is not None: encrypted_file = download_file(self.save_mode, os.path.join(self.save_location, str(file['crypt_id']).zfill(ZFILL)), bucket=self.buk) if parents: save_path = os.path.join(save_path, file['path']) uncompress(uncrypt(encrypted_file, self.key), file['name'], save_path, file['compress_mode']) def upload_file(file, mode, file_name, bucket=None): if mode == BACKBLAZE: bucket.upload_bytes(file.read(), file_name) elif mode == LOCAL: save(file, file_name) def download_file(mode, file, bucket=None): dl = tempfile.SpooledTemporaryFile() if mode == BACKBLAZE: bucket.download_file_by_name(file).save(dl) elif mode == LOCAL: with open(file, 'rb') as infile: dl.write(infile.read()) return dl def get_size(in_file): if type(in_file) is str: filesize = os.path.getsize(in_file) elif type(in_file) is tempfile.SpooledTemporaryFile: in_file.seek(0, 2) filesize = in_file.tell() in_file.seek(0) return human_size(filesize, decimal_places=1, unit='si') def get_hash(infile): if type(infile) is str: file = open(infile, 'rb') elif type(infile) is tempfile.SpooledTemporaryFile: infile.seek(0) file = infile return hashlib.sha1(file.read()).hexdigest() def tar(files): tarball = tempfile.SpooledTemporaryFile() with tarfile.open(fileobj=tarball, mode='w') as zipfile: for file in files: zipfile.add(file, arcname=os.path.basename(file)) return tarball def untar(tar_file, file, save_path): if type(tar_file) is tempfile.SpooledTemporaryFile: tar_file.seek(0) zipfile = tarfile.open(fileobj=tar_file, mode='r') else: zipfile = tarfile.open(tar_file, 'r') if not os.path.isdir(save_path): os.mkdir(save_path) zipfile.extract(file, path=save_path) zipfile.close() def compress(file): if type(file) is str: infile = open(file, 'rb') elif type(file) is tempfile.SpooledTemporaryFile: file.seek(0) infile = file compressed_file = tempfile.SpooledTemporaryFile() with gzip.open(compressed_file, 'wb') as zipfile: zipfile.write(infile.read()) return compressed_file def uncompress(data, file_name, save_path, compress_mode): modes = compress_mode.split('.') if 'gz' in modes: data = ungz(data) if 'tar' in modes: untar(data, file_name, save_path) else: save(data, os.path.join(save_path, file_name)) def ungz(data): if type(data) is tempfile.SpooledTemporaryFile: data.seek(0) decompressed_file = tempfile.SpooledTemporaryFile() with gzip.open(data, 'rb') as zipfile: decompressed_file.write(zipfile.read()) return decompressed_file def crypt(file, key): encrypted_file = tempfile.SpooledTemporaryFile() encrypt_file(key, file, encrypted_file) return encrypted_file def uncrypt(file, key): decrypted_file = tempfile.SpooledTemporaryFile() decrypt_file(key, file, decrypted_file) return decrypted_file def save(file, save_path): if not os.path.isdir(os.path.dirname(save_path)): os.mkdir(os.path.dirname(save_path)) if type(file) is io.BufferedRandom or tempfile.SpooledTemporaryFile: file.seek(0) with open(save_path, 'wb') as save: while chunk := file.read(64 * 1024): save.write(chunk) else: print("Unable to save " + str(file) + " of type " + str(type(file))) return class DataBase: def __init__(self, base_file): self.conn = sqlite3.connect(base_file) self.__create_table() def __del__(self): self.conn.commit() self.conn.close() def __create_table(self): cursor = self.conn.cursor() # cursor.execute("""DROP TABLE IF EXISTS files""") # # cursor.execute("""DROP TABLE IF EXISTS crypt""") # # self.conn.commit() cursor.execute(""" CREATE TABLE IF NOT EXISTS files( id INTEGER PRIMARY KEY UNIQUE NOT NULL, name TEXT, path TEXT, size INTEGER, m_date DATE, c_date DATE, crypt_id INTEGER, CONSTRAINT files_crypt_FK FOREIGN KEY (crypt_id) REFERENCES crypt(id) ) """) cursor.execute(""" CREATE TABLE IF NOT EXISTS crypt( id INTEGER PRIMARY KEY UNIQUE NOT NULL, compress_mode TEXT, sha1sum TEXT ) """) self.conn.row_factory = dict_factory # self.conn.set_trace_callback(print) self.conn.commit() def get_crypt_name(self, list_file): cursor = self.conn.cursor() crypt_list = [] # for path in [file['path'] for file in list_file]: for path in list_file: path = pathlib.PureWindowsPath(path).as_posix() cursor.execute("""SELECT crypt_id, compress_mode FROM files INNER JOIN crypt ON files.crypt_id = crypt.id WHERE path=?""", (path,)) retval = cursor.fetchone() try: crypt_list.append({'name': os.path.basename(path), 'path': path, 'crypt_id': retval['crypt_id'], 'compress_mode': retval['compress_mode']}) except TypeError: crypt_list.append({'path': path, 'crypt': None}) return crypt_list def __get_crypt_id(self, list_file): cursor = self.conn.cursor() crypt_id_list = [] for file in list_file: cursor.execute("""SELECT crypt_id FROM files WHERE name=? AND path=?""", (file['name'], file['path'])) try: crypt_id_list.append(cursor.fetchone()['crypt_id']) except TypeError: pass try: if len(list(set(crypt_id_list))) == 1: return crypt_id_list[0] id = most_frequent(crypt_id_list) except ValueError: cursor.execute("""SELECT IFNULL(max(id) + 1, 0) as crypt_id FROM crypt""") return cursor.fetchone()['crypt_id'] params = {'id': id, 'name': ', '.join([f"'{file['name']}'" for file in list_file]), 'path': ', '.join([f"'{file['path']}'" for file in list_file])} cursor.execute("""SELECT 1 FROM files WHERE crypt_id={id} AND name NOT IN ({name}) AND path NOT IN ({path})""".format(**params)) neighbour = cursor.fetchall() if len(neighbour) > 0: cursor.execute("""SELECT IFNULL(max(id) + 1, 0) as crypt_id FROM crypt""") return cursor.fetchone()['crypt_id'] else: cursor.execute("""UPDATE files SET crypt_id={id} WHERE name IN ({name}) AND path IN ({path})""".format(**params)) return id def exist(self, file): cursor = self.conn.cursor() cursor.execute("""SELECT EXISTS (SELECT 1 FROM files WHERE name=? AND path=?) as exist""", (file['name'], file['path'])) return cursor.fetchone()['exist'] def modified(self, file): cursor = self.conn.cursor() cursor.execute("""SELECT name, path, size, m_date, c_date FROM files WHERE name=? AND path=?""", (file['name'], file['path'])) bdd_file = cursor.fetchone() # for key in ['m_date', 'c_date']: # bdd_file[key] = datetime.strptime(bdd_file[key], "%Y-%m-%d %H:%M:%S.%f") # bdd_file['size'] = int(bdd_file['size']) if file == bdd_file: return False else: return True def set_crypt_attr(self, crypt_id, compress_mode=None, sha1sum=None): cursor = self.conn.cursor() cursor.execute("""UPDATE crypt SET compress_mode='{mode}', sha1sum='{sum}' WHERE id='{id}'""".format(id=crypt_id, mode=compress_mode, sum=sha1sum)) self.conn.commit() def add(self, list_file): cursor = self.conn.cursor() crypt_id = self.__get_crypt_id(list_file) cursor.execute("""SELECT IFNULL(max(id) + 1, 0) as files_id FROM files""") file_id = cursor.fetchone()['files_id'] proceed = False for file in list_file: if self.exist(file): if self.modified(file): cursor.execute("""UPDATE files SET size=?, m_date=?, c_date=?, crypt_id=? WHERE name=? AND path=?""", (file['size'], file['m_date'], file['c_date'], crypt_id, file['name'], file['path'])) proceed = True else: cursor.execute("""INSERT INTO files VALUES({id}, '{name}', '{path}', '{size}', '{m_date}', '{c_date}', {crypt_id})""".format( id=file_id, crypt_id=crypt_id, **file)) file_id += 1 proceed = True if proceed: cursor.execute("""INSERT OR IGNORE INTO crypt (id) VALUES(?)""", (crypt_id,)) self.conn.commit() return crypt_id else: return None def human_size(size, decimal_places=0, unit=None): unit_tab = ['B', 'K', 'M', 'G', 'T'] format = 1024.0 if unit == 'iec': format = 1024.0 unit_tab = ['B', 'KiB', 'MiB', 'GiB', 'TiB'] elif unit == 'si': format = 1000.0 unit_tab = ['B', 'KB', 'MB', 'GB', 'TB'] for unit in unit_tab: if size < format: break size /= format return f"{size:.{decimal_places}f}{unit}" def parse_size(size): units = {"B": 1, "K": 2**10, "M": 2**20, "G": 2**30, "T": 2**40} if size[-1].isdigit(): size = size + 'K' number, unit = re.match(r"([0-9]+)([BKMGT])", size, re.I).groups() return int(float(number)*units[unit]) def most_frequent(list): return max(set(list), key=list.count) def dict_factory(cursor, row): d = {} for idx, col in enumerate(cursor.description): d[col[0]] = row[idx] return d