import io import os from datetime import datetime import re import tempfile import mgzip as gzip import tarfile import sqlite3 import pathlib import hashlib import b2sdk.v2 from b2sdk.v2 import B2Api from crypt import * class Backup: key = None tarball_size = "50M" save_location = None def __init__(self, key, bdd, app_key_id=None, app_key=None, bucket_id=None): self.key = key self.bdd = DataBase(bdd) self.b2 = B2Api() self.b2.authorize_account("production", app_key_id, app_key) self.buk = self.b2.get_bucket_by_id(bucket_id) def recurse(self, path): tarball_size = parse_size(self.tarball_size) files = [] for f in os.listdir(path): uri = os.path.join(path, f) if os.path.isfile(uri): size = os.path.getsize(uri) m_date = datetime.fromtimestamp(os.path.getmtime(uri)).strftime("%Y-%m-%d %H:%M:%S.%f") c_date = datetime.fromtimestamp(os.path.getctime(uri)).strftime("%Y-%m-%d %H:%M:%S.%f") if size > tarball_size: crypt_name = self.bdd.add([{'name': f, 'path': uri, 'size': size, 'm_date': m_date, 'c_date': c_date}], compress_mode="gz") if crypt_name is not None: print("Proceed", uri) enc = crypt(compress(uri), self.key) print(" Size : ", get_size(enc)) upload_b2(self.buk, enc, crypt_name) save(enc, os.path.join(self.save_location, crypt_name)) else: files.append({'name': f, 'path': uri, 'size': size, 'm_date': m_date, 'c_date': c_date}) elif os.path.isdir(uri): self.recurse(uri) if len(files) > 0: crypt_name = self.bdd.add(files, compress_mode="tar.gz") if crypt_name is not None: print("Proceed", path, ":", [file['name'] for file in files]) tarball = tar(files) enc = crypt(compress(tarball), self.key) print(" Size : ", get_size(enc)) upload_b2(self.buk, enc, crypt_name) save(enc, os.path.join(self.save_location, crypt_name)) def get_size(in_file): if type(in_file) is str: filesize = os.path.getsize(in_file) elif type(in_file) is io.BufferedRandom or tempfile.SpooledTemporaryFile: in_file.seek(0, 2) filesize = in_file.tell() in_file.seek(0) return human_size(filesize, decimal_places=1, unit='si') def upload_b2(buk, file, save_name, chunksize=64 * 1024): if type(file) is io.BufferedRandom or tempfile.SpooledTemporaryFile: file.seek(0) sha1 = hashlib.sha1(file.read()).hexdigest() file.seek(0) input_sources = b2sdk.v2.UploadSourceBytes(file.read(), content_sha1=sha1) buk.upload(input_sources, save_name) # while chunk := file.read(chunksize): # input_sources = b2sdk.v2.UploadSourceBytes(chunk, content_sha1=sha1) # buk.upload(input_sources, save_name) else: print("Unable to save " + str(file) + " of type " + str(type(file))) return def tar(files): tarball = tempfile.SpooledTemporaryFile() with tarfile.open(fileobj=tarball, mode='w') as zipfile: for file in files: # zipfile.add(os.path.join(file['path'], file['name'])) zipfile.add(file['path']) return tarball def untar(tar_file, files, save_path): if type(tar_file) is io.BufferedRandom or tempfile.SpooledTemporaryFile: tar_file.seek(0) zipfile = tarfile.open(fileobj=tar_file, mode='r') else: zipfile = tarfile.open(tar_file, 'r') for file in files: zipfile.extract(file['path'], path=save_path) zipfile.close() def compress(file): if type(file) is str: infile = open(file, 'rb') elif type(file) is io.BufferedRandom or tempfile.SpooledTemporaryFile: file.seek(0) infile = file compressed_file = tempfile.SpooledTemporaryFile() with gzip.open(compressed_file, 'wb') as zipfile: while chunk := infile.read(64 * 1024): zipfile.write(chunk) return compressed_file def uncompress(file): if type(file) is io.BufferedRandom or tempfile.SpooledTemporaryFile: file.seek(0) decompressed_file = tempfile.SpooledTemporaryFile() with gzip.open(file, 'rb') as zipfile: while chunk := zipfile.read(64 * 1024): decompressed_file.write(chunk) return decompressed_file def crypt(file, key): encrypted_file = tempfile.SpooledTemporaryFile() encrypt_file(key, file, encrypted_file) return encrypted_file def uncrypt(file, key): decrypted_file = tempfile.SpooledTemporaryFile() decrypt_file(key, file, decrypted_file) return decrypted_file def save(file, save_path): if not os.path.isdir(os.path.dirname(save_path)): os.mkdir(os.path.dirname(save_path)) if type(file) is io.BufferedRandom or tempfile.SpooledTemporaryFile: file.seek(0) with open(save_path, 'wb') as save: while chunk := file.read(64 * 1024): save.write(chunk) else: print("Unable to save " + str(file) + " of type " + str(type(file))) return class DataBase: def __init__(self, base_file): self.conn = sqlite3.connect(base_file) self.__create_table() def __del__(self): self.conn.commit() self.conn.close() def __create_table(self): cursor = self.conn.cursor() cursor.execute("""DROP TABLE IF EXISTS files""") cursor.execute("""DROP TABLE IF EXISTS crypt""") self.conn.commit() cursor.execute(""" CREATE TABLE IF NOT EXISTS files( id INTEGER PRIMARY KEY UNIQUE NOT NULL, name TEXT, path TEXT, size INTEGER, m_date DATE, c_date DATE, crypt_id INTEGER, CONSTRAINT files_crypt_FK FOREIGN KEY (crypt_id) REFERENCES crypt(id) ) """) cursor.execute(""" CREATE TABLE IF NOT EXISTS crypt( id INTEGER PRIMARY KEY UNIQUE NOT NULL, compress_mode TEXT ) """) self.conn.row_factory = dict_factory self.conn.commit() def get_crypt_id(self, list_file): cursor = self.conn.cursor() crypt_id_list = [] for file in list_file: cursor.execute("""SELECT crypt_id FROM files WHERE name=? AND path=?""", (file['name'], file['path'])) try: crypt_id_list.append(cursor.fetchone()['crypt_id']) except TypeError: cursor.execute("""SELECT IFNULL(max(id) + 1, 0) as crypt_id FROM crypt""") return cursor.fetchone()['crypt_id'] return most_frequent(crypt_id_list) def exist(self, file): cursor = self.conn.cursor() cursor.execute("""SELECT EXISTS (SELECT 1 FROM files WHERE name=? AND path=?) as exist""", (file['name'], file['path'])) return cursor.fetchone()['exist'] def modified(self, file): cursor = self.conn.cursor() cursor.execute("""SELECT name, path, size, m_date, c_date FROM files WHERE name=? AND path=?""", (file['name'], file['path'])) bdd_file = cursor.fetchone() # for key in ['m_date', 'c_date']: # bdd_file[key] = datetime.strptime(bdd_file[key], "%Y-%m-%d %H:%M:%S.%f") # bdd_file['size'] = int(bdd_file['size']) if file == bdd_file: return False else: return True def add(self, list_file, compress_mode=None): cursor = self.conn.cursor() crypt_id = self.get_crypt_id(list_file) cursor.execute("""SELECT IFNULL(max(id) + 1, 0) as files_id FROM files""") file_id = cursor.fetchone()['files_id'] proceed = False for file in list_file: if self.exist(file): if self.modified(file): cursor.execute("""UPDATE files SET size=?, m_date=?, c_date=?, crypt_id=? WHERE name=? AND path=?""", (file['size'], file['m_date'], file['c_date'], crypt_id, file['name'], file['path'])) proceed = True else: cursor.execute("""INSERT INTO files VALUES({id}, '{name}', '{path}', '{size}', '{m_date}', '{c_date}', {crypt_id})""".format( id=file_id, crypt_id=crypt_id, **file)) file_id += 1 proceed = True if proceed: cursor.execute("""INSERT INTO crypt VALUES(?, ?)""", (crypt_id, compress_mode)) self.conn.commit() return str(crypt_id).zfill(5) else: return None def human_size(size, decimal_places=0, unit=None): unit_tab = ['B', 'K', 'M', 'G', 'T'] format = 1024.0 if unit == 'iec': format = 1024.0 unit_tab = ['B', 'KiB', 'MiB', 'GiB', 'TiB'] elif unit == 'si': format = 1000.0 unit_tab = ['B', 'KB', 'MB', 'GB', 'TB'] for unit in unit_tab: if size < format: break size /= format return f"{size:.{decimal_places}f}{unit}" def parse_size(size): units = {"B": 1, "K": 2**10, "M": 2**20, "G": 2**30, "T": 2**40} if size[-1].isdigit(): size = size + 'K' number, unit = re.match(r"([0-9]+)([BKMGT])", size, re.I).groups() return int(float(number)*units[unit]) def most_frequent(list): return max(set(list), key=list.count) def dict_factory(cursor, row): d = {} for idx, col in enumerate(cursor.description): d[col[0]] = row[idx] return d