import io import os from datetime import datetime import re import tempfile import mgzip as gzip import tarfile import sqlite3 import pathlib from crypt import * class Backup: key = None tarball_size = "50M" save_location = None def __init__(self, key, bdd): self.key = key self.bdd = DataBase(bdd) def recurse(self, path): tarball_size = parse_size(self.tarball_size) files = [] for f in os.listdir(path): uri = os.path.join(path, f) if os.path.isfile(uri): size = os.path.getsize(uri) m_date = datetime.fromtimestamp(os.path.getmtime(uri)).strftime("%Y-%m-%d %H:%M:%S.%f") c_date = datetime.fromtimestamp(os.path.getctime(uri)).strftime("%Y-%m-%d %H:%M:%S.%f") if size > tarball_size: crypt_name = self.bdd.add([{'name': f, 'path': path, 'size': size, 'm_date': m_date, 'c_date': c_date}], compress_mode="gz") if crypt_name is not None: print("Proceed", uri) enc = crypt(compress(uri), self.key) save(enc, os.path.join(self.save_location, crypt_name)) else: files.append({'name': f, 'path': path, 'size': size, 'm_date': m_date, 'c_date': c_date}) elif os.path.isdir(uri): self.recurse(uri) if len(files) > 0: crypt_name = self.bdd.add(files, compress_mode="tar.gz") if crypt_name is not None: print("Proceed", path, ":", [file['name'] for file in files]) tarball = tar(files) enc = crypt(compress(tarball), self.key) save(enc, os.path.join(self.save_location, crypt_name)) def tar(files): tarball = tempfile.SpooledTemporaryFile() with tarfile.open(fileobj=tarball, mode='w') as zipfile: for file in files: zipfile.add(file['path']) return tarball def untar(tar_file, files, save_path): if type(tar_file) is io.BufferedRandom or tempfile.SpooledTemporaryFile: tar_file.seek(0) zipfile = tarfile.open(fileobj=tar_file, mode='r') else: zipfile = tarfile.open(tar_file, 'r') for file in files: zipfile.extract(file['path'], path=save_path) zipfile.close() def compress(file): if type(file) is str: infile = open(file, 'rb') elif type(file) is io.BufferedRandom or tempfile.SpooledTemporaryFile: file.seek(0) infile = file compressed_file = tempfile.SpooledTemporaryFile() with gzip.open(compressed_file, 'wb') as zipfile: while chunk := infile.read(64 * 1024): zipfile.write(chunk) return compressed_file def uncompress(file): if type(file) is io.BufferedRandom or tempfile.SpooledTemporaryFile: file.seek(0) decompressed_file = tempfile.SpooledTemporaryFile() with gzip.open(file, 'rb') as zipfile: while chunk := zipfile.read(64 * 1024): decompressed_file.write(chunk) return decompressed_file def crypt(file, key): encrypted_file = tempfile.SpooledTemporaryFile() encrypt_file(key, file, encrypted_file) return encrypted_file def uncrypt(file, key): decrypted_file = tempfile.SpooledTemporaryFile() decrypt_file(key, file, decrypted_file) return decrypted_file def save(file, save_path): if not os.path.isdir(os.path.dirname(save_path)): os.mkdir(os.path.dirname(save_path)) if type(file) is io.BufferedRandom or tempfile.SpooledTemporaryFile: file.seek(0) with open(save_path, 'wb') as save: while chunk := file.read(64 * 1024): save.write(chunk) else: print("Unable to save " + str(file) + " of type " + str(type(file))) return class DataBase: def __init__(self, base_file): self.conn = sqlite3.connect(base_file) self.__create_table() def __del__(self): self.conn.commit() self.conn.close() def __create_table(self): cursor = self.conn.cursor() cursor.execute(""" CREATE TABLE IF NOT EXISTS files( id INTEGER PRIMARY KEY UNIQUE NOT NULL, name TEXT, path TEXT, size INTEGER, m_date DATE, c_date DATE, crypt_id INTEGER, CONSTRAINT files_crypt_FK FOREIGN KEY (crypt_id) REFERENCES crypt(id) ) """) cursor.execute(""" CREATE TABLE IF NOT EXISTS crypt( id INTEGER PRIMARY KEY UNIQUE NOT NULL, compress_mode TEXT ) """) self.conn.row_factory = dict_factory self.conn.commit() def get_crypt_id(self, list_file): cursor = self.conn.cursor() crypt_id_list = [] for file in list_file: cursor.execute("""SELECT crypt_id FROM files WHERE name=? AND path=?""", (file['name'], file['path'])) try: crypt_id_list.append(cursor.fetchone()['crypt_id']) except TypeError: pass if len(crypt_id_list) > 0: return most_frequent(crypt_id_list) else: return None def exist(self, file): cursor = self.conn.cursor() cursor.execute("""SELECT EXISTS (SELECT 1 FROM files WHERE name=? AND path=?) as exist""", (file['name'], file['path'])) return cursor.fetchone()['exist'] def modified(self, file): cursor = self.conn.cursor() cursor.execute("""SELECT name, path, size, m_date, c_date FROM files WHERE name=? AND path=?""", (file['name'], file['path'])) bdd_file = cursor.fetchone() # for key in ['m_date', 'c_date']: # bdd_file[key] = datetime.strptime(bdd_file[key], "%Y-%m-%d %H:%M:%S.%f") # bdd_file['size'] = int(bdd_file['size']) if file == bdd_file: return False else: return True def add(self, list_file, compress_mode=None): cursor = self.conn.cursor() crypt_id = self.get_crypt_id(list_file) if crypt_id is None: cursor.execute("""SELECT IFNULL(max(id) + 1, 0) as crypt_id FROM crypt""") crypt_id = cursor.fetchone()['crypt_id'] cursor.execute("""SELECT IFNULL(max(id) + 1, 0) as files_id FROM files""") file_id = cursor.fetchone()['files_id'] proceed = False for file in list_file: if self.exist(file): if self.modified(file): cursor.execute("""UPDATE files SET size=?, m_date=?, c_date=?, crypt_id=? WHERE name=? AND path=?""", (file['size'], file['m_date'], file['c_date'], crypt_id, file['name'], file['path'])) proceed = True else: cursor.execute("""INSERT INTO files VALUES({id}, '{name}', '{path}', '{size}', '{m_date}', '{c_date}', {crypt_id})""".format( id=file_id, crypt_id=crypt_id, **file)) file_id += 1 proceed = True if proceed: cursor.execute("""INSERT OR IGNORE INTO crypt VALUES(?, ?)""", (crypt_id, compress_mode)) self.conn.commit() return str(crypt_id).zfill(5) else: return None def human_size(size, decimal_places=0): for unit in ['B', 'K', 'M', 'G', 'T']: if size < 1024.0: break size /= 1024.0 return f"{size:.{decimal_places}f}{unit}" def parse_size(size): units = {"B": 1, "K": 2**10, "M": 2**20, "G": 2**30, "T": 2**40} if size[-1].isdigit(): size = size + 'K' number, unit = re.match(r"([0-9]+)([BKMGT])", size, re.I).groups() return int(float(number)*units[unit]) def most_frequent(list): return max(set(list), key=list.count) def dict_factory(cursor, row): d = {} for idx, col in enumerate(cursor.description): d[col[0]] = row[idx] return d