cryptAES/Backup.py
2021-10-14 14:56:54 +02:00

254 lines
8.3 KiB
Python

import io
import os
from datetime import datetime
import re
import tempfile
import mgzip as gzip
import tarfile
import sqlite3
import pathlib
from crypt import *
class Backup:
key = None
tarball_size = "50M"
save_location = None
def __init__(self, key, bdd):
self.key = key
self.bdd = DataBase(bdd)
def recurse(self, path):
tarball_size = parse_size(self.tarball_size)
files = []
for f in os.listdir(path):
uri = os.path.join(path, f)
if os.path.isfile(uri):
size = os.path.getsize(uri)
m_date = datetime.fromtimestamp(os.path.getmtime(uri)).strftime("%Y-%m-%d %H:%M:%S.%f")
c_date = datetime.fromtimestamp(os.path.getctime(uri)).strftime("%Y-%m-%d %H:%M:%S.%f")
if size > tarball_size:
crypt_name = self.bdd.add([{'name': f,
'path': path,
'size': size,
'm_date': m_date,
'c_date': c_date}],
compress_mode="gz")
if crypt_name is not None:
print("Proceed", uri)
enc = crypt(compress(uri), self.key)
save(enc, os.path.join(self.save_location, crypt_name))
else:
files.append({'name': f,
'path': path,
'size': size,
'm_date': m_date,
'c_date': c_date})
elif os.path.isdir(uri):
self.recurse(uri)
if len(files) > 0:
crypt_name = self.bdd.add(files, compress_mode="tar.gz")
if crypt_name is not None:
print("Proceed", path, ":", [file['name'] for file in files])
tarball = tar(files)
enc = crypt(compress(tarball), self.key)
save(enc, os.path.join(self.save_location, crypt_name))
def tar(files):
tarball = tempfile.SpooledTemporaryFile()
with tarfile.open(fileobj=tarball, mode='w') as zipfile:
for file in files:
zipfile.add(file['path'])
return tarball
def untar(tar_file, files, save_path):
if type(tar_file) is io.BufferedRandom or tempfile.SpooledTemporaryFile:
tar_file.seek(0)
zipfile = tarfile.open(fileobj=tar_file, mode='r')
else:
zipfile = tarfile.open(tar_file, 'r')
for file in files:
zipfile.extract(file['path'], path=save_path)
zipfile.close()
def compress(file):
if type(file) is str:
infile = open(file, 'rb')
elif type(file) is io.BufferedRandom or tempfile.SpooledTemporaryFile:
file.seek(0)
infile = file
compressed_file = tempfile.SpooledTemporaryFile()
with gzip.open(compressed_file, 'wb') as zipfile:
while chunk := infile.read(64 * 1024):
zipfile.write(chunk)
return compressed_file
def uncompress(file):
if type(file) is io.BufferedRandom or tempfile.SpooledTemporaryFile:
file.seek(0)
decompressed_file = tempfile.SpooledTemporaryFile()
with gzip.open(file, 'rb') as zipfile:
while chunk := zipfile.read(64 * 1024):
decompressed_file.write(chunk)
return decompressed_file
def crypt(file, key):
encrypted_file = tempfile.SpooledTemporaryFile()
encrypt_file(key, file, encrypted_file)
return encrypted_file
def uncrypt(file, key):
decrypted_file = tempfile.SpooledTemporaryFile()
decrypt_file(key, file, decrypted_file)
return decrypted_file
def save(file, save_path):
if not os.path.isdir(os.path.dirname(save_path)):
os.mkdir(os.path.dirname(save_path))
if type(file) is io.BufferedRandom or tempfile.SpooledTemporaryFile:
file.seek(0)
with open(save_path, 'wb') as save:
while chunk := file.read(64 * 1024):
save.write(chunk)
else:
print("Unable to save " + str(file) + " of type " + str(type(file)))
return
class DataBase:
def __init__(self, base_file):
self.conn = sqlite3.connect(base_file)
self.__create_table()
def __del__(self):
self.conn.commit()
self.conn.close()
def __create_table(self):
cursor = self.conn.cursor()
cursor.execute("""
CREATE TABLE IF NOT EXISTS files(
id INTEGER PRIMARY KEY UNIQUE NOT NULL,
name TEXT,
path TEXT,
size INTEGER,
m_date DATE,
c_date DATE,
crypt_id INTEGER,
CONSTRAINT files_crypt_FK FOREIGN KEY (crypt_id) REFERENCES crypt(id)
)
""")
cursor.execute("""
CREATE TABLE IF NOT EXISTS crypt(
id INTEGER PRIMARY KEY UNIQUE NOT NULL,
compress_mode TEXT
)
""")
self.conn.row_factory = dict_factory
self.conn.commit()
def get_crypt_id(self, list_file):
cursor = self.conn.cursor()
crypt_id_list = []
for file in list_file:
cursor.execute("""SELECT crypt_id FROM files WHERE name=? AND path=?""",
(file['name'], file['path']))
try:
crypt_id_list.append(cursor.fetchone()['crypt_id'])
except TypeError:
pass
if len(crypt_id_list) > 0:
return most_frequent(crypt_id_list)
else:
return None
def exist(self, file):
cursor = self.conn.cursor()
cursor.execute("""SELECT EXISTS (SELECT 1 FROM files WHERE name=? AND path=?) as exist""",
(file['name'], file['path']))
return cursor.fetchone()['exist']
def modified(self, file):
cursor = self.conn.cursor()
cursor.execute("""SELECT name, path, size, m_date, c_date FROM files WHERE name=? AND path=?""",
(file['name'], file['path']))
bdd_file = cursor.fetchone()
# for key in ['m_date', 'c_date']:
# bdd_file[key] = datetime.strptime(bdd_file[key], "%Y-%m-%d %H:%M:%S.%f")
# bdd_file['size'] = int(bdd_file['size'])
if file == bdd_file:
return False
else:
return True
def add(self, list_file, compress_mode=None):
cursor = self.conn.cursor()
crypt_id = self.get_crypt_id(list_file)
if crypt_id is None:
cursor.execute("""SELECT IFNULL(max(id) + 1, 0) as crypt_id FROM crypt""")
crypt_id = cursor.fetchone()['crypt_id']
cursor.execute("""SELECT IFNULL(max(id) + 1, 0) as files_id FROM files""")
file_id = cursor.fetchone()['files_id']
proceed = False
for file in list_file:
if self.exist(file):
if self.modified(file):
cursor.execute("""UPDATE files SET size=?, m_date=?, c_date=?, crypt_id=? WHERE name=? AND path=?""",
(file['size'], file['m_date'], file['c_date'], crypt_id, file['name'], file['path']))
proceed = True
else:
cursor.execute("""INSERT INTO files VALUES({id}, '{name}', '{path}', '{size}', '{m_date}', '{c_date}', {crypt_id})""".format(
id=file_id, crypt_id=crypt_id, **file))
file_id += 1
proceed = True
if proceed:
cursor.execute("""INSERT OR IGNORE INTO crypt VALUES(?, ?)""", (crypt_id, compress_mode))
self.conn.commit()
return str(crypt_id).zfill(5)
else:
return None
def human_size(size, decimal_places=0):
for unit in ['B', 'K', 'M', 'G', 'T']:
if size < 1024.0:
break
size /= 1024.0
return f"{size:.{decimal_places}f}{unit}"
def parse_size(size):
units = {"B": 1, "K": 2**10, "M": 2**20, "G": 2**30, "T": 2**40}
if size[-1].isdigit():
size = size + 'K'
number, unit = re.match(r"([0-9]+)([BKMGT])", size, re.I).groups()
return int(float(number)*units[unit])
def most_frequent(list):
return max(set(list), key=list.count)
def dict_factory(cursor, row):
d = {}
for idx, col in enumerate(cursor.description):
d[col[0]] = row[idx]
return d