cryptAES/backup.py
2021-11-02 12:32:57 +01:00

419 lines
15 KiB
Python

import io
import os
from datetime import datetime
import re
import tempfile
import mgzip as gzip
import tarfile
import sqlite3
import pathlib
import hashlib
import collections as coll
from b2sdk.v2 import B2Api
from crypt import encrypt_file, decrypt_file
ZFILL = 5
BACKBLAZE = 100
LOCAL = 101
class Backup:
key = None
buk = None
tarball_size = "50M"
save_location = ""
def __init__(self, key, bdd=None, save_mode=None, save_location=None, app_key_id=None, app_key=None, bucket_id=None):
self.key = key
if save_mode is None:
raise Exception("No save_mode")
elif save_mode == BACKBLAZE:
if None in (app_key_id, app_key, bucket_id):
raise Exception("Some arguments are not filled")
self.b2 = B2Api()
self.b2.authorize_account("production", app_key_id, app_key)
self.buk = self.b2.get_bucket_by_id(bucket_id)
self.bdd = DataBase(bdd)
elif save_mode == LOCAL:
if None in (save_location, bdd):
raise Exception("Some arguments are not filled")
self.save_location = save_location
self.bdd = DataBase(bdd)
self.save_mode = save_mode
def save(self, path, recurse=True):
self.__save(path, recurse=recurse)
return self
def __save(self, path, recurse=True):
tarball_size = parse_size(self.tarball_size)
files = []
for f in os.listdir(path):
uri = os.path.join(path, f)
if os.path.isfile(uri):
size = os.path.getsize(uri)
m_date = datetime.fromtimestamp(os.path.getmtime(uri)).strftime("%Y-%m-%d %H:%M:%S.%f")
c_date = datetime.fromtimestamp(os.path.getctime(uri)).strftime("%Y-%m-%d %H:%M:%S.%f")
if size > tarball_size:
crypt_id = self.bdd.add([{'name': f,
'path': pathlib.Path(uri).as_posix(),
'size': size,
'm_date': m_date,
'c_date': c_date}])
if crypt_id is not None:
print("Proceed", uri, ' ==> ', crypt_id)
enc = crypt(compress(uri), self.key)
self.bdd.set_crypt_attr(crypt_id, compress_mode="gz", sha1sum=get_hash(enc))
print(" Size : ", get_size(enc))
upload_file(enc, self.save_mode, file_name=os.path.join(self.save_location, str(crypt_id).zfill(ZFILL)), bucket=self.buk)
else:
files.append({'name': f,
'path': pathlib.Path(uri).as_posix(),
'size': size,
'm_date': m_date,
'c_date': c_date})
elif os.path.isdir(uri) and recurse:
self.__save(uri, recurse=recurse)
if len(files) > 0:
crypt_id = self.bdd.add(files)
if crypt_id is not None:
print("Proceed", path, ":", [file['name'] for file in files], ' ==> ', crypt_id)
tarball = tar([file['path'] for file in files])
enc = crypt(compress(tarball), self.key)
self.bdd.set_crypt_attr(crypt_id, compress_mode="tar.gz", sha1sum=get_hash(enc))
print(" Size : ", get_size(enc))
upload_file(enc, self.save_mode, file_name=os.path.join(self.save_location, str(crypt_id).zfill(ZFILL)), bucket=self.buk)
def recover_file(self, paths, parents=False, save_path=os.getcwd()):
files = self.bdd.get_crypt_name(paths)
for file in files:
if file['crypt_id'] is not None:
encrypted_file = download_file(self.save_mode, os.path.join(self.save_location, str(file['crypt_id']).zfill(ZFILL)), bucket=self.buk)
file_hash = get_hash(encrypted_file)
if file['sha1sum'] == file_hash:
if parents:
save_path = os.path.join(save_path, file['path'])
uncompress(uncrypt(encrypted_file, self.key), file['name'], save_path, file['compress_mode'])
else:
print("Checksum don't match:")
print("{} {}".format(file['sha1sum'], "BDD"))
print("{} {}".format(file_hash, "File"))
print()
def upload_file(file, mode, file_name, bucket=None):
if mode == BACKBLAZE:
bucket.upload_bytes(file.read(), file_name)
elif mode == LOCAL:
save(file, file_name)
def download_file(mode, file, bucket=None):
dl = tempfile.SpooledTemporaryFile()
if mode == BACKBLAZE:
bucket.download_file_by_name(file).save(dl)
elif mode == LOCAL:
with open(file, 'rb') as infile:
dl.write(infile.read())
return dl
def get_size(in_file):
if type(in_file) is str:
filesize = os.path.getsize(in_file)
elif type(in_file) is tempfile.SpooledTemporaryFile:
in_file.seek(0, 2)
filesize = in_file.tell()
in_file.seek(0)
return human_size(filesize, decimal_places=1, unit='si')
def get_hash(infile):
if type(infile) is str:
file = open(infile, 'rb')
elif type(infile) is tempfile.SpooledTemporaryFile:
infile.seek(0)
file = infile
return hashlib.sha1(file.read()).hexdigest()
def tar(files):
tarball = tempfile.SpooledTemporaryFile()
with tarfile.open(fileobj=tarball, mode='w') as zipfile:
for file in files:
zipfile.add(file, arcname=os.path.basename(file))
return tarball
def untar(tar_file, file, save_path):
if type(tar_file) is tempfile.SpooledTemporaryFile:
tar_file.seek(0)
zipfile = tarfile.open(fileobj=tar_file, mode='r')
else:
zipfile = tarfile.open(tar_file, 'r')
if not os.path.isdir(save_path):
os.mkdir(save_path)
zipfile.extract(file, path=save_path)
zipfile.close()
def compress(file):
if type(file) is str:
infile = open(file, 'rb')
elif type(file) is tempfile.SpooledTemporaryFile:
file.seek(0)
infile = file
compressed_file = tempfile.SpooledTemporaryFile()
with gzip.open(compressed_file, 'wb') as zipfile:
zipfile.write(infile.read())
return compressed_file
def uncompress(data, file_name, save_path, compress_mode):
modes = compress_mode.split('.')
if 'gz' in modes:
data = ungz(data)
if 'tar' in modes:
untar(data, file_name, save_path)
else:
save(data, os.path.join(save_path, file_name))
def ungz(data):
if type(data) is tempfile.SpooledTemporaryFile:
data.seek(0)
decompressed_file = tempfile.SpooledTemporaryFile()
with gzip.open(data, 'rb') as zipfile:
decompressed_file.write(zipfile.read())
return decompressed_file
def crypt(file, key):
encrypted_file = tempfile.SpooledTemporaryFile()
encrypt_file(key, file, encrypted_file)
return encrypted_file
def uncrypt(file, key):
decrypted_file = tempfile.SpooledTemporaryFile()
decrypt_file(key, file, decrypted_file)
return decrypted_file
def save(file, save_path):
if not os.path.isdir(os.path.dirname(save_path)):
os.mkdir(os.path.dirname(save_path))
if type(file) is io.BufferedRandom or tempfile.SpooledTemporaryFile:
file.seek(0)
with open(save_path, 'wb') as save:
while chunk := file.read(64 * 1024):
save.write(chunk)
else:
print("Unable to save " + str(file) + " of type " + str(type(file)))
return
class DataBase:
def __init__(self, base_file):
self.conn = sqlite3.connect(base_file)
self.__create_table()
def __del__(self):
self.conn.commit()
self.conn.close()
def __create_table(self):
cursor = self.conn.cursor()
# cursor.execute("""DROP TABLE IF EXISTS files""")
#
# cursor.execute("""DROP TABLE IF EXISTS crypt""")
#
# self.conn.commit()
cursor.execute("""
CREATE TABLE IF NOT EXISTS files(
id INTEGER PRIMARY KEY UNIQUE NOT NULL,
name TEXT,
path TEXT,
size INTEGER,
m_date DATE,
c_date DATE,
crypt_id INTEGER,
CONSTRAINT files_crypt_FK FOREIGN KEY (crypt_id) REFERENCES crypt(id)
)
""")
cursor.execute("""
CREATE TABLE IF NOT EXISTS crypt(
id INTEGER PRIMARY KEY UNIQUE NOT NULL,
compress_mode TEXT,
sha1sum TEXT
)
""")
self.conn.row_factory = dict_factory
# self.conn.set_trace_callback(print)
self.conn.commit()
def get_crypt_name(self, list_file):
cursor = self.conn.cursor()
crypt_list = []
# for path in [file['path'] for file in list_file]:
for path in list_file:
path = pathlib.PureWindowsPath(path).as_posix()
cursor.execute("""SELECT crypt_id, compress_mode, sha1sum FROM files
INNER JOIN crypt ON files.crypt_id = crypt.id
WHERE path=?""", (path,))
retval = cursor.fetchone()
try:
crypt_list.append({'name': os.path.basename(path),
'path': path,
'crypt_id': retval['crypt_id'],
'compress_mode': retval['compress_mode'],
'sha1sum': retval['sha1sum']})
except TypeError:
crypt_list.append({'path': path, 'crypt': None})
return crypt_list
def __get_crypt_id(self, list_file):
cursor = self.conn.cursor()
crypt_id_list = []
for file in list_file:
cursor.execute("""SELECT crypt_id FROM files WHERE name=? AND path=?""",
(file['name'], file['path']))
try:
crypt_id_list.append(cursor.fetchone()['crypt_id'])
except TypeError:
pass
if len(crypt_id_list) != 0:
id = most_common(crypt_id_list)
else:
# if not already/find in bdd
cursor.execute("""SELECT IFNULL(max(id) + 1, 0) as crypt_id FROM crypt""")
return cursor.fetchone()['crypt_id']
params = {'id': id,
'name': ', '.join([f"'{file['name']}'" for file in list_file]),
'path': ', '.join([f"'{file['path']}'" for file in list_file])}
cursor.execute("""SELECT 1 FROM files
WHERE crypt_id={id}
AND name NOT IN ({name})
AND path NOT IN ({path})""".format(**params))
neighbour = cursor.fetchall()
# if they have a neighbour don't overwrite it
if len(neighbour) > 0:
cursor.execute("""SELECT IFNULL(max(id) + 1, 0) as crypt_id FROM crypt""")
return cursor.fetchone()['crypt_id']
else:
# if they are different, define the same id for the files of this archive
if len(set(crypt_id_list)) > 1:
cursor.execute("""UPDATE files SET crypt_id={id}
WHERE name IN ({name})
AND path IN ({path})""".format(**params))
return id
def exist(self, file):
cursor = self.conn.cursor()
cursor.execute("""SELECT EXISTS (SELECT 1 FROM files WHERE name=? AND path=?) as exist""",
(file['name'], file['path']))
return cursor.fetchone()['exist']
def modified(self, file):
cursor = self.conn.cursor()
cursor.execute("""SELECT name, path, size, m_date, c_date FROM files WHERE name=? AND path=?""",
(file['name'], file['path']))
bdd_file = cursor.fetchone()
# for key in ['m_date', 'c_date']:
# bdd_file[key] = datetime.strptime(bdd_file[key], "%Y-%m-%d %H:%M:%S.%f")
# bdd_file['size'] = int(bdd_file['size'])
if file == bdd_file:
return False
else:
return True
def set_crypt_attr(self, crypt_id, compress_mode=None, sha1sum=None):
cursor = self.conn.cursor()
cursor.execute("""UPDATE crypt SET compress_mode='{mode}', sha1sum='{sum}' WHERE id='{id}'""".format(id=crypt_id, mode=compress_mode, sum=sha1sum))
self.conn.commit()
def add(self, list_file):
cursor = self.conn.cursor()
crypt_id = self.__get_crypt_id(list_file)
cursor.execute("""SELECT IFNULL(max(id) + 1, 0) as files_id FROM files""")
file_id = cursor.fetchone()['files_id']
proceed = False
for file in list_file:
if self.exist(file):
if self.modified(file):
cursor.execute("""UPDATE files SET size=?, m_date=?, c_date=?, crypt_id=? WHERE name=? AND path=?""",
(file['size'], file['m_date'], file['c_date'], crypt_id, file['name'], file['path']))
proceed = True
else:
cursor.execute("""INSERT INTO files VALUES({id}, '{name}', '{path}', '{size}', '{m_date}', '{c_date}', {crypt_id})""".format(
id=file_id, crypt_id=crypt_id, **file))
file_id += 1
proceed = True
if proceed:
cursor.execute("""INSERT OR IGNORE INTO crypt (id) VALUES(?)""", (crypt_id,))
self.conn.commit()
return crypt_id
else:
return None
def human_size(size, decimal_places=0, unit=None):
unit_tab = ['B', 'K', 'M', 'G', 'T']
format = 1024.0
if unit == 'iec':
format = 1024.0
unit_tab = ['B', 'KiB', 'MiB', 'GiB', 'TiB']
elif unit == 'si':
format = 1000.0
unit_tab = ['B', 'KB', 'MB', 'GB', 'TB']
for unit in unit_tab:
if size < format:
break
size /= format
return f"{size:.{decimal_places}f}{unit}"
def parse_size(size):
units = {"B": 1, "K": 2**10, "M": 2**20, "G": 2**30, "T": 2**40}
if size[-1].isdigit():
size = size + 'K'
number, unit = re.match(r"([0-9]+)([BKMGT])", size, re.I).groups()
return int(float(number)*units[unit])
def most_frequent(list):
return max(set(list), key=list.count)
def most_common(lst):
if len(set(lst)) == 1:
return lst[0]
data = coll.Counter(lst)
most = {'count': 0, 'item': 99999}
for item, count in data.items():
if count > most['count'] and item < most['item']:
most['count'] = count
most['item'] = item
return most['item']
def dict_factory(cursor, row):
d = {}
for idx, col in enumerate(cursor.description):
d[col[0]] = row[idx]
return d