514 lines
18 KiB
Python
514 lines
18 KiB
Python
import io
|
|
import os
|
|
from datetime import datetime
|
|
import re
|
|
import tempfile
|
|
import mgzip as gzip
|
|
import tarfile
|
|
import sqlite3
|
|
import pathlib
|
|
import hashlib
|
|
import collections as coll
|
|
|
|
from b2sdk.v2 import B2Api
|
|
from crypt import encrypt_file, decrypt_file
|
|
|
|
ZFILL = 5
|
|
BACKBLAZE = 100
|
|
LOCAL = 101
|
|
|
|
|
|
class Backup:
|
|
key = None
|
|
buk = None
|
|
tarball_size = "50M"
|
|
save_location = ""
|
|
|
|
def backblaze(self, key, bdd=None, app_key_id=None, app_key=None, bucket_id=None):
|
|
self.key = key
|
|
self.save_mode = BACKBLAZE
|
|
|
|
if None in (app_key_id, app_key, bucket_id):
|
|
raise Exception("Some arguments are not filled")
|
|
self.b2 = B2Api()
|
|
self.b2.authorize_account("production", app_key_id, app_key)
|
|
self.buk = self.b2.get_bucket_by_id(bucket_id)
|
|
self.bdd = DataBase(bdd)
|
|
|
|
return self
|
|
|
|
def local(self, key, bdd=None, save_location=None):
|
|
self.key = key
|
|
self.save_mode = LOCAL
|
|
|
|
if None in (save_location, bdd):
|
|
raise Exception("Some arguments are not filled")
|
|
self.save_location = save_location
|
|
self.bdd = DataBase(bdd)
|
|
|
|
return self
|
|
|
|
def update(self, path, recurse=True):
|
|
self.clear(path, recurse=recurse)
|
|
self.__save(path, recurse=recurse)
|
|
return self
|
|
|
|
def save(self, path, recurse=True):
|
|
self.__save(path, recurse=recurse)
|
|
return self
|
|
|
|
def clear(self, path, recurse=True):
|
|
files = self.bdd.get_files(path)
|
|
if not recurse:
|
|
files = [f for f in files if (pathlib.Path(f['path'])).parent == pathlib.Path(path)]
|
|
for file in files:
|
|
file_path = pathlib.Path(file['path'])
|
|
if not os.path.isfile(file_path):
|
|
# delete it
|
|
print("Clear deleted file :", file['path'])
|
|
self.bdd.delete_file(file)
|
|
orphans = self.bdd.get_orphan_crypt()
|
|
for orphan in orphans:
|
|
# Delete orphan crypt
|
|
self.__delete_file(str(orphan['id']).zfill(ZFILL))
|
|
self.bdd.delete_crypt(orphan['id'])
|
|
nocrypts = self.bdd.get_file_no_crypt()
|
|
for nocrypt in nocrypts:
|
|
self.bdd.delete_file(nocrypt)
|
|
return self
|
|
|
|
def check(self, path, recurse=True):
|
|
#paths = [paths] if not isinstance(paths, list) else paths
|
|
# files = self.bdd.get_files(path)
|
|
crypts = self.bdd.get_crypts(path)
|
|
# if not recurse:
|
|
# crypts = [f for f in crypts if (pathlib.Path(f['path'])).parent == pathlib.Path(paths)]
|
|
for crypt in crypts:
|
|
if crypt['id'] is not None:
|
|
encrypted_file = self.__download_file(str(crypt['id']).zfill(ZFILL))
|
|
file_hash = get_hash(encrypted_file)
|
|
if crypt['sha1sum'] != file_hash:
|
|
print("Hash mismatch", str(crypt['id']).zfill(ZFILL))
|
|
self.__delete_file(str(crypt['id']).zfill(ZFILL))
|
|
self.bdd.delete_crypt(crypt['id'])
|
|
return self
|
|
|
|
def __save(self, path, recurse=True):
|
|
tarball_size = parse_size(self.tarball_size)
|
|
files = []
|
|
for f in os.listdir(path):
|
|
uri = os.path.join(path, f)
|
|
if os.path.isfile(uri):
|
|
size = os.path.getsize(uri)
|
|
m_date = datetime.fromtimestamp(os.path.getmtime(uri)).strftime("%Y-%m-%d %H:%M:%S.%f")
|
|
c_date = datetime.fromtimestamp(os.path.getctime(uri)).strftime("%Y-%m-%d %H:%M:%S.%f")
|
|
if size > tarball_size:
|
|
crypt_id = self.bdd.add([{'name': f,
|
|
'path': pathlib.Path(uri).as_posix(),
|
|
'size': size,
|
|
'm_date': m_date,
|
|
'c_date': c_date}])
|
|
if crypt_id is not None:
|
|
print("Proceed", uri, ' ==> ', crypt_id)
|
|
enc = crypt(compress(uri), self.key)
|
|
self.bdd.set_crypt_attr(crypt_id, compress_mode="gz", sha1sum=get_hash(enc))
|
|
print(" Size :", get_size(enc))
|
|
self.__upload_file(enc, file_name=str(crypt_id).zfill(ZFILL))
|
|
else:
|
|
files.append({'name': f,
|
|
'path': pathlib.Path(uri).as_posix(),
|
|
'size': size,
|
|
'm_date': m_date,
|
|
'c_date': c_date})
|
|
elif os.path.isdir(uri) and recurse:
|
|
self.__save(uri, recurse=recurse)
|
|
if len(files) > 0:
|
|
crypt_id = self.bdd.add(files)
|
|
if crypt_id is not None:
|
|
print("Proceed", path, ":", [file['name'] for file in files], ' ==> ', crypt_id)
|
|
tarball = tar([file['path'] for file in files])
|
|
enc = crypt(compress(tarball), self.key)
|
|
self.bdd.set_crypt_attr(crypt_id, compress_mode="tar.gz", sha1sum=get_hash(enc))
|
|
print(" Size :", get_size(enc))
|
|
self.__upload_file(enc, file_name=str(crypt_id).zfill(ZFILL))
|
|
|
|
def recover_file(self, paths, parents=False, save_path=os.getcwd()):
|
|
files = self.bdd.get_crypt_name(paths)
|
|
for file in files:
|
|
if file['crypt_id'] is not None:
|
|
encrypted_file = self.__download_file(str(file['crypt_id']).zfill(ZFILL))
|
|
file_hash = get_hash(encrypted_file)
|
|
if file['sha1sum'] == file_hash:
|
|
if parents:
|
|
save_path = os.path.join(save_path, file['path'])
|
|
uncompress(uncrypt(encrypted_file, self.key), file['name'], save_path, file['compress_mode'])
|
|
print("Recovered :", file['name'])
|
|
else:
|
|
print("Checksum don't match:")
|
|
print("{} {}".format(file['sha1sum'], "BDD"))
|
|
print("{} {}".format(file_hash, "File"))
|
|
print()
|
|
|
|
def __upload_file(self, file, file_name):
|
|
if self.save_mode == BACKBLAZE:
|
|
self.buk.upload_bytes(file.read(), file_name)
|
|
elif self.save_mode == LOCAL:
|
|
save(file, os.path.join(self.save_location, file_name))
|
|
|
|
def __download_file(self, file):
|
|
dl = tempfile.SpooledTemporaryFile()
|
|
if self.save_mode == BACKBLAZE:
|
|
self.buk.download_file_by_name(file).save(dl)
|
|
elif self.save_mode == LOCAL:
|
|
try:
|
|
with open(os.path.join(self.save_location, file), 'rb') as infile:
|
|
dl.write(infile.read())
|
|
except FileNotFoundError:
|
|
print("Fichier", file, "introuvable")
|
|
return dl
|
|
|
|
def __delete_file(self, file):
|
|
if self.save_mode == BACKBLAZE:
|
|
truc = self.buk.get_file_info_by_name(file)
|
|
#self.buk.delete_file_version(None, file)
|
|
elif self.save_mode == LOCAL:
|
|
try:
|
|
os.remove(os.path.join(self.save_location, file))
|
|
except FileNotFoundError:
|
|
print("Fichier", file, "introuvable")
|
|
|
|
|
|
def get_size(in_file):
|
|
if type(in_file) is str:
|
|
filesize = os.path.getsize(in_file)
|
|
|
|
elif type(in_file) is tempfile.SpooledTemporaryFile:
|
|
in_file.seek(0, 2)
|
|
filesize = in_file.tell()
|
|
in_file.seek(0)
|
|
return human_size(filesize, decimal_places=1, unit='si')
|
|
|
|
|
|
def get_hash(infile):
|
|
if type(infile) is str:
|
|
file = open(infile, 'rb')
|
|
elif type(infile) is tempfile.SpooledTemporaryFile:
|
|
infile.seek(0)
|
|
file = infile
|
|
|
|
return hashlib.sha1(file.read()).hexdigest()
|
|
|
|
|
|
def tar(files):
|
|
tarball = tempfile.SpooledTemporaryFile()
|
|
with tarfile.open(fileobj=tarball, mode='w') as zipfile:
|
|
for file in files:
|
|
zipfile.add(file, arcname=os.path.basename(file))
|
|
return tarball
|
|
|
|
|
|
def untar(tar_file, file, save_path):
|
|
if type(tar_file) is tempfile.SpooledTemporaryFile:
|
|
tar_file.seek(0)
|
|
zipfile = tarfile.open(fileobj=tar_file, mode='r')
|
|
else:
|
|
zipfile = tarfile.open(tar_file, 'r')
|
|
if not os.path.isdir(save_path):
|
|
os.mkdir(save_path)
|
|
zipfile.extract(file, path=save_path)
|
|
zipfile.close()
|
|
|
|
|
|
def compress(file):
|
|
if type(file) is str:
|
|
infile = open(file, 'rb')
|
|
elif type(file) is tempfile.SpooledTemporaryFile:
|
|
file.seek(0)
|
|
infile = file
|
|
|
|
compressed_file = tempfile.SpooledTemporaryFile()
|
|
with gzip.open(compressed_file, 'wb') as zipfile:
|
|
zipfile.write(infile.read())
|
|
return compressed_file
|
|
|
|
|
|
def uncompress(data, file_name, save_path, compress_mode):
|
|
modes = compress_mode.split('.')
|
|
if 'gz' in modes:
|
|
data = ungz(data)
|
|
if 'tar' in modes:
|
|
untar(data, file_name, save_path)
|
|
else:
|
|
save(data, os.path.join(save_path, file_name))
|
|
|
|
|
|
def ungz(data):
|
|
if type(data) is tempfile.SpooledTemporaryFile:
|
|
data.seek(0)
|
|
decompressed_file = tempfile.SpooledTemporaryFile()
|
|
with gzip.open(data, 'rb') as zipfile:
|
|
decompressed_file.write(zipfile.read())
|
|
return decompressed_file
|
|
|
|
|
|
def crypt(file, key):
|
|
encrypted_file = tempfile.SpooledTemporaryFile()
|
|
encrypt_file(key, file, encrypted_file)
|
|
return encrypted_file
|
|
|
|
|
|
def uncrypt(file, key):
|
|
decrypted_file = tempfile.SpooledTemporaryFile()
|
|
decrypt_file(key, file, decrypted_file)
|
|
return decrypted_file
|
|
|
|
|
|
def save(file, save_path):
|
|
if not os.path.isdir(os.path.dirname(save_path)):
|
|
os.mkdir(os.path.dirname(save_path))
|
|
|
|
if type(file) is io.BufferedRandom or tempfile.SpooledTemporaryFile:
|
|
file.seek(0)
|
|
with open(save_path, 'wb') as save:
|
|
while chunk := file.read(64 * 1024):
|
|
save.write(chunk)
|
|
else:
|
|
print("Unable to save " + str(file) + " of type " + str(type(file)))
|
|
return
|
|
|
|
|
|
class DataBase:
|
|
|
|
def __init__(self, base_file):
|
|
self.conn = sqlite3.connect(base_file)
|
|
self.__create_table()
|
|
|
|
def __del__(self):
|
|
self.conn.commit()
|
|
self.conn.close()
|
|
|
|
def __create_table(self):
|
|
cursor = self.conn.cursor()
|
|
|
|
# cursor.execute("""DROP TABLE IF EXISTS files""")
|
|
#
|
|
# cursor.execute("""DROP TABLE IF EXISTS crypt""")
|
|
#
|
|
# self.conn.commit()
|
|
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS files(
|
|
id INTEGER PRIMARY KEY UNIQUE NOT NULL,
|
|
name TEXT,
|
|
path TEXT,
|
|
size INTEGER,
|
|
m_date DATE,
|
|
c_date DATE,
|
|
crypt_id INTEGER,
|
|
CONSTRAINT files_crypt_FK FOREIGN KEY (crypt_id) REFERENCES crypt(id)
|
|
)
|
|
""")
|
|
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS crypt(
|
|
id INTEGER PRIMARY KEY UNIQUE NOT NULL,
|
|
compress_mode TEXT,
|
|
sha1sum TEXT
|
|
)
|
|
""")
|
|
|
|
self.conn.row_factory = dict_factory
|
|
# self.conn.set_trace_callback(print)
|
|
|
|
self.conn.commit()
|
|
|
|
def get_crypt_name(self, list_file):
|
|
cursor = self.conn.cursor()
|
|
crypt_list = []
|
|
# for path in [file['path'] for file in list_file]:
|
|
for path in list_file:
|
|
path = pathlib.PureWindowsPath(path).as_posix()
|
|
cursor.execute("""SELECT crypt_id, compress_mode, sha1sum FROM files
|
|
INNER JOIN crypt ON files.crypt_id = crypt.id
|
|
WHERE path=?""", (path,))
|
|
retval = cursor.fetchone()
|
|
try:
|
|
crypt_list.append({'name': os.path.basename(path),
|
|
'path': path,
|
|
'crypt_id': retval['crypt_id'],
|
|
'compress_mode': retval['compress_mode'],
|
|
'sha1sum': retval['sha1sum']})
|
|
except TypeError:
|
|
crypt_list.append({'path': path, 'crypt': None})
|
|
return crypt_list
|
|
|
|
def get_crypts(self, path):
|
|
cursor = self.conn.cursor()
|
|
cursor.execute("""SELECT DISTINCT crypt.id, crypt.sha1sum FROM crypt
|
|
LEFT JOIN files ON files.crypt_id = crypt.id
|
|
WHERE path LIKE ?""",
|
|
(path + "%", ))
|
|
return cursor.fetchall()
|
|
|
|
def __get_crypt_id(self, list_file):
|
|
cursor = self.conn.cursor()
|
|
crypt_id_list = []
|
|
for file in list_file:
|
|
cursor.execute("""SELECT crypt_id FROM files WHERE name=? AND path=?""",
|
|
(file['name'], file['path']))
|
|
try:
|
|
crypt_id_list.append(cursor.fetchone()['crypt_id'])
|
|
except TypeError:
|
|
pass
|
|
if len(crypt_id_list) != 0:
|
|
id = most_common(crypt_id_list)
|
|
else:
|
|
# if not already/find in bdd
|
|
cursor.execute("""SELECT IFNULL(max(id) + 1, 0) as crypt_id FROM crypt""")
|
|
return cursor.fetchone()['crypt_id']
|
|
params = {'id': id,
|
|
'name': ', '.join([f"'{file['name']}'" for file in list_file]),
|
|
'path': ', '.join([f"'{file['path']}'" for file in list_file])}
|
|
cursor.execute("""SELECT 1 FROM files WHERE crypt_id=? AND name NOT IN (?) AND path NOT IN (?)""",
|
|
(params['id'], params['name'], params['path']))
|
|
neighbour = cursor.fetchall()
|
|
# if they have a neighbour don't overwrite it
|
|
if len(neighbour) > 0:
|
|
cursor.execute("""SELECT IFNULL(max(id) + 1, 0) as crypt_id FROM crypt""")
|
|
return cursor.fetchone()['crypt_id']
|
|
else:
|
|
# if they are different, define the same id for the files of this archive
|
|
if len(set(crypt_id_list)) > 1:
|
|
cursor.execute("""UPDATE files SET crypt_id=? WHERE name IN (?) AND path IN (?)""",
|
|
(params['id'], params['name'], params['path']))
|
|
return id
|
|
|
|
def get_files(self, path):
|
|
cursor = self.conn.cursor()
|
|
cursor.execute("""SELECT id, name, path FROM files WHERE path LIKE ?""",
|
|
(path + "%", ))
|
|
list_file = cursor.fetchall()
|
|
return list_file
|
|
|
|
def delete_file(self, file):
|
|
cursor = self.conn.cursor()
|
|
cursor.execute("""DELETE FROM files WHERE id=? AND name=? AND path=?""",
|
|
(file['id'], file['name'], file['path']))
|
|
self.conn.commit()
|
|
|
|
def get_orphan_crypt(self):
|
|
cursor = self.conn.cursor()
|
|
cursor.execute("""SELECT crypt.id FROM crypt
|
|
LEFT JOIN files ON files.crypt_id = crypt.id
|
|
WHERE files.id IS NULL""")
|
|
return cursor.fetchall()
|
|
|
|
def get_file_no_crypt(self):
|
|
cursor = self.conn.cursor()
|
|
cursor.execute("""SELECT files.id, files.name, files.path FROM files
|
|
LEFT JOIN crypt ON files.crypt_id = crypt.id
|
|
WHERE crypt.id IS NULL""")
|
|
return cursor.fetchall()
|
|
|
|
def delete_crypt(self, crypt_id):
|
|
cursor = self.conn.cursor()
|
|
cursor.execute("""DELETE FROM crypt WHERE id=?""", (crypt_id,))
|
|
self.conn.commit()
|
|
|
|
def exist(self, file):
|
|
cursor = self.conn.cursor()
|
|
cursor.execute("""SELECT EXISTS (SELECT 1 FROM files WHERE name=? AND path=?) as exist""",
|
|
(file['name'], file['path']))
|
|
return cursor.fetchone()['exist']
|
|
|
|
def modified(self, file):
|
|
cursor = self.conn.cursor()
|
|
cursor.execute("""SELECT name, path, size, m_date, c_date FROM files WHERE name=? AND path=?""",
|
|
(file['name'], file['path']))
|
|
bdd_file = cursor.fetchone()
|
|
# for key in ['m_date', 'c_date']:
|
|
# bdd_file[key] = datetime.strptime(bdd_file[key], "%Y-%m-%d %H:%M:%S.%f")
|
|
# bdd_file['size'] = int(bdd_file['size'])
|
|
if file == bdd_file:
|
|
return False
|
|
else:
|
|
return True
|
|
|
|
def set_crypt_attr(self, crypt_id, compress_mode=None, sha1sum=None):
|
|
cursor = self.conn.cursor()
|
|
cursor.execute("""UPDATE crypt SET compress_mode=?, sha1sum=? WHERE id=?""",
|
|
(compress_mode, sha1sum, crypt_id))
|
|
self.conn.commit()
|
|
|
|
def add(self, list_file):
|
|
cursor = self.conn.cursor()
|
|
crypt_id = self.__get_crypt_id(list_file)
|
|
cursor.execute("""SELECT IFNULL(max(id) + 1, 0) as files_id FROM files""")
|
|
file_id = cursor.fetchone()['files_id']
|
|
proceed = False
|
|
for file in list_file:
|
|
if self.exist(file):
|
|
if self.modified(file):
|
|
cursor.execute("""UPDATE files SET size=?, m_date=?, c_date=?, crypt_id=? WHERE name=? AND path=?""",
|
|
(file['size'], file['m_date'], file['c_date'], crypt_id, file['name'], file['path']))
|
|
proceed = True
|
|
else:
|
|
cursor.execute("""INSERT INTO files VALUES(?, ?, ?, ?, ?, ?, ?)""",
|
|
(file_id, file['name'], file['path'], file['size'], file['m_date'], file['c_date'], crypt_id))
|
|
file_id += 1
|
|
proceed = True
|
|
if proceed:
|
|
cursor.execute("""INSERT OR IGNORE INTO crypt (id) VALUES(?)""", (crypt_id,))
|
|
self.conn.commit()
|
|
return crypt_id
|
|
else:
|
|
return None
|
|
|
|
|
|
def human_size(size, decimal_places=0, unit=None):
|
|
unit_tab = ['B', 'K', 'M', 'G', 'T']
|
|
format = 1024.0
|
|
if unit == 'iec':
|
|
format = 1024.0
|
|
unit_tab = ['B', 'KiB', 'MiB', 'GiB', 'TiB']
|
|
elif unit == 'si':
|
|
format = 1000.0
|
|
unit_tab = ['B', 'KB', 'MB', 'GB', 'TB']
|
|
for unit in unit_tab:
|
|
if size < format:
|
|
break
|
|
size /= format
|
|
|
|
return f"{size:.{decimal_places}f}{unit}"
|
|
|
|
|
|
def parse_size(size):
|
|
units = {"B": 1, "K": 2**10, "M": 2**20, "G": 2**30, "T": 2**40}
|
|
if size[-1].isdigit():
|
|
size = size + 'K'
|
|
number, unit = re.match(r"([0-9]+)([BKMGT])", size, re.I).groups()
|
|
return int(float(number)*units[unit])
|
|
|
|
|
|
def most_frequent(list):
|
|
return max(set(list), key=list.count)
|
|
|
|
|
|
def most_common(lst):
|
|
if len(set(lst)) == 1:
|
|
return lst[0]
|
|
data = coll.Counter(lst)
|
|
most = {'count': 0, 'item': 99999}
|
|
for item, count in data.items():
|
|
if count > most['count'] and item < most['item']:
|
|
most['count'] = count
|
|
most['item'] = item
|
|
return most['item']
|
|
|
|
|
|
def dict_factory(cursor, row):
|
|
d = {}
|
|
for idx, col in enumerate(cursor.description):
|
|
d[col[0]] = row[idx]
|
|
return d
|