252 lines
8.1 KiB
Python
252 lines
8.1 KiB
Python
import io
|
|
import os
|
|
from datetime import datetime
|
|
import re
|
|
import tempfile
|
|
import mgzip as gzip
|
|
import tarfile
|
|
import sqlite3
|
|
import pathlib
|
|
|
|
from crypt import *
|
|
|
|
|
|
class Backup:
|
|
key = None
|
|
tarball_size = "50M"
|
|
save_location = None
|
|
|
|
def __init__(self, key, bdd):
|
|
self.key = key
|
|
self.bdd = DataBase(bdd)
|
|
|
|
def recurse(self, path):
|
|
tarball_size = parse_size(self.tarball_size)
|
|
files = []
|
|
print("Start", path, ":", files)
|
|
for f in os.listdir(path):
|
|
uri = os.path.join(path, f)
|
|
if os.path.isfile(uri):
|
|
size = os.path.getsize(uri)
|
|
m_date = datetime.fromtimestamp(os.path.getmtime(uri))
|
|
c_date = datetime.fromtimestamp(os.path.getctime(uri))
|
|
print(f + " : ", human_size(size))
|
|
if size > tarball_size:
|
|
crypt_name = self.bdd.add([{'name': f,
|
|
'path': path,
|
|
'size': size,
|
|
'm_date': m_date,
|
|
'c_date': c_date}],
|
|
compress_mode="gz")
|
|
if crypt_name is not None:
|
|
print("proceed")
|
|
enc = crypt(compress(uri), self.key)
|
|
save(enc, os.path.join(self.save_location, crypt_name))
|
|
else:
|
|
files.append({'name': f,
|
|
'path': path,
|
|
'size': size,
|
|
'm_date': m_date,
|
|
'c_date': c_date})
|
|
elif os.path.isdir(uri):
|
|
self.recurse(uri)
|
|
if len(files) > 0:
|
|
print("End", path, ":", [file['name'] for file in files])
|
|
crypt_name = self.bdd.add(files, compress_mode="tar.gz")
|
|
if crypt_name is not None:
|
|
print("proceed")
|
|
tarball = tar(files)
|
|
enc = crypt(compress(tarball), self.key)
|
|
save(enc, os.path.join(self.save_location, crypt_name))
|
|
|
|
|
|
def tar(files):
|
|
tarball = tempfile.SpooledTemporaryFile()
|
|
with tarfile.open(fileobj=tarball, mode='w') as zipfile:
|
|
for file in files:
|
|
zipfile.add(file['path'])
|
|
return tarball
|
|
|
|
|
|
def untar(tar_file, files, save_path):
|
|
if type(tar_file) is io.BufferedRandom or tempfile.SpooledTemporaryFile:
|
|
tar_file.seek(0)
|
|
zipfile = tarfile.open(fileobj=tar_file, mode='r')
|
|
else:
|
|
zipfile = tarfile.open(tar_file, 'r')
|
|
for file in files:
|
|
zipfile.extract(file['path'], path=save_path)
|
|
zipfile.close()
|
|
|
|
|
|
def compress(file):
|
|
if type(file) is str:
|
|
infile = open(file, 'rb')
|
|
elif type(file) is io.BufferedRandom or tempfile.SpooledTemporaryFile:
|
|
file.seek(0)
|
|
infile = file
|
|
|
|
compressed_file = tempfile.SpooledTemporaryFile()
|
|
with gzip.open(compressed_file, 'wb') as zipfile:
|
|
while chunk := infile.read(64 * 1024):
|
|
zipfile.write(chunk)
|
|
return compressed_file
|
|
|
|
|
|
def uncompress(file):
|
|
if type(file) is io.BufferedRandom or tempfile.SpooledTemporaryFile:
|
|
file.seek(0)
|
|
|
|
decompressed_file = tempfile.SpooledTemporaryFile()
|
|
with gzip.open(file, 'rb') as zipfile:
|
|
while chunk := zipfile.read(64 * 1024):
|
|
decompressed_file.write(chunk)
|
|
return decompressed_file
|
|
|
|
|
|
def crypt(file, key):
|
|
encrypted_file = tempfile.SpooledTemporaryFile()
|
|
encrypt_file(key, file, encrypted_file)
|
|
return encrypted_file
|
|
|
|
|
|
def uncrypt(file, key):
|
|
decrypted_file = tempfile.SpooledTemporaryFile()
|
|
decrypt_file(key, file, decrypted_file)
|
|
return decrypted_file
|
|
|
|
|
|
def save(file, save_path):
|
|
if not os.path.isdir(os.path.dirname(save_path)):
|
|
os.mkdir(os.path.dirname(save_path))
|
|
|
|
if type(file) is io.BufferedRandom or tempfile.SpooledTemporaryFile:
|
|
file.seek(0)
|
|
with open(save_path, 'wb') as save:
|
|
while chunk := file.read(64 * 1024):
|
|
save.write(chunk)
|
|
else:
|
|
print("Unable to save " + str(file) + " of type " + str(type(file)))
|
|
return
|
|
|
|
|
|
class DataBase:
|
|
|
|
def __init__(self, base_file):
|
|
self.conn = sqlite3.connect(base_file)
|
|
self.__create_table()
|
|
|
|
def __del__(self):
|
|
self.conn.commit()
|
|
self.conn.close()
|
|
|
|
def __create_table(self):
|
|
cursor = self.conn.cursor()
|
|
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS files(
|
|
id INTEGER PRIMARY KEY UNIQUE NOT NULL,
|
|
name TEXT,
|
|
path TEXT,
|
|
size TEXT,
|
|
m_date DATE,
|
|
c_date DATE,
|
|
crypt_id INTEGER,
|
|
CONSTRAINT files_crypt_FK FOREIGN KEY (crypt_id) REFERENCES crypt(id)
|
|
)
|
|
""")
|
|
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS crypt(
|
|
id INTEGER PRIMARY KEY UNIQUE NOT NULL,
|
|
compress_mode TEXT
|
|
)
|
|
""")
|
|
|
|
self.conn.row_factory = dict_factory
|
|
|
|
self.conn.commit()
|
|
|
|
def get_crypt_id(self, list_file):
|
|
cursor = self.conn.cursor()
|
|
crypt_id_list = []
|
|
for file in list_file:
|
|
cursor.execute("""SELECT crypt_id FROM files WHERE name=? AND path=?""",
|
|
(file['name'], file['path']))
|
|
try:
|
|
crypt_id_list.append(cursor.fetchone()['crypt_id'])
|
|
except TypeError:
|
|
pass
|
|
if len(crypt_id_list) > 0:
|
|
return most_frequent(crypt_id_list)
|
|
else:
|
|
return None
|
|
|
|
def exist(self, file):
|
|
cursor = self.conn.cursor()
|
|
cursor.execute("""SELECT EXISTS (SELECT 1 FROM files WHERE name=? AND path=?) as exist""",
|
|
(file['name'], file['path']))
|
|
return cursor.fetchone()['exist']
|
|
|
|
def modified(self, file):
|
|
cursor = self.conn.cursor()
|
|
cursor.execute("""SELECT name, path, size, m_date, c_date FROM files WHERE name=? AND path=?""",
|
|
(file['name'], file['path']))
|
|
bdd_file = cursor.fetchone()
|
|
for key in ['m_date', 'c_date']:
|
|
bdd_file[key] = datetime.strptime(bdd_file[key], "%Y-%m-%d %H:%M:%S.%f")
|
|
bdd_file['size'] = int(bdd_file['size'])
|
|
if file != bdd_file:
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
def add(self, list_file, compress_mode=None):
|
|
cursor = self.conn.cursor()
|
|
crypt_id = self.get_crypt_id(list_file)
|
|
if crypt_id is None:
|
|
cursor.execute("""SELECT IFNULL(max(id) + 1, 0) as crypt_id FROM crypt""")
|
|
crypt_id = cursor.fetchone()['crypt_id']
|
|
for file in list_file:
|
|
if self.exist(file):
|
|
if self.modified(file):
|
|
cursor.execute("""UPDATE files SET size=?, m_date=?, c_date=?, crypt_id=?
|
|
WHERE name=? AND path=?""",
|
|
(file['size'], file['m_date'], file['c_date'], crypt_id, file['name'], file['path']))
|
|
else:
|
|
return None
|
|
else:
|
|
cursor.execute("""INSERT INTO files (name, path, size, m_date, c_date, crypt_id)
|
|
VALUES(?, ?, ?, ?, ?, ?)""", (file['name'], file['path'], file['size'], file['m_date'], file['c_date'], crypt_id))
|
|
|
|
cursor.execute("""INSERT OR IGNORE INTO crypt VALUES(?, ?)""", (crypt_id, compress_mode))
|
|
self.conn.commit()
|
|
return str(crypt_id)
|
|
|
|
|
|
def human_size(size, decimal_places=0):
|
|
for unit in ['B', 'K', 'M', 'G', 'T']:
|
|
if size < 1024.0:
|
|
break
|
|
size /= 1024.0
|
|
return f"{size:.{decimal_places}f}{unit}"
|
|
|
|
|
|
def parse_size(size):
|
|
units = {"B": 1, "K": 2**10, "M": 2**20, "G": 2**30, "T": 2**40}
|
|
if size[-1].isdigit():
|
|
size = size + 'K'
|
|
number, unit = re.match(r"([0-9]+)([BKMGT])", size, re.I).groups()
|
|
return int(float(number)*units[unit])
|
|
|
|
|
|
def most_frequent(list):
|
|
return max(set(list), key=list.count)
|
|
|
|
|
|
def dict_factory(cursor, row):
|
|
d = {}
|
|
for idx, col in enumerate(cursor.description):
|
|
d[col[0]] = row[idx]
|
|
return d
|