192 lines
6.6 KiB
Python
192 lines
6.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Sincronizzazione completa degli archivi MDB su MySQL
|
|
- Replica fedele di tutte le tabelle e dati da ogni file MDB trovato in INPUT_ROOT.
|
|
- Deduplica tramite hash di riga.
|
|
- Aggiornamento struttura tabelle MySQL se cambia il modello dati nei MDB.
|
|
- Logging dettagliato.
|
|
- Pronto per essere eseguito regolarmente (cron/service).
|
|
"""
|
|
|
|
import os
|
|
import pymysql
|
|
import subprocess
|
|
import csv
|
|
import json
|
|
import hashlib
|
|
import datetime
|
|
import re
|
|
|
|
# --- CONFIGURAZIONE ---
|
|
|
|
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
CONFIG_PATH = os.path.join(SCRIPT_DIR, "agent_config.json")
|
|
LOGDIR = os.path.join(SCRIPT_DIR, "log")
|
|
os.makedirs(LOGDIR, exist_ok=True)
|
|
LOGFILE = os.path.join(LOGDIR, "import_mdb_to_mysql.jsonlog")
|
|
|
|
# Carica la configurazione
|
|
with open(CONFIG_PATH) as f:
|
|
config = json.load(f)
|
|
MYSQL_HOST = config.get("MySQLHost", "localhost")
|
|
MYSQL_DB = config.get("MySQLDatabase", "netgescon")
|
|
MYSQL_USER = config.get("MySQLUser", "user")
|
|
MYSQL_PW = config.get("MySQLPassword", "password")
|
|
INPUT_ROOT = config.get("InputDirectory", SCRIPT_DIR)
|
|
|
|
# --- UTILITY ---
|
|
|
|
def log_event(event, **kwargs):
|
|
row = {"event": event, "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
|
|
row.update(kwargs)
|
|
with open(LOGFILE, "a") as f:
|
|
f.write(json.dumps(row, ensure_ascii=False) + "\n")
|
|
|
|
def connect_mysql():
|
|
return pymysql.connect(
|
|
host=MYSQL_HOST,
|
|
user=MYSQL_USER,
|
|
password=MYSQL_PW,
|
|
database=MYSQL_DB,
|
|
charset="utf8mb4",
|
|
autocommit=True
|
|
)
|
|
|
|
def infer_type(val):
|
|
if val is None or str(val).strip() == "":
|
|
return "VARCHAR(255)"
|
|
s = str(val)
|
|
if re.fullmatch(r"-?\d+", s):
|
|
return "INT"
|
|
try:
|
|
float(s.replace(",", "."))
|
|
return "FLOAT"
|
|
except Exception:
|
|
pass
|
|
if len(s) > 255:
|
|
return "TEXT"
|
|
return "VARCHAR(255)"
|
|
|
|
def get_existing_columns(cur, table_name):
|
|
try:
|
|
cur.execute(f"SHOW COLUMNS FROM `{table_name}`;")
|
|
return set([row[0] for row in cur.fetchall()])
|
|
except Exception:
|
|
return set()
|
|
|
|
def alter_table_add_columns(cur, table_name, header, sample_row):
|
|
existing_cols = get_existing_columns(cur, table_name)
|
|
for col in header:
|
|
if col not in existing_cols:
|
|
t = infer_type(sample_row.get(col, ""))
|
|
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {t} NULL"
|
|
cur.execute(sql)
|
|
|
|
def create_table(cur, table_name, header, sample_row):
|
|
fields_types = []
|
|
for col in header:
|
|
sample = sample_row.get(col, "")
|
|
t = infer_type(sample)
|
|
fields_types.append(f"`{col}` {t}")
|
|
sql = f"CREATE TABLE IF NOT EXISTS `{table_name}` (" \
|
|
f"id INT AUTO_INCREMENT PRIMARY KEY, " \
|
|
f"{', '.join(fields_types)}, " \
|
|
f"_hash_row CHAR(64) UNIQUE, " \
|
|
f"_imported_at DATETIME)"
|
|
cur.execute(sql)
|
|
|
|
def ensure_table_structure(cur, table_name, header, sample_row):
|
|
cur.execute("SHOW TABLES LIKE %s", (table_name,))
|
|
if cur.fetchone():
|
|
alter_table_add_columns(cur, table_name, header, sample_row)
|
|
else:
|
|
create_table(cur, table_name, header, sample_row)
|
|
|
|
def calc_row_hash(header, row):
|
|
values = [str(row.get(col, "")).strip() for col in header]
|
|
return hashlib.sha256("|".join(values).encode("utf-8")).hexdigest()
|
|
|
|
def insert_row(cur, table_name, header, row):
|
|
hash_row = calc_row_hash(header, row)
|
|
cols = [f"`{col}`" for col in header]
|
|
vals = [row.get(col, None) for col in header]
|
|
cols += ["_hash_row", "_imported_at"]
|
|
vals += [hash_row, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")]
|
|
placeholders = ", ".join(["%s"] * len(cols))
|
|
sql = f"INSERT IGNORE INTO `{table_name}` ({', '.join(cols)}) VALUES ({placeholders})"
|
|
cur.execute(sql, vals)
|
|
|
|
def safe_name(s):
|
|
return re.sub(r"[^a-zA-Z0-9_]", "_", str(s)).lower()
|
|
|
|
def parse_info_from_path(path):
|
|
parts = path.split(os.sep)
|
|
admin_code = "unknownadmin"
|
|
cond_code = "unknowncond"
|
|
try:
|
|
nums = [p for p in parts if re.fullmatch(r"\d{4,8}", p)]
|
|
if len(nums) >= 2:
|
|
admin_code, cond_code = nums[-2], nums[-1]
|
|
except Exception:
|
|
pass
|
|
return admin_code, cond_code
|
|
|
|
def stream_csv_from_mdb(mdb_path, tab):
|
|
proc = subprocess.Popen(["mdb-export", "-H", "csv", mdb_path, tab], stdout=subprocess.PIPE)
|
|
for line in proc.stdout:
|
|
yield line.decode(errors="ignore")
|
|
|
|
def process_mdb(mdb_path, admin_code, cond_code, cur):
|
|
base = os.path.splitext(os.path.basename(mdb_path))[0]
|
|
log_event("start_mdb", file=mdb_path)
|
|
try:
|
|
tables = subprocess.check_output(["mdb-tables", "-1", mdb_path]).decode().split()
|
|
except Exception as e:
|
|
log_event("error_mdb", file=mdb_path, error=str(e))
|
|
print(f"Errore lettura tabelle: {e}")
|
|
return
|
|
for tab in tables:
|
|
safe_tab = safe_name(tab)
|
|
table_name = f"mdb_{safe_name(admin_code)}_{safe_name(cond_code)}_{safe_name(base)}_{safe_tab}"
|
|
log_event("import_table", mdb=mdb_path, admin=admin_code, condominio=cond_code, table_mdb=tab, table_mysql=table_name)
|
|
try:
|
|
csv_stream = stream_csv_from_mdb(mdb_path, tab)
|
|
reader = csv.DictReader(csv_stream, delimiter=",")
|
|
first_row = None
|
|
rows = []
|
|
for row in reader:
|
|
if not first_row:
|
|
first_row = row
|
|
rows.append(row)
|
|
if not rows:
|
|
log_event("empty_table", table=tab)
|
|
continue
|
|
ensure_table_structure(cur, table_name, first_row.keys(), first_row)
|
|
imported = 0
|
|
for row in rows:
|
|
try:
|
|
insert_row(cur, table_name, first_row.keys(), row)
|
|
imported += 1
|
|
except Exception as row_e:
|
|
log_event("row_error", table=table_name, data=row, error=str(row_e))
|
|
log_event("imported_rows", table=table_name, rows=imported)
|
|
print(f"Importate {imported} righe in {table_name}")
|
|
except Exception as e:
|
|
log_event("table_error", table=table_name, error=str(e))
|
|
print(f"Errore su tabella {tab}: {e}")
|
|
|
|
def main():
|
|
conn = connect_mysql()
|
|
cur = conn.cursor()
|
|
for root, _, files in os.walk(INPUT_ROOT):
|
|
for fname in files:
|
|
if fname.lower().endswith(".mdb"):
|
|
mdb_path = os.path.join(root, fname)
|
|
admin_code, cond_code = parse_info_from_path(mdb_path)
|
|
process_mdb(mdb_path, admin_code, cond_code, cur)
|
|
cur.close()
|
|
conn.close()
|
|
log_event("import_complete")
|
|
|
|
if __name__ == "__main__":
|
|
main() |