import os import pandas as pd import pickle BASE_PATH = "../estratti" OUT_PATH = "estratti_serializzati" def safe_columns(df): # Rinomina colonna "id" o "ID" (case-insensitive) in "id_csv" df.columns = [col if col.lower() != "id" else "id_csv" for col in df.columns] return df def scan_and_save(): all_tables = {} os.makedirs(OUT_PATH, exist_ok=True) for root, dirs, files in os.walk(BASE_PATH): for f in files: if f.lower().endswith(".csv"): full_path = os.path.join(root, f) key = os.path.relpath(full_path, BASE_PATH).replace(os.sep, "__") try: df = pd.read_csv(full_path, dtype=str, keep_default_na=False) df = safe_columns(df) all_tables[key] = df df.to_pickle(os.path.join(OUT_PATH, f"{key}.pkl")) print(f"[OK] Letto e salvato: {key}") except Exception as e: print(f"[ERRORE] {key}: {e}") # Facoltativo: salva tutto in un unico file with open(os.path.join(OUT_PATH, "all_tables.pkl"), "wb") as f: pickle.dump(all_tables, f) print("Tutto serializzato!") if __name__ == "__main__": scan_and_save()