35 lines
1.2 KiB
Python
35 lines
1.2 KiB
Python
import os
|
|
import pandas as pd
|
|
import pickle
|
|
|
|
BASE_PATH = "../estratti"
|
|
OUT_PATH = "estratti_serializzati"
|
|
|
|
def safe_columns(df):
|
|
# Rinomina colonna "id" o "ID" (case-insensitive) in "id_csv"
|
|
df.columns = [col if col.lower() != "id" else "id_csv" for col in df.columns]
|
|
return df
|
|
|
|
def scan_and_save():
|
|
all_tables = {}
|
|
os.makedirs(OUT_PATH, exist_ok=True)
|
|
for root, dirs, files in os.walk(BASE_PATH):
|
|
for f in files:
|
|
if f.lower().endswith(".csv"):
|
|
full_path = os.path.join(root, f)
|
|
key = os.path.relpath(full_path, BASE_PATH).replace(os.sep, "__")
|
|
try:
|
|
df = pd.read_csv(full_path, dtype=str, keep_default_na=False)
|
|
df = safe_columns(df)
|
|
all_tables[key] = df
|
|
df.to_pickle(os.path.join(OUT_PATH, f"{key}.pkl"))
|
|
print(f"[OK] Letto e salvato: {key}")
|
|
except Exception as e:
|
|
print(f"[ERRORE] {key}: {e}")
|
|
# Facoltativo: salva tutto in un unico file
|
|
with open(os.path.join(OUT_PATH, "all_tables.pkl"), "wb") as f:
|
|
pickle.dump(all_tables, f)
|
|
print("Tutto serializzato!")
|
|
|
|
if __name__ == "__main__":
|
|
scan_and_save() |