#!/bin/bash # Script: 01_estrai_e_hash_debug.sh # Estrae i MDB dalla directory configurata in agent_config.json (ricorsivo!), salva CSV e hash, salta i file già estratti se non modificati. # Debug avanzato con echo e log. # Autore: Pikappa2 – 2025-05-27 set -e CONFIG="$HOME/netgescon/agent_config.json" # Controllo jq if ! command -v jq &> /dev/null; then echo "[ERRORE] Il programma 'jq' non è installato. Installa con: sudo apt-get install jq" exit 1 fi # Lettura parametri da config IN=$(jq -r '.InputDirectory' "$CONFIG") OUT=$(jq -r '.OutputDirectory' "$CONFIG") HASH="$OUT/hash" LOG="$HOME/netgescon/log/estrazione.log" mkdir -p "$IN" "$OUT" "$HASH" "$(dirname "$LOG")" # Funzione di log avanzato logmsg() { # $1 = livello (INFO, WARN, ERR, REPORT, DEBUG) # $2 = messaggio local LEVEL="$1" local MSG="$2" local TS TS=$(date "+%Y-%m-%d %H:%M:%S") echo "[$TS][$LEVEL] $MSG" | tee -a "$LOG" } logmsg "INFO" "==== AVVIO ESTRAZIONE DATI ====" logmsg "INFO" "Input: $IN" logmsg "INFO" "Output: $OUT" logmsg "INFO" "Hash: $HASH" logmsg "INFO" "Log: $LOG" MDB_FOUND=0 MDB_PROCESSED=0 TBL_EXTRACTED=0 TBL_SKIPPED=0 PDF_FOUND=0 PDF_COPIED=0 PDF_SKIPPED=0 ERROR_MDB=0 # --- FUNZIONE ESTRAZIONE TABELLE MDB --- extract_tables() { local MDB="$1" local MDBNAME="$2" local DESTDIR="$3" local TBL_FOUND=0 local TBL_EXTR=0 local TBL_SKIP=0 local CHANGED=0 echo "DEBUG: [extract_tables] MDB=$MDB, MDBNAME=$MDBNAME, DESTDIR=$DESTDIR" logmsg "DEBUG" "Entra in extract_tables con MDB=$MDB, MDBNAME=$MDBNAME, DESTDIR=$DESTDIR" mkdir -p "$DESTDIR" if ! mdb-tables -1 "$MDB" > /dev/null 2>&1; then logmsg "ERR" "Impossibile leggere tabelle da $MDB" echo "DEBUG: [extract_tables] ERRORE su mdb-tables $MDB" return 1 fi mdb-tables -1 "$MDB" | while read TBL; do [ -z "$TBL" ] && continue ((TBL_FOUND++)) local CSV="$DESTDIR/$TBL.csv" local HASHFILE="$HASH/$MDBNAME.$TBL.csv.md5" TMP_CSV="$(mktemp)" echo "DEBUG: [extract_tables] Estrai tabella $TBL da $MDB" | tee -a "$LOG" if ! mdb-export "$MDB" "$TBL" > "$TMP_CSV" 2>/dev/null; then logmsg "WARN" "Errore nell'estrazione della tabella $TBL da $MDBNAME" rm -f "$TMP_CSV" continue fi HASHVAL=$(md5sum "$TMP_CSV" | cut -d' ' -f1) if [ -f "$CSV" ] && [ -f "$HASHFILE" ]; then OLDHASH=$(cat "$HASHFILE") if [ "$OLDHASH" = "$HASHVAL" ]; then ((TBL_SKIPPED++)) ((TBL_SKIP++)) rm -f "$TMP_CSV" logmsg "DEBUG" "Tabella $TBL ($MDBNAME) invariata, saltata." continue fi fi cp "$TMP_CSV" "$CSV" echo "$HASHVAL" > "$HASHFILE" ((TBL_EXTRACTED++)) ((TBL_EXTR++)) CHANGED=1 logmsg "INFO" "Estratta tabella $TBL da $MDBNAME (hash $HASHVAL)" rm -f "$TMP_CSV" done if [ $TBL_EXTR -gt 0 ]; then logmsg "REPORT" "MDB $MDBNAME: $TBL_EXTR tabelle estratte/aggiornate, $TBL_SKIP skippate." fi return $CHANGED } # --- FUNZIONE PER COPIA E HASH PDF --- copy_and_hash_pdf() { local SRC="$1" local REL="$2" local DST="$OUT/$REL" local HASHFILE="$HASH/$(echo "$REL" | sed 's/\//_/g').pdf.md5" mkdir -p "$(dirname "$DST")" HASHVAL=$(md5sum "$SRC" | cut -d' ' -f1) if [ -f "$DST" ] && [ -f "$HASHFILE" ]; then OLDHASH=$(cat "$HASHFILE") if [ "$OLDHASH" = "$HASHVAL" ]; then ((PDF_SKIPPED++)) logmsg "DEBUG" "PDF $REL invariato, saltato." return fi fi cp "$SRC" "$DST" echo "$HASHVAL" > "$HASHFILE" ((PDF_COPIED++)) logmsg "INFO" "PDF copiato/aggiornato: $REL (hash $HASHVAL)" } # --- MDB: ESTRAZIONE RICORSIVA --- find "$IN" -type f -iname '*.mdb' | while IFS= read -r MDB; do ((MDB_FOUND++)) RELPATH="${MDB#$IN/}" MDBNAME=$(basename "$MDB" .mdb) DESTDIR="$OUT/$(dirname "$RELPATH")/$MDBNAME" echo "DEBUG: [main loop] Trovato MDB: $MDB RELPATH=$RELPATH MDBNAME=$MDBNAME DESTDIR=$DESTDIR" logmsg "DEBUG" "Trovato MDB: $MDB RELPATH=$RELPATH MDBNAME=$MDBNAME DESTDIR=$DESTDIR" if extract_tables "$MDB" "$MDBNAME" "$DESTDIR"; then ((MDB_PROCESSED++)) else ((ERROR_MDB++)) logmsg "ERR" "File MDB problematico: $MDB" echo "DEBUG: [main loop] ERRORE su extract_tables $MDB" fi done # --- PDF: RICERCA E COPIA RICORSIVA --- find "$IN" -type f -iname '*.pdf' | while IFS= read -r PDF; do ((PDF_FOUND++)) RELPATH="${PDF#$IN/}" copy_and_hash_pdf "$PDF" "$RELPATH" done # --- REPORT FINALE --- logmsg "REPORT" "==== REPORT ESTRAZIONE ====" logmsg "REPORT" "MDB trovati: $MDB_FOUND" logmsg "REPORT" "MDB processati: $MDB_PROCESSED" logmsg "REPORT" "MDB con errori: $ERROR_MDB" logmsg "REPORT" "Tabelle estratte/aggiornate: $TBL_EXTRACTED" logmsg "REPORT" "Tabelle skippate: $TBL_SKIPPED" logmsg "REPORT" "PDF trovati: $PDF_FOUND" logmsg "REPORT" "PDF copiati/aggiornati: $PDF_COPIED" logmsg "REPORT" "PDF skippati: $PDF_SKIPPED" if [[ $TBL_EXTRACTED -eq 0 && $PDF_COPIED -eq 0 ]]; then logmsg "REPORT" "Nessuna tabella estratta/aggiornata e nessun PDF copiato (tutto invariato)." fi logmsg "INFO" "==== FINE ESTRAZIONE ===="