netgescon-master/scripts/01_estrai_e_hash_debug.sh

232 lines
6.7 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# Script: 01_estrai_e_hash_debug.sh
# Estrae i MDB dalla directory configurata in agent_config.json (ricorsivo!), salva CSV e hash, salta i file già estratti se non modificati.
# Debug avanzato con echo/log, progress bar e contatori precisi, errori sempre tracciati mai bloccanti.
# Autore: Pikappa2 2025-05-28
set -u
# --- Funzione log avanzata ---
logmsg() {
# $1 = livello (INFO, WARN, ERR, REPORT, DEBUG)
# $2 = messaggio
local LEVEL="$1"
local MSG="$2"
local TS
TS=$(date "+%Y-%m-%d %H:%M:%S")
echo "[$TS][$LEVEL] $MSG" | tee -a "$LOG"
}
# --- Funzione debug condizionale ---
debugmsg() {
# $1 = messaggio
if [[ "$DEBUG" == "true" ]]; then
logmsg "DEBUG" "$1"
fi
}
# --- Lettura configurazione ---
CONFIG="$HOME/netgescon/agent_config.json"
if ! command -v jq &>/dev/null; then
echo "[ERRORE] jq non trovato! Installa con: sudo apt-get install jq"
exit 1
fi
if [[ ! -f "$CONFIG" ]]; then
echo "[ERRORE] File di configurazione $CONFIG non trovato!"
exit 1
fi
IN=$(jq -r '.InputDirectory // empty' "$CONFIG")
OUT=$(jq -r '.OutputDirectory // empty' "$CONFIG")
DEBUG=$(jq -r '.Debug // "false"' "$CONFIG" | tr '[:upper:]' '[:lower:]')
if [[ -z "$IN" || -z "$OUT" ]]; then
echo "[ERRORE] InputDirectory o OutputDirectory non valorizzati nel file di configurazione!"
exit 1
fi
HASH="$OUT/hash"
LOG="$HOME/netgescon/log/estrazione.log"
mkdir -p "$OUT" "$HASH" "$(dirname "$LOG")" 2>/dev/null || {
echo "[ERRORE] Impossibile creare cartelle di output."
exit 1
}
logmsg "INFO" "==== AVVIO ESTRAZIONE DATI ===="
logmsg "INFO" "Input: $IN"
logmsg "INFO" "Output: $OUT"
logmsg "INFO" "Hash: $HASH"
logmsg "INFO" "Log: $LOG"
logmsg "INFO" "Debug: $DEBUG"
MDB_FOUND=0
MDB_PROCESSED=0
TBL_EXTRACTED=0
TBL_SKIPPED=0
PDF_FOUND=0
PDF_COPIED=0
PDF_SKIPPED=0
ERROR_MDB=0
# --- Funzione estrazione tabelle MDB ---
extract_tables() {
local MDB="$1"
local MDBNAME="$2"
local DESTDIR="$3"
local TBL_FOUND=0
local TBL_EXTR=0
local TBL_SKIP=0
local CHANGED=0
debugmsg "Entra in extract_tables con MDB=$MDB, MDBNAME=$MDBNAME, DESTDIR=$DESTDIR"
mkdir -p "$DESTDIR" 2>/dev/null || {
logmsg "ERR" "Impossibile creare cartella $DESTDIR"
return 1
}
local TBL_LIST
TBL_LIST=$(mdb-tables -1 "$MDB" 2>&1)
if [[ $? -ne 0 || -z "$TBL_LIST" ]]; then
logmsg "ERR" "Impossibile leggere tabelle da $MDB: $TBL_LIST"
return 1
fi
while read -r TBL; do
[[ -z "$TBL" ]] && continue
((TBL_FOUND++))
local CSV="$DESTDIR/$TBL.csv"
local HASHFILE="$HASH/$MDBNAME.$TBL.csv.md5"
TMP_CSV="$(mktemp)"
debugmsg "Estrazione tabella $TBL da $MDB"
local EXPORT_OUT
EXPORT_OUT=$(mdb-export "$MDB" "$TBL" 2>&1 > "$TMP_CSV")
if [[ $? -ne 0 ]]; then
logmsg "WARN" "Errore nell'estrazione della tabella $TBL da $MDBNAME: $EXPORT_OUT"
rm -f "$TMP_CSV"
continue
fi
HASHVAL=$(md5sum "$TMP_CSV" | cut -d' ' -f1)
if [[ -f "$CSV" && -f "$HASHFILE" ]]; then
OLDHASH=$(cat "$HASHFILE")
if [[ "$OLDHASH" == "$HASHVAL" ]]; then
((TBL_SKIPPED++))
((TBL_SKIP++))
rm -f "$TMP_CSV"
debugmsg "Tabella $TBL ($MDBNAME) invariata, saltata."
continue
fi
fi
cp "$TMP_CSV" "$CSV" 2>/dev/null || logmsg "WARN" "Impossibile copiare $TMP_CSV -> $CSV"
echo "$HASHVAL" > "$HASHFILE"
((TBL_EXTRACTED++))
((TBL_EXTR++))
CHANGED=1
logmsg "INFO" "Estratta tabella $TBL da $MDBNAME (hash $HASHVAL)"
rm -f "$TMP_CSV"
done <<< "$TBL_LIST"
if [[ $TBL_EXTR -gt 0 ]]; then
logmsg "REPORT" "MDB $MDBNAME: $TBL_EXTR tabelle estratte/aggiornate, $TBL_SKIP skippate."
fi
return $CHANGED
}
# --- Funzione PDF ---
copy_and_hash_pdf() {
local SRC="$1"
local REL="$2"
local DST="$OUT/$REL"
local HASHFILE="$HASH/$(echo "$REL" | sed 's/\//_/g').pdf.md5"
mkdir -p "$(dirname "$DST")" 2>/dev/null
HASHVAL=$(md5sum "$SRC" | cut -d' ' -f1)
if [[ -f "$DST" && -f "$HASHFILE" ]]; then
OLDHASH=$(cat "$HASHFILE")
if [[ "$OLDHASH" == "$HASHVAL" ]]; then
((PDF_SKIPPED++))
debugmsg "PDF $REL invariato, saltato."
return
fi
fi
cp "$SRC" "$DST" 2>/dev/null || logmsg "WARN" "Impossibile copiare $SRC -> $DST"
echo "$HASHVAL" > "$HASHFILE"
((PDF_COPIED++))
logmsg "INFO" "PDF copiato/aggiornato: $REL (hash $HASHVAL)"
}
# --- MDB: ESTRAZIONE RICORSIVA CON PROGRESS BAR ---
debugmsg "Inizio ricerca file MDB..."
MDB_LIST=()
while IFS= read -r line; do MDB_LIST+=("$line"); done < <(find "$IN" -type f -iname '*.mdb')
TOTAL_MDB=${#MDB_LIST[@]}
CURRENT_MDB=0
for MDB in "${MDB_LIST[@]}"; do
((CURRENT_MDB++))
# Barra di avanzamento testuale
BAR_WIDTH=30
PERCENT=$((CURRENT_MDB * 100 / TOTAL_MDB))
BAR_DONE=$((CURRENT_MDB * BAR_WIDTH / TOTAL_MDB))
BAR_LEFT=$((BAR_WIDTH - BAR_DONE))
BAR="["
for ((i=0;i<BAR_DONE;i++)); do BAR+="#"; done
for ((i=0;i<BAR_LEFT;i++)); do BAR+=" "; done
BAR+="]"
echo -ne "$BAR $PERCENT% ($CURRENT_MDB/$TOTAL_MDB) MDB\r"
((MDB_FOUND++))
RELPATH="${MDB#$IN/}"
MDBNAME=$(basename "$MDB" .mdb)
DESTDIR="$OUT/$(dirname "$RELPATH")/$MDBNAME"
debugmsg "Trovato MDB: $MDB RELPATH=$RELPATH MDBNAME=$MDBNAME DESTDIR=$DESTDIR"
if extract_tables "$MDB" "$MDBNAME" "$DESTDIR"; then
((MDB_PROCESSED++))
else
((ERROR_MDB++))
logmsg "ERR" "File MDB problematico: $MDB"
debugmsg "Errore su extract_tables $MDB"
fi
done
echo # newline dopo barra
# --- PDF: RICERCA E COPIA RICORSIVA CON PROGRESS BAR ---
debugmsg "Inizio ricerca file PDF..."
PDF_LIST=()
while IFS= read -r line; do PDF_LIST+=("$line"); done < <(find "$IN" -type f -iname '*.pdf')
TOTAL_PDF=${#PDF_LIST[@]}
CURRENT_PDF=0
for PDF in "${PDF_LIST[@]}"; do
((CURRENT_PDF++))
BAR_WIDTH=30
PERCENT=$((CURRENT_PDF * 100 / TOTAL_PDF))
BAR_DONE=$((CURRENT_PDF * BAR_WIDTH / TOTAL_PDF))
BAR_LEFT=$((BAR_WIDTH - BAR_DONE))
BAR="["
for ((i=0;i<BAR_DONE;i++)); do BAR+="#"; done
for ((i=0;i<BAR_LEFT;i++)); do BAR+=" "; done
BAR+="]"
echo -ne "$BAR $PERCENT% ($CURRENT_PDF/$TOTAL_PDF) PDF\r"
((PDF_FOUND++))
RELPATH="${PDF#$IN/}"
copy_and_hash_pdf "$PDF" "$RELPATH"
done
echo # newline dopo barra
# --- REPORT FINALE ---
logmsg "REPORT" "==== REPORT ESTRAZIONE ===="
logmsg "REPORT" "MDB trovati: $MDB_FOUND"
logmsg "REPORT" "MDB processati: $MDB_PROCESSED"
logmsg "REPORT" "MDB con errori: $ERROR_MDB"
logmsg "REPORT" "Tabelle estratte/aggiornate: $TBL_EXTRACTED"
logmsg "REPORT" "Tabelle skippate: $TBL_SKIPPED"
logmsg "REPORT" "PDF trovati: $PDF_FOUND"
logmsg "REPORT" "PDF copiati/aggiornati: $PDF_COPIED"
logmsg "REPORT" "PDF skippati: $PDF_SKIPPED"
if [[ $TBL_EXTRACTED -eq 0 && $PDF_COPIED -eq 0 ]]; then
logmsg "REPORT" "Nessuna tabella estratta/aggiornata e nessun PDF copiato (tutto invariato)."
fi
logmsg "INFO" "==== FINE ESTRAZIONE ===="