netgescon-master/scripts/01_estrai_e_hash.sh

167 lines
5.2 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# Script: 01_estrai_e_hash_debug.sh
# Estrae i MDB dalla directory configurata in agent_config.json (ricorsivo!), salva CSV e hash, salta i file già estratti se non modificati.
# Debug avanzato con echo e log.
# Autore: Pikappa2 2025-05-27
set -e
CONFIG="$HOME/netgescon/agent_config.json"
# Controllo jq
if ! command -v jq &> /dev/null; then
echo "[ERRORE] Il programma 'jq' non è installato. Installa con: sudo apt-get install jq"
exit 1
fi
# Lettura parametri da config
IN=$(jq -r '.InputDirectory' "$CONFIG")
OUT=$(jq -r '.OutputDirectory' "$CONFIG")
HASH="$OUT/hash"
LOG="$HOME/netgescon/log/estrazione.log"
mkdir -p "$IN" "$OUT" "$HASH" "$(dirname "$LOG")"
# Funzione di log avanzato
logmsg() {
# $1 = livello (INFO, WARN, ERR, REPORT, DEBUG)
# $2 = messaggio
local LEVEL="$1"
local MSG="$2"
local TS
TS=$(date "+%Y-%m-%d %H:%M:%S")
echo "[$TS][$LEVEL] $MSG" | tee -a "$LOG"
}
logmsg "INFO" "==== AVVIO ESTRAZIONE DATI ===="
logmsg "INFO" "Input: $IN"
logmsg "INFO" "Output: $OUT"
logmsg "INFO" "Hash: $HASH"
logmsg "INFO" "Log: $LOG"
MDB_FOUND=0
MDB_PROCESSED=0
TBL_EXTRACTED=0
TBL_SKIPPED=0
PDF_FOUND=0
PDF_COPIED=0
PDF_SKIPPED=0
ERROR_MDB=0
# --- FUNZIONE ESTRAZIONE TABELLE MDB ---
extract_tables() {
local MDB="$1"
local MDBNAME="$2"
local DESTDIR="$3"
local TBL_FOUND=0
local TBL_EXTR=0
local TBL_SKIP=0
local CHANGED=0
echo "DEBUG: [extract_tables] MDB=$MDB, MDBNAME=$MDBNAME, DESTDIR=$DESTDIR"
logmsg "DEBUG" "Entra in extract_tables con MDB=$MDB, MDBNAME=$MDBNAME, DESTDIR=$DESTDIR"
mkdir -p "$DESTDIR"
if ! mdb-tables -1 "$MDB" > /dev/null 2>&1; then
logmsg "ERR" "Impossibile leggere tabelle da $MDB"
echo "DEBUG: [extract_tables] ERRORE su mdb-tables $MDB"
return 1
fi
mdb-tables -1 "$MDB" | while read TBL; do
[ -z "$TBL" ] && continue
((TBL_FOUND++))
local CSV="$DESTDIR/$TBL.csv"
local HASHFILE="$HASH/$MDBNAME.$TBL.csv.md5"
TMP_CSV="$(mktemp)"
echo "DEBUG: [extract_tables] Estrai tabella $TBL da $MDB" | tee -a "$LOG"
if ! mdb-export "$MDB" "$TBL" > "$TMP_CSV" 2>/dev/null; then
logmsg "WARN" "Errore nell'estrazione della tabella $TBL da $MDBNAME"
rm -f "$TMP_CSV"
continue
fi
HASHVAL=$(md5sum "$TMP_CSV" | cut -d' ' -f1)
if [ -f "$CSV" ] && [ -f "$HASHFILE" ]; then
OLDHASH=$(cat "$HASHFILE")
if [ "$OLDHASH" = "$HASHVAL" ]; then
((TBL_SKIPPED++))
((TBL_SKIP++))
rm -f "$TMP_CSV"
logmsg "DEBUG" "Tabella $TBL ($MDBNAME) invariata, saltata."
continue
fi
fi
cp "$TMP_CSV" "$CSV"
echo "$HASHVAL" > "$HASHFILE"
((TBL_EXTRACTED++))
((TBL_EXTR++))
CHANGED=1
logmsg "INFO" "Estratta tabella $TBL da $MDBNAME (hash $HASHVAL)"
rm -f "$TMP_CSV"
done
if [ $TBL_EXTR -gt 0 ]; then
logmsg "REPORT" "MDB $MDBNAME: $TBL_EXTR tabelle estratte/aggiornate, $TBL_SKIP skippate."
fi
return $CHANGED
}
# --- FUNZIONE PER COPIA E HASH PDF ---
copy_and_hash_pdf() {
local SRC="$1"
local REL="$2"
local DST="$OUT/$REL"
local HASHFILE="$HASH/$(echo "$REL" | sed 's/\//_/g').pdf.md5"
mkdir -p "$(dirname "$DST")"
HASHVAL=$(md5sum "$SRC" | cut -d' ' -f1)
if [ -f "$DST" ] && [ -f "$HASHFILE" ]; then
OLDHASH=$(cat "$HASHFILE")
if [ "$OLDHASH" = "$HASHVAL" ]; then
((PDF_SKIPPED++))
logmsg "DEBUG" "PDF $REL invariato, saltato."
return
fi
fi
cp "$SRC" "$DST"
echo "$HASHVAL" > "$HASHFILE"
((PDF_COPIED++))
logmsg "INFO" "PDF copiato/aggiornato: $REL (hash $HASHVAL)"
}
# --- MDB: ESTRAZIONE RICORSIVA ---
find "$IN" -type f -iname '*.mdb' | while IFS= read -r MDB; do
((MDB_FOUND++))
RELPATH="${MDB#$IN/}"
MDBNAME=$(basename "$MDB" .mdb)
DESTDIR="$OUT/$(dirname "$RELPATH")/$MDBNAME"
echo "DEBUG: [main loop] Trovato MDB: $MDB RELPATH=$RELPATH MDBNAME=$MDBNAME DESTDIR=$DESTDIR"
logmsg "DEBUG" "Trovato MDB: $MDB RELPATH=$RELPATH MDBNAME=$MDBNAME DESTDIR=$DESTDIR"
if extract_tables "$MDB" "$MDBNAME" "$DESTDIR"; then
((MDB_PROCESSED++))
else
((ERROR_MDB++))
logmsg "ERR" "File MDB problematico: $MDB"
echo "DEBUG: [main loop] ERRORE su extract_tables $MDB"
fi
done
# --- PDF: RICERCA E COPIA RICORSIVA ---
find "$IN" -type f -iname '*.pdf' | while IFS= read -r PDF; do
((PDF_FOUND++))
RELPATH="${PDF#$IN/}"
copy_and_hash_pdf "$PDF" "$RELPATH"
done
# --- REPORT FINALE ---
logmsg "REPORT" "==== REPORT ESTRAZIONE ===="
logmsg "REPORT" "MDB trovati: $MDB_FOUND"
logmsg "REPORT" "MDB processati: $MDB_PROCESSED"
logmsg "REPORT" "MDB con errori: $ERROR_MDB"
logmsg "REPORT" "Tabelle estratte/aggiornate: $TBL_EXTRACTED"
logmsg "REPORT" "Tabelle skippate: $TBL_SKIPPED"
logmsg "REPORT" "PDF trovati: $PDF_FOUND"
logmsg "REPORT" "PDF copiati/aggiornati: $PDF_COPIED"
logmsg "REPORT" "PDF skippati: $PDF_SKIPPED"
if [[ $TBL_EXTRACTED -eq 0 && $PDF_COPIED -eq 0 ]]; then
logmsg "REPORT" "Nessuna tabella estratta/aggiornata e nessun PDF copiato (tutto invariato)."
fi
logmsg "INFO" "==== FINE ESTRAZIONE ===="