controllore_vram_ollama/controlla_vram_ollama.py

import subprocess
from datetime import datetime, timedelta

def esegui_comando(comando):
    processo = subprocess.run(comando, shell=True, capture_output=True, text=True)
    return processo.stdout.splitlines()

def ottieni_modelli_attivi():
    comando = "ollama ps"
    output = esegui_comando(comando)

    if not output or len(output) < 2:
        return []

    modelli = []
    for linea in output[1:]:  # Salta la riga di intestazione
        parti = linea.strip().split()
        if parti:
            modelli.append(parti[0])  # Estrae il nome del modello dalla prima colonna
    return modelli

def ottieni_ultimo_timestamp(api_path):
    comando = f"systemctl status ollama.service | grep '{api_path}' | cut -d' ' -f3"
    output = esegui_comando(comando)

    if not output:
        print(f"Nessun timestamp per {api_path}")
        return None

    time_str = output[-1].strip()

    try:
        ora_log = datetime.strptime(time_str, "%H:%M:%S").time()
        oggi = datetime.now()
        timestamp_log = datetime.combine(oggi.date(), ora_log)

        # Corregge eventuali timestamp oltre la mezzanotte
        if timestamp_log > oggi:
            timestamp_log -= timedelta(days=1)

        return timestamp_log

    except ValueError as e:
        print(f"Formato non valido per {api_path}: '{time_str}'. Errore: {e}")
        return None

def main():
    chat_time = ottieni_ultimo_timestamp("/api/chat")
    embeddings_time = ottieni_ultimo_timestamp("/api/embeddings")

    if not chat_time or not embeddings_time:
        print("Dati mancanti per una o entrambe le API")
        return

    now = datetime.now()
    minuti_chat = (now - chat_time).total_seconds() / 60
    minuti_embeddings = (now - embeddings_time).total_seconds() / 60

    if minuti_chat > 5 and minuti_embeddings > 5:
        print("\nStop modelli per inattività")
        modelli_attivi = ottieni_modelli_attivi()

        if not modelli_attivi:
            print("Nessun modello attivo da fermare")
            return

        for modello in modelli_attivi:
            print(f"Fermando {modello}...")
            processo = subprocess.run(
                f"ollama stop {modello}",
                shell=True,
                capture_output=True,
                text=True
            )

            if processo.returncode == 0:
                print(f"✅ {modello} fermato con successo")
            else:
                print(f"❌ Errore fermando {modello}: {processo.stderr.strip()}")
    else:
        print("\nModelli mantenuti attivi: richieste recenti")

if __name__ == "__main__":
    main()