2025-12-09 10:28:18 -03:00
using System.Runtime.CompilerServices ;
2025-11-18 14:34:26 -03:00
using System.Text ;
2025-12-09 10:28:18 -03:00
using System.Text.Json ;
using ChatbotApi.Data.Models ;
2025-11-18 14:34:26 -03:00
using HtmlAgilityPack ;
using Microsoft.Extensions.Caching.Memory ;
2025-12-05 13:02:23 -03:00
using Microsoft.EntityFrameworkCore ;
2025-12-09 10:28:18 -03:00
using System.Net ;
using System.Globalization ;
2025-12-05 13:02:23 -03:00
2025-12-09 10:28:18 -03:00
namespace ChatbotApi.Services
2025-11-21 12:51:00 -03:00
{
2025-12-09 10:28:18 -03:00
public interface IChatService
{
IAsyncEnumerable < string > StreamMessageAsync ( ChatRequest request , CancellationToken cancellationToken ) ;
}
2025-11-18 14:34:26 -03:00
2025-12-09 10:28:18 -03:00
public class ChatService : IChatService
2025-11-18 14:34:26 -03:00
{
2025-12-09 10:28:18 -03:00
private readonly IHttpClientFactory _httpClientFactory ;
2025-11-18 14:34:26 -03:00
private readonly IMemoryCache _cache ;
2025-11-20 10:52:46 -03:00
private readonly IServiceProvider _serviceProvider ;
2025-12-09 10:28:18 -03:00
private readonly ILogger < ChatService > _logger ;
private readonly string _apiUrl ;
private readonly AppContexto _dbContext ;
2025-11-27 15:11:54 -03:00
2025-11-18 14:34:26 -03:00
private static readonly string _siteUrl = "https://www.eldia.com/" ;
private static readonly string [ ] PrefijosAQuitar = { "VIDEO.- " , "VIDEO. " , "FOTOS.- " , "FOTOS. " } ;
2025-11-20 15:24:47 -03:00
const int OutTokens = 8192 ;
2025-12-05 13:02:23 -03:00
private const string SystemPromptsCacheKey = "ActiveSystemPrompts" ;
2025-12-09 10:28:18 -03:00
public ChatService (
IConfiguration configuration ,
IMemoryCache memoryCache ,
IServiceProvider serviceProvider ,
ILogger < ChatService > logger ,
IHttpClientFactory httpClientFactory ,
AppContexto dbContext )
2025-11-18 14:34:26 -03:00
{
_logger = logger ;
_cache = memoryCache ;
_serviceProvider = serviceProvider ;
2025-12-09 10:28:18 -03:00
_httpClientFactory = httpClientFactory ;
2025-12-05 13:02:23 -03:00
_dbContext = dbContext ;
2025-12-09 10:28:18 -03:00
2025-11-21 12:10:45 -03:00
var apiKey = configuration [ "Gemini:GeminiApiKey" ] ? ? throw new InvalidOperationException ( "La API Key de Gemini no está configurada en .env" ) ;
2025-11-18 14:34:26 -03:00
var baseUrl = configuration [ "Gemini:GeminiApiUrl" ] ;
_apiUrl = $"{baseUrl}{apiKey}" ;
}
2025-11-20 12:39:23 -03:00
2025-12-09 14:05:53 -03:00
// Modelo de respuesta para JSON estructurado de Gemini
2025-12-09 12:36:04 -03:00
private class GeminiStructuredResponse
{
public string intent { get ; set ; } = "NOTICIAS_PORTADA" ;
public string reply { get ; set ; } = "" ;
public string summary { get ; set ; } = "" ;
}
2025-12-09 10:28:18 -03:00
public async IAsyncEnumerable < string > StreamMessageAsync ( ChatRequest request , [ EnumeratorCancellation ] CancellationToken cancellationToken )
2025-11-18 14:34:26 -03:00
{
if ( string . IsNullOrWhiteSpace ( request ? . Message ) )
{
yield return "Error: No he recibido ningún mensaje." ;
yield break ;
}
2025-11-27 15:11:54 -03:00
string safeUserMessage = SanitizeInput ( request . Message ) ;
2025-11-20 10:52:46 -03:00
string context = "" ;
string? articleContext = null ;
string? errorMessage = null ;
2025-11-20 12:39:23 -03:00
2025-12-09 12:36:04 -03:00
// Pre-carga de prompts del sistema en paralelo
2025-12-09 10:28:18 -03:00
var systemPromptsTask = GetActiveSystemPromptsAsync ( ) ;
Task < string? > ? articleTask = null ;
2025-11-18 14:34:26 -03:00
try
{
2025-12-09 14:05:53 -03:00
// Cargar artículo si se proporciona URL
2025-11-27 15:11:54 -03:00
if ( ! string . IsNullOrEmpty ( request . ContextUrl ) & & await UrlSecurity . IsSafeUrlAsync ( request . ContextUrl ) )
2025-11-20 10:52:46 -03:00
{
2025-12-09 10:28:18 -03:00
articleTask = GetArticleContentAsync ( request . ContextUrl ) ;
2025-11-20 10:52:46 -03:00
}
2025-11-21 10:21:34 -03:00
2025-12-09 10:28:18 -03:00
if ( articleTask ! = null ) articleContext = await articleTask ;
2025-12-09 14:05:53 -03:00
// Construir contexto basado en heurísticas
2025-12-09 12:36:04 -03:00
if ( ! string . IsNullOrEmpty ( articleContext ) )
2025-12-09 10:28:18 -03:00
{
2025-12-09 12:36:04 -03:00
context = articleContext ;
2025-12-09 10:28:18 -03:00
}
2025-12-09 12:36:04 -03:00
else
2025-11-20 10:52:46 -03:00
{
2025-12-09 12:36:04 -03:00
var articles = await GetWebsiteNewsAsync ( _siteUrl , 50 ) ;
2025-11-21 12:10:45 -03:00
2025-12-09 12:36:04 -03:00
if ( request . ShownArticles ! = null & & request . ShownArticles . Any ( ) )
{
articles = articles . Where ( a = > ! request . ShownArticles . Contains ( a . Url ) ) . ToList ( ) ;
}
2025-11-21 12:51:00 -03:00
2025-12-09 12:36:04 -03:00
// Búsqueda Híbrida: local + AI fallback
var bestMatch = FindBestMatchingArticleLocal ( safeUserMessage , articles ) ;
if ( bestMatch = = null )
{
2025-12-09 14:05:53 -03:00
// Optimización: Solo llamar AI matching si el mensaje parece específico
// Evita llamadas innecesarias para saludos y mensajes genéricos
if ( RequiresAIMatching ( safeUserMessage ) )
{
bestMatch = await FindBestMatchingArticleAIAsync ( safeUserMessage , articles , request . ConversationSummary ) ;
}
else
{
_logger . LogInformation ( "Mensaje genérico detectado: '{Message}'. Skipping AI matching." , safeUserMessage ) ;
}
2025-12-09 12:36:04 -03:00
}
2025-11-27 15:11:54 -03:00
2025-12-09 12:36:04 -03:00
if ( bestMatch ! = null & & await UrlSecurity . IsSafeUrlAsync ( bestMatch . Url ) )
{
string rawContent = await GetArticleContentAsync ( bestMatch . Url ) ? ? "" ;
context = $"ARTÍCULO ENCONTRADO: {bestMatch.Title}\nURL: {bestMatch.Url}\n\nCONTENIDO:\n{SanitizeInput(rawContent)}" ;
}
else
{
var sb = new StringBuilder ( ) ;
sb . AppendLine ( "NOTICIAS DISPONIBLES:" ) ;
foreach ( var article in articles . Take ( 15 ) )
2025-12-09 10:28:18 -03:00
{
2025-12-09 12:36:04 -03:00
sb . AppendLine ( $"- {article.Title} ({article.Url})" ) ;
2025-12-09 10:28:18 -03:00
}
2025-12-09 12:36:04 -03:00
context = sb . ToString ( ) ;
}
}
2025-11-21 12:51:00 -03:00
2025-12-09 14:05:53 -03:00
// Agregar base de conocimiento si está disponible
2025-12-09 12:36:04 -03:00
var knowledgeItems = await GetKnowledgeItemsAsync ( ) ;
if ( knowledgeItems . Any ( ) )
{
var kbBuilder = new StringBuilder ( "\n\nBASE DE CONOCIMIENTO:" ) ;
foreach ( var item in knowledgeItems . Values )
{
kbBuilder . AppendLine ( $"\n- {item.Descripcion}: {item.Valor}" ) ;
}
context + = kbBuilder . ToString ( ) ;
2025-11-20 10:52:46 -03:00
}
2025-11-18 14:34:26 -03:00
}
catch ( Exception ex )
{
2025-12-09 12:36:04 -03:00
_logger . LogError ( ex , "Error construyendo contexto." ) ;
2025-11-27 15:11:54 -03:00
errorMessage = "Lo siento, hubo un problema técnico procesando tu solicitud." ;
2025-11-18 14:34:26 -03:00
}
if ( ! string . IsNullOrEmpty ( errorMessage ) )
{
yield return errorMessage ;
yield break ;
}
2025-12-09 14:05:53 -03:00
// ========== LLAMADA API UNIFICADA ==========
2025-12-09 10:28:18 -03:00
var httpClient = _httpClientFactory . CreateClient ( ) ;
2025-12-09 12:36:04 -03:00
httpClient . Timeout = TimeSpan . FromSeconds ( 45 ) ;
string? jsonText = null ;
2025-11-21 10:21:34 -03:00
2025-11-18 14:34:26 -03:00
try
{
2025-12-09 10:28:18 -03:00
var systemInstructions = ! string . IsNullOrWhiteSpace ( request . SystemPromptOverride )
? request . SystemPromptOverride
2025-12-09 12:36:04 -03:00
: await systemPromptsTask ;
2025-11-27 15:11:54 -03:00
2025-12-09 14:05:53 -03:00
// Construir meta-prompt unificado
2025-12-09 12:36:04 -03:00
var promptBuilder = new StringBuilder ( ) ;
2025-11-27 15:11:54 -03:00
promptBuilder . AppendLine ( "<instrucciones_sistema>" ) ;
promptBuilder . AppendLine ( "Eres DiaBot, asistente virtual de El Día (La Plata, Argentina)." ) ;
2025-12-09 12:36:04 -03:00
promptBuilder . AppendLine ( ) ;
promptBuilder . AppendLine ( "FORMATO DE RESPUESTA:" ) ;
promptBuilder . AppendLine ( "Debes responder en formato JSON con esta estructura EXACTA:" ) ;
promptBuilder . AppendLine ( "{\"intent\": \"...\", \"reply\": \"...\", \"summary\": \"...\"}" ) ;
promptBuilder . AppendLine ( ) ;
promptBuilder . AppendLine ( "INSTRUCCIONES GENERALES:" ) ;
2025-12-09 10:28:18 -03:00
promptBuilder . AppendLine ( systemInstructions ) ;
2025-12-09 12:36:04 -03:00
promptBuilder . AppendLine ( "- NO uses formatos de email/carta ('Estimado/a', 'Atentamente')" ) ;
promptBuilder . AppendLine ( "- NO saludes de nuevo si ya saludaste o si la pregunta es directa" ) ;
promptBuilder . AppendLine ( "- Sé conciso, directo y natural" ) ;
promptBuilder . AppendLine ( ) ;
promptBuilder . AppendLine ( "--- REGLAS PARA CADA CAMPO JSON ---" ) ;
promptBuilder . AppendLine ( ) ;
promptBuilder . AppendLine ( "1. 'intent': Clasifica la intención usando SOLO uno de estos valores:" ) ;
promptBuilder . AppendLine ( " - \"ARTICULO_ACTUAL\": Si la pregunta es sobre el tema del artículo en <contexto>" ) ;
promptBuilder . AppendLine ( " - \"BASE_DE_CONOCIMIENTO\": Para preguntas sobre 'El Día' como empresa/organización" ) ;
promptBuilder . AppendLine ( " - \"NOTICIAS_PORTADA\": Para todo lo demás (este es el default si dudas)" ) ;
promptBuilder . AppendLine ( ) ;
promptBuilder . AppendLine ( "2. 'reply': Tu respuesta en texto Markdown para el usuario." ) ;
promptBuilder . AppendLine ( " - Si es un artículo específico: Resume brevemente e INCLUYE el enlace [Título](URL)" ) ;
promptBuilder . AppendLine ( " - Si son noticias generales: Selecciona las 3 más relevantes, breve frase c/u + enlace" ) ;
promptBuilder . AppendLine ( " - Si la pregunta refiere a algo del <historial_conversacion>, úsalo (ej: 'dónde leerla' → dale el link)" ) ;
promptBuilder . AppendLine ( ) ;
promptBuilder . AppendLine ( "3. 'summary': Actualiza el historial de conversación." ) ;
promptBuilder . AppendLine ( " - Resume el intercambio actual (pregunta + respuesta) en 1-2 líneas" ) ;
promptBuilder . AppendLine ( " - Integra con el <historial_conversacion> previo si existe" ) ;
promptBuilder . AppendLine ( " - Máximo 200 palabras para el resumen completo" ) ;
2025-11-27 15:11:54 -03:00
promptBuilder . AppendLine ( "</instrucciones_sistema>" ) ;
2025-12-09 12:36:04 -03:00
promptBuilder . AppendLine ( ) ;
2025-11-27 15:11:54 -03:00
2025-12-09 14:05:53 -03:00
// Historial de conversación
2025-12-09 12:18:52 -03:00
if ( ! string . IsNullOrWhiteSpace ( request . ConversationSummary ) )
{
promptBuilder . AppendLine ( "<historial_conversacion>" ) ;
promptBuilder . AppendLine ( SanitizeInput ( request . ConversationSummary ) ) ;
promptBuilder . AppendLine ( "</historial_conversacion>" ) ;
2025-12-09 12:36:04 -03:00
promptBuilder . AppendLine ( ) ;
2025-12-09 12:18:52 -03:00
}
2025-12-09 14:05:53 -03:00
// Contexto
2025-11-27 15:11:54 -03:00
promptBuilder . AppendLine ( "<contexto>" ) ;
2025-11-18 14:34:26 -03:00
promptBuilder . AppendLine ( context ) ;
2025-11-27 15:11:54 -03:00
promptBuilder . AppendLine ( "</contexto>" ) ;
2025-12-09 12:36:04 -03:00
promptBuilder . AppendLine ( ) ;
2025-11-27 15:11:54 -03:00
2025-12-09 14:05:53 -03:00
// Pregunta del usuario
2025-11-27 15:11:54 -03:00
promptBuilder . AppendLine ( "<pregunta_usuario>" ) ;
promptBuilder . AppendLine ( safeUserMessage ) ;
promptBuilder . AppendLine ( "</pregunta_usuario>" ) ;
2025-12-09 12:36:04 -03:00
promptBuilder . AppendLine ( ) ;
promptBuilder . AppendLine ( "RESPUESTA (SOLO el JSON, sin comentarios adicionales):" ) ;
2025-11-20 15:24:47 -03:00
var requestData = new GeminiRequest
{
2025-11-27 15:11:54 -03:00
Contents = new [ ] { new Content { Parts = new [ ] { new Part { Text = promptBuilder . ToString ( ) } } } } ,
GenerationConfig = new GenerationConfig { MaxOutputTokens = OutTokens } ,
SafetySettings = GetDefaultSafetySettings ( )
2025-11-20 15:24:47 -03:00
} ;
2025-12-09 14:05:53 -03:00
// Usar endpoint sin streaming
2025-12-09 12:36:04 -03:00
var nonStreamingApiUrl = _apiUrl . Replace ( ":streamGenerateContent?alt=sse&" , ":generateContent?" ) ;
var response = await httpClient . PostAsJsonAsync ( nonStreamingApiUrl , requestData , cancellationToken ) ;
2025-11-18 14:34:26 -03:00
if ( ! response . IsSuccessStatusCode )
{
2025-11-27 15:11:54 -03:00
_logger . LogWarning ( "Error API Gemini: {StatusCode}" , response . StatusCode ) ;
2025-12-09 12:36:04 -03:00
throw new HttpRequestException ( $"Error en proveedor de IA: {response.StatusCode}" ) ;
2025-11-18 14:34:26 -03:00
}
2025-12-09 12:36:04 -03:00
var geminiResponse = await response . Content . ReadFromJsonAsync < GeminiResponse > ( cancellationToken : cancellationToken ) ;
jsonText = geminiResponse ? . Candidates ? . FirstOrDefault ( ) ? . Content ? . Parts ? . FirstOrDefault ( ) ? . Text ? . Trim ( ) ;
if ( string . IsNullOrEmpty ( jsonText ) )
{
_logger . LogWarning ( "Respuesta vacía de Gemini" ) ;
errorMessage = "Lo siento, hubo un problema al procesar la respuesta." ;
}
2025-11-18 14:34:26 -03:00
}
catch ( Exception ex )
{
2025-12-09 12:36:04 -03:00
_logger . LogError ( ex , "Error en llamada unificada a Gemini." ) ;
2025-12-10 10:13:20 -03:00
errorMessage = "Lo siento, el servicio está temporalmente no disponible. Por favor, intenta de nuevo en unos minutos." ;
2025-11-18 14:34:26 -03:00
}
if ( ! string . IsNullOrEmpty ( errorMessage ) )
{
yield return errorMessage ;
yield break ;
}
2025-12-09 14:05:53 -03:00
// Parsear respuesta JSON (fuera del try-catch para permitir yield)
2025-12-09 12:36:04 -03:00
GeminiStructuredResponse ? apiResponse = null ;
try
2025-11-18 14:34:26 -03:00
{
2025-12-09 14:05:53 -03:00
// Extraer JSON de bloques de código markdown si están presentes
2025-12-09 12:36:04 -03:00
var jsonContent = jsonText ! ;
if ( jsonText ! . Contains ( "```json" ) )
{
var startIndex = jsonText . IndexOf ( "```json" ) + 7 ;
var endIndex = jsonText . IndexOf ( "```" , startIndex ) ;
if ( endIndex > startIndex )
{
jsonContent = jsonText . Substring ( startIndex , endIndex - startIndex ) . Trim ( ) ;
}
}
else if ( jsonText . Contains ( "```" ) )
2025-11-18 14:34:26 -03:00
{
2025-12-09 12:36:04 -03:00
var startIndex = jsonText . IndexOf ( "```" ) + 3 ;
var endIndex = jsonText . IndexOf ( "```" , startIndex ) ;
if ( endIndex > startIndex )
2025-11-18 14:34:26 -03:00
{
2025-12-09 12:36:04 -03:00
jsonContent = jsonText . Substring ( startIndex , endIndex - startIndex ) . Trim ( ) ;
}
}
2025-11-18 14:34:26 -03:00
2025-12-09 12:36:04 -03:00
apiResponse = JsonSerializer . Deserialize < GeminiStructuredResponse > ( jsonContent , new JsonSerializerOptions
{
PropertyNameCaseInsensitive = true
} ) ;
}
catch ( JsonException ex )
{
2025-12-09 14:05:53 -03:00
_logger . LogError ( ex , "Error al parsear JSON de Gemini. Respuesta raw: {JsonText}" , jsonText ) ;
2025-12-09 12:36:04 -03:00
}
2025-11-18 14:34:26 -03:00
2025-12-09 12:36:04 -03:00
if ( apiResponse = = null | | string . IsNullOrEmpty ( apiResponse . reply ) )
{
yield return "Lo siento, tuve un problema al procesar la respuesta. Por favor, intenta de nuevo." ;
yield break ;
}
2025-12-09 14:05:53 -03:00
// Enviar metadata de intención
2025-12-09 12:36:04 -03:00
yield return $"INTENT::{apiResponse.intent}" ;
2025-12-09 14:05:53 -03:00
// Simular streaming dividiendo la respuesta en fragmentos
2025-12-09 12:36:04 -03:00
string fullReply = apiResponse . reply ;
var words = fullReply . Split ( ' ' , StringSplitOptions . RemoveEmptyEntries ) ;
var chunkBuilder = new StringBuilder ( ) ;
foreach ( var word in words )
{
chunkBuilder . Append ( word + " " ) ;
2025-12-09 14:05:53 -03:00
// Enviar fragmento cada ~20 caracteres para streaming fluido
2025-12-09 12:36:04 -03:00
if ( chunkBuilder . Length > = 20 )
{
yield return chunkBuilder . ToString ( ) ;
chunkBuilder . Clear ( ) ;
await Task . Delay ( 30 , cancellationToken ) ;
2025-11-18 14:34:26 -03:00
}
}
2025-12-09 14:05:53 -03:00
// Enviar cualquier texto restante
2025-12-09 12:36:04 -03:00
if ( chunkBuilder . Length > 0 )
{
yield return chunkBuilder . ToString ( ) ;
}
2025-12-09 14:05:53 -03:00
// Registrar conversación (fire-and-forget)
2025-12-09 12:36:04 -03:00
_ = Task . Run ( async ( ) = >
2025-11-18 14:34:26 -03:00
{
2025-12-09 12:36:04 -03:00
using ( var scope = _serviceProvider . CreateScope ( ) )
2025-12-09 10:28:18 -03:00
{
2025-12-09 12:36:04 -03:00
var db = scope . ServiceProvider . GetRequiredService < AppContexto > ( ) ;
try
2025-12-09 10:28:18 -03:00
{
2025-12-09 12:36:04 -03:00
db . ConversacionLogs . Add ( new ConversacionLog
{
UsuarioMensaje = safeUserMessage ,
BotRespuesta = fullReply ,
Fecha = DateTime . UtcNow
} ) ;
await db . SaveChangesAsync ( ) ;
2025-12-09 10:28:18 -03:00
}
2025-12-09 12:36:04 -03:00
catch ( Exception ex )
{
var logger = scope . ServiceProvider . GetRequiredService < ILogger < ChatService > > ( ) ;
2025-12-09 14:05:53 -03:00
logger . LogError ( ex , "Error en registro en segundo plano" ) ;
2025-12-09 12:36:04 -03:00
}
}
} ) ;
2025-12-09 10:28:18 -03:00
2025-12-09 14:05:53 -03:00
// Enviar resumen
2025-12-09 12:36:04 -03:00
yield return $"SUMMARY::{apiResponse.summary}" ;
2025-11-18 14:34:26 -03:00
}
2025-12-09 14:05:53 -03:00
// --- MÉTODOS PRIVADOS ---
2025-12-09 10:28:18 -03:00
private string SanitizeInput ( string? input )
{
if ( string . IsNullOrWhiteSpace ( input ) ) return string . Empty ;
return input . Replace ( "<" , "<" ) . Replace ( ">" , ">" ) ;
}
private async Task < string > GetActiveSystemPromptsAsync ( )
{
return await _cache . GetOrCreateAsync ( SystemPromptsCacheKey , async entry = >
{
entry . AbsoluteExpirationRelativeToNow = TimeSpan . FromMinutes ( 10 ) ;
var prompts = await _dbContext . SystemPrompts
. Where ( p = > p . IsActive )
. OrderByDescending ( p = > p . CreatedAt )
. Select ( p = > p . Content )
. ToListAsync ( ) ;
if ( ! prompts . Any ( ) ) return "Tu rol es ser el asistente virtual de 'El Día'. Responde de forma natural, útil y concisa. Usa un tono amigable pero profesional (estilo periodístico moderno). IMPORTANTE: NO uses saludos formales tipo carta (como 'Estimado/a'), NO saludes si el usuario no saludó primero o si es una continuación de la charla. NO repitas saludos." ;
return string . Join ( "\n\n" , prompts ) ;
} ) ? ? "Responde de forma natural y concisa." ;
}
private List < SafetySetting > GetDefaultSafetySettings ( )
{
return new List < SafetySetting >
{
new SafetySetting { Category = "HARM_CATEGORY_HARASSMENT" , Threshold = "BLOCK_MEDIUM_AND_ABOVE" } ,
new SafetySetting { Category = "HARM_CATEGORY_HATE_SPEECH" , Threshold = "BLOCK_MEDIUM_AND_ABOVE" } ,
new SafetySetting { Category = "HARM_CATEGORY_SEXUALLY_EXPLICIT" , Threshold = "BLOCK_MEDIUM_AND_ABOVE" } ,
new SafetySetting { Category = "HARM_CATEGORY_DANGEROUS_CONTENT" , Threshold = "BLOCK_MEDIUM_AND_ABOVE" }
} ;
}
2025-12-09 14:05:53 -03:00
/// <summary>
/// Determina si un mensaje requiere búsqueda AI de artículos.
/// Usa enfoque híbrido: heurísticas (longitud, estructura) + patrones comunes.
/// Retorna false para mensajes genéricos (saludos, respuestas cortas, confirmaciones)
/// para evitar llamadas innecesarias a la API y reducir latencia.
/// </summary>
private bool RequiresAIMatching ( string userMessage )
{
// Normalizar: lowercase, trim, quitar puntuación final
var normalized = userMessage . Trim ( ) . ToLowerInvariant ( )
. TrimEnd ( '.' , '!' , '?' , ',' , ';' ) ;
// Contar palabras (excluyendo puntuación)
var wordCount = normalized
. Split ( new [ ] { ' ' , '\t' , '\n' , '\r' } , StringSplitOptions . RemoveEmptyEntries )
. Length ;
// ========== REGLA 1: Mensajes ultra-cortos (1-2 palabras) ==========
// Probablemente sean saludos o respuestas cortas, SALVO que contengan keywords específicas
if ( wordCount < = 2 )
{
// Excepciones: keywords de temas que SÍ requieren búsqueda de artículos
var specificKeywords = new [ ] {
"economía" , "economia" , "inflación" , "inflacion" , "dólar" , "dolar" ,
"política" , "politica" , "elecciones" , "gobierno" ,
2025-12-10 10:13:20 -03:00
"clima" , "deporte" , "fútbol" , "futbol" , "boca" , "river" ,
"estudiantes" , "gimnasia" , "pincha" , "lobo" , "partido"
2025-12-09 14:05:53 -03:00
} ;
// Si NO contiene ningún keyword específico, skip AI
if ( ! specificKeywords . Any ( k = > normalized . Contains ( k ) ) )
{
return false ; // Skip AI - probablemente saludo/respuesta corta
}
}
// ========== REGLA 2: Preguntas casuales cortas ==========
// Si tiene signos de pregunta y es corto (≤4 palabras)
if ( userMessage . Contains ( '?' ) & & wordCount < = 4 )
{
var casualQuestions = new [ ] {
"qué tal" , "que tal" , "cómo va" , "como va" ,
"cómo estás" , "como estas" , "cómo andás" , "como andas" ,
"todo bien" , "qué onda" , "que onda"
} ;
if ( casualQuestions . Any ( q = > normalized . Contains ( q ) ) )
{
return false ; // Skip AI - pregunta casual
}
}
// ========== REGLA 3: Lista expandida de patrones comunes ==========
// Mensajes cortos (≤3 palabras) que claramente son genéricos
if ( wordCount < = 3 )
{
var genericPatterns = new [ ]
{
// Saludos (incluyendo variantes argentinas)
"hola" , "buenas" , "buen día" , "buenos días" , "buenas tardes" , "buenas noches" ,
"buen dia" , "buenos dias" , "hi" , "hello" , "hey" ,
// Confirmaciones/Aceptación (argentinismos incluidos)
"ok" , "perfecto" , "genial" , "bárbaro" , "barbaro" , "dale" , "dale dale" ,
"está bien" , "esta bien" , "de acuerdo" , "si" , "sí" , "vale" , "listo" ,
2025-12-10 10:13:20 -03:00
"joya" , "buenísimo" , "buenisimo" , "excelente" , "piola" ,
2025-12-09 14:05:53 -03:00
// Agradecimientos
"gracias" , "muchas gracias" , "mil gracias" , "thank you" , "thanks" ,
// Despedidas
"chau" , "chao" , "adiós" , "adios" , "hasta luego" , "nos vemos" , "bye" ,
// Ayuda genérica
"ayuda" , "help" , "ayúdame" , "ayudame" ,
// Negaciones simples
"no" , "nada" , "ninguna" , "ninguno"
} ;
if ( genericPatterns . Contains ( normalized ) )
{
return false ; // Skip AI - patrón genérico detectado
}
}
// ========== Por defecto: usar AI matching ==========
// Cualquier mensaje que no caiga en las reglas anteriores
// (más de 4 palabras, o contiene keywords específicas, o no está en patrones)
return true ;
}
// NOTA: UpdateConversationSummaryAsync y GetIntentAsync han sido REMOVIDOS
// Su funcionalidad ahora está en la llamada unificada StreamMessageAsync
2025-12-09 10:28:18 -03:00
2025-11-18 14:34:26 -03:00
private async Task SaveConversationLogAsync ( string userMessage , string botReply )
{
try
{
2025-12-09 10:28:18 -03:00
_dbContext . ConversacionLogs . Add ( new ConversacionLog
2025-11-18 14:34:26 -03:00
{
2025-12-09 10:28:18 -03:00
UsuarioMensaje = userMessage ,
BotRespuesta = botReply ,
Fecha = DateTime . UtcNow
} ) ;
await _dbContext . SaveChangesAsync ( ) ;
2025-11-18 14:34:26 -03:00
}
2025-11-27 15:11:54 -03:00
catch ( Exception ex ) { _logger . LogError ( ex , "Error guardando log." ) ; }
2025-11-18 14:34:26 -03:00
}
2025-11-20 12:39:23 -03:00
2025-11-21 12:51:00 -03:00
private async Task < List < NewsArticleLink > > GetWebsiteNewsAsync ( string url , int cantidad )
2025-11-18 14:34:26 -03:00
{
2025-11-21 12:51:00 -03:00
var newsList = new List < NewsArticleLink > ( ) ;
2025-11-18 14:34:26 -03:00
try
{
2025-11-27 15:11:54 -03:00
if ( ! await UrlSecurity . IsSafeUrlAsync ( url ) ) return newsList ;
2025-11-18 14:34:26 -03:00
var web = new HtmlWeb ( ) ;
var doc = await web . LoadFromWebAsync ( url ) ;
2025-11-25 14:11:30 -03:00
var articleNodes = doc . DocumentNode . SelectNodes ( "//article[contains(@class, 'item')] | //article[contains(@class, 'nota_modulo')]" ) ;
2025-11-20 10:52:46 -03:00
2025-11-21 12:51:00 -03:00
if ( articleNodes = = null ) return newsList ;
2025-11-18 14:34:26 -03:00
var urlsProcesadas = new HashSet < string > ( ) ;
2025-11-27 15:11:54 -03:00
2025-11-20 10:52:46 -03:00
foreach ( var articleNode in articleNodes )
2025-11-18 14:34:26 -03:00
{
2025-11-21 12:51:00 -03:00
if ( newsList . Count > = cantidad ) break ;
2025-11-20 10:52:46 -03:00
var linkNode = articleNode . SelectSingleNode ( ".//a[@href]" ) ;
var titleNode = articleNode . SelectSingleNode ( ".//h2" ) ;
2025-11-18 14:34:26 -03:00
2025-11-20 10:52:46 -03:00
if ( linkNode ! = null & & titleNode ! = null )
2025-11-18 14:34:26 -03:00
{
2025-11-20 10:52:46 -03:00
var relativeUrl = linkNode . GetAttributeValue ( "href" , string . Empty ) ;
2025-11-21 12:51:00 -03:00
if ( ! string . IsNullOrEmpty ( relativeUrl ) & & relativeUrl ! = "#" & & ! urlsProcesadas . Contains ( relativeUrl ) )
2025-11-20 10:52:46 -03:00
{
2025-11-21 12:51:00 -03:00
var fullUrl = relativeUrl . StartsWith ( "/" ) ? new Uri ( new Uri ( url ) , relativeUrl ) . ToString ( ) : relativeUrl ;
2025-11-27 15:11:54 -03:00
string cleanTitle = WebUtility . HtmlDecode ( titleNode . InnerText ) . Trim ( ) ;
foreach ( var p in PrefijosAQuitar )
if ( cleanTitle . StartsWith ( p , StringComparison . OrdinalIgnoreCase ) )
cleanTitle = cleanTitle . Substring ( p . Length ) . Trim ( ) ;
newsList . Add ( new NewsArticleLink { Title = cleanTitle , Url = fullUrl } ) ;
2025-11-21 12:51:00 -03:00
urlsProcesadas . Add ( relativeUrl ) ;
2025-11-20 10:52:46 -03:00
}
2025-11-21 12:51:00 -03:00
}
}
}
2025-11-27 15:11:54 -03:00
catch ( Exception ex ) { _logger . LogError ( ex , "Error scraping news." ) ; }
2025-11-21 12:51:00 -03:00
return newsList ;
}
2025-11-20 10:52:46 -03:00
2025-12-09 10:28:18 -03:00
private NewsArticleLink ? FindBestMatchingArticleLocal ( string userMessage , List < NewsArticleLink > articles )
{
if ( ! articles . Any ( ) | | string . IsNullOrWhiteSpace ( userMessage ) ) return null ;
var userTerms = Tokenize ( userMessage ) ;
if ( ! userTerms . Any ( ) ) return null ;
NewsArticleLink ? bestMatch = null ;
double maxScore = 0 ;
foreach ( var article in articles )
{
var titleTerms = Tokenize ( article . Title ) ;
double score = CalculateJaccardSimilarity ( userTerms , titleTerms ) ;
if ( userTerms . Intersect ( titleTerms ) . Any ( t = > t . Length > 3 ) )
{
score + = 0.2 ;
}
if ( article . Title . IndexOf ( userMessage , StringComparison . OrdinalIgnoreCase ) > = 0 )
{
score + = 0.5 ;
}
if ( score > maxScore )
{
maxScore = score ;
bestMatch = article ;
}
}
return maxScore > = 0.05 ? bestMatch : null ;
}
private async Task < NewsArticleLink ? > FindBestMatchingArticleAIAsync ( string userMessage , List < NewsArticleLink > articles , string? conversationSummary )
2025-11-21 12:51:00 -03:00
{
if ( ! articles . Any ( ) ) return null ;
2025-11-27 15:11:54 -03:00
string safeUserMsg = SanitizeInput ( userMessage ) ;
2025-12-09 10:28:18 -03:00
string safeSummary = SanitizeInput ( conversationSummary ) ;
2025-11-20 10:52:46 -03:00
2025-11-21 12:51:00 -03:00
var promptBuilder = new StringBuilder ( ) ;
2025-12-09 10:28:18 -03:00
promptBuilder . AppendLine ( "Encuentra el artículo más relevante para la <pregunta_usuario> en la <lista_articulos>, usando el <resumen_contexto> para entender referencias (ej: 'esa nota')." ) ;
if ( ! string . IsNullOrWhiteSpace ( safeSummary ) )
{
promptBuilder . AppendLine ( "<resumen_contexto>" ) ;
promptBuilder . AppendLine ( safeSummary ) ;
promptBuilder . AppendLine ( "</resumen_contexto>" ) ;
}
2025-11-27 15:11:54 -03:00
promptBuilder . AppendLine ( "<lista_articulos>" ) ;
foreach ( var article in articles ) promptBuilder . AppendLine ( $"- Título: \" { article . Title } \ ", URL: {article.Url}" ) ;
promptBuilder . AppendLine ( "</lista_articulos>" ) ;
promptBuilder . AppendLine ( $"<pregunta_usuario>{safeUserMsg}</pregunta_usuario>" ) ;
2025-12-09 10:28:18 -03:00
promptBuilder . AppendLine ( "Responde SOLO con la URL. Si ninguna es relevante, responde 'N/A'." ) ;
2025-11-27 15:11:54 -03:00
var requestData = new GeminiRequest
2025-11-21 12:51:00 -03:00
{
2025-11-27 15:11:54 -03:00
Contents = new [ ] { new Content { Parts = new [ ] { new Part { Text = promptBuilder . ToString ( ) } } } } ,
SafetySettings = GetDefaultSafetySettings ( )
} ;
2025-11-21 12:51:00 -03:00
var nonStreamingApiUrl = _apiUrl . Replace ( ":streamGenerateContent?alt=sse&" , ":generateContent?" ) ;
2025-12-09 10:28:18 -03:00
var httpClient = _httpClientFactory . CreateClient ( ) ;
2025-11-20 10:52:46 -03:00
2025-11-21 12:51:00 -03:00
try
{
2025-12-09 10:28:18 -03:00
var response = await httpClient . PostAsJsonAsync ( nonStreamingApiUrl , requestData ) ;
2025-11-21 12:51:00 -03:00
if ( ! response . IsSuccessStatusCode ) return null ;
var geminiResponse = await response . Content . ReadFromJsonAsync < GeminiResponse > ( ) ;
var responseUrl = geminiResponse ? . Candidates ? . FirstOrDefault ( ) ? . Content ? . Parts ? . FirstOrDefault ( ) ? . Text ? . Trim ( ) ;
if ( string . IsNullOrEmpty ( responseUrl ) | | responseUrl = = "N/A" ) return null ;
return articles . FirstOrDefault ( a = > a . Url = = responseUrl ) ;
2025-11-18 14:34:26 -03:00
}
2025-11-27 15:11:54 -03:00
catch { return null ; }
2025-11-18 14:34:26 -03:00
}
2025-11-20 10:52:46 -03:00
2025-12-09 10:28:18 -03:00
private HashSet < string > Tokenize ( string text )
{
var normalizedText = RemoveDiacritics ( text . ToLower ( ) ) ;
var punctuation = normalizedText . Where ( char . IsPunctuation ) . Distinct ( ) . ToArray ( ) ;
return normalizedText
. Split ( )
. Select ( x = > x . Trim ( punctuation ) )
2025-12-09 12:36:04 -03:00
. Where ( x = > x . Length > 2 )
2025-12-09 10:28:18 -03:00
. ToHashSet ( ) ;
}
private string RemoveDiacritics ( string text )
{
var normalizedString = text . Normalize ( NormalizationForm . FormD ) ;
var stringBuilder = new StringBuilder ( capacity : normalizedString . Length ) ;
for ( int i = 0 ; i < normalizedString . Length ; i + + )
{
char c = normalizedString [ i ] ;
var unicodeCategory = CharUnicodeInfo . GetUnicodeCategory ( c ) ;
if ( unicodeCategory ! = UnicodeCategory . NonSpacingMark )
{
stringBuilder . Append ( c ) ;
}
}
return stringBuilder . ToString ( ) . Normalize ( NormalizationForm . FormC ) ;
}
private double CalculateJaccardSimilarity ( HashSet < string > set1 , HashSet < string > set2 )
{
if ( ! set1 . Any ( ) | | ! set2 . Any ( ) ) return 0.0 ;
var intersection = new HashSet < string > ( set1 ) ;
intersection . IntersectWith ( set2 ) ;
var union = new HashSet < string > ( set1 ) ;
union . UnionWith ( set2 ) ;
return ( double ) intersection . Count / union . Count ;
}
2025-11-21 12:10:45 -03:00
private async Task < Dictionary < string , ContextoItem > > GetKnowledgeItemsAsync ( )
2025-11-18 14:34:26 -03:00
{
2025-11-25 11:46:52 -03:00
return await _cache . GetOrCreateAsync ( CacheKeys . KnowledgeItems , async entry = >
2025-11-18 14:34:26 -03:00
{
entry . AbsoluteExpirationRelativeToNow = TimeSpan . FromMinutes ( 5 ) ;
using ( var scope = _serviceProvider . CreateScope ( ) )
{
var dbContext = scope . ServiceProvider . GetRequiredService < AppContexto > ( ) ;
2025-11-21 12:10:45 -03:00
return await dbContext . ContextoItems . AsNoTracking ( ) . ToDictionaryAsync ( item = > item . Clave , item = > item ) ;
2025-11-18 14:34:26 -03:00
}
2025-11-21 11:20:44 -03:00
} ) ? ? new Dictionary < string , ContextoItem > ( ) ;
2025-11-20 10:52:46 -03:00
}
2025-11-21 12:10:45 -03:00
private async Task < List < FuenteContexto > > GetFuentesDeContextoAsync ( )
{
2025-11-25 11:46:52 -03:00
return await _cache . GetOrCreateAsync ( CacheKeys . FuentesDeContexto , async entry = >
2025-11-21 12:10:45 -03:00
{
entry . AbsoluteExpirationRelativeToNow = TimeSpan . FromMinutes ( 5 ) ;
using ( var scope = _serviceProvider . CreateScope ( ) )
{
var dbContext = scope . ServiceProvider . GetRequiredService < AppContexto > ( ) ;
return await dbContext . FuentesDeContexto . Where ( f = > f . Activo ) . AsNoTracking ( ) . ToListAsync ( ) ;
}
} ) ? ? new List < FuenteContexto > ( ) ;
}
2025-11-20 10:52:46 -03:00
private async Task < string? > GetArticleContentAsync ( string url )
{
2025-11-27 15:11:54 -03:00
if ( ! await UrlSecurity . IsSafeUrlAsync ( url ) ) return null ;
2025-11-20 10:52:46 -03:00
try
{
var web = new HtmlWeb ( ) ;
var doc = await web . LoadFromWebAsync ( url ) ;
var paragraphs = doc . DocumentNode . SelectNodes ( "//div[contains(@class, 'cuerpo_nota')]//p" ) ;
2025-11-27 15:11:54 -03:00
if ( paragraphs = = null | | ! paragraphs . Any ( ) ) return null ;
2025-11-20 10:52:46 -03:00
2025-11-27 15:11:54 -03:00
var sb = new StringBuilder ( ) ;
2025-11-20 10:52:46 -03:00
foreach ( var p in paragraphs )
{
var cleanText = WebUtility . HtmlDecode ( p . InnerText ) . Trim ( ) ;
2025-11-27 15:11:54 -03:00
if ( ! string . IsNullOrWhiteSpace ( cleanText ) ) sb . AppendLine ( cleanText ) ;
2025-11-20 10:52:46 -03:00
}
2025-11-27 15:11:54 -03:00
return sb . ToString ( ) ;
2025-11-20 10:52:46 -03:00
}
2025-11-27 15:11:54 -03:00
catch { return null ; }
2025-11-18 14:34:26 -03:00
}
2025-11-21 11:20:44 -03:00
2025-11-25 11:46:52 -03:00
private async Task < string > ScrapeUrlContentAsync ( FuenteContexto fuente )
2025-11-21 11:20:44 -03:00
{
2025-11-27 15:11:54 -03:00
if ( ! await UrlSecurity . IsSafeUrlAsync ( fuente . Url ) ) return string . Empty ;
return await _cache . GetOrCreateAsync ( $"scrape_{fuente.Url}_{fuente.SelectorContenido}" , async entry = >
2025-11-21 11:20:44 -03:00
{
2025-11-21 12:10:45 -03:00
entry . AbsoluteExpirationRelativeToNow = TimeSpan . FromMinutes ( 30 ) ;
2025-11-27 15:11:54 -03:00
try
2025-11-21 12:10:45 -03:00
{
2025-11-27 15:11:54 -03:00
var web = new HtmlWeb ( ) ;
var doc = await web . LoadFromWebAsync ( fuente . Url ) ;
string selector = ! string . IsNullOrWhiteSpace ( fuente . SelectorContenido ) ? fuente . SelectorContenido : "//main | //body" ;
var node = doc . DocumentNode . SelectSingleNode ( selector ) ;
if ( node = = null ) return string . Empty ;
return WebUtility . HtmlDecode ( node . InnerText ) ? ? string . Empty ;
2025-11-21 12:10:45 -03:00
}
2025-11-27 15:11:54 -03:00
catch { return string . Empty ; }
} ) ? ? string . Empty ;
2025-11-21 11:20:44 -03:00
}
2025-11-18 14:34:26 -03:00
}
2025-12-09 10:28:18 -03:00
}