2025-12-09 10:28:18 -03:00
using System.Runtime.CompilerServices ;
2025-11-18 14:34:26 -03:00
using System.Text ;
2025-12-09 10:28:18 -03:00
using System.Text.Json ;
using ChatbotApi.Data.Models ;
2025-11-18 14:34:26 -03:00
using HtmlAgilityPack ;
using Microsoft.Extensions.Caching.Memory ;
2025-12-05 13:02:23 -03:00
using Microsoft.EntityFrameworkCore ;
2025-12-09 10:28:18 -03:00
using System.Net ;
using System.Globalization ;
2025-12-05 13:02:23 -03:00
2025-12-09 10:28:18 -03:00
namespace ChatbotApi.Services
2025-11-21 12:51:00 -03:00
{
2025-12-09 10:28:18 -03:00
public interface IChatService
{
IAsyncEnumerable < string > StreamMessageAsync ( ChatRequest request , CancellationToken cancellationToken ) ;
}
2025-11-18 14:34:26 -03:00
2025-12-09 10:28:18 -03:00
public class ChatService : IChatService
2025-11-18 14:34:26 -03:00
{
2025-12-09 10:28:18 -03:00
private readonly IHttpClientFactory _httpClientFactory ;
2025-11-18 14:34:26 -03:00
private readonly IMemoryCache _cache ;
2025-11-20 10:52:46 -03:00
private readonly IServiceProvider _serviceProvider ;
2025-12-09 10:28:18 -03:00
private readonly ILogger < ChatService > _logger ;
private readonly string _apiUrl ;
private readonly AppContexto _dbContext ;
2025-11-27 15:11:54 -03:00
2025-11-18 14:34:26 -03:00
private static readonly string _siteUrl = "https://www.eldia.com/" ;
private static readonly string [ ] PrefijosAQuitar = { "VIDEO.- " , "VIDEO. " , "FOTOS.- " , "FOTOS. " } ;
2025-11-20 15:24:47 -03:00
const int OutTokens = 8192 ;
2025-12-05 13:02:23 -03:00
private const string SystemPromptsCacheKey = "ActiveSystemPrompts" ;
2025-12-09 10:28:18 -03:00
public ChatService (
IConfiguration configuration ,
IMemoryCache memoryCache ,
IServiceProvider serviceProvider ,
ILogger < ChatService > logger ,
IHttpClientFactory httpClientFactory ,
AppContexto dbContext )
2025-11-18 14:34:26 -03:00
{
_logger = logger ;
_cache = memoryCache ;
_serviceProvider = serviceProvider ;
2025-12-09 10:28:18 -03:00
_httpClientFactory = httpClientFactory ;
2025-12-05 13:02:23 -03:00
_dbContext = dbContext ;
2025-12-09 10:28:18 -03:00
2025-11-21 12:10:45 -03:00
var apiKey = configuration [ "Gemini:GeminiApiKey" ] ? ? throw new InvalidOperationException ( "La API Key de Gemini no está configurada en .env" ) ;
2025-11-18 14:34:26 -03:00
var baseUrl = configuration [ "Gemini:GeminiApiUrl" ] ;
_apiUrl = $"{baseUrl}{apiKey}" ;
}
2025-11-20 12:39:23 -03:00
2025-12-09 10:28:18 -03:00
public async IAsyncEnumerable < string > StreamMessageAsync ( ChatRequest request , [ EnumeratorCancellation ] CancellationToken cancellationToken )
2025-11-18 14:34:26 -03:00
{
if ( string . IsNullOrWhiteSpace ( request ? . Message ) )
{
yield return "Error: No he recibido ningún mensaje." ;
yield break ;
}
2025-11-27 15:11:54 -03:00
string safeUserMessage = SanitizeInput ( request . Message ) ;
2025-11-20 10:52:46 -03:00
string context = "" ;
string promptInstructions = "" ;
string? articleContext = null ;
string? errorMessage = null ;
2025-11-20 15:24:47 -03:00
IntentType intent = IntentType . Homepage ;
2025-11-20 12:39:23 -03:00
2025-12-09 10:28:18 -03:00
// [OPTIMIZACIÓN] Pre-carga de prompts del sistema en paralelo
var systemPromptsTask = GetActiveSystemPromptsAsync ( ) ;
Task < string? > ? articleTask = null ;
2025-11-18 14:34:26 -03:00
try
{
2025-11-27 15:11:54 -03:00
if ( ! string . IsNullOrEmpty ( request . ContextUrl ) & & await UrlSecurity . IsSafeUrlAsync ( request . ContextUrl ) )
2025-11-20 10:52:46 -03:00
{
2025-12-09 10:28:18 -03:00
articleTask = GetArticleContentAsync ( request . ContextUrl ) ;
2025-11-20 10:52:46 -03:00
}
2025-11-21 10:21:34 -03:00
2025-12-09 10:28:18 -03:00
if ( articleTask ! = null ) articleContext = await articleTask ;
2025-11-27 15:11:54 -03:00
intent = await GetIntentAsync ( safeUserMessage , articleContext , request . ConversationSummary ) ;
2025-11-20 10:52:46 -03:00
2025-12-09 10:28:18 -03:00
// [FIX] Si la intención es 'Artículo' pero no hay contexto (el usuario pregunta sobre un tema específico sin abrir una nota),
// asumimos que quiere BUSCAR en la portada.
if ( intent = = IntentType . Article & & string . IsNullOrEmpty ( articleContext ) )
{
intent = IntentType . Homepage ;
}
2025-11-20 10:52:46 -03:00
switch ( intent )
{
case IntentType . Article :
context = articleContext ? ? "No se pudo cargar el artículo." ;
2025-11-27 15:11:54 -03:00
promptInstructions = "Responde la pregunta dentro de <pregunta_usuario> basándote ESTRICTA Y ÚNICAMENTE en la información dentro de <contexto>." ;
2025-11-20 10:52:46 -03:00
break ;
2025-11-21 12:10:45 -03:00
case IntentType . KnowledgeBase :
var contextBuilder = new StringBuilder ( ) ;
2025-12-09 10:28:18 -03:00
// [OPTIMIZACIÓN] Recolección de conocimiento en paralelo
var knowledgeTask = GetKnowledgeItemsAsync ( ) ;
var fuentesTask = GetFuentesDeContextoAsync ( ) ;
await Task . WhenAll ( knowledgeTask , fuentesTask ) ;
foreach ( var item in knowledgeTask . Result . Values )
2025-11-21 11:20:44 -03:00
{
2025-11-21 12:10:45 -03:00
contextBuilder . AppendLine ( $"- TEMA: {item.Descripcion}\n INFORMACIÓN: {item.Valor}" ) ;
2025-11-21 11:20:44 -03:00
}
2025-11-21 12:10:45 -03:00
2025-12-09 10:28:18 -03:00
foreach ( var fuente in fuentesTask . Result )
2025-11-21 11:20:44 -03:00
{
2025-11-27 15:11:54 -03:00
if ( await UrlSecurity . IsSafeUrlAsync ( fuente . Url ) )
{
contextBuilder . AppendLine ( $"\n--- {fuente.Nombre} ---" ) ;
string scrapedContent = await ScrapeUrlContentAsync ( fuente ) ;
contextBuilder . AppendLine ( SanitizeInput ( scrapedContent ) ) ;
}
2025-11-21 11:20:44 -03:00
}
2025-11-21 12:10:45 -03:00
context = contextBuilder . ToString ( ) ;
2025-11-27 15:11:54 -03:00
promptInstructions = "Responde basándote ESTRICTA Y ÚNICAMENTE en la información proporcionada en <contexto>." ;
2025-11-20 10:52:46 -03:00
break ;
default :
2025-12-09 10:28:18 -03:00
// No es necesario hacer scraping si solo vinculamos a la portada,
// pero la lógica mantiene el scraping de 50 items aquí.
// Podría optimizarse más, pero el scraping es rápido comparado con el LLM.
2025-11-21 13:05:40 -03:00
var articles = await GetWebsiteNewsAsync ( _siteUrl , 50 ) ;
2025-11-21 12:51:00 -03:00
2025-11-27 15:11:54 -03:00
if ( request . ShownArticles ! = null & & request . ShownArticles . Any ( ) )
{
articles = articles
. Where ( a = > ! request . ShownArticles . Contains ( a . Url ) )
. ToList ( ) ;
}
2025-12-09 10:28:18 -03:00
// [OPTIMIZACIÓN] Búsqueda Híbrida: Intentamos localmente (rápido), si falla usamos IA (inteligente)
var bestMatch = FindBestMatchingArticleLocal ( safeUserMessage , articles ) ;
if ( bestMatch = = null )
{
bestMatch = await FindBestMatchingArticleAIAsync ( safeUserMessage , articles , request . ConversationSummary ) ;
}
2025-11-21 12:51:00 -03:00
if ( bestMatch ! = null )
{
2025-11-27 15:11:54 -03:00
if ( await UrlSecurity . IsSafeUrlAsync ( bestMatch . Url ) )
{
string rawContent = await GetArticleContentAsync ( bestMatch . Url ) ? ? "" ;
context = SanitizeInput ( rawContent ) ;
promptInstructions = $"La pregunta es sobre el artículo '{bestMatch.Title}'. Responde con un resumen conciso y ofrece el enlace: [{bestMatch.Title}]({bestMatch.Url})." ;
}
2025-11-21 12:51:00 -03:00
}
else
{
2025-12-09 10:28:18 -03:00
// [OPTIMIZACIÓN] Limitamos a las 15 primeras para no saturar el contexto
2025-11-27 15:11:54 -03:00
var sb = new StringBuilder ( ) ;
2025-12-09 10:28:18 -03:00
foreach ( var article in articles . Take ( 15 ) ) sb . AppendLine ( $"- {article.Title} ({article.Url})" ) ;
2025-11-27 15:11:54 -03:00
context = sb . ToString ( ) ;
2025-12-09 12:18:52 -03:00
promptInstructions = "Responde la pregunta del usuario. Si la pregunta es sobre algo mencionado en el <historial_conversacion>, responde basándote en eso (por ejemplo, si pregunta 'dónde puedo leerla', proporciona el enlace mencionado anteriormente). Si la pregunta es sobre noticias actuales, selecciona las 3 más relevantes del <contexto>, escribe una frase breve para cada una e INCLUYE el enlace con formato [Título](URL)." ;
2025-11-21 12:51:00 -03:00
}
2025-11-21 11:20:44 -03:00
break ;
2025-11-20 10:52:46 -03:00
}
2025-11-18 14:34:26 -03:00
}
catch ( Exception ex )
{
2025-11-27 15:11:54 -03:00
_logger . LogError ( ex , "Error procesando intención." ) ;
errorMessage = "Lo siento, hubo un problema técnico procesando tu solicitud." ;
2025-11-18 14:34:26 -03:00
}
2025-11-20 12:39:23 -03:00
yield return $"INTENT::{intent}" ;
2025-11-18 14:34:26 -03:00
if ( ! string . IsNullOrEmpty ( errorMessage ) )
{
yield return errorMessage ;
yield break ;
}
Stream ? responseStream = null ;
2025-11-21 10:21:34 -03:00
var fullBotReply = new StringBuilder ( ) ;
2025-12-09 10:28:18 -03:00
var httpClient = _httpClientFactory . CreateClient ( ) ;
httpClient . Timeout = TimeSpan . FromSeconds ( 30 ) ;
2025-11-21 10:21:34 -03:00
2025-11-18 14:34:26 -03:00
try
{
var promptBuilder = new StringBuilder ( ) ;
2025-12-09 10:28:18 -03:00
var systemInstructions = ! string . IsNullOrWhiteSpace ( request . SystemPromptOverride )
? request . SystemPromptOverride
: await systemPromptsTask ; // Esperar tarea precargada
2025-11-27 15:11:54 -03:00
promptBuilder . AppendLine ( "<instrucciones_sistema>" ) ;
promptBuilder . AppendLine ( "Eres DiaBot, asistente virtual de El Día (La Plata, Argentina)." ) ;
2025-12-09 10:28:18 -03:00
promptBuilder . AppendLine ( systemInstructions ) ;
promptBuilder . AppendLine ( "IMPORTANTE:" ) ;
promptBuilder . AppendLine ( "- NO uses formatos de email/carta ('Estimado/a', 'Atentamente')." ) ;
promptBuilder . AppendLine ( "- NO saludes de nuevo si ya saludaste o si la pregunta es directa, ve al grano." ) ;
promptBuilder . AppendLine ( "- Sé conciso, directo y natural." ) ;
promptBuilder . AppendLine ( "- Si el usuario pregunta '¿algo más?' o '¿qué más?', asume que pide más noticias de la portada y no saludes." ) ;
2025-11-27 15:11:54 -03:00
promptBuilder . AppendLine ( promptInstructions ) ;
2025-11-21 13:05:40 -03:00
try
{
2025-11-27 15:11:54 -03:00
var timeInfo = TimeZoneInfo . ConvertTimeFromUtc ( DateTime . UtcNow , TimeZoneInfo . FindSystemTimeZoneById ( "America/Argentina/Buenos_Aires" ) ) ;
promptBuilder . AppendLine ( $"Fecha y hora actual: {timeInfo:dd/MM/yyyy HH:mm}" ) ;
2025-11-21 13:05:40 -03:00
}
2025-11-27 15:11:54 -03:00
catch { }
promptBuilder . AppendLine ( "</instrucciones_sistema>" ) ;
2025-12-09 12:18:52 -03:00
// Incluir historial de conversación para referencias contextuales
if ( ! string . IsNullOrWhiteSpace ( request . ConversationSummary ) )
{
promptBuilder . AppendLine ( "<historial_conversacion>" ) ;
promptBuilder . AppendLine ( SanitizeInput ( request . ConversationSummary ) ) ;
promptBuilder . AppendLine ( "</historial_conversacion>" ) ;
}
2025-11-27 15:11:54 -03:00
promptBuilder . AppendLine ( "<contexto>" ) ;
2025-11-18 14:34:26 -03:00
promptBuilder . AppendLine ( context ) ;
2025-11-27 15:11:54 -03:00
promptBuilder . AppendLine ( "</contexto>" ) ;
promptBuilder . AppendLine ( "<pregunta_usuario>" ) ;
promptBuilder . AppendLine ( safeUserMessage ) ;
promptBuilder . AppendLine ( "</pregunta_usuario>" ) ;
2025-11-18 14:34:26 -03:00
2025-11-27 15:11:54 -03:00
promptBuilder . AppendLine ( "RESPUESTA:" ) ;
2025-11-20 15:24:47 -03:00
var requestData = new GeminiRequest
{
2025-11-27 15:11:54 -03:00
Contents = new [ ] { new Content { Parts = new [ ] { new Part { Text = promptBuilder . ToString ( ) } } } } ,
GenerationConfig = new GenerationConfig { MaxOutputTokens = OutTokens } ,
SafetySettings = GetDefaultSafetySettings ( )
2025-11-20 15:24:47 -03:00
} ;
2025-11-27 15:11:54 -03:00
var httpRequestMessage = new HttpRequestMessage ( HttpMethod . Post , _apiUrl )
{
Content = JsonContent . Create ( requestData )
} ;
2025-11-18 14:34:26 -03:00
2025-12-09 10:28:18 -03:00
var response = await httpClient . SendAsync ( httpRequestMessage , HttpCompletionOption . ResponseHeadersRead , cancellationToken ) ;
2025-11-18 14:34:26 -03:00
if ( ! response . IsSuccessStatusCode )
{
2025-11-27 15:11:54 -03:00
_logger . LogWarning ( "Error API Gemini: {StatusCode}" , response . StatusCode ) ;
throw new HttpRequestException ( "Error en proveedor de IA." ) ;
2025-11-18 14:34:26 -03:00
}
responseStream = await response . Content . ReadAsStreamAsync ( cancellationToken ) ;
}
catch ( Exception ex )
{
2025-11-27 15:11:54 -03:00
_logger . LogError ( ex , "Error en stream." ) ;
errorMessage = "Lo siento, servicio temporalmente no disponible." ;
2025-11-18 14:34:26 -03:00
}
if ( ! string . IsNullOrEmpty ( errorMessage ) )
{
yield return errorMessage ;
yield break ;
}
if ( responseStream ! = null )
{
await using ( responseStream )
using ( var reader = new StreamReader ( responseStream ) )
{
string? line ;
while ( ( line = await reader . ReadLineAsync ( cancellationToken ) ) ! = null )
{
if ( string . IsNullOrWhiteSpace ( line ) | | ! line . StartsWith ( "data: " ) ) continue ;
var jsonString = line . Substring ( 6 ) ;
2025-11-27 15:11:54 -03:00
string? chunk = null ;
2025-11-18 14:34:26 -03:00
try
{
var geminiResponse = JsonSerializer . Deserialize < GeminiStreamingResponse > ( jsonString ) ;
chunk = geminiResponse ? . Candidates ? . FirstOrDefault ( ) ? . Content ? . Parts ? . FirstOrDefault ( ) ? . Text ;
}
2025-11-27 15:11:54 -03:00
catch ( JsonException ) { continue ; }
2025-11-18 14:34:26 -03:00
if ( chunk ! = null )
{
fullBotReply . Append ( chunk ) ;
yield return chunk ;
}
}
}
}
if ( fullBotReply . Length > 0 )
{
2025-12-09 10:28:18 -03:00
// [OPTIMIZACIÓN] Logging "fire-and-forget" (BD)
_ = Task . Run ( async ( ) = >
{
using ( var scope = _serviceProvider . CreateScope ( ) )
{
var db = scope . ServiceProvider . GetRequiredService < AppContexto > ( ) ;
try
{
db . ConversacionLogs . Add ( new ConversacionLog
{
UsuarioMensaje = safeUserMessage ,
BotRespuesta = fullBotReply . ToString ( ) ,
Fecha = DateTime . UtcNow
} ) ;
await db . SaveChangesAsync ( ) ;
}
catch ( Exception ex )
{
var logger = scope . ServiceProvider . GetRequiredService < ILogger < ChatService > > ( ) ;
logger . LogError ( ex , "Error in background logging" ) ;
}
}
} ) ;
// [IMPORTANTE] El resumen del contexto debe permanecer en primer plano para informar al cliente
2025-11-27 15:11:54 -03:00
var newSummary = await UpdateConversationSummaryAsync ( request . ConversationSummary , safeUserMessage , fullBotReply . ToString ( ) ) ;
2025-11-21 10:21:34 -03:00
yield return $"SUMMARY::{newSummary}" ;
2025-11-18 14:34:26 -03:00
}
}
2025-12-09 10:28:18 -03:00
// --- PRIVATE METHODS ---
private string SanitizeInput ( string? input )
{
if ( string . IsNullOrWhiteSpace ( input ) ) return string . Empty ;
return input . Replace ( "<" , "<" ) . Replace ( ">" , ">" ) ;
}
private async Task < string > GetActiveSystemPromptsAsync ( )
{
return await _cache . GetOrCreateAsync ( SystemPromptsCacheKey , async entry = >
{
entry . AbsoluteExpirationRelativeToNow = TimeSpan . FromMinutes ( 10 ) ;
var prompts = await _dbContext . SystemPrompts
. Where ( p = > p . IsActive )
. OrderByDescending ( p = > p . CreatedAt )
. Select ( p = > p . Content )
. ToListAsync ( ) ;
if ( ! prompts . Any ( ) ) return "Tu rol es ser el asistente virtual de 'El Día'. Responde de forma natural, útil y concisa. Usa un tono amigable pero profesional (estilo periodístico moderno). IMPORTANTE: NO uses saludos formales tipo carta (como 'Estimado/a'), NO saludes si el usuario no saludó primero o si es una continuación de la charla. NO repitas saludos." ;
return string . Join ( "\n\n" , prompts ) ;
} ) ? ? "Responde de forma natural y concisa." ;
}
private List < SafetySetting > GetDefaultSafetySettings ( )
{
return new List < SafetySetting >
{
new SafetySetting { Category = "HARM_CATEGORY_HARASSMENT" , Threshold = "BLOCK_MEDIUM_AND_ABOVE" } ,
new SafetySetting { Category = "HARM_CATEGORY_HATE_SPEECH" , Threshold = "BLOCK_MEDIUM_AND_ABOVE" } ,
new SafetySetting { Category = "HARM_CATEGORY_SEXUALLY_EXPLICIT" , Threshold = "BLOCK_MEDIUM_AND_ABOVE" } ,
new SafetySetting { Category = "HARM_CATEGORY_DANGEROUS_CONTENT" , Threshold = "BLOCK_MEDIUM_AND_ABOVE" }
} ;
}
private async Task < string > UpdateConversationSummaryAsync ( string? oldSummary , string userMessage , string botResponse )
{
string safeOldSummary = SanitizeInput ( oldSummary ? ? "Esta es una nueva conversación." ) ;
string safeUserMsg = SanitizeInput ( userMessage ) ;
string safeBotMsg = SanitizeInput ( new string ( botResponse . Take ( 300 ) . ToArray ( ) ) ) ;
var promptBuilder = new StringBuilder ( ) ;
promptBuilder . AppendLine ( "Tu tarea es actualizar un resumen de conversación. Basado en el <resumen_anterior> y el <ultimo_intercambio>, crea un nuevo resumen conciso." ) ;
promptBuilder . AppendLine ( $"<resumen_anterior>{safeOldSummary}</resumen_anterior>" ) ;
promptBuilder . AppendLine ( "<ultimo_intercambio>" ) ;
promptBuilder . AppendLine ( $"Usuario: {safeUserMsg}" ) ;
promptBuilder . AppendLine ( $"Bot: {safeBotMsg}..." ) ;
promptBuilder . AppendLine ( "</ultimo_intercambio>" ) ;
promptBuilder . AppendLine ( "\nResponde SOLO con el nuevo resumen." ) ;
var requestData = new GeminiRequest
{
Contents = new [ ] { new Content { Parts = new [ ] { new Part { Text = promptBuilder . ToString ( ) } } } } ,
SafetySettings = GetDefaultSafetySettings ( )
} ;
var nonStreamingApiUrl = _apiUrl . Replace ( ":streamGenerateContent?alt=sse&" , ":generateContent?" ) ;
var httpClient = _httpClientFactory . CreateClient ( ) ;
try
{
var response = await httpClient . PostAsJsonAsync ( nonStreamingApiUrl , requestData ) ;
if ( ! response . IsSuccessStatusCode ) return safeOldSummary ;
var geminiResponse = await response . Content . ReadFromJsonAsync < GeminiResponse > ( ) ;
var newSummary = geminiResponse ? . Candidates ? . FirstOrDefault ( ) ? . Content ? . Parts ? . FirstOrDefault ( ) ? . Text ? . Trim ( ) ;
return newSummary ? ? safeOldSummary ;
}
catch ( Exception ex )
{
_logger . LogError ( ex , "Excepción en UpdateConversationSummaryAsync." ) ;
return safeOldSummary ;
}
}
private async Task < IntentType > GetIntentAsync ( string userMessage , string? activeArticleContent , string? conversationSummary )
{
string safeUserMsg = SanitizeInput ( userMessage ) ;
string safeSummary = SanitizeInput ( conversationSummary ) ;
string safeArticle = SanitizeInput ( new string ( ( activeArticleContent ? ? "" ) . Take ( 1000 ) . ToArray ( ) ) ) ;
var promptBuilder = new StringBuilder ( ) ;
promptBuilder . AppendLine ( "Actúa como un router de intenciones. Analiza la <pregunta_usuario> y decide qué fuente de información usar." ) ;
promptBuilder . AppendLine ( "Categorías posibles: [ARTICULO_ACTUAL], [BASE_DE_CONOCIMIENTO], [NOTICIAS_PORTADA]." ) ;
if ( ! string . IsNullOrWhiteSpace ( safeSummary ) )
promptBuilder . AppendLine ( $"<resumen_conversacion>{safeSummary}</resumen_conversacion>" ) ;
if ( ! string . IsNullOrEmpty ( safeArticle ) )
promptBuilder . AppendLine ( $"<contexto_articulo>{safeArticle}...</contexto_articulo>" ) ;
promptBuilder . AppendLine ( "\n--- CRITERIOS DE DECISIÓN ESTRICTOS ---" ) ;
promptBuilder . AppendLine ( "1. [ARTICULO_ACTUAL]: SOLO si la pregunta es sobre el MISMO TEMA del <contexto_articulo>." ) ;
promptBuilder . AppendLine ( " Ejemplos: '¿qué más dice?', 'cuándo pasó?', 'quién es?', 'dame detalles'." ) ;
promptBuilder . AppendLine ( " IMPORTANTE: Si la pregunta menciona un tema DIFERENTE al artículo, NO uses esta categoría." ) ;
promptBuilder . AppendLine ( "" ) ;
promptBuilder . AppendLine ( "2. [NOTICIAS_PORTADA]: Si la pregunta es sobre:" ) ;
promptBuilder . AppendLine ( " - Noticias generales ('¿qué hay?', '¿algo más?', 'novedades')" ) ;
promptBuilder . AppendLine ( " - Un tema DIFERENTE al del artículo actual" ) ;
promptBuilder . AppendLine ( " - Cualquier tema que NO esté en el <contexto_articulo>" ) ;
promptBuilder . AppendLine ( "" ) ;
promptBuilder . AppendLine ( "3. [BASE_DE_CONOCIMIENTO]: Solo para preguntas sobre el diario 'El Día' como empresa/organización." ) ;
promptBuilder . AppendLine ( $"\n<pregunta_usuario>{safeUserMsg}</pregunta_usuario>" ) ;
promptBuilder . AppendLine ( "\nResponde ÚNICAMENTE con el nombre de la categoría entre corchetes. Si hay duda, usa [NOTICIAS_PORTADA]." ) ;
var requestData = new GeminiRequest
{
Contents = new [ ] { new Content { Parts = new [ ] { new Part { Text = promptBuilder . ToString ( ) } } } } ,
SafetySettings = GetDefaultSafetySettings ( )
} ;
var nonStreamingApiUrl = _apiUrl . Replace ( ":streamGenerateContent?alt=sse&" , ":generateContent?" ) ;
var httpClient = _httpClientFactory . CreateClient ( ) ;
try
{
var response = await httpClient . PostAsJsonAsync ( nonStreamingApiUrl , requestData ) ;
if ( ! response . IsSuccessStatusCode ) return IntentType . Homepage ;
var geminiResponse = await response . Content . ReadFromJsonAsync < GeminiResponse > ( ) ;
var responseText = geminiResponse ? . Candidates ? . FirstOrDefault ( ) ? . Content ? . Parts ? . FirstOrDefault ( ) ? . Text ? . Trim ( ) ? ? "" ;
if ( responseText . Contains ( "ARTICULO_ACTUAL" ) ) return IntentType . Article ;
if ( responseText . Contains ( "BASE_DE_CONOCIMIENTO" ) ) return IntentType . KnowledgeBase ;
return IntentType . Homepage ;
}
catch ( Exception ex )
{
_logger . LogError ( ex , "Excepción en GetIntentAsync." ) ;
return IntentType . Homepage ;
}
}
2025-11-18 14:34:26 -03:00
private async Task SaveConversationLogAsync ( string userMessage , string botReply )
{
try
{
2025-12-09 10:28:18 -03:00
// usamos dbContext injectado (Scoped) directamente
_dbContext . ConversacionLogs . Add ( new ConversacionLog
2025-11-18 14:34:26 -03:00
{
2025-12-09 10:28:18 -03:00
UsuarioMensaje = userMessage ,
BotRespuesta = botReply ,
Fecha = DateTime . UtcNow
} ) ;
await _dbContext . SaveChangesAsync ( ) ;
2025-11-18 14:34:26 -03:00
}
2025-11-27 15:11:54 -03:00
catch ( Exception ex ) { _logger . LogError ( ex , "Error guardando log." ) ; }
2025-11-18 14:34:26 -03:00
}
2025-11-20 12:39:23 -03:00
2025-11-21 12:51:00 -03:00
private async Task < List < NewsArticleLink > > GetWebsiteNewsAsync ( string url , int cantidad )
2025-11-18 14:34:26 -03:00
{
2025-11-21 12:51:00 -03:00
var newsList = new List < NewsArticleLink > ( ) ;
2025-11-18 14:34:26 -03:00
try
{
2025-11-27 15:11:54 -03:00
if ( ! await UrlSecurity . IsSafeUrlAsync ( url ) ) return newsList ;
2025-11-18 14:34:26 -03:00
var web = new HtmlWeb ( ) ;
var doc = await web . LoadFromWebAsync ( url ) ;
2025-11-25 14:11:30 -03:00
var articleNodes = doc . DocumentNode . SelectNodes ( "//article[contains(@class, 'item')] | //article[contains(@class, 'nota_modulo')]" ) ;
2025-11-20 10:52:46 -03:00
2025-11-21 12:51:00 -03:00
if ( articleNodes = = null ) return newsList ;
2025-11-18 14:34:26 -03:00
var urlsProcesadas = new HashSet < string > ( ) ;
2025-11-27 15:11:54 -03:00
2025-11-20 10:52:46 -03:00
foreach ( var articleNode in articleNodes )
2025-11-18 14:34:26 -03:00
{
2025-11-21 12:51:00 -03:00
if ( newsList . Count > = cantidad ) break ;
2025-11-20 10:52:46 -03:00
var linkNode = articleNode . SelectSingleNode ( ".//a[@href]" ) ;
var titleNode = articleNode . SelectSingleNode ( ".//h2" ) ;
2025-11-18 14:34:26 -03:00
2025-11-20 10:52:46 -03:00
if ( linkNode ! = null & & titleNode ! = null )
2025-11-18 14:34:26 -03:00
{
2025-11-20 10:52:46 -03:00
var relativeUrl = linkNode . GetAttributeValue ( "href" , string . Empty ) ;
2025-11-21 12:51:00 -03:00
if ( ! string . IsNullOrEmpty ( relativeUrl ) & & relativeUrl ! = "#" & & ! urlsProcesadas . Contains ( relativeUrl ) )
2025-11-20 10:52:46 -03:00
{
2025-11-21 12:51:00 -03:00
var fullUrl = relativeUrl . StartsWith ( "/" ) ? new Uri ( new Uri ( url ) , relativeUrl ) . ToString ( ) : relativeUrl ;
2025-11-27 15:11:54 -03:00
string cleanTitle = WebUtility . HtmlDecode ( titleNode . InnerText ) . Trim ( ) ;
foreach ( var p in PrefijosAQuitar )
if ( cleanTitle . StartsWith ( p , StringComparison . OrdinalIgnoreCase ) )
cleanTitle = cleanTitle . Substring ( p . Length ) . Trim ( ) ;
newsList . Add ( new NewsArticleLink { Title = cleanTitle , Url = fullUrl } ) ;
2025-11-21 12:51:00 -03:00
urlsProcesadas . Add ( relativeUrl ) ;
2025-11-20 10:52:46 -03:00
}
2025-11-21 12:51:00 -03:00
}
}
}
2025-11-27 15:11:54 -03:00
catch ( Exception ex ) { _logger . LogError ( ex , "Error scraping news." ) ; }
2025-11-21 12:51:00 -03:00
return newsList ;
}
2025-11-20 10:52:46 -03:00
2025-12-09 10:28:18 -03:00
private NewsArticleLink ? FindBestMatchingArticleLocal ( string userMessage , List < NewsArticleLink > articles )
{
if ( ! articles . Any ( ) | | string . IsNullOrWhiteSpace ( userMessage ) ) return null ;
var userTerms = Tokenize ( userMessage ) ;
if ( ! userTerms . Any ( ) ) return null ;
NewsArticleLink ? bestMatch = null ;
double maxScore = 0 ;
foreach ( var article in articles )
{
var titleTerms = Tokenize ( article . Title ) ;
double score = CalculateJaccardSimilarity ( userTerms , titleTerms ) ;
// Boost: Palabras clave compartidas (longitud > 3)
if ( userTerms . Intersect ( titleTerms ) . Any ( t = > t . Length > 3 ) )
{
score + = 0.2 ;
}
// Aumentar puntaje si los términos son consecutivos en el título (coincidencia de frase)
if ( article . Title . IndexOf ( userMessage , StringComparison . OrdinalIgnoreCase ) > = 0 )
{
score + = 0.5 ;
}
if ( score > maxScore )
{
maxScore = score ;
bestMatch = article ;
}
}
// Umbral mínimo de relevancia: Reducido a 0.05 para capturar coincidencias de una sola palabra en títulos largos
return maxScore > = 0.05 ? bestMatch : null ;
}
private async Task < NewsArticleLink ? > FindBestMatchingArticleAIAsync ( string userMessage , List < NewsArticleLink > articles , string? conversationSummary )
2025-11-21 12:51:00 -03:00
{
if ( ! articles . Any ( ) ) return null ;
2025-11-27 15:11:54 -03:00
string safeUserMsg = SanitizeInput ( userMessage ) ;
2025-12-09 10:28:18 -03:00
string safeSummary = SanitizeInput ( conversationSummary ) ;
2025-11-20 10:52:46 -03:00
2025-11-21 12:51:00 -03:00
var promptBuilder = new StringBuilder ( ) ;
2025-12-09 10:28:18 -03:00
promptBuilder . AppendLine ( "Encuentra el artículo más relevante para la <pregunta_usuario> en la <lista_articulos>, usando el <resumen_contexto> para entender referencias (ej: 'esa nota')." ) ;
if ( ! string . IsNullOrWhiteSpace ( safeSummary ) )
{
promptBuilder . AppendLine ( "<resumen_contexto>" ) ;
promptBuilder . AppendLine ( safeSummary ) ;
promptBuilder . AppendLine ( "</resumen_contexto>" ) ;
}
2025-11-27 15:11:54 -03:00
promptBuilder . AppendLine ( "<lista_articulos>" ) ;
foreach ( var article in articles ) promptBuilder . AppendLine ( $"- Título: \" { article . Title } \ ", URL: {article.Url}" ) ;
promptBuilder . AppendLine ( "</lista_articulos>" ) ;
promptBuilder . AppendLine ( $"<pregunta_usuario>{safeUserMsg}</pregunta_usuario>" ) ;
2025-12-09 10:28:18 -03:00
promptBuilder . AppendLine ( "Responde SOLO con la URL. Si ninguna es relevante, responde 'N/A'." ) ;
2025-11-27 15:11:54 -03:00
var requestData = new GeminiRequest
2025-11-21 12:51:00 -03:00
{
2025-11-27 15:11:54 -03:00
Contents = new [ ] { new Content { Parts = new [ ] { new Part { Text = promptBuilder . ToString ( ) } } } } ,
SafetySettings = GetDefaultSafetySettings ( )
} ;
2025-11-21 12:51:00 -03:00
var nonStreamingApiUrl = _apiUrl . Replace ( ":streamGenerateContent?alt=sse&" , ":generateContent?" ) ;
2025-12-09 10:28:18 -03:00
var httpClient = _httpClientFactory . CreateClient ( ) ;
2025-11-20 10:52:46 -03:00
2025-11-21 12:51:00 -03:00
try
{
2025-12-09 10:28:18 -03:00
var response = await httpClient . PostAsJsonAsync ( nonStreamingApiUrl , requestData ) ;
2025-11-21 12:51:00 -03:00
if ( ! response . IsSuccessStatusCode ) return null ;
var geminiResponse = await response . Content . ReadFromJsonAsync < GeminiResponse > ( ) ;
var responseUrl = geminiResponse ? . Candidates ? . FirstOrDefault ( ) ? . Content ? . Parts ? . FirstOrDefault ( ) ? . Text ? . Trim ( ) ;
if ( string . IsNullOrEmpty ( responseUrl ) | | responseUrl = = "N/A" ) return null ;
return articles . FirstOrDefault ( a = > a . Url = = responseUrl ) ;
2025-11-18 14:34:26 -03:00
}
2025-11-27 15:11:54 -03:00
catch { return null ; }
2025-11-18 14:34:26 -03:00
}
2025-11-20 10:52:46 -03:00
2025-12-09 10:28:18 -03:00
private HashSet < string > Tokenize ( string text )
{
var normalizedText = RemoveDiacritics ( text . ToLower ( ) ) ;
var punctuation = normalizedText . Where ( char . IsPunctuation ) . Distinct ( ) . ToArray ( ) ;
return normalizedText
. Split ( )
. Select ( x = > x . Trim ( punctuation ) )
. Where ( x = > x . Length > 2 ) // ignorar palabras muy cortas
. ToHashSet ( ) ;
}
private string RemoveDiacritics ( string text )
{
var normalizedString = text . Normalize ( NormalizationForm . FormD ) ;
var stringBuilder = new StringBuilder ( capacity : normalizedString . Length ) ;
for ( int i = 0 ; i < normalizedString . Length ; i + + )
{
char c = normalizedString [ i ] ;
var unicodeCategory = CharUnicodeInfo . GetUnicodeCategory ( c ) ;
if ( unicodeCategory ! = UnicodeCategory . NonSpacingMark )
{
stringBuilder . Append ( c ) ;
}
}
return stringBuilder . ToString ( ) . Normalize ( NormalizationForm . FormC ) ;
}
private double CalculateJaccardSimilarity ( HashSet < string > set1 , HashSet < string > set2 )
{
if ( ! set1 . Any ( ) | | ! set2 . Any ( ) ) return 0.0 ;
var intersection = new HashSet < string > ( set1 ) ;
intersection . IntersectWith ( set2 ) ;
var union = new HashSet < string > ( set1 ) ;
union . UnionWith ( set2 ) ;
return ( double ) intersection . Count / union . Count ;
}
2025-11-21 12:10:45 -03:00
private async Task < Dictionary < string , ContextoItem > > GetKnowledgeItemsAsync ( )
2025-11-18 14:34:26 -03:00
{
2025-11-25 11:46:52 -03:00
return await _cache . GetOrCreateAsync ( CacheKeys . KnowledgeItems , async entry = >
2025-11-18 14:34:26 -03:00
{
entry . AbsoluteExpirationRelativeToNow = TimeSpan . FromMinutes ( 5 ) ;
using ( var scope = _serviceProvider . CreateScope ( ) )
{
var dbContext = scope . ServiceProvider . GetRequiredService < AppContexto > ( ) ;
2025-11-21 12:10:45 -03:00
return await dbContext . ContextoItems . AsNoTracking ( ) . ToDictionaryAsync ( item = > item . Clave , item = > item ) ;
2025-11-18 14:34:26 -03:00
}
2025-11-21 11:20:44 -03:00
} ) ? ? new Dictionary < string , ContextoItem > ( ) ;
2025-11-20 10:52:46 -03:00
}
2025-11-21 12:10:45 -03:00
private async Task < List < FuenteContexto > > GetFuentesDeContextoAsync ( )
{
2025-11-25 11:46:52 -03:00
return await _cache . GetOrCreateAsync ( CacheKeys . FuentesDeContexto , async entry = >
2025-11-21 12:10:45 -03:00
{
entry . AbsoluteExpirationRelativeToNow = TimeSpan . FromMinutes ( 5 ) ;
using ( var scope = _serviceProvider . CreateScope ( ) )
{
var dbContext = scope . ServiceProvider . GetRequiredService < AppContexto > ( ) ;
return await dbContext . FuentesDeContexto . Where ( f = > f . Activo ) . AsNoTracking ( ) . ToListAsync ( ) ;
}
} ) ? ? new List < FuenteContexto > ( ) ;
}
2025-11-20 10:52:46 -03:00
private async Task < string? > GetArticleContentAsync ( string url )
{
2025-11-27 15:11:54 -03:00
if ( ! await UrlSecurity . IsSafeUrlAsync ( url ) ) return null ;
2025-11-20 10:52:46 -03:00
try
{
var web = new HtmlWeb ( ) ;
var doc = await web . LoadFromWebAsync ( url ) ;
var paragraphs = doc . DocumentNode . SelectNodes ( "//div[contains(@class, 'cuerpo_nota')]//p" ) ;
2025-11-27 15:11:54 -03:00
if ( paragraphs = = null | | ! paragraphs . Any ( ) ) return null ;
2025-11-20 10:52:46 -03:00
2025-11-27 15:11:54 -03:00
var sb = new StringBuilder ( ) ;
2025-11-20 10:52:46 -03:00
foreach ( var p in paragraphs )
{
var cleanText = WebUtility . HtmlDecode ( p . InnerText ) . Trim ( ) ;
2025-11-27 15:11:54 -03:00
if ( ! string . IsNullOrWhiteSpace ( cleanText ) ) sb . AppendLine ( cleanText ) ;
2025-11-20 10:52:46 -03:00
}
2025-11-27 15:11:54 -03:00
return sb . ToString ( ) ;
2025-11-20 10:52:46 -03:00
}
2025-11-27 15:11:54 -03:00
catch { return null ; }
2025-11-18 14:34:26 -03:00
}
2025-11-21 11:20:44 -03:00
2025-11-25 11:46:52 -03:00
private async Task < string > ScrapeUrlContentAsync ( FuenteContexto fuente )
2025-11-21 11:20:44 -03:00
{
2025-11-27 15:11:54 -03:00
if ( ! await UrlSecurity . IsSafeUrlAsync ( fuente . Url ) ) return string . Empty ;
return await _cache . GetOrCreateAsync ( $"scrape_{fuente.Url}_{fuente.SelectorContenido}" , async entry = >
2025-11-21 11:20:44 -03:00
{
2025-11-21 12:10:45 -03:00
entry . AbsoluteExpirationRelativeToNow = TimeSpan . FromMinutes ( 30 ) ;
2025-11-27 15:11:54 -03:00
try
2025-11-21 12:10:45 -03:00
{
2025-11-27 15:11:54 -03:00
var web = new HtmlWeb ( ) ;
var doc = await web . LoadFromWebAsync ( fuente . Url ) ;
string selector = ! string . IsNullOrWhiteSpace ( fuente . SelectorContenido ) ? fuente . SelectorContenido : "//main | //body" ;
var node = doc . DocumentNode . SelectSingleNode ( selector ) ;
if ( node = = null ) return string . Empty ;
return WebUtility . HtmlDecode ( node . InnerText ) ? ? string . Empty ;
2025-11-21 12:10:45 -03:00
}
2025-11-27 15:11:54 -03:00
catch { return string . Empty ; }
} ) ? ? string . Empty ;
2025-11-21 11:20:44 -03:00
}
2025-11-18 14:34:26 -03:00
}
2025-12-09 10:28:18 -03:00
}