using System.Runtime.CompilerServices; using System.Text; using System.Text.Json; using ChatbotApi.Data.Models; using HtmlAgilityPack; using Microsoft.Extensions.Caching.Memory; using Microsoft.EntityFrameworkCore; using System.Net; using System.Globalization; namespace ChatbotApi.Services { public interface IChatService { IAsyncEnumerable StreamMessageAsync(ChatRequest request, CancellationToken cancellationToken); } public class ChatService : IChatService { private readonly IHttpClientFactory _httpClientFactory; private readonly IMemoryCache _cache; private readonly IServiceProvider _serviceProvider; private readonly ILogger _logger; private readonly string _apiUrl; private readonly AppContexto _dbContext; private static readonly string _siteUrl = "https://www.eldia.com/"; private static readonly string[] PrefijosAQuitar = { "VIDEO.- ", "VIDEO. ", "FOTOS.- ", "FOTOS. " }; const int OutTokens = 8192; private const string SystemPromptsCacheKey = "ActiveSystemPrompts"; public ChatService( IConfiguration configuration, IMemoryCache memoryCache, IServiceProvider serviceProvider, ILogger logger, IHttpClientFactory httpClientFactory, AppContexto dbContext) { _logger = logger; _cache = memoryCache; _serviceProvider = serviceProvider; _httpClientFactory = httpClientFactory; _dbContext = dbContext; var apiKey = configuration["Gemini:GeminiApiKey"] ?? throw new InvalidOperationException("La API Key de Gemini no está configurada en .env"); var baseUrl = configuration["Gemini:GeminiApiUrl"]; _apiUrl = $"{baseUrl}{apiKey}"; } // Response model for structured JSON from Gemini private class GeminiStructuredResponse { public string intent { get; set; } = "NOTICIAS_PORTADA"; public string reply { get; set; } = ""; public string summary { get; set; } = ""; } public async IAsyncEnumerable StreamMessageAsync(ChatRequest request, [EnumeratorCancellation] CancellationToken cancellationToken) { if (string.IsNullOrWhiteSpace(request?.Message)) { yield return "Error: No he recibido ningún mensaje."; yield break; } string safeUserMessage = SanitizeInput(request.Message); string context = ""; string? articleContext = null; string? errorMessage = null; // Pre-carga de prompts del sistema en paralelo var systemPromptsTask = GetActiveSystemPromptsAsync(); Task? articleTask = null; try { // Load article if URL provided if (!string.IsNullOrEmpty(request.ContextUrl) && await UrlSecurity.IsSafeUrlAsync(request.ContextUrl)) { articleTask = GetArticleContentAsync(request.ContextUrl); } if (articleTask != null) articleContext = await articleTask; // Build context based on heuristics if (!string.IsNullOrEmpty(articleContext)) { context = articleContext; } else { var articles = await GetWebsiteNewsAsync(_siteUrl, 50); if (request.ShownArticles != null && request.ShownArticles.Any()) { articles = articles.Where(a => !request.ShownArticles.Contains(a.Url)).ToList(); } // Búsqueda Híbrida: local + AI fallback var bestMatch = FindBestMatchingArticleLocal(safeUserMessage, articles); if (bestMatch == null) { bestMatch = await FindBestMatchingArticleAIAsync(safeUserMessage, articles, request.ConversationSummary); } if (bestMatch != null && await UrlSecurity.IsSafeUrlAsync(bestMatch.Url)) { string rawContent = await GetArticleContentAsync(bestMatch.Url) ?? ""; context = $"ARTÍCULO ENCONTRADO: {bestMatch.Title}\nURL: {bestMatch.Url}\n\nCONTENIDO:\n{SanitizeInput(rawContent)}"; } else { var sb = new StringBuilder(); sb.AppendLine("NOTICIAS DISPONIBLES:"); foreach (var article in articles.Take(15)) { sb.AppendLine($"- {article.Title} ({article.Url})"); } context = sb.ToString(); } } // Add knowledge base if available var knowledgeItems = await GetKnowledgeItemsAsync(); if (knowledgeItems.Any()) { var kbBuilder = new StringBuilder("\n\nBASE DE CONOCIMIENTO:"); foreach (var item in knowledgeItems.Values) { kbBuilder.AppendLine($"\n- {item.Descripcion}: {item.Valor}"); } context += kbBuilder.ToString(); } } catch (Exception ex) { _logger.LogError(ex, "Error construyendo contexto."); errorMessage = "Lo siento, hubo un problema técnico procesando tu solicitud."; } if (!string.IsNullOrEmpty(errorMessage)) { yield return errorMessage; yield break; } // ========== UNIFIED API CALL ========== var httpClient = _httpClientFactory.CreateClient(); httpClient.Timeout = TimeSpan.FromSeconds(45); string? jsonText = null; try { var systemInstructions = !string.IsNullOrWhiteSpace(request.SystemPromptOverride) ? request.SystemPromptOverride : await systemPromptsTask; // Build unified meta-prompt var promptBuilder = new StringBuilder(); promptBuilder.AppendLine(""); promptBuilder.AppendLine("Eres DiaBot, asistente virtual de El Día (La Plata, Argentina)."); promptBuilder.AppendLine(); promptBuilder.AppendLine("FORMATO DE RESPUESTA:"); promptBuilder.AppendLine("Debes responder en formato JSON con esta estructura EXACTA:"); promptBuilder.AppendLine("{\"intent\": \"...\", \"reply\": \"...\", \"summary\": \"...\"}"); promptBuilder.AppendLine(); promptBuilder.AppendLine("INSTRUCCIONES GENERALES:"); promptBuilder.AppendLine(systemInstructions); promptBuilder.AppendLine("- NO uses formatos de email/carta ('Estimado/a', 'Atentamente')"); promptBuilder.AppendLine("- NO saludes de nuevo si ya saludaste o si la pregunta es directa"); promptBuilder.AppendLine("- Sé conciso, directo y natural"); promptBuilder.AppendLine(); promptBuilder.AppendLine("--- REGLAS PARA CADA CAMPO JSON ---"); promptBuilder.AppendLine(); promptBuilder.AppendLine("1. 'intent': Clasifica la intención usando SOLO uno de estos valores:"); promptBuilder.AppendLine(" - \"ARTICULO_ACTUAL\": Si la pregunta es sobre el tema del artículo en "); promptBuilder.AppendLine(" - \"BASE_DE_CONOCIMIENTO\": Para preguntas sobre 'El Día' como empresa/organización"); promptBuilder.AppendLine(" - \"NOTICIAS_PORTADA\": Para todo lo demás (este es el default si dudas)"); promptBuilder.AppendLine(); promptBuilder.AppendLine("2. 'reply': Tu respuesta en texto Markdown para el usuario."); promptBuilder.AppendLine(" - Si es un artículo específico: Resume brevemente e INCLUYE el enlace [Título](URL)"); promptBuilder.AppendLine(" - Si son noticias generales: Selecciona las 3 más relevantes, breve frase c/u + enlace"); promptBuilder.AppendLine(" - Si la pregunta refiere a algo del , úsalo (ej: 'dónde leerla' → dale el link)"); promptBuilder.AppendLine(); promptBuilder.AppendLine("3. 'summary': Actualiza el historial de conversación."); promptBuilder.AppendLine(" - Resume el intercambio actual (pregunta + respuesta) en 1-2 líneas"); promptBuilder.AppendLine(" - Integra con el previo si existe"); promptBuilder.AppendLine(" - Máximo 200 palabras para el resumen completo"); promptBuilder.AppendLine(""); promptBuilder.AppendLine(); // Conversation history if (!string.IsNullOrWhiteSpace(request.ConversationSummary)) { promptBuilder.AppendLine(""); promptBuilder.AppendLine(SanitizeInput(request.ConversationSummary)); promptBuilder.AppendLine(""); promptBuilder.AppendLine(); } // Context promptBuilder.AppendLine(""); promptBuilder.AppendLine(context); promptBuilder.AppendLine(""); promptBuilder.AppendLine(); // User question promptBuilder.AppendLine(""); promptBuilder.AppendLine(safeUserMessage); promptBuilder.AppendLine(""); promptBuilder.AppendLine(); promptBuilder.AppendLine("RESPUESTA (SOLO el JSON, sin comentarios adicionales):"); var requestData = new GeminiRequest { Contents = new[] { new Content { Parts = new[] { new Part { Text = promptBuilder.ToString() } } } }, GenerationConfig = new GenerationConfig { MaxOutputTokens = OutTokens }, SafetySettings = GetDefaultSafetySettings() }; // Use non-streaming endpoint var nonStreamingApiUrl = _apiUrl.Replace(":streamGenerateContent?alt=sse&", ":generateContent?"); var response = await httpClient.PostAsJsonAsync(nonStreamingApiUrl, requestData, cancellationToken); if (!response.IsSuccessStatusCode) { _logger.LogWarning("Error API Gemini: {StatusCode}", response.StatusCode); throw new HttpRequestException($"Error en proveedor de IA: {response.StatusCode}"); } var geminiResponse = await response.Content.ReadFromJsonAsync(cancellationToken: cancellationToken); jsonText = geminiResponse?.Candidates?.FirstOrDefault()?.Content?.Parts?.FirstOrDefault()?.Text?.Trim(); if (string.IsNullOrEmpty(jsonText)) { _logger.LogWarning("Respuesta vacía de Gemini"); errorMessage = "Lo siento, hubo un problema al procesar la respuesta."; } } catch (Exception ex) { _logger.LogError(ex, "Error en llamada unificada a Gemini."); errorMessage = "Lo siento, el servicio está temporalmente no disponible. Por favor, intenta de nuevo."; } if (!string.IsNullOrEmpty(errorMessage)) { yield return errorMessage; yield break; } // Parse JSON response (outside try-catch to allow yield) GeminiStructuredResponse? apiResponse = null; try { // Extract JSON from markdown code blocks if present var jsonContent = jsonText!; if (jsonText!.Contains("```json")) { var startIndex = jsonText.IndexOf("```json") + 7; var endIndex = jsonText.IndexOf("```", startIndex); if (endIndex > startIndex) { jsonContent = jsonText.Substring(startIndex, endIndex - startIndex).Trim(); } } else if (jsonText.Contains("```")) { var startIndex = jsonText.IndexOf("```") + 3; var endIndex = jsonText.IndexOf("```", startIndex); if (endIndex > startIndex) { jsonContent = jsonText.Substring(startIndex, endIndex - startIndex).Trim(); } } apiResponse = JsonSerializer.Deserialize(jsonContent, new JsonSerializerOptions { PropertyNameCaseInsensitive = true }); } catch (JsonException ex) { _logger.LogError(ex, "Failed to parse Gemini JSON. Raw response: {JsonText}", jsonText); } if (apiResponse == null || string.IsNullOrEmpty(apiResponse.reply)) { yield return "Lo siento, tuve un problema al procesar la respuesta. Por favor, intenta de nuevo."; yield break; } // Send intent metadata yield return $"INTENT::{apiResponse.intent}"; // Simulate streaming by chunking the reply string fullReply = apiResponse.reply; var words = fullReply.Split(' ', StringSplitOptions.RemoveEmptyEntries); var chunkBuilder = new StringBuilder(); foreach (var word in words) { chunkBuilder.Append(word + " "); // Send chunk every ~20 characters for smooth streaming if (chunkBuilder.Length >= 20) { yield return chunkBuilder.ToString(); chunkBuilder.Clear(); await Task.Delay(30, cancellationToken); } } // Send any remaining text if (chunkBuilder.Length > 0) { yield return chunkBuilder.ToString(); } // Log conversation (fire-and-forget) _ = Task.Run(async () => { using (var scope = _serviceProvider.CreateScope()) { var db = scope.ServiceProvider.GetRequiredService(); try { db.ConversacionLogs.Add(new ConversacionLog { UsuarioMensaje = safeUserMessage, BotRespuesta = fullReply, Fecha = DateTime.UtcNow }); await db.SaveChangesAsync(); } catch(Exception ex) { var logger = scope.ServiceProvider.GetRequiredService>(); logger.LogError(ex, "Error in background logging"); } } }); // Send summary yield return $"SUMMARY::{apiResponse.summary}"; } // --- PRIVATE METHODS --- private string SanitizeInput(string? input) { if (string.IsNullOrWhiteSpace(input)) return string.Empty; return input.Replace("<", "<").Replace(">", ">"); } private async Task GetActiveSystemPromptsAsync() { return await _cache.GetOrCreateAsync(SystemPromptsCacheKey, async entry => { entry.AbsoluteExpirationRelativeToNow = TimeSpan.FromMinutes(10); var prompts = await _dbContext.SystemPrompts .Where(p => p.IsActive) .OrderByDescending(p => p.CreatedAt) .Select(p => p.Content) .ToListAsync(); if (!prompts.Any()) return "Tu rol es ser el asistente virtual de 'El Día'. Responde de forma natural, útil y concisa. Usa un tono amigable pero profesional (estilo periodístico moderno). IMPORTANTE: NO uses saludos formales tipo carta (como 'Estimado/a'), NO saludes si el usuario no saludó primero o si es una continuación de la charla. NO repitas saludos."; return string.Join("\n\n", prompts); }) ?? "Responde de forma natural y concisa."; } private List GetDefaultSafetySettings() { return new List { new SafetySetting { Category = "HARM_CATEGORY_HARASSMENT", Threshold = "BLOCK_MEDIUM_AND_ABOVE" }, new SafetySetting { Category = "HARM_CATEGORY_HATE_SPEECH", Threshold = "BLOCK_MEDIUM_AND_ABOVE" }, new SafetySetting { Category = "HARM_CATEGORY_SEXUALLY_EXPLICIT", Threshold = "BLOCK_MEDIUM_AND_ABOVE" }, new SafetySetting { Category = "HARM_CATEGORY_DANGEROUS_CONTENT", Threshold = "BLOCK_MEDIUM_AND_ABOVE" } }; } // NOTE: UpdateConversationSummaryAsync and GetIntentAsync have been REMOVED // Their functionality is now in the unified StreamMessageAsync call private async Task SaveConversationLogAsync(string userMessage, string botReply) { try { _dbContext.ConversacionLogs.Add(new ConversacionLog { UsuarioMensaje = userMessage, BotRespuesta = botReply, Fecha = DateTime.UtcNow }); await _dbContext.SaveChangesAsync(); } catch (Exception ex) { _logger.LogError(ex, "Error guardando log."); } } private async Task> GetWebsiteNewsAsync(string url, int cantidad) { var newsList = new List(); try { if (!await UrlSecurity.IsSafeUrlAsync(url)) return newsList; var web = new HtmlWeb(); var doc = await web.LoadFromWebAsync(url); var articleNodes = doc.DocumentNode.SelectNodes("//article[contains(@class, 'item')] | //article[contains(@class, 'nota_modulo')]"); if (articleNodes == null) return newsList; var urlsProcesadas = new HashSet(); foreach (var articleNode in articleNodes) { if (newsList.Count >= cantidad) break; var linkNode = articleNode.SelectSingleNode(".//a[@href]"); var titleNode = articleNode.SelectSingleNode(".//h2"); if (linkNode != null && titleNode != null) { var relativeUrl = linkNode.GetAttributeValue("href", string.Empty); if (!string.IsNullOrEmpty(relativeUrl) && relativeUrl != "#" && !urlsProcesadas.Contains(relativeUrl)) { var fullUrl = relativeUrl.StartsWith("/") ? new Uri(new Uri(url), relativeUrl).ToString() : relativeUrl; string cleanTitle = WebUtility.HtmlDecode(titleNode.InnerText).Trim(); foreach (var p in PrefijosAQuitar) if (cleanTitle.StartsWith(p, StringComparison.OrdinalIgnoreCase)) cleanTitle = cleanTitle.Substring(p.Length).Trim(); newsList.Add(new NewsArticleLink { Title = cleanTitle, Url = fullUrl }); urlsProcesadas.Add(relativeUrl); } } } } catch (Exception ex) { _logger.LogError(ex, "Error scraping news."); } return newsList; } private NewsArticleLink? FindBestMatchingArticleLocal(string userMessage, List articles) { if (!articles.Any() || string.IsNullOrWhiteSpace(userMessage)) return null; var userTerms = Tokenize(userMessage); if (!userTerms.Any()) return null; NewsArticleLink? bestMatch = null; double maxScore = 0; foreach (var article in articles) { var titleTerms = Tokenize(article.Title); double score = CalculateJaccardSimilarity(userTerms, titleTerms); if (userTerms.Intersect(titleTerms).Any(t => t.Length > 3)) { score += 0.2; } if (article.Title.IndexOf(userMessage, StringComparison.OrdinalIgnoreCase) >= 0) { score += 0.5; } if (score > maxScore) { maxScore = score; bestMatch = article; } } return maxScore >= 0.05 ? bestMatch : null; } private async Task FindBestMatchingArticleAIAsync(string userMessage, List articles, string? conversationSummary) { if (!articles.Any()) return null; string safeUserMsg = SanitizeInput(userMessage); string safeSummary = SanitizeInput(conversationSummary); var promptBuilder = new StringBuilder(); promptBuilder.AppendLine("Encuentra el artículo más relevante para la en la , usando el para entender referencias (ej: 'esa nota')."); if (!string.IsNullOrWhiteSpace(safeSummary)) { promptBuilder.AppendLine(""); promptBuilder.AppendLine(safeSummary); promptBuilder.AppendLine(""); } promptBuilder.AppendLine(""); foreach (var article in articles) promptBuilder.AppendLine($"- Título: \"{article.Title}\", URL: {article.Url}"); promptBuilder.AppendLine(""); promptBuilder.AppendLine($"{safeUserMsg}"); promptBuilder.AppendLine("Responde SOLO con la URL. Si ninguna es relevante, responde 'N/A'."); var requestData = new GeminiRequest { Contents = new[] { new Content { Parts = new[] { new Part { Text = promptBuilder.ToString() } } } }, SafetySettings = GetDefaultSafetySettings() }; var nonStreamingApiUrl = _apiUrl.Replace(":streamGenerateContent?alt=sse&", ":generateContent?"); var httpClient = _httpClientFactory.CreateClient(); try { var response = await httpClient.PostAsJsonAsync(nonStreamingApiUrl, requestData); if (!response.IsSuccessStatusCode) return null; var geminiResponse = await response.Content.ReadFromJsonAsync(); var responseUrl = geminiResponse?.Candidates?.FirstOrDefault()?.Content?.Parts?.FirstOrDefault()?.Text?.Trim(); if (string.IsNullOrEmpty(responseUrl) || responseUrl == "N/A") return null; return articles.FirstOrDefault(a => a.Url == responseUrl); } catch { return null; } } private HashSet Tokenize(string text) { var normalizedText = RemoveDiacritics(text.ToLower()); var punctuation = normalizedText.Where(char.IsPunctuation).Distinct().ToArray(); return normalizedText .Split() .Select(x => x.Trim(punctuation)) .Where(x => x.Length > 2) .ToHashSet(); } private string RemoveDiacritics(string text) { var normalizedString = text.Normalize(NormalizationForm.FormD); var stringBuilder = new StringBuilder(capacity: normalizedString.Length); for (int i = 0; i < normalizedString.Length; i++) { char c = normalizedString[i]; var unicodeCategory = CharUnicodeInfo.GetUnicodeCategory(c); if (unicodeCategory != UnicodeCategory.NonSpacingMark) { stringBuilder.Append(c); } } return stringBuilder.ToString().Normalize(NormalizationForm.FormC); } private double CalculateJaccardSimilarity(HashSet set1, HashSet set2) { if (!set1.Any() || !set2.Any()) return 0.0; var intersection = new HashSet(set1); intersection.IntersectWith(set2); var union = new HashSet(set1); union.UnionWith(set2); return (double)intersection.Count / union.Count; } private async Task> GetKnowledgeItemsAsync() { return await _cache.GetOrCreateAsync(CacheKeys.KnowledgeItems, async entry => { entry.AbsoluteExpirationRelativeToNow = TimeSpan.FromMinutes(5); using (var scope = _serviceProvider.CreateScope()) { var dbContext = scope.ServiceProvider.GetRequiredService(); return await dbContext.ContextoItems.AsNoTracking().ToDictionaryAsync(item => item.Clave, item => item); } }) ?? new Dictionary(); } private async Task> GetFuentesDeContextoAsync() { return await _cache.GetOrCreateAsync(CacheKeys.FuentesDeContexto, async entry => { entry.AbsoluteExpirationRelativeToNow = TimeSpan.FromMinutes(5); using (var scope = _serviceProvider.CreateScope()) { var dbContext = scope.ServiceProvider.GetRequiredService(); return await dbContext.FuentesDeContexto.Where(f => f.Activo).AsNoTracking().ToListAsync(); } }) ?? new List(); } private async Task GetArticleContentAsync(string url) { if (!await UrlSecurity.IsSafeUrlAsync(url)) return null; try { var web = new HtmlWeb(); var doc = await web.LoadFromWebAsync(url); var paragraphs = doc.DocumentNode.SelectNodes("//div[contains(@class, 'cuerpo_nota')]//p"); if (paragraphs == null || !paragraphs.Any()) return null; var sb = new StringBuilder(); foreach (var p in paragraphs) { var cleanText = WebUtility.HtmlDecode(p.InnerText).Trim(); if (!string.IsNullOrWhiteSpace(cleanText)) sb.AppendLine(cleanText); } return sb.ToString(); } catch { return null; } } private async Task ScrapeUrlContentAsync(FuenteContexto fuente) { if (!await UrlSecurity.IsSafeUrlAsync(fuente.Url)) return string.Empty; return await _cache.GetOrCreateAsync($"scrape_{fuente.Url}_{fuente.SelectorContenido}", async entry => { entry.AbsoluteExpirationRelativeToNow = TimeSpan.FromMinutes(30); try { var web = new HtmlWeb(); var doc = await web.LoadFromWebAsync(fuente.Url); string selector = !string.IsNullOrWhiteSpace(fuente.SelectorContenido) ? fuente.SelectorContenido : "//main | //body"; var node = doc.DocumentNode.SelectSingleNode(selector); if (node == null) return string.Empty; return WebUtility.HtmlDecode(node.InnerText) ?? string.Empty; } catch { return string.Empty; } }) ?? string.Empty; } } }