- 3 → 1 llamada API por mensaje (-66,6% de costo) - Métodos GetIntentAsync y UpdateConversationSummaryAsync eliminados - Prompt unificado con respuesta JSON estructurada
635 lines
28 KiB
C#
635 lines
28 KiB
C#
using System.Runtime.CompilerServices;
|
|
using System.Text;
|
|
using System.Text.Json;
|
|
using ChatbotApi.Data.Models;
|
|
using HtmlAgilityPack;
|
|
using Microsoft.Extensions.Caching.Memory;
|
|
using Microsoft.EntityFrameworkCore;
|
|
using System.Net;
|
|
using System.Globalization;
|
|
|
|
namespace ChatbotApi.Services
|
|
{
|
|
public interface IChatService
|
|
{
|
|
IAsyncEnumerable<string> StreamMessageAsync(ChatRequest request, CancellationToken cancellationToken);
|
|
}
|
|
|
|
public class ChatService : IChatService
|
|
{
|
|
private readonly IHttpClientFactory _httpClientFactory;
|
|
private readonly IMemoryCache _cache;
|
|
private readonly IServiceProvider _serviceProvider;
|
|
private readonly ILogger<ChatService> _logger;
|
|
private readonly string _apiUrl;
|
|
private readonly AppContexto _dbContext;
|
|
|
|
private static readonly string _siteUrl = "https://www.eldia.com/";
|
|
private static readonly string[] PrefijosAQuitar = { "VIDEO.- ", "VIDEO. ", "FOTOS.- ", "FOTOS. " };
|
|
const int OutTokens = 8192;
|
|
private const string SystemPromptsCacheKey = "ActiveSystemPrompts";
|
|
|
|
public ChatService(
|
|
IConfiguration configuration,
|
|
IMemoryCache memoryCache,
|
|
IServiceProvider serviceProvider,
|
|
ILogger<ChatService> logger,
|
|
IHttpClientFactory httpClientFactory,
|
|
AppContexto dbContext)
|
|
{
|
|
_logger = logger;
|
|
_cache = memoryCache;
|
|
_serviceProvider = serviceProvider;
|
|
_httpClientFactory = httpClientFactory;
|
|
_dbContext = dbContext;
|
|
|
|
var apiKey = configuration["Gemini:GeminiApiKey"] ?? throw new InvalidOperationException("La API Key de Gemini no está configurada en .env");
|
|
var baseUrl = configuration["Gemini:GeminiApiUrl"];
|
|
_apiUrl = $"{baseUrl}{apiKey}";
|
|
}
|
|
|
|
// Response model for structured JSON from Gemini
|
|
private class GeminiStructuredResponse
|
|
{
|
|
public string intent { get; set; } = "NOTICIAS_PORTADA";
|
|
public string reply { get; set; } = "";
|
|
public string summary { get; set; } = "";
|
|
}
|
|
|
|
public async IAsyncEnumerable<string> StreamMessageAsync(ChatRequest request, [EnumeratorCancellation] CancellationToken cancellationToken)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(request?.Message))
|
|
{
|
|
yield return "Error: No he recibido ningún mensaje.";
|
|
yield break;
|
|
}
|
|
|
|
string safeUserMessage = SanitizeInput(request.Message);
|
|
string context = "";
|
|
string? articleContext = null;
|
|
string? errorMessage = null;
|
|
|
|
// Pre-carga de prompts del sistema en paralelo
|
|
var systemPromptsTask = GetActiveSystemPromptsAsync();
|
|
Task<string?>? articleTask = null;
|
|
|
|
try
|
|
{
|
|
// Load article if URL provided
|
|
if (!string.IsNullOrEmpty(request.ContextUrl) && await UrlSecurity.IsSafeUrlAsync(request.ContextUrl))
|
|
{
|
|
articleTask = GetArticleContentAsync(request.ContextUrl);
|
|
}
|
|
|
|
if (articleTask != null) articleContext = await articleTask;
|
|
|
|
// Build context based on heuristics
|
|
if (!string.IsNullOrEmpty(articleContext))
|
|
{
|
|
context = articleContext;
|
|
}
|
|
else
|
|
{
|
|
var articles = await GetWebsiteNewsAsync(_siteUrl, 50);
|
|
|
|
if (request.ShownArticles != null && request.ShownArticles.Any())
|
|
{
|
|
articles = articles.Where(a => !request.ShownArticles.Contains(a.Url)).ToList();
|
|
}
|
|
|
|
// Búsqueda Híbrida: local + AI fallback
|
|
var bestMatch = FindBestMatchingArticleLocal(safeUserMessage, articles);
|
|
|
|
if (bestMatch == null)
|
|
{
|
|
bestMatch = await FindBestMatchingArticleAIAsync(safeUserMessage, articles, request.ConversationSummary);
|
|
}
|
|
|
|
if (bestMatch != null && await UrlSecurity.IsSafeUrlAsync(bestMatch.Url))
|
|
{
|
|
string rawContent = await GetArticleContentAsync(bestMatch.Url) ?? "";
|
|
context = $"ARTÍCULO ENCONTRADO: {bestMatch.Title}\nURL: {bestMatch.Url}\n\nCONTENIDO:\n{SanitizeInput(rawContent)}";
|
|
}
|
|
else
|
|
{
|
|
var sb = new StringBuilder();
|
|
sb.AppendLine("NOTICIAS DISPONIBLES:");
|
|
foreach (var article in articles.Take(15))
|
|
{
|
|
sb.AppendLine($"- {article.Title} ({article.Url})");
|
|
}
|
|
context = sb.ToString();
|
|
}
|
|
}
|
|
|
|
// Add knowledge base if available
|
|
var knowledgeItems = await GetKnowledgeItemsAsync();
|
|
if (knowledgeItems.Any())
|
|
{
|
|
var kbBuilder = new StringBuilder("\n\nBASE DE CONOCIMIENTO:");
|
|
foreach (var item in knowledgeItems.Values)
|
|
{
|
|
kbBuilder.AppendLine($"\n- {item.Descripcion}: {item.Valor}");
|
|
}
|
|
context += kbBuilder.ToString();
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Error construyendo contexto.");
|
|
errorMessage = "Lo siento, hubo un problema técnico procesando tu solicitud.";
|
|
}
|
|
|
|
if (!string.IsNullOrEmpty(errorMessage))
|
|
{
|
|
yield return errorMessage;
|
|
yield break;
|
|
}
|
|
|
|
// ========== UNIFIED API CALL ==========
|
|
var httpClient = _httpClientFactory.CreateClient();
|
|
httpClient.Timeout = TimeSpan.FromSeconds(45);
|
|
|
|
string? jsonText = null;
|
|
|
|
try
|
|
{
|
|
var systemInstructions = !string.IsNullOrWhiteSpace(request.SystemPromptOverride)
|
|
? request.SystemPromptOverride
|
|
: await systemPromptsTask;
|
|
|
|
// Build unified meta-prompt
|
|
var promptBuilder = new StringBuilder();
|
|
|
|
promptBuilder.AppendLine("<instrucciones_sistema>");
|
|
promptBuilder.AppendLine("Eres DiaBot, asistente virtual de El Día (La Plata, Argentina).");
|
|
promptBuilder.AppendLine();
|
|
promptBuilder.AppendLine("FORMATO DE RESPUESTA:");
|
|
promptBuilder.AppendLine("Debes responder en formato JSON con esta estructura EXACTA:");
|
|
promptBuilder.AppendLine("{\"intent\": \"...\", \"reply\": \"...\", \"summary\": \"...\"}");
|
|
promptBuilder.AppendLine();
|
|
|
|
promptBuilder.AppendLine("INSTRUCCIONES GENERALES:");
|
|
promptBuilder.AppendLine(systemInstructions);
|
|
promptBuilder.AppendLine("- NO uses formatos de email/carta ('Estimado/a', 'Atentamente')");
|
|
promptBuilder.AppendLine("- NO saludes de nuevo si ya saludaste o si la pregunta es directa");
|
|
promptBuilder.AppendLine("- Sé conciso, directo y natural");
|
|
promptBuilder.AppendLine();
|
|
|
|
promptBuilder.AppendLine("--- REGLAS PARA CADA CAMPO JSON ---");
|
|
promptBuilder.AppendLine();
|
|
promptBuilder.AppendLine("1. 'intent': Clasifica la intención usando SOLO uno de estos valores:");
|
|
promptBuilder.AppendLine(" - \"ARTICULO_ACTUAL\": Si la pregunta es sobre el tema del artículo en <contexto>");
|
|
promptBuilder.AppendLine(" - \"BASE_DE_CONOCIMIENTO\": Para preguntas sobre 'El Día' como empresa/organización");
|
|
promptBuilder.AppendLine(" - \"NOTICIAS_PORTADA\": Para todo lo demás (este es el default si dudas)");
|
|
promptBuilder.AppendLine();
|
|
|
|
promptBuilder.AppendLine("2. 'reply': Tu respuesta en texto Markdown para el usuario.");
|
|
promptBuilder.AppendLine(" - Si es un artículo específico: Resume brevemente e INCLUYE el enlace [Título](URL)");
|
|
promptBuilder.AppendLine(" - Si son noticias generales: Selecciona las 3 más relevantes, breve frase c/u + enlace");
|
|
promptBuilder.AppendLine(" - Si la pregunta refiere a algo del <historial_conversacion>, úsalo (ej: 'dónde leerla' → dale el link)");
|
|
promptBuilder.AppendLine();
|
|
|
|
promptBuilder.AppendLine("3. 'summary': Actualiza el historial de conversación.");
|
|
promptBuilder.AppendLine(" - Resume el intercambio actual (pregunta + respuesta) en 1-2 líneas");
|
|
promptBuilder.AppendLine(" - Integra con el <historial_conversacion> previo si existe");
|
|
promptBuilder.AppendLine(" - Máximo 200 palabras para el resumen completo");
|
|
promptBuilder.AppendLine("</instrucciones_sistema>");
|
|
promptBuilder.AppendLine();
|
|
|
|
// Conversation history
|
|
if (!string.IsNullOrWhiteSpace(request.ConversationSummary))
|
|
{
|
|
promptBuilder.AppendLine("<historial_conversacion>");
|
|
promptBuilder.AppendLine(SanitizeInput(request.ConversationSummary));
|
|
promptBuilder.AppendLine("</historial_conversacion>");
|
|
promptBuilder.AppendLine();
|
|
}
|
|
|
|
// Context
|
|
promptBuilder.AppendLine("<contexto>");
|
|
promptBuilder.AppendLine(context);
|
|
promptBuilder.AppendLine("</contexto>");
|
|
promptBuilder.AppendLine();
|
|
|
|
// User question
|
|
promptBuilder.AppendLine("<pregunta_usuario>");
|
|
promptBuilder.AppendLine(safeUserMessage);
|
|
promptBuilder.AppendLine("</pregunta_usuario>");
|
|
promptBuilder.AppendLine();
|
|
|
|
promptBuilder.AppendLine("RESPUESTA (SOLO el JSON, sin comentarios adicionales):");
|
|
|
|
var requestData = new GeminiRequest
|
|
{
|
|
Contents = new[] { new Content { Parts = new[] { new Part { Text = promptBuilder.ToString() } } } },
|
|
GenerationConfig = new GenerationConfig { MaxOutputTokens = OutTokens },
|
|
SafetySettings = GetDefaultSafetySettings()
|
|
};
|
|
|
|
// Use non-streaming endpoint
|
|
var nonStreamingApiUrl = _apiUrl.Replace(":streamGenerateContent?alt=sse&", ":generateContent?");
|
|
|
|
var response = await httpClient.PostAsJsonAsync(nonStreamingApiUrl, requestData, cancellationToken);
|
|
|
|
if (!response.IsSuccessStatusCode)
|
|
{
|
|
_logger.LogWarning("Error API Gemini: {StatusCode}", response.StatusCode);
|
|
throw new HttpRequestException($"Error en proveedor de IA: {response.StatusCode}");
|
|
}
|
|
|
|
var geminiResponse = await response.Content.ReadFromJsonAsync<GeminiResponse>(cancellationToken: cancellationToken);
|
|
jsonText = geminiResponse?.Candidates?.FirstOrDefault()?.Content?.Parts?.FirstOrDefault()?.Text?.Trim();
|
|
|
|
if (string.IsNullOrEmpty(jsonText))
|
|
{
|
|
_logger.LogWarning("Respuesta vacía de Gemini");
|
|
errorMessage = "Lo siento, hubo un problema al procesar la respuesta.";
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Error en llamada unificada a Gemini.");
|
|
errorMessage = "Lo siento, el servicio está temporalmente no disponible. Por favor, intenta de nuevo.";
|
|
}
|
|
|
|
if (!string.IsNullOrEmpty(errorMessage))
|
|
{
|
|
yield return errorMessage;
|
|
yield break;
|
|
}
|
|
|
|
// Parse JSON response (outside try-catch to allow yield)
|
|
GeminiStructuredResponse? apiResponse = null;
|
|
try
|
|
{
|
|
// Extract JSON from markdown code blocks if present
|
|
var jsonContent = jsonText!;
|
|
if (jsonText!.Contains("```json"))
|
|
{
|
|
var startIndex = jsonText.IndexOf("```json") + 7;
|
|
var endIndex = jsonText.IndexOf("```", startIndex);
|
|
if (endIndex > startIndex)
|
|
{
|
|
jsonContent = jsonText.Substring(startIndex, endIndex - startIndex).Trim();
|
|
}
|
|
}
|
|
else if (jsonText.Contains("```"))
|
|
{
|
|
var startIndex = jsonText.IndexOf("```") + 3;
|
|
var endIndex = jsonText.IndexOf("```", startIndex);
|
|
if (endIndex > startIndex)
|
|
{
|
|
jsonContent = jsonText.Substring(startIndex, endIndex - startIndex).Trim();
|
|
}
|
|
}
|
|
|
|
apiResponse = JsonSerializer.Deserialize<GeminiStructuredResponse>(jsonContent, new JsonSerializerOptions
|
|
{
|
|
PropertyNameCaseInsensitive = true
|
|
});
|
|
}
|
|
catch (JsonException ex)
|
|
{
|
|
_logger.LogError(ex, "Failed to parse Gemini JSON. Raw response: {JsonText}", jsonText);
|
|
}
|
|
|
|
if (apiResponse == null || string.IsNullOrEmpty(apiResponse.reply))
|
|
{
|
|
yield return "Lo siento, tuve un problema al procesar la respuesta. Por favor, intenta de nuevo.";
|
|
yield break;
|
|
}
|
|
|
|
// Send intent metadata
|
|
yield return $"INTENT::{apiResponse.intent}";
|
|
|
|
// Simulate streaming by chunking the reply
|
|
string fullReply = apiResponse.reply;
|
|
var words = fullReply.Split(' ', StringSplitOptions.RemoveEmptyEntries);
|
|
var chunkBuilder = new StringBuilder();
|
|
|
|
foreach (var word in words)
|
|
{
|
|
chunkBuilder.Append(word + " ");
|
|
|
|
// Send chunk every ~20 characters for smooth streaming
|
|
if (chunkBuilder.Length >= 20)
|
|
{
|
|
yield return chunkBuilder.ToString();
|
|
chunkBuilder.Clear();
|
|
await Task.Delay(30, cancellationToken);
|
|
}
|
|
}
|
|
|
|
// Send any remaining text
|
|
if (chunkBuilder.Length > 0)
|
|
{
|
|
yield return chunkBuilder.ToString();
|
|
}
|
|
|
|
// Log conversation (fire-and-forget)
|
|
_ = Task.Run(async () =>
|
|
{
|
|
using (var scope = _serviceProvider.CreateScope())
|
|
{
|
|
var db = scope.ServiceProvider.GetRequiredService<AppContexto>();
|
|
try
|
|
{
|
|
db.ConversacionLogs.Add(new ConversacionLog
|
|
{
|
|
UsuarioMensaje = safeUserMessage,
|
|
BotRespuesta = fullReply,
|
|
Fecha = DateTime.UtcNow
|
|
});
|
|
await db.SaveChangesAsync();
|
|
}
|
|
catch(Exception ex)
|
|
{
|
|
var logger = scope.ServiceProvider.GetRequiredService<ILogger<ChatService>>();
|
|
logger.LogError(ex, "Error in background logging");
|
|
}
|
|
}
|
|
});
|
|
|
|
// Send summary
|
|
yield return $"SUMMARY::{apiResponse.summary}";
|
|
}
|
|
|
|
// --- PRIVATE METHODS ---
|
|
|
|
private string SanitizeInput(string? input)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(input)) return string.Empty;
|
|
return input.Replace("<", "<").Replace(">", ">");
|
|
}
|
|
|
|
private async Task<string> GetActiveSystemPromptsAsync()
|
|
{
|
|
return await _cache.GetOrCreateAsync(SystemPromptsCacheKey, async entry =>
|
|
{
|
|
entry.AbsoluteExpirationRelativeToNow = TimeSpan.FromMinutes(10);
|
|
var prompts = await _dbContext.SystemPrompts
|
|
.Where(p => p.IsActive)
|
|
.OrderByDescending(p => p.CreatedAt)
|
|
.Select(p => p.Content)
|
|
.ToListAsync();
|
|
|
|
if (!prompts.Any()) return "Tu rol es ser el asistente virtual de 'El Día'. Responde de forma natural, útil y concisa. Usa un tono amigable pero profesional (estilo periodístico moderno). IMPORTANTE: NO uses saludos formales tipo carta (como 'Estimado/a'), NO saludes si el usuario no saludó primero o si es una continuación de la charla. NO repitas saludos.";
|
|
return string.Join("\n\n", prompts);
|
|
}) ?? "Responde de forma natural y concisa.";
|
|
}
|
|
|
|
private List<SafetySetting> GetDefaultSafetySettings()
|
|
{
|
|
return new List<SafetySetting>
|
|
{
|
|
new SafetySetting { Category = "HARM_CATEGORY_HARASSMENT", Threshold = "BLOCK_MEDIUM_AND_ABOVE" },
|
|
new SafetySetting { Category = "HARM_CATEGORY_HATE_SPEECH", Threshold = "BLOCK_MEDIUM_AND_ABOVE" },
|
|
new SafetySetting { Category = "HARM_CATEGORY_SEXUALLY_EXPLICIT", Threshold = "BLOCK_MEDIUM_AND_ABOVE" },
|
|
new SafetySetting { Category = "HARM_CATEGORY_DANGEROUS_CONTENT", Threshold = "BLOCK_MEDIUM_AND_ABOVE" }
|
|
};
|
|
}
|
|
|
|
// NOTE: UpdateConversationSummaryAsync and GetIntentAsync have been REMOVED
|
|
// Their functionality is now in the unified StreamMessageAsync call
|
|
|
|
private async Task SaveConversationLogAsync(string userMessage, string botReply)
|
|
{
|
|
try
|
|
{
|
|
_dbContext.ConversacionLogs.Add(new ConversacionLog
|
|
{
|
|
UsuarioMensaje = userMessage,
|
|
BotRespuesta = botReply,
|
|
Fecha = DateTime.UtcNow
|
|
});
|
|
await _dbContext.SaveChangesAsync();
|
|
}
|
|
catch (Exception ex) { _logger.LogError(ex, "Error guardando log."); }
|
|
}
|
|
|
|
private async Task<List<NewsArticleLink>> GetWebsiteNewsAsync(string url, int cantidad)
|
|
{
|
|
var newsList = new List<NewsArticleLink>();
|
|
try
|
|
{
|
|
if (!await UrlSecurity.IsSafeUrlAsync(url)) return newsList;
|
|
var web = new HtmlWeb();
|
|
var doc = await web.LoadFromWebAsync(url);
|
|
var articleNodes = doc.DocumentNode.SelectNodes("//article[contains(@class, 'item')] | //article[contains(@class, 'nota_modulo')]");
|
|
|
|
if (articleNodes == null) return newsList;
|
|
var urlsProcesadas = new HashSet<string>();
|
|
|
|
foreach (var articleNode in articleNodes)
|
|
{
|
|
if (newsList.Count >= cantidad) break;
|
|
var linkNode = articleNode.SelectSingleNode(".//a[@href]");
|
|
var titleNode = articleNode.SelectSingleNode(".//h2");
|
|
|
|
if (linkNode != null && titleNode != null)
|
|
{
|
|
var relativeUrl = linkNode.GetAttributeValue("href", string.Empty);
|
|
if (!string.IsNullOrEmpty(relativeUrl) && relativeUrl != "#" && !urlsProcesadas.Contains(relativeUrl))
|
|
{
|
|
var fullUrl = relativeUrl.StartsWith("/") ? new Uri(new Uri(url), relativeUrl).ToString() : relativeUrl;
|
|
string cleanTitle = WebUtility.HtmlDecode(titleNode.InnerText).Trim();
|
|
foreach (var p in PrefijosAQuitar)
|
|
if (cleanTitle.StartsWith(p, StringComparison.OrdinalIgnoreCase))
|
|
cleanTitle = cleanTitle.Substring(p.Length).Trim();
|
|
|
|
newsList.Add(new NewsArticleLink { Title = cleanTitle, Url = fullUrl });
|
|
urlsProcesadas.Add(relativeUrl);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
catch (Exception ex) { _logger.LogError(ex, "Error scraping news."); }
|
|
return newsList;
|
|
}
|
|
|
|
private NewsArticleLink? FindBestMatchingArticleLocal(string userMessage, List<NewsArticleLink> articles)
|
|
{
|
|
if (!articles.Any() || string.IsNullOrWhiteSpace(userMessage)) return null;
|
|
|
|
var userTerms = Tokenize(userMessage);
|
|
if (!userTerms.Any()) return null;
|
|
|
|
NewsArticleLink? bestMatch = null;
|
|
double maxScore = 0;
|
|
|
|
foreach (var article in articles)
|
|
{
|
|
var titleTerms = Tokenize(article.Title);
|
|
double score = CalculateJaccardSimilarity(userTerms, titleTerms);
|
|
|
|
if (userTerms.Intersect(titleTerms).Any(t => t.Length > 3))
|
|
{
|
|
score += 0.2;
|
|
}
|
|
|
|
if (article.Title.IndexOf(userMessage, StringComparison.OrdinalIgnoreCase) >= 0)
|
|
{
|
|
score += 0.5;
|
|
}
|
|
|
|
if (score > maxScore)
|
|
{
|
|
maxScore = score;
|
|
bestMatch = article;
|
|
}
|
|
}
|
|
|
|
return maxScore >= 0.05 ? bestMatch : null;
|
|
}
|
|
|
|
private async Task<NewsArticleLink?> FindBestMatchingArticleAIAsync(string userMessage, List<NewsArticleLink> articles, string? conversationSummary)
|
|
{
|
|
if (!articles.Any()) return null;
|
|
string safeUserMsg = SanitizeInput(userMessage);
|
|
string safeSummary = SanitizeInput(conversationSummary);
|
|
|
|
var promptBuilder = new StringBuilder();
|
|
promptBuilder.AppendLine("Encuentra el artículo más relevante para la <pregunta_usuario> en la <lista_articulos>, usando el <resumen_contexto> para entender referencias (ej: 'esa nota').");
|
|
|
|
if (!string.IsNullOrWhiteSpace(safeSummary))
|
|
{
|
|
promptBuilder.AppendLine("<resumen_contexto>");
|
|
promptBuilder.AppendLine(safeSummary);
|
|
promptBuilder.AppendLine("</resumen_contexto>");
|
|
}
|
|
|
|
promptBuilder.AppendLine("<lista_articulos>");
|
|
foreach (var article in articles) promptBuilder.AppendLine($"- Título: \"{article.Title}\", URL: {article.Url}");
|
|
promptBuilder.AppendLine("</lista_articulos>");
|
|
promptBuilder.AppendLine($"<pregunta_usuario>{safeUserMsg}</pregunta_usuario>");
|
|
promptBuilder.AppendLine("Responde SOLO con la URL. Si ninguna es relevante, responde 'N/A'.");
|
|
|
|
var requestData = new GeminiRequest
|
|
{
|
|
Contents = new[] { new Content { Parts = new[] { new Part { Text = promptBuilder.ToString() } } } },
|
|
SafetySettings = GetDefaultSafetySettings()
|
|
};
|
|
var nonStreamingApiUrl = _apiUrl.Replace(":streamGenerateContent?alt=sse&", ":generateContent?");
|
|
var httpClient = _httpClientFactory.CreateClient();
|
|
|
|
try
|
|
{
|
|
var response = await httpClient.PostAsJsonAsync(nonStreamingApiUrl, requestData);
|
|
if (!response.IsSuccessStatusCode) return null;
|
|
var geminiResponse = await response.Content.ReadFromJsonAsync<GeminiResponse>();
|
|
var responseUrl = geminiResponse?.Candidates?.FirstOrDefault()?.Content?.Parts?.FirstOrDefault()?.Text?.Trim();
|
|
|
|
if (string.IsNullOrEmpty(responseUrl) || responseUrl == "N/A") return null;
|
|
return articles.FirstOrDefault(a => a.Url == responseUrl);
|
|
}
|
|
catch { return null; }
|
|
}
|
|
|
|
private HashSet<string> Tokenize(string text)
|
|
{
|
|
var normalizedText = RemoveDiacritics(text.ToLower());
|
|
var punctuation = normalizedText.Where(char.IsPunctuation).Distinct().ToArray();
|
|
return normalizedText
|
|
.Split()
|
|
.Select(x => x.Trim(punctuation))
|
|
.Where(x => x.Length > 2)
|
|
.ToHashSet();
|
|
}
|
|
|
|
private string RemoveDiacritics(string text)
|
|
{
|
|
var normalizedString = text.Normalize(NormalizationForm.FormD);
|
|
var stringBuilder = new StringBuilder(capacity: normalizedString.Length);
|
|
|
|
for (int i = 0; i < normalizedString.Length; i++)
|
|
{
|
|
char c = normalizedString[i];
|
|
var unicodeCategory = CharUnicodeInfo.GetUnicodeCategory(c);
|
|
if (unicodeCategory != UnicodeCategory.NonSpacingMark)
|
|
{
|
|
stringBuilder.Append(c);
|
|
}
|
|
}
|
|
|
|
return stringBuilder.ToString().Normalize(NormalizationForm.FormC);
|
|
}
|
|
|
|
private double CalculateJaccardSimilarity(HashSet<string> set1, HashSet<string> set2)
|
|
{
|
|
if (!set1.Any() || !set2.Any()) return 0.0;
|
|
var intersection = new HashSet<string>(set1);
|
|
intersection.IntersectWith(set2);
|
|
var union = new HashSet<string>(set1);
|
|
union.UnionWith(set2);
|
|
return (double)intersection.Count / union.Count;
|
|
}
|
|
|
|
private async Task<Dictionary<string, ContextoItem>> GetKnowledgeItemsAsync()
|
|
{
|
|
return await _cache.GetOrCreateAsync(CacheKeys.KnowledgeItems, async entry =>
|
|
{
|
|
entry.AbsoluteExpirationRelativeToNow = TimeSpan.FromMinutes(5);
|
|
using (var scope = _serviceProvider.CreateScope())
|
|
{
|
|
var dbContext = scope.ServiceProvider.GetRequiredService<AppContexto>();
|
|
return await dbContext.ContextoItems.AsNoTracking().ToDictionaryAsync(item => item.Clave, item => item);
|
|
}
|
|
}) ?? new Dictionary<string, ContextoItem>();
|
|
}
|
|
|
|
private async Task<List<FuenteContexto>> GetFuentesDeContextoAsync()
|
|
{
|
|
return await _cache.GetOrCreateAsync(CacheKeys.FuentesDeContexto, async entry =>
|
|
{
|
|
entry.AbsoluteExpirationRelativeToNow = TimeSpan.FromMinutes(5);
|
|
using (var scope = _serviceProvider.CreateScope())
|
|
{
|
|
var dbContext = scope.ServiceProvider.GetRequiredService<AppContexto>();
|
|
return await dbContext.FuentesDeContexto.Where(f => f.Activo).AsNoTracking().ToListAsync();
|
|
}
|
|
}) ?? new List<FuenteContexto>();
|
|
}
|
|
|
|
private async Task<string?> GetArticleContentAsync(string url)
|
|
{
|
|
if (!await UrlSecurity.IsSafeUrlAsync(url)) return null;
|
|
try
|
|
{
|
|
var web = new HtmlWeb();
|
|
var doc = await web.LoadFromWebAsync(url);
|
|
var paragraphs = doc.DocumentNode.SelectNodes("//div[contains(@class, 'cuerpo_nota')]//p");
|
|
if (paragraphs == null || !paragraphs.Any()) return null;
|
|
|
|
var sb = new StringBuilder();
|
|
foreach (var p in paragraphs)
|
|
{
|
|
var cleanText = WebUtility.HtmlDecode(p.InnerText).Trim();
|
|
if (!string.IsNullOrWhiteSpace(cleanText)) sb.AppendLine(cleanText);
|
|
}
|
|
return sb.ToString();
|
|
}
|
|
catch { return null; }
|
|
}
|
|
|
|
private async Task<string> ScrapeUrlContentAsync(FuenteContexto fuente)
|
|
{
|
|
if (!await UrlSecurity.IsSafeUrlAsync(fuente.Url)) return string.Empty;
|
|
return await _cache.GetOrCreateAsync($"scrape_{fuente.Url}_{fuente.SelectorContenido}", async entry =>
|
|
{
|
|
entry.AbsoluteExpirationRelativeToNow = TimeSpan.FromMinutes(30);
|
|
try
|
|
{
|
|
var web = new HtmlWeb();
|
|
var doc = await web.LoadFromWebAsync(fuente.Url);
|
|
string selector = !string.IsNullOrWhiteSpace(fuente.SelectorContenido) ? fuente.SelectorContenido : "//main | //body";
|
|
var node = doc.DocumentNode.SelectSingleNode(selector);
|
|
if (node == null) return string.Empty;
|
|
return WebUtility.HtmlDecode(node.InnerText) ?? string.Empty;
|
|
}
|
|
catch { return string.Empty; }
|
|
}) ?? string.Empty;
|
|
}
|
|
}
|
|
}
|