using System.Reflection; using System.Text.Json; using JobTrackerApi.Controllers; using JobTrackerApi.Models; using JobTrackerApi.Services; using JobTrackerApi.Tests.TestSupport; using Microsoft.AspNetCore.Http; using Microsoft.AspNetCore.Identity; using Microsoft.AspNetCore.Mvc; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Hosting; using Moq; using Xunit; namespace JobTrackerApi.Tests; public sealed class CvCorpusHarnessTests { private static readonly string CorpusRoot = "/home/pi/cvs"; [Fact] public async Task Local_cv_corpus_harness_produces_repeatable_parse_report_when_available() { if (!Directory.Exists(CorpusRoot)) return; var files = Directory.EnumerateFiles(CorpusRoot, "*.*", SearchOption.TopDirectoryOnly) .Where(path => path.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase) || path.EndsWith(".docx", StringComparison.OrdinalIgnoreCase) || path.EndsWith(".txt", StringComparison.OrdinalIgnoreCase) || path.EndsWith(".md", StringComparison.OrdinalIgnoreCase)) .OrderBy(path => path, StringComparer.OrdinalIgnoreCase) .Take(8) .ToList(); if (files.Count == 0) return; var user = new ApplicationUser { Id = "user-1", ProfileCvText = "seed" }; var userManager = TestHostFactory.CreateUserManager(); userManager.Setup(x => x.GetUserAsync(It.IsAny())).ReturnsAsync(user); userManager.Setup(x => x.UpdateAsync(It.IsAny())).ReturnsAsync(IdentityResult.Success); var aiService = new Mock(); aiService.Setup(x => x.SummarizeSectionAsync(It.Is(instruction => instruction.Contains("Extract this CV into structured JSON", StringComparison.Ordinal)), It.IsAny(), 3200, 900)).ReturnsAsync(string.Empty); aiService.Setup(x => x.SummarizeSectionAsync(It.Is(instruction => instruction.Contains("Reconstruct this CV text extracted from a PDF", StringComparison.Ordinal)), It.IsAny(), 2800, 900)).ReturnsAsync((string _, string text, int _, int __) => text); await using var db = TestHostFactory.CreateInMemoryDb(); var paths = CreatePaths(); var controller = new ProfileCvController(userManager.Object, aiService.Object, db, paths, NoOpCvAiClassifier.Instance) { ControllerContext = new ControllerContext { HttpContext = new DefaultHttpContext() } }; var extractMethod = typeof(ProfileCvController).GetMethod("ExtractTextAsync", BindingFlags.NonPublic | BindingFlags.Static); var buildMethod = typeof(ProfileCvController).GetMethod("BuildStructuredCvAsync", BindingFlags.NonPublic | BindingFlags.Instance); Assert.NotNull(extractMethod); Assert.NotNull(buildMethod); var report = new List(); foreach (var path in files) { await using var stream = File.OpenRead(path); var fileName = Path.GetFileName(path); var formFile = new FormFile(stream, 0, stream.Length, "file", fileName) { Headers = new HeaderDictionary(), ContentType = GuessContentType(path) }; var extension = Path.GetExtension(path); var extractTask = (Task)extractMethod!.Invoke(null, new object[] { formFile, extension })!; var text = await extractTask; Assert.False(string.IsNullOrWhiteSpace(text)); var buildTask = (Task)buildMethod!.Invoke(controller, new object[] { text, CancellationToken.None })!; var structured = await buildTask; Assert.NotNull(structured); report.Add(new { file = fileName, characters = text.Length, contactLocation = structured.Contact.Location, firstJob = structured.Jobs.FirstOrDefault()?.Title, firstJobLocation = structured.Jobs.FirstOrDefault()?.Location, firstEducation = structured.Education.FirstOrDefault()?.Qualification, firstEducationLocation = structured.Education.FirstOrDefault()?.Location, suspiciousLocations = structured.Jobs.Select(job => job.Location) .Concat(structured.Education.Select(education => education.Location)) .Append(structured.Contact.Location) .Where(value => !string.IsNullOrWhiteSpace(value)) .Where(LooksSuspiciousLocation) .ToList() }); } var reportPath = Path.Combine(Path.GetTempPath(), $"jobtracker-cv-corpus-{DateTime.UtcNow:yyyyMMddHHmmss}.json"); await File.WriteAllTextAsync(reportPath, JsonSerializer.Serialize(report, new JsonSerializerOptions { WriteIndented = true })); Assert.True(report.Count > 0); } private static bool LooksSuspiciousLocation(string? value) { if (string.IsNullOrWhiteSpace(value)) return false; return value.Contains("Python", StringComparison.OrdinalIgnoreCase) || value.Contains("Ruby", StringComparison.OrdinalIgnoreCase) || value.Contains(" S A L E S ", StringComparison.OrdinalIgnoreCase) || value.Any(char.IsDigit); } private static string GuessContentType(string path) { return Path.GetExtension(path).ToLowerInvariant() switch { ".pdf" => "application/pdf", ".docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ".md" => "text/markdown", _ => "text/plain" }; } private static AppPaths CreatePaths() { var tempRoot = Path.Combine(Path.GetTempPath(), $"jobtracker-cv-corpus-{Guid.NewGuid():N}"); Directory.CreateDirectory(tempRoot); var config = new ConfigurationBuilder() .AddInMemoryCollection(new Dictionary { ["Data:Root"] = tempRoot, ["Data:CvArtifactsRoot"] = Path.Combine(tempRoot, "CvArtifacts") }) .Build(); var env = new Mock(); env.SetupGet(x => x.ContentRootPath).Returns(tempRoot); return new AppPaths(config, env.Object); } }