refactor, security updates, cv extraction upgrades
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
using System.IO.Enumeration;
|
||||
using System.Reflection;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text.Json;
|
||||
@@ -24,11 +25,13 @@ public sealed class CvCorpusHarnessTests
|
||||
{
|
||||
if (!Directory.Exists(CorpusRoot)) return;
|
||||
|
||||
var ignoredPatterns = ResolveIgnoredPatterns();
|
||||
var files = Directory.EnumerateFiles(CorpusRoot, "*.*", SearchOption.TopDirectoryOnly)
|
||||
.Where(path => path.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase)
|
||||
|| path.EndsWith(".docx", StringComparison.OrdinalIgnoreCase)
|
||||
|| path.EndsWith(".txt", StringComparison.OrdinalIgnoreCase)
|
||||
|| path.EndsWith(".md", StringComparison.OrdinalIgnoreCase))
|
||||
.Where(path => !IsIgnoredFile(path, ignoredPatterns))
|
||||
.OrderBy(path => path, StringComparer.OrdinalIgnoreCase)
|
||||
.ToList();
|
||||
|
||||
@@ -51,17 +54,20 @@ public sealed class CvCorpusHarnessTests
|
||||
var aiService = new Mock<ISummarizerService>();
|
||||
aiService.Setup(x => x.SummarizeSectionAsync(It.Is<string>(instruction => instruction.Contains("Extract this CV into structured JSON", StringComparison.Ordinal)), It.IsAny<string>(), 3200, 900)).ReturnsAsync(string.Empty);
|
||||
aiService.Setup(x => x.SummarizeSectionAsync(It.Is<string>(instruction => instruction.Contains("Reconstruct this CV text extracted from a PDF", StringComparison.Ordinal)), It.IsAny<string>(), 2800, 900)).ReturnsAsync((string _, string text, int _, int __) => text);
|
||||
var cvAiNormalizer = CreateCvAiNormalizerFromEnvironment();
|
||||
|
||||
await using var db = TestHostFactory.CreateInMemoryDb();
|
||||
var paths = CreatePaths(outputRoot);
|
||||
var controller = new ProfileCvController(userManager.Object, aiService.Object, db, paths, null, NoOpCvAiClassifier.Instance)
|
||||
var controller = new ProfileCvController(userManager.Object, aiService.Object, db, paths, null, NoOpCvAiClassifier.Instance, cvAiNormalizer)
|
||||
{
|
||||
ControllerContext = new ControllerContext { HttpContext = new DefaultHttpContext() }
|
||||
};
|
||||
|
||||
var extractMethod = typeof(ProfileCvController).GetMethod("ExtractTextAsync", BindingFlags.NonPublic | BindingFlags.Static);
|
||||
var reconstructMethod = typeof(ProfileCvController).GetMethod("MaybeReconstructStructuredCvAsync", BindingFlags.NonPublic | BindingFlags.Instance);
|
||||
var buildMethod = typeof(ProfileCvController).GetMethod("BuildStructuredCvAsync", BindingFlags.NonPublic | BindingFlags.Instance);
|
||||
Assert.NotNull(extractMethod);
|
||||
Assert.NotNull(reconstructMethod);
|
||||
Assert.NotNull(buildMethod);
|
||||
|
||||
var entries = new List<CvBenchmarkEntry>();
|
||||
@@ -80,7 +86,11 @@ public sealed class CvCorpusHarnessTests
|
||||
var text = await extractTask;
|
||||
Assert.False(string.IsNullOrWhiteSpace(text));
|
||||
|
||||
var buildTask = (Task<StructuredCvProfile>)buildMethod!.Invoke(controller, new object[] { text, CancellationToken.None })!;
|
||||
var reconstructTask = (Task<string>)reconstructMethod!.Invoke(controller, new object[] { text, CancellationToken.None })!;
|
||||
var normalizedText = await reconstructTask;
|
||||
Assert.False(string.IsNullOrWhiteSpace(normalizedText));
|
||||
|
||||
var buildTask = (Task<StructuredCvProfile>)buildMethod!.Invoke(controller, new object[] { normalizedText, CancellationToken.None })!;
|
||||
var structured = StructuredCvProfileJson.Normalize(await buildTask);
|
||||
Assert.NotNull(structured);
|
||||
|
||||
@@ -199,6 +209,33 @@ public sealed class CvCorpusHarnessTests
|
||||
return Path.Combine(outputRoot, "approved-fixtures");
|
||||
}
|
||||
|
||||
private static List<string> ResolveIgnoredPatterns()
|
||||
{
|
||||
var configured = Environment.GetEnvironmentVariable("CV_BENCHMARK_IGNORE");
|
||||
if (string.IsNullOrWhiteSpace(configured)) return new List<string>();
|
||||
|
||||
return configured
|
||||
.Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
|
||||
.Where(value => !string.IsNullOrWhiteSpace(value))
|
||||
.ToList();
|
||||
}
|
||||
|
||||
private static bool IsIgnoredFile(string path, List<string> ignoredPatterns)
|
||||
{
|
||||
if (ignoredPatterns.Count == 0) return false;
|
||||
|
||||
var fileName = Path.GetFileName(path);
|
||||
foreach (var pattern in ignoredPatterns)
|
||||
{
|
||||
if (FileSystemName.MatchesSimpleExpression(pattern, fileName, ignoreCase: true))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static string PrettyJson(string normalizedJson)
|
||||
{
|
||||
using var doc = JsonDocument.Parse(normalizedJson);
|
||||
@@ -327,4 +364,20 @@ public sealed class CvCorpusHarnessTests
|
||||
env.SetupGet(x => x.ContentRootPath).Returns(tempRoot);
|
||||
return new AppPaths(config, env.Object);
|
||||
}
|
||||
|
||||
private static ICvAiNormalizer CreateCvAiNormalizerFromEnvironment()
|
||||
{
|
||||
var baseUrl = Environment.GetEnvironmentVariable("CV_AI_BASE_URL");
|
||||
if (string.IsNullOrWhiteSpace(baseUrl)) return NoOpCvAiNormalizer.Instance;
|
||||
|
||||
var services = new Microsoft.Extensions.DependencyInjection.ServiceCollection();
|
||||
services.AddHttpClient("ai-service", client =>
|
||||
{
|
||||
client.BaseAddress = new Uri(baseUrl.Trim());
|
||||
client.Timeout = TimeSpan.FromSeconds(180);
|
||||
});
|
||||
var provider = services.BuildServiceProvider();
|
||||
var factory = provider.GetRequiredService<System.Net.Http.IHttpClientFactory>();
|
||||
return new CvAiNormalizer(factory);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user