Evolve summarizer into AI service with OCR support

This commit is contained in:
cesnimda
2026-03-23 20:12:34 +01:00
parent 90fdd8e1a5
commit 653f713a78
20 changed files with 475 additions and 129 deletions
@@ -1,5 +1,6 @@
using System.Text;
using System.Text.RegularExpressions;
using JobTrackerApi.Services;
using JobTrackerApi.Models;
using Microsoft.AspNetCore.Authorization;
using Microsoft.AspNetCore.Identity;
@@ -18,15 +19,21 @@ public sealed class ProfileCvController : ControllerBase
".md",
".pdf",
".docx",
".png",
".jpg",
".jpeg",
".webp",
};
private const long MaxFileSizeBytes = 5 * 1024 * 1024;
private readonly UserManager<ApplicationUser> _users;
private readonly ISummarizerService _aiService;
public ProfileCvController(UserManager<ApplicationUser> users)
public ProfileCvController(UserManager<ApplicationUser> users, ISummarizerService aiService)
{
_users = users;
_aiService = aiService;
}
[HttpPost("upload")]
@@ -41,10 +48,34 @@ public sealed class ProfileCvController : ControllerBase
var extension = Path.GetExtension(file.FileName ?? string.Empty);
if (!AllowedExtensions.Contains(extension))
{
return BadRequest("Only .txt, .md, .pdf, and .docx CV imports are supported right now.");
return BadRequest("Only .txt, .md, .pdf, .docx, .png, .jpg, .jpeg, and .webp CV imports are supported right now.");
}
var text = (await ExtractTextAsync(file, extension)).Trim();
string text;
var canUseAiExtraction = string.Equals(extension, ".pdf", StringComparison.OrdinalIgnoreCase)
|| string.Equals(extension, ".docx", StringComparison.OrdinalIgnoreCase)
|| string.Equals(extension, ".txt", StringComparison.OrdinalIgnoreCase)
|| string.Equals(extension, ".md", StringComparison.OrdinalIgnoreCase)
|| string.Equals(extension, ".png", StringComparison.OrdinalIgnoreCase)
|| string.Equals(extension, ".jpg", StringComparison.OrdinalIgnoreCase)
|| string.Equals(extension, ".jpeg", StringComparison.OrdinalIgnoreCase)
|| string.Equals(extension, ".webp", StringComparison.OrdinalIgnoreCase);
if (canUseAiExtraction)
{
await using var uploadStream = file.OpenReadStream();
var extracted = await _aiService.ExtractTextAsync(uploadStream, file.FileName ?? $"cv{extension}", file.ContentType, HttpContext.RequestAborted);
text = extracted?.Text?.Trim() ?? string.Empty;
}
else
{
text = string.Empty;
}
if (string.IsNullOrWhiteSpace(text))
{
text = (await ExtractTextAsync(file, extension)).Trim();
}
if (string.IsNullOrWhiteSpace(text))
{
return BadRequest("The uploaded CV file could not be read or was empty.");