Files
jobtrackingapp/JobTrackerApi/Controllers/ProfileCvController.cs
T

195 lines
8.2 KiB
C#

using System.Text;
using System.Text.RegularExpressions;
using JobTrackerApi.Services;
using JobTrackerApi.Models;
using Microsoft.AspNetCore.Authorization;
using Microsoft.AspNetCore.Identity;
using Microsoft.AspNetCore.Mvc;
namespace JobTrackerApi.Controllers;
[ApiController]
[Route("api/profile-cv")]
[Authorize(AuthenticationSchemes = "local")]
public sealed class ProfileCvController : ControllerBase
{
private static readonly HashSet<string> AllowedExtensions = new(StringComparer.OrdinalIgnoreCase)
{
".txt",
".md",
".pdf",
".docx",
".png",
".jpg",
".jpeg",
".webp",
};
private const long MaxFileSizeBytes = 5 * 1024 * 1024;
private readonly UserManager<ApplicationUser> _users;
private readonly ISummarizerService _aiService;
public ProfileCvController(UserManager<ApplicationUser> users, ISummarizerService aiService)
{
_users = users;
_aiService = aiService;
}
[HttpPost("upload")]
[RequestSizeLimit(MaxFileSizeBytes)]
public async Task<IActionResult> Upload([FromForm] IFormFile file)
{
var user = await _users.GetUserAsync(User);
if (user is null) return Unauthorized();
if (file is null || file.Length == 0) return BadRequest("Select a CV file to upload.");
if (file.Length > MaxFileSizeBytes) return BadRequest("CV import file is too large. Keep it under 5 MB.");
var extension = Path.GetExtension(file.FileName ?? string.Empty);
if (!AllowedExtensions.Contains(extension))
{
return BadRequest("Only .txt, .md, .pdf, .docx, .png, .jpg, .jpeg, and .webp CV imports are supported right now.");
}
string text;
var canUseAiExtraction = string.Equals(extension, ".pdf", StringComparison.OrdinalIgnoreCase)
|| string.Equals(extension, ".docx", StringComparison.OrdinalIgnoreCase)
|| string.Equals(extension, ".txt", StringComparison.OrdinalIgnoreCase)
|| string.Equals(extension, ".md", StringComparison.OrdinalIgnoreCase)
|| string.Equals(extension, ".png", StringComparison.OrdinalIgnoreCase)
|| string.Equals(extension, ".jpg", StringComparison.OrdinalIgnoreCase)
|| string.Equals(extension, ".jpeg", StringComparison.OrdinalIgnoreCase)
|| string.Equals(extension, ".webp", StringComparison.OrdinalIgnoreCase);
if (canUseAiExtraction)
{
await using var uploadStream = file.OpenReadStream();
var extracted = await _aiService.ExtractTextAsync(uploadStream, file.FileName ?? $"cv{extension}", file.ContentType, HttpContext.RequestAborted);
text = extracted?.Text?.Trim() ?? string.Empty;
}
else
{
text = string.Empty;
}
if (string.IsNullOrWhiteSpace(text))
{
text = (await ExtractTextAsync(file, extension)).Trim();
}
if (string.IsNullOrWhiteSpace(text))
{
return BadRequest("The uploaded CV file could not be read or was empty.");
}
user.ProfileCvText = text;
var result = await _users.UpdateAsync(user);
if (!result.Succeeded)
{
return BadRequest(string.Join("; ", result.Errors.Select(e => e.Description)));
}
return Ok(new { imported = true, characters = text.Length });
}
[HttpPost("rebuild")]
public async Task<IActionResult> Rebuild()
{
var user = await _users.GetUserAsync(User);
if (user is null) return Unauthorized();
if (string.IsNullOrWhiteSpace(user.ProfileCvText)) return BadRequest("Add or import CV text before rebuilding it.");
var rebuilt = await _aiService.SummarizeSectionAsync(
"Rewrite this CV into a stronger master CV with clear sections such as Professional Summary, Core Skills, Experience Highlights, and Selected Achievements. Preserve only factual claims, avoid inventing employers or metrics, and make the output clean and ready for tailoring to job applications. Return only the rebuilt CV text.",
user.ProfileCvText,
2200,
700);
if (string.IsNullOrWhiteSpace(rebuilt))
{
return BadRequest("The AI service could not rebuild your CV text right now.");
}
user.ProfileCvText = rebuilt.Trim();
var result = await _users.UpdateAsync(user);
if (!result.Succeeded)
{
return BadRequest(string.Join("; ", result.Errors.Select(e => e.Description)));
}
return Ok(new { rebuilt = true, characters = user.ProfileCvText.Length, text = user.ProfileCvText });
}
[HttpPost("improve")]
public async Task<IActionResult> Improve()
{
var user = await _users.GetUserAsync(User);
if (user is null) return Unauthorized();
if (string.IsNullOrWhiteSpace(user.ProfileCvText)) return BadRequest("Add or import CV text before improving it.");
var improved = await _aiService.SummarizeSectionAsync(
"Rewrite this CV into a cleaner, better-structured master CV profile. Preserve factual claims, employers, skills, and measurable results. Improve clarity, tighten wording, use strong bullet-style phrasing, and keep it ready for further tailoring to specific roles. Return only the improved CV text.",
user.ProfileCvText,
1800,
500);
if (string.IsNullOrWhiteSpace(improved))
{
return BadRequest("The AI service could not improve your CV text right now.");
}
user.ProfileCvText = improved.Trim();
var result = await _users.UpdateAsync(user);
if (!result.Succeeded)
{
return BadRequest(string.Join("; ", result.Errors.Select(e => e.Description)));
}
return Ok(new { improved = true, characters = user.ProfileCvText.Length, text = user.ProfileCvText });
}
private static async Task<string> ExtractTextAsync(IFormFile file, string extension)
{
if (string.Equals(extension, ".txt", StringComparison.OrdinalIgnoreCase) || string.Equals(extension, ".md", StringComparison.OrdinalIgnoreCase))
{
using var stream = file.OpenReadStream();
using var reader = new StreamReader(stream, Encoding.UTF8, detectEncodingFromByteOrderMarks: true);
return (await reader.ReadToEndAsync()).Trim();
}
await using var memory = new MemoryStream();
await file.CopyToAsync(memory);
var bytes = memory.ToArray();
if (string.Equals(extension, ".pdf", StringComparison.OrdinalIgnoreCase))
{
var raw = Encoding.UTF8.GetString(bytes);
var textMatches = Regex.Matches(raw, @"\((.*?)\)Tj", RegexOptions.Singleline)
.Select(match => match.Groups[1].Value)
.Concat(Regex.Matches(raw, @"\[(.*?)\]TJ", RegexOptions.Singleline)
.SelectMany(match => Regex.Matches(match.Groups[1].Value, @"\((.*?)\)", RegexOptions.Singleline).Select(x => x.Groups[1].Value)))
.Where(value => !string.IsNullOrWhiteSpace(value))
.Select(value => Regex.Unescape(value))
.ToList();
var joined = textMatches.Count > 0 ? string.Join(" ", textMatches) : raw;
var scrubbed = Regex.Replace(joined, @"[\x00-\x08\x0B\x0C\x0E-\x1F]", " ");
return Regex.Replace(scrubbed, @"\s+", " ").Trim();
}
if (string.Equals(extension, ".docx", StringComparison.OrdinalIgnoreCase))
{
using var archive = new System.IO.Compression.ZipArchive(new MemoryStream(bytes), System.IO.Compression.ZipArchiveMode.Read, leaveOpen: false);
var entry = archive.GetEntry("word/document.xml");
if (entry is null) return string.Empty;
using var entryStream = entry.Open();
using var reader = new StreamReader(entryStream, Encoding.UTF8);
var xml = await reader.ReadToEndAsync();
var withoutTags = Regex.Replace(xml, "<[^>]+>", " ");
var decoded = System.Net.WebUtility.HtmlDecode(withoutTags) ?? string.Empty;
return Regex.Replace(decoded, @"\s+", " ").Trim();
}
return string.Empty;
}
}