108 lines
4.3 KiB
C#
108 lines
4.3 KiB
C#
using System.Text;
|
|
using System.Text.RegularExpressions;
|
|
using JobTrackerApi.Models;
|
|
using Microsoft.AspNetCore.Authorization;
|
|
using Microsoft.AspNetCore.Identity;
|
|
using Microsoft.AspNetCore.Mvc;
|
|
|
|
namespace JobTrackerApi.Controllers;
|
|
|
|
[ApiController]
|
|
[Route("api/profile-cv")]
|
|
[Authorize(AuthenticationSchemes = "local")]
|
|
public sealed class ProfileCvController : ControllerBase
|
|
{
|
|
private static readonly HashSet<string> AllowedExtensions = new(StringComparer.OrdinalIgnoreCase)
|
|
{
|
|
".txt",
|
|
".md",
|
|
".pdf",
|
|
".docx",
|
|
};
|
|
|
|
private const long MaxFileSizeBytes = 5 * 1024 * 1024;
|
|
|
|
private readonly UserManager<ApplicationUser> _users;
|
|
|
|
public ProfileCvController(UserManager<ApplicationUser> users)
|
|
{
|
|
_users = users;
|
|
}
|
|
|
|
[HttpPost("upload")]
|
|
[RequestSizeLimit(MaxFileSizeBytes)]
|
|
public async Task<IActionResult> Upload([FromForm] IFormFile file)
|
|
{
|
|
var user = await _users.GetUserAsync(User);
|
|
if (user is null) return Unauthorized();
|
|
if (file is null || file.Length == 0) return BadRequest("Select a CV file to upload.");
|
|
if (file.Length > MaxFileSizeBytes) return BadRequest("CV import file is too large. Keep it under 5 MB.");
|
|
|
|
var extension = Path.GetExtension(file.FileName ?? string.Empty);
|
|
if (!AllowedExtensions.Contains(extension))
|
|
{
|
|
return BadRequest("Only .txt, .md, .pdf, and .docx CV imports are supported right now.");
|
|
}
|
|
|
|
var text = (await ExtractTextAsync(file, extension)).Trim();
|
|
if (string.IsNullOrWhiteSpace(text))
|
|
{
|
|
return BadRequest("The uploaded CV file could not be read or was empty.");
|
|
}
|
|
|
|
user.ProfileCvText = text;
|
|
var result = await _users.UpdateAsync(user);
|
|
if (!result.Succeeded)
|
|
{
|
|
return BadRequest(string.Join("; ", result.Errors.Select(e => e.Description)));
|
|
}
|
|
|
|
return Ok(new { imported = true, characters = text.Length });
|
|
}
|
|
|
|
private static async Task<string> ExtractTextAsync(IFormFile file, string extension)
|
|
{
|
|
if (string.Equals(extension, ".txt", StringComparison.OrdinalIgnoreCase) || string.Equals(extension, ".md", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
using var stream = file.OpenReadStream();
|
|
using var reader = new StreamReader(stream, Encoding.UTF8, detectEncodingFromByteOrderMarks: true);
|
|
return (await reader.ReadToEndAsync()).Trim();
|
|
}
|
|
|
|
await using var memory = new MemoryStream();
|
|
await file.CopyToAsync(memory);
|
|
var bytes = memory.ToArray();
|
|
|
|
if (string.Equals(extension, ".pdf", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
var raw = Encoding.UTF8.GetString(bytes);
|
|
var textMatches = Regex.Matches(raw, @"\((.*?)\)Tj", RegexOptions.Singleline)
|
|
.Select(match => match.Groups[1].Value)
|
|
.Concat(Regex.Matches(raw, @"\[(.*?)\]TJ", RegexOptions.Singleline)
|
|
.SelectMany(match => Regex.Matches(match.Groups[1].Value, @"\((.*?)\)", RegexOptions.Singleline).Select(x => x.Groups[1].Value)))
|
|
.Where(value => !string.IsNullOrWhiteSpace(value))
|
|
.Select(value => Regex.Unescape(value))
|
|
.ToList();
|
|
|
|
var joined = textMatches.Count > 0 ? string.Join(" ", textMatches) : raw;
|
|
var scrubbed = Regex.Replace(joined, @"[\x00-\x08\x0B\x0C\x0E-\x1F]", " ");
|
|
return Regex.Replace(scrubbed, @"\s+", " ").Trim();
|
|
}
|
|
|
|
if (string.Equals(extension, ".docx", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
using var archive = new System.IO.Compression.ZipArchive(new MemoryStream(bytes), System.IO.Compression.ZipArchiveMode.Read, leaveOpen: false);
|
|
var entry = archive.GetEntry("word/document.xml");
|
|
if (entry is null) return string.Empty;
|
|
using var entryStream = entry.Open();
|
|
using var reader = new StreamReader(entryStream, Encoding.UTF8);
|
|
var xml = await reader.ReadToEndAsync();
|
|
var withoutTags = Regex.Replace(xml, "<[^>]+>", " ");
|
|
var decoded = System.Net.WebUtility.HtmlDecode(withoutTags) ?? string.Empty;
|
|
return Regex.Replace(decoded, @"\s+", " ").Trim();
|
|
}
|
|
|
|
return string.Empty;
|
|
}
|
|
}
|