Improve CV parsing and profile editor flow

2026-03-29 14:29:18 +02:00
parent 99fc94bc18
commit 44000f96f2
18 changed files with 1028 additions and 44 deletions
@@ -9,6 +9,9 @@ GOOGLE_GMAIL_CLIENT_SECRET=CHANGE_ME_GOOGLE_OAUTH_CLIENT_SECRET
 # Optional. If omitted, the backend uses https://<your-domain>/api/gmail/oauth/callback
 GOOGLE_GMAIL_REDIRECT_URI=
 AI_SERVICE_BASE_URL=http://ai-service:8001
+# Optional: enables hybrid CV block classification in the local AI service.
+OLLAMA_BASE_URL=http://ollama:11434
+OLLAMA_MODEL=qwen2.5:7b

 # Optional: only needed if you want the UI to call a non-default API base URL.
 # In production the UI defaults to `/api`.
@@ -280,7 +280,7 @@ public sealed class ProfileCvControllerTests
    [Fact]
    public async Task Upload_populates_structured_fields_from_flattened_cv_when_ai_json_is_invalid()
    {
-        var rawExtraction = "connor.babbington@cesnimda.co.uk cesnimda.co.uk +47 41 33 44 70 E D U C A T I O N E X T E N D E D D I P L O M A N V Q L E V E L 3 I N I C T 2012 - 2015 F O L L O W A B O U T M E Mid-level system developer with eight years of experience in UK local government, with expertise in full-stack development, backend, frontend and server administration. I N T E R E S T S I am interested in PC and board games, as well as cooking and learning new skills. E X P E R I E N C E S Y S T E M D E V E L O P E R 2015 - 2023 Developed and maintained multiple full-stack applications using C#, Python, Ruby on Rails, SQL, and JavaScript. + Warwickshire County Council, UK C O N T A C T Native English speaker, Norwegian level A2/B1.";
+        var rawExtraction = "connor.babbington@cesnimda.co.uk cesnimda.co.uk +47 41 33 44 70 E D U C A T I O N E X T E N D E D D I P L O M A N V Q L E V E L 3 I N I C T 2012 - 2015 F O L L O W A B O U T M E Mid-level system developer with eight years of experience in UK local government, with expertise in full-stack development, backend, frontend and server administration. I N T E R E S T S I am interested in PC and board games, as well as cooking and learning new skills. E X P E R I E N C E S Y S T E M D E V E L O P E R 2015 - 2023 Developed and maintained multiple full-stack applications using C#, Python, Ruby on Rails, SQL, and JavaScript. + Warwickshire County Council, UK C O N T A C T Native English speaker, Norwegian level A2/B1, C#, SQL, and public speaking.";

        var user = new ApplicationUser { Id = "user-1" };
        var userManager = CreateUserManager();
@@ -320,9 +320,164 @@ public sealed class ProfileCvControllerTests
        Assert.Contains(structured.Interests, item => item.Contains("board games", StringComparison.OrdinalIgnoreCase) || item.Contains("cooking", StringComparison.OrdinalIgnoreCase));
        Assert.Contains(structured.Languages, item => item.Name != null && item.Name.Equals("English", StringComparison.OrdinalIgnoreCase));
        Assert.Contains(structured.Languages, item => item.Name != null && item.Name.StartsWith("Norwegian", StringComparison.OrdinalIgnoreCase));
+        Assert.DoesNotContain(structured.Languages, item => item.Name != null && item.Name.Equals("C#", StringComparison.OrdinalIgnoreCase));
+        Assert.DoesNotContain(structured.Languages, item => item.Name != null && item.Name.Equals("SQL", StringComparison.OrdinalIgnoreCase));
+        Assert.DoesNotContain(structured.Languages, item => item.Name != null && item.Name.Contains("public speaking", StringComparison.OrdinalIgnoreCase));
        Assert.DoesNotContain(structured.Sections, section => section.Name == "General");
    }

+    [Fact]
+    public void Structured_cv_normalization_keeps_human_languages_and_drops_skill_noise()
+    {
+        var structured = StructuredCvProfileJson.Deserialize("""
+        {
+          "version": "1",
+          "contact": {},
+          "summary": [],
+          "jobs": [],
+          "education": [],
+          "skills": [],
+          "languages": [
+            { "name": "English", "level": "Native" },
+            { "name": "Native Norwegian speaker", "level": null },
+            { "name": "French", "level": null },
+            { "name": "C#", "level": "Advanced" },
+            { "name": "Leadership", "level": null }
+          ],
+          "interests": [],
+          "otherSections": []
+        }
+        """);
+
+        Assert.Collection(
+            structured.Languages.OrderBy(item => item.Name, StringComparer.OrdinalIgnoreCase),
+            first =>
+            {
+                Assert.Equal("English", first.Name);
+                Assert.Equal("Native", first.Level);
+            },
+            second =>
+            {
+                Assert.Equal("Norwegian", second.Name);
+                Assert.Equal("Native", second.Level);
+            });
+    }
+
+    [Fact]
+    public void Structured_cv_normalization_separates_job_title_company_and_tasks()
+    {
+        var structured = StructuredCvProfileJson.Deserialize("""
+        {
+          "version": "1",
+          "contact": {},
+          "summary": [],
+          "jobs": [
+            {
+              "title": "Acme Ltd",
+              "company": "Senior Backend Developer",
+              "location": "Oslo",
+              "start": "2022",
+              "end": "2024",
+              "isCurrent": false,
+              "bullets": [
+                "Senior Backend Developer",
+                "Acme Ltd",
+                "2022 - 2024",
+                "Built API integrations for recruiter workflows and reduced manual follow-up churn."
+              ],
+              "skills": [".NET", "SQL"]
+            },
+            {
+              "title": "Lead Engineer at Northwind Council",
+              "company": null,
+              "location": "Remote",
+              "start": "2020",
+              "end": "Present",
+              "isCurrent": true,
+              "bullets": [
+                "Led platform delivery across case-management and reporting surfaces.",
+                "Skills: C#, SQL"
+              ],
+              "skills": ["C#", "SQL"]
+            }
+          ],
+          "education": [],
+          "skills": [],
+          "languages": [],
+          "interests": [],
+          "otherSections": []
+        }
+        """);
+
+        Assert.Collection(
+            structured.Jobs,
+            first =>
+            {
+                Assert.Equal("Senior Backend Developer", first.Title);
+                Assert.Equal("Acme Ltd", first.Company);
+                Assert.Equal(new[] { "Built API integrations for recruiter workflows and reduced manual follow-up churn." }, first.Bullets);
+            },
+            second =>
+            {
+                Assert.Equal("Lead Engineer", second.Title);
+                Assert.Equal("Northwind Council", second.Company);
+                Assert.Equal(new[] { "Led platform delivery across case-management and reporting surfaces." }, second.Bullets);
+            });
+    }
+
+    [Fact]
+    public void Structured_cv_normalization_hardens_contact_links_locations_and_dates()
+    {
+        var structured = StructuredCvProfileJson.Deserialize("""
+        {
+          "version": "1",
+          "contact": {
+            "location": "Tønsberg, Norway",
+            "website": "https://cesnimda.co.uk/about",
+            "linkedin": "linkedin.com/in/demo-user?trk=foo"
+          },
+          "summary": [],
+          "jobs": [
+            {
+              "title": "System Developer",
+              "company": "Warwickshire County Council",
+              "location": "Warwickshire, England, UK",
+              "start": "Sept 2023",
+              "end": "1/1/2024",
+              "isCurrent": false,
+              "bullets": ["Built APIs"],
+              "skills": []
+            },
+            {
+              "title": "Developer",
+              "company": "Demo Co",
+              "location": "Remote 123",
+              "start": "Spring 2024",
+              "end": "Later",
+              "isCurrent": false,
+              "bullets": ["Kept services running"],
+              "skills": []
+            }
+          ],
+          "education": [],
+          "skills": [],
+          "languages": [],
+          "interests": [],
+          "otherSections": []
+        }
+        """);
+
+        Assert.Equal("Tønsberg, Norway", structured.Contact.Location);
+        Assert.Equal("cesnimda.co.uk", structured.Contact.Website);
+        Assert.Equal("https://www.linkedin.com/in/demo-user", structured.Contact.LinkedIn);
+        Assert.Equal("Warwickshire, England, UK", structured.Jobs[0].Location);
+        Assert.Equal("Sept 2023", structured.Jobs[0].Start);
+        Assert.Equal("1/1/2024", structured.Jobs[0].End);
+        Assert.Null(structured.Jobs[1].Location);
+        Assert.Null(structured.Jobs[1].Start);
+        Assert.Null(structured.Jobs[1].End);
+    }
+
    [Fact]
    public async Task Parse_returns_structured_cv_and_persists_it()
    {
@@ -124,6 +124,10 @@ public sealed class AdminSystemController : ControllerBase
                GpuName: null,
                OcrAvailable: false,
                OcrLanguages: null,
+                OllamaConfigured: null,
+                OllamaReachable: null,
+                OllamaModel: null,
+                OllamaModelAvailable: null,
                HealthLatencyMs: null,
                ProbeLatencyMs: null,
                LastProbeAt: null,
@@ -61,13 +61,15 @@ public sealed class ProfileCvController : ControllerBase

    private readonly UserManager<ApplicationUser> _users;
    private readonly ISummarizerService _aiService;
+    private readonly ICvAiClassifier _cvAiClassifier;
    private readonly JobTrackerContext _db;
    private readonly AppPaths _paths;

-    public ProfileCvController(UserManager<ApplicationUser> users, ISummarizerService aiService, JobTrackerContext db, AppPaths paths)
+    public ProfileCvController(UserManager<ApplicationUser> users, ISummarizerService aiService, JobTrackerContext db, AppPaths paths, ICvAiClassifier? cvAiClassifier = null)
    {
        _users = users;
        _aiService = aiService;
+        _cvAiClassifier = cvAiClassifier ?? NoOpCvAiClassifier.Instance;
        _db = db;
        _paths = paths;
    }
@@ -338,14 +340,7 @@ public sealed class ProfileCvController : ControllerBase
    private async Task<StructuredCvProfile> BuildStructuredCvAsync(string text, CancellationToken cancellationToken)
    {
        var parseSource = NormalizeTextForStructuredParsing(text);
-        var fallbackSections = ParseSections(parseSource)
-            .Select(section => new StructuredCvSection
-            {
-                Name = section.Name,
-                Content = section.Content,
-                WordCount = CountWords(section.Content),
-            })
-            .ToList();
+        var fallbackSections = await BuildFallbackSectionsAsync(parseSource, cancellationToken);

        var sectionFallback = StructuredCvProfileJson.FromSections(fallbackSections);
        AnnotateStructuredCv(sectionFallback, "repair", 0.56);
@@ -729,12 +724,19 @@ public sealed class ProfileCvController : ControllerBase
    private static List<StructuredCvLanguage> ParseLanguagesHeuristically(string content)
    {
        var languages = new List<StructuredCvLanguage>();
-        foreach (Match match in Regex.Matches(content, @"\b(English|Norwegian|Norsk|German|French|Spanish|Swedish|Danish)\b(?:[^\n.,;:]*?\b(Native|Fluent|Advanced|Intermediate|Beginner|A1|A2|B1|B2|C1|C2|Native speaker)\b)?", RegexOptions.IgnoreCase))
+        var candidates = Regex.Split(content.Replace("\r\n", "\n"), @"[\n,;]+|(?<=[.!?])\s+")
+            .Select(item => item.Trim())
+            .Where(item => item.Length > 1);
+
+        foreach (var candidate in candidates)
        {
-            var name = NullIfWhitespace(match.Groups[1].Value);
-            var level = NullIfWhitespace(match.Groups[2].Value);
-            if (name is null) continue;
-            languages.Add(new StructuredCvLanguage { Name = name, Level = level });
+            var level = HumanLanguageCatalog.ExtractLevel(candidate);
+            if (level is null) continue;
+
+            foreach (var name in HumanLanguageCatalog.ExtractLanguageNames(candidate))
+            {
+                languages.Add(new StructuredCvLanguage { Name = name, Level = level });
+            }
        }

        return languages
@@ -872,6 +874,86 @@ public sealed class ProfileCvController : ControllerBase
            .ToList();
    }

+    private async Task<List<StructuredCvSection>> BuildFallbackSectionsAsync(string parseSource, CancellationToken cancellationToken)
+    {
+        var parsed = ParseSections(parseSource)
+            .Select(section => new StructuredCvSection
+            {
+                Name = section.Name,
+                Content = section.Content,
+                WordCount = CountWords(section.Content),
+            })
+            .ToList();
+
+        var hasRealSections = parsed.Any(section => !string.Equals(section.Name, "General", StringComparison.OrdinalIgnoreCase));
+        if (hasRealSections) return parsed;
+
+        var aiSections = await ClassifyBlocksIntoSectionsAsync(parseSource, cancellationToken);
+        return aiSections.Count > 0 ? aiSections : parsed;
+    }
+
+    private async Task<List<StructuredCvSection>> ClassifyBlocksIntoSectionsAsync(string parseSource, CancellationToken cancellationToken)
+    {
+        var blocks = Regex.Split(parseSource.Replace("\r\n", "\n"), @"\n\s*\n")
+            .Select(block => block.Trim())
+            .Where(block => block.Length >= 24)
+            .ToList();
+
+        if (blocks.Count == 0) return new List<StructuredCvSection>();
+
+        var sectionBuckets = new List<StructuredCvSection>();
+        foreach (var block in blocks)
+        {
+            var classification = await _cvAiClassifier.ClassifyBlockAsync(block, cancellationToken);
+            var sectionName = classification?.Section;
+            if (!string.IsNullOrWhiteSpace(sectionName) && SectionAliases.TryGetValue(sectionName, out var canonical))
+            {
+                sectionName = canonical;
+            }
+
+            if (string.IsNullOrWhiteSpace(sectionName) || string.Equals(sectionName, "Other", StringComparison.OrdinalIgnoreCase))
+            {
+                sectionName = "General";
+            }
+
+            var content = block;
+            if (string.Equals(sectionName, "Work Experience", StringComparison.OrdinalIgnoreCase) && classification is not null)
+            {
+                var lines = new List<string>();
+                if (!string.IsNullOrWhiteSpace(classification.Title)) lines.Add($"### {classification.Title.Trim()}");
+                var endIsCurrent = string.Equals(classification.End, "Present", StringComparison.OrdinalIgnoreCase) || string.Equals(classification.End, "Current", StringComparison.OrdinalIgnoreCase);
+                var dateRange = FormatDateRangeForSection(classification.Start, classification.End, endIsCurrent);
+                var meta = string.Join(" | ", new[] { classification.Company, classification.Location, dateRange }.Where(value => !string.IsNullOrWhiteSpace(value)));
+                if (!string.IsNullOrWhiteSpace(meta)) lines.Add(meta);
+                if (classification.Bullets is not null)
+                {
+                    lines.AddRange(classification.Bullets.Where(bullet => !string.IsNullOrWhiteSpace(bullet)).Select(bullet => $"- {bullet.Trim()}"));
+                }
+                if (lines.Count > 0) content = string.Join("\n", lines);
+            }
+
+            var existing = sectionBuckets.FirstOrDefault(section => section.Name == sectionName);
+            if (existing is null)
+            {
+                sectionBuckets.Add(new StructuredCvSection { Name = sectionName, Content = content, WordCount = CountWords(content) });
+            }
+            else
+            {
+                existing.Content = $"{existing.Content}\n\n{content}".Trim();
+                existing.WordCount = CountWords(existing.Content);
+            }
+        }
+
+        return sectionBuckets.Where(section => !string.IsNullOrWhiteSpace(section.Content)).ToList();
+    }
+
+    private static string? FormatDateRangeForSection(string? start, string? end, bool isCurrent)
+    {
+        if (string.IsNullOrWhiteSpace(start) && string.IsNullOrWhiteSpace(end)) return null;
+        if (string.IsNullOrWhiteSpace(start)) return end;
+        return $"{start} - {(isCurrent ? "Present" : end ?? "Present")}";
+    }
+
    private async Task<string> MaybeReconstructStructuredCvAsync(string text, CancellationToken cancellationToken)
    {
        var normalized = text.Trim();
@@ -132,6 +132,7 @@ builder.Services.AddHttpClient("ai-service", client =>

 builder.Services.AddMemoryCache();
 builder.Services.AddSingleton<ISummarizerService, SummarizerService>();
+builder.Services.AddSingleton<ICvAiClassifier, CvAiClassifier>();
 builder.Services.AddSingleton<IGoogleTokenValidator, GoogleTokenValidator>();
 builder.Services.AddScoped<IGmailOAuthService, GmailOAuthService>();

@@ -0,0 +1,65 @@
+using System.Net.Http;
+using System.Text;
+using System.Text.Json;
+
+namespace JobTrackerApi.Services;
+
+public sealed record CvBlockClassificationResult(
+    string? Section,
+    double? Confidence,
+    string? Reason,
+    string? Title,
+    string? Company,
+    string? Location,
+    string? Start,
+    string? End,
+    List<string>? Bullets);
+
+public interface ICvAiClassifier
+{
+    Task<CvBlockClassificationResult?> ClassifyBlockAsync(string block, CancellationToken cancellationToken = default);
+}
+
+public sealed class CvAiClassifier : ICvAiClassifier
+{
+    private readonly IHttpClientFactory _httpClientFactory;
+
+    public CvAiClassifier(IHttpClientFactory httpClientFactory)
+    {
+        _httpClientFactory = httpClientFactory;
+    }
+
+    public async Task<CvBlockClassificationResult?> ClassifyBlockAsync(string block, CancellationToken cancellationToken = default)
+    {
+        if (string.IsNullOrWhiteSpace(block)) return null;
+
+        try
+        {
+            var client = _httpClientFactory.CreateClient("ai-service");
+            var payload = JsonSerializer.Serialize(new { block });
+            using var content = new StringContent(payload, Encoding.UTF8, "application/json");
+            using var response = await client.PostAsync("/cv/classify-block", content, cancellationToken);
+            if (!response.IsSuccessStatusCode) return null;
+
+            await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken);
+            var parsed = await JsonSerializer.DeserializeAsync<CvBlockClassificationResult>(stream, new JsonSerializerOptions(JsonSerializerDefaults.Web)
+            {
+                PropertyNameCaseInsensitive = true
+            }, cancellationToken);
+
+            return parsed;
+        }
+        catch
+        {
+            return null;
+        }
+    }
+}
+
+public sealed class NoOpCvAiClassifier : ICvAiClassifier
+{
+    public static NoOpCvAiClassifier Instance { get; } = new();
+    private NoOpCvAiClassifier() { }
+    public Task<CvBlockClassificationResult?> ClassifyBlockAsync(string block, CancellationToken cancellationToken = default)
+        => Task.FromResult<CvBlockClassificationResult?>(null);
+}
@@ -21,6 +21,10 @@ namespace JobTrackerApi.Services
        string? GpuName,
        bool? OcrAvailable,
        string? OcrLanguages,
+        bool? OllamaConfigured,
+        bool? OllamaReachable,
+        string? OllamaModel,
+        bool? OllamaModelAvailable,
        double? HealthLatencyMs,
        double? ProbeLatencyMs,
        DateTimeOffset? LastProbeAt,
@@ -310,6 +314,10 @@ namespace JobTrackerApi.Services
            string? gpuName = null;
            bool? ocrAvailable = null;
            string? ocrLanguages = null;
+            bool? ollamaConfigured = null;
+            bool? ollamaReachable = null;
+            string? ollamaModel = null;
+            bool? ollamaModelAvailable = null;
            double? healthLatencyMs = null;
            var healthy = false;
            string? healthError = null;
@@ -332,6 +340,10 @@ namespace JobTrackerApi.Services
                    if (doc.RootElement.TryGetProperty("gpu_name", out var gpuNameEl)) gpuName = gpuNameEl.GetString();
                    if (doc.RootElement.TryGetProperty("ocr_available", out var ocrAvailableEl) && ocrAvailableEl.ValueKind is JsonValueKind.True or JsonValueKind.False) ocrAvailable = ocrAvailableEl.GetBoolean();
                    if (doc.RootElement.TryGetProperty("ocr_languages", out var ocrLanguagesEl)) ocrLanguages = ocrLanguagesEl.GetString();
+                    if (doc.RootElement.TryGetProperty("ollama_configured", out var ollamaConfiguredEl) && ollamaConfiguredEl.ValueKind is JsonValueKind.True or JsonValueKind.False) ollamaConfigured = ollamaConfiguredEl.GetBoolean();
+                    if (doc.RootElement.TryGetProperty("ollama_reachable", out var ollamaReachableEl) && ollamaReachableEl.ValueKind is JsonValueKind.True or JsonValueKind.False) ollamaReachable = ollamaReachableEl.GetBoolean();
+                    if (doc.RootElement.TryGetProperty("ollama_model", out var ollamaModelEl)) ollamaModel = ollamaModelEl.GetString();
+                    if (doc.RootElement.TryGetProperty("ollama_model_available", out var ollamaModelAvailableEl) && ollamaModelAvailableEl.ValueKind is JsonValueKind.True or JsonValueKind.False) ollamaModelAvailable = ollamaModelAvailableEl.GetBoolean();
                }
                else
                {
@@ -390,6 +402,10 @@ namespace JobTrackerApi.Services
                GpuName: gpuName,
                OcrAvailable: ocrAvailable,
                OcrLanguages: ocrLanguages,
+                OllamaConfigured: ollamaConfigured,
+                OllamaReachable: ollamaReachable,
+                OllamaModel: ollamaModel,
+                OllamaModelAvailable: ollamaModelAvailable,
                HealthLatencyMs: healthLatencyMs,
                ProbeLatencyMs: probeLatencyMs,
                LastProbeAt: lastProbeAt,
@@ -0,0 +1,162 @@
+using System.Globalization;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace JobTrackerApi.Models;
+
+public static class HumanLanguageCatalog
+{
+    private static readonly Dictionary<string, string> LanguageLookup = BuildLanguageLookup();
+
+    private static readonly Regex WordRegex = new(@"\p{L}+", RegexOptions.Compiled);
+
+    private static readonly Regex LevelRegex = new(
+        @"\b(native(?:\s+speaker)?|fluent|advanced|intermediate|beginner|basic|conversational|elementary|professional\s+working\s+proficiency|working\s+proficiency|limited\s+working\s+proficiency|full\s+professional\s+proficiency|a1|a2|b1|b2|c1|c2|a1\s*/\s*a2|a2\s*/\s*b1|b1\s*/\s*b2|b2\s*/\s*c1|c1\s*/\s*c2)\b",
+        RegexOptions.IgnoreCase | RegexOptions.Compiled);
+
+    public static string? NormalizeLanguageName(string? raw)
+    {
+        var matches = ExtractLanguageNames(raw);
+        return matches.Count == 1 ? matches[0] : null;
+    }
+
+    public static IReadOnlyList<string> ExtractLanguageNames(string? raw)
+    {
+        if (string.IsNullOrWhiteSpace(raw)) return Array.Empty<string>();
+
+        var words = WordRegex.Matches(raw)
+            .Select(match => match.Value)
+            .Where(value => !string.IsNullOrWhiteSpace(value))
+            .ToList();
+
+        if (words.Count == 0) return Array.Empty<string>();
+
+        var matches = new List<(int Start, int Size, string Canonical)>();
+        for (var size = Math.Min(4, words.Count); size >= 1; size--)
+        {
+            for (var start = 0; start <= words.Count - size; start++)
+            {
+                var phrase = string.Join(" ", words.Skip(start).Take(size));
+                if (!LanguageLookup.TryGetValue(NormalizeKey(phrase), out var canonical)) continue;
+                if (matches.Any(existing => RangesOverlap(existing.Start, existing.Size, start, size))) continue;
+                matches.Add((start, size, canonical));
+            }
+        }
+
+        return matches
+            .OrderBy(match => match.Start)
+            .Select(match => match.Canonical)
+            .Distinct(StringComparer.OrdinalIgnoreCase)
+            .ToList();
+    }
+
+    public static bool HasRecognizedLevel(string? raw)
+    {
+        return ExtractLevel(raw) is not null;
+    }
+
+    public static string? ExtractLevel(string? raw)
+    {
+        if (string.IsNullOrWhiteSpace(raw)) return null;
+
+        var match = LevelRegex.Match(raw);
+        if (!match.Success) return null;
+
+        var value = match.Groups[1].Value.Trim();
+        var compact = Regex.Replace(value, @"\s+", " ");
+        return compact.ToLowerInvariant() switch
+        {
+            "native speaker" => "Native",
+            "native" => "Native",
+            "fluent" => "Fluent",
+            "advanced" => "Advanced",
+            "intermediate" => "Intermediate",
+            "beginner" => "Beginner",
+            "basic" => "Basic",
+            "conversational" => "Conversational",
+            "elementary" => "Elementary",
+            "professional working proficiency" => "Professional working proficiency",
+            "working proficiency" => "Working proficiency",
+            "limited working proficiency" => "Limited working proficiency",
+            "full professional proficiency" => "Full professional proficiency",
+            _ when Regex.IsMatch(compact, @"^[ABC][12](?:\s*/\s*[ABC][12])?$", RegexOptions.IgnoreCase) => compact.ToUpperInvariant().Replace(" ", string.Empty),
+            _ => compact,
+        };
+    }
+
+    private static bool RangesOverlap(int startA, int sizeA, int startB, int sizeB)
+    {
+        var endA = startA + sizeA;
+        var endB = startB + sizeB;
+        return startA < endB && startB < endA;
+    }
+
+    private static Dictionary<string, string> BuildLanguageLookup()
+    {
+        var map = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
+
+        void Add(string? alias, string? canonical)
+        {
+            var normalizedAlias = NormalizeKey(alias);
+            var normalizedCanonical = NormalizeDisplayName(canonical);
+            if (string.IsNullOrWhiteSpace(normalizedAlias) || string.IsNullOrWhiteSpace(normalizedCanonical)) return;
+            map.TryAdd(normalizedAlias, normalizedCanonical);
+        }
+
+        foreach (var culture in CultureInfo.GetCultures(CultureTypes.NeutralCultures | CultureTypes.SpecificCultures))
+        {
+            var english = CleanCultureLanguageName(culture.EnglishName);
+            var native = CleanCultureLanguageName(culture.NativeName);
+            Add(english, english);
+            Add(native, english);
+        }
+
+        Add("norsk", "Norwegian");
+        Add("bokmal", "Norwegian");
+        Add("bokmål", "Norwegian");
+        Add("nynorsk", "Norwegian");
+        Add("mandarin", "Chinese");
+        Add("cantonese", "Chinese");
+        Add("farsi", "Persian");
+        Add("persian", "Persian");
+
+        return map;
+    }
+
+    private static string? CleanCultureLanguageName(string? value)
+    {
+        if (string.IsNullOrWhiteSpace(value)) return null;
+
+        var cleaned = value.Trim();
+        var parenIndex = cleaned.IndexOf('(');
+        if (parenIndex > 0) cleaned = cleaned[..parenIndex].Trim();
+        var commaIndex = cleaned.IndexOf(',');
+        if (commaIndex > 0) cleaned = cleaned[..commaIndex].Trim();
+        return NormalizeDisplayName(cleaned);
+    }
+
+    private static string? NormalizeDisplayName(string? value)
+    {
+        if (string.IsNullOrWhiteSpace(value)) return null;
+        var cleaned = Regex.Replace(value.Trim(), @"\s+", " ");
+        return string.Join(" ", cleaned.Split(' ', StringSplitOptions.RemoveEmptyEntries)
+            .Select(word => word.Length <= 3 && word.All(char.IsUpper)
+                ? word
+                : char.ToUpperInvariant(word[0]) + word[1..].ToLowerInvariant()));
+    }
+
+    private static string NormalizeKey(string? value)
+    {
+        if (string.IsNullOrWhiteSpace(value)) return string.Empty;
+
+        var decomposed = value.Trim().Normalize(NormalizationForm.FormD);
+        var builder = new StringBuilder(decomposed.Length);
+        foreach (var ch in decomposed)
+        {
+            if (CharUnicodeInfo.GetUnicodeCategory(ch) == UnicodeCategory.NonSpacingMark) continue;
+            builder.Append(char.ToLowerInvariant(ch));
+        }
+
+        return Regex.Replace(builder.ToString().Normalize(NormalizationForm.FormC), @"[^\p{L}]+", " ").Trim();
+    }
+}
@@ -144,7 +144,7 @@ public static class StructuredCvProfileJson
        profile.Version = string.IsNullOrWhiteSpace(profile.Version) ? "1" : profile.Version.Trim();
        profile.Metadata ??= new StructuredCvMetadata();
        profile.Metadata.Fields ??= new Dictionary<string, StructuredCvFieldMetadata>();
-        profile.Contact ??= new StructuredCvContact();
+        profile.Contact = NormalizeContact(profile.Contact);
        profile.Summary = CleanList(profile.Summary);
        profile.Jobs = (profile.Jobs ?? new List<StructuredCvJob>())
            .Select(NormalizeJob)
@@ -178,20 +178,206 @@ public static class StructuredCvProfileJson
        return profile;
    }

+    private static StructuredCvContact NormalizeContact(StructuredCvContact? contact)
+    {
+        contact ??= new StructuredCvContact();
+        contact.FullName = TrimOrNull(contact.FullName);
+        contact.Headline = TrimOrNull(contact.Headline);
+        contact.Email = TrimOrNull(contact.Email);
+        contact.Phone = TrimOrNull(contact.Phone);
+        contact.Location = NormalizeLocationValue(contact.Location);
+        contact.Website = NormalizeWebsite(contact.Website);
+        contact.LinkedIn = NormalizeLinkedIn(contact.LinkedIn);
+        return contact;
+    }
+
    private static StructuredCvJob NormalizeJob(StructuredCvJob? job)
    {
        job ??= new StructuredCvJob();
-        job.Title = TrimOrNull(job.Title);
-        job.Company = TrimOrNull(job.Company);
-        job.Location = TrimOrNull(job.Location);
-        job.Start = TrimOrNull(job.Start);
-        job.End = TrimOrNull(job.End);
-        job.Bullets = CleanList(job.Bullets);
+
+        var title = NormalizeJobTitle(job.Title);
+        var company = NormalizeCompanyName(job.Company);
+        var location = NormalizeLocationValue(job.Location);
+
+        if (!string.IsNullOrWhiteSpace(title) && company is null)
+        {
+            var atSplit = Regex.Match(title, @"^(?<title>.+?)\s+at\s+(?<company>.+)$", RegexOptions.IgnoreCase);
+            if (atSplit.Success)
+            {
+                title = NormalizeJobTitle(atSplit.Groups["title"].Value);
+                company = NormalizeCompanyName(atSplit.Groups["company"].Value);
+            }
+        }
+
+        if (!string.IsNullOrWhiteSpace(title) && !string.IsNullOrWhiteSpace(company))
+        {
+            var titleLooksLikeCompany = LooksLikeCompanyName(title) && !LooksLikeJobTitle(title);
+            var companyLooksLikeTitle = LooksLikeJobTitle(company) && !LooksLikeCompanyName(company);
+            if (titleLooksLikeCompany && companyLooksLikeTitle)
+            {
+                (title, company) = (company, title);
+            }
+        }
+
+        if (!string.IsNullOrWhiteSpace(title) && !LooksLikeJobTitle(title) && LooksLikeCompanyName(title))
+        {
+            if (company is null) company = title;
+            title = null;
+        }
+
+        if (!string.IsNullOrWhiteSpace(company) && !LooksLikeCompanyName(company) && LooksLikeJobTitle(company) && title is null)
+        {
+            title = company;
+            company = null;
+        }
+
+        job.Title = title;
+        job.Company = company;
+        job.Location = location;
+        job.Start = NormalizeDateValue(job.Start);
+        job.End = NormalizeDateValue(job.End);
+        job.Bullets = CleanList(job.Bullets)
+            .Select(NormalizeBullet)
+            .Where(bullet => bullet is not null)
+            .Select(bullet => bullet!)
+            .Where(bullet => IsUsefulJobBullet(bullet, job.Title, job.Company))
+            .ToList();
        job.Skills = CleanList(job.Skills);
        job.IsCurrent = job.IsCurrent || string.Equals(job.End, "present", StringComparison.OrdinalIgnoreCase) || string.Equals(job.End, "current", StringComparison.OrdinalIgnoreCase);
        return job;
    }

+    private static string? NormalizeBullet(string? value)
+    {
+        if (string.IsNullOrWhiteSpace(value)) return null;
+        return value.Trim().TrimStart('-', '•', '*', ' ');
+    }
+
+    private static bool IsUsefulJobBullet(string? value, string? title, string? company)
+    {
+        var trimmed = TrimOrNull(value);
+        if (trimmed is null) return false;
+        if (LooksLikeDateRange(trimmed) || LooksLikeSectionHeading(trimmed) || trimmed.StartsWith("Skills:", StringComparison.OrdinalIgnoreCase)) return false;
+        if (title is not null && trimmed.Equals(title, StringComparison.OrdinalIgnoreCase)) return false;
+        if (company is not null && trimmed.Equals(company, StringComparison.OrdinalIgnoreCase)) return false;
+        if (trimmed.Length < 12 && !trimmed.Contains(' ')) return false;
+        return true;
+    }
+
+    private static string? NormalizeJobTitle(string? value)
+    {
+        var trimmed = TrimOrNull(value);
+        if (trimmed is null) return null;
+        if (LooksLikeDateRange(trimmed) || LooksLikeSectionHeading(trimmed) || LooksLikeUrlOrEmail(trimmed)) return null;
+        trimmed = Regex.Replace(trimmed, @"\s+", " ").Trim(' ', '|', ',', '-', ':');
+        return string.IsNullOrWhiteSpace(trimmed) ? null : trimmed;
+    }
+
+    private static string? NormalizeCompanyName(string? value)
+    {
+        var trimmed = TrimOrNull(value);
+        if (trimmed is null) return null;
+        if (LooksLikeDateRange(trimmed) || LooksLikeSectionHeading(trimmed) || LooksLikeUrlOrEmail(trimmed)) return null;
+        if (trimmed.StartsWith("Skills:", StringComparison.OrdinalIgnoreCase)) return null;
+        if (trimmed.Contains('.') && trimmed.Contains(' ')) return null;
+        trimmed = Regex.Replace(trimmed, @"\s+", " ").Trim(' ', '|', ',', '-', ':');
+        return string.IsNullOrWhiteSpace(trimmed) ? null : trimmed;
+    }
+
+    private static string? NormalizeLocationValue(string? value)
+    {
+        var trimmed = TrimOrNull(value);
+        if (trimmed is null) return null;
+        if (LooksLikeDateRange(trimmed) || LooksLikeSectionHeading(trimmed) || LooksLikeUrlOrEmail(trimmed)) return null;
+        if (trimmed.Any(char.IsDigit) || trimmed.Length > 80) return null;
+
+        var normalized = Regex.Replace(trimmed, @"\s+", " ").Trim(' ', '|', ';', ':');
+        var parts = normalized.Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
+        if (parts.Length == 0 || parts.Length > 4) return null;
+        if (parts.Any(part => !Regex.IsMatch(part, @"^[\p{L}][\p{L}'’\-. ]+$"))) return null;
+
+        return string.Join(", ", parts);
+    }
+
+    private static string? NormalizeWebsite(string? value)
+    {
+        var trimmed = TrimOrNull(value);
+        if (trimmed is null) return null;
+        if (trimmed.Contains("linkedin.com", StringComparison.OrdinalIgnoreCase)) return null;
+
+        var candidate = trimmed;
+        if (!candidate.Contains("://", StringComparison.Ordinal)) candidate = $"https://{candidate}";
+        if (!Uri.TryCreate(candidate, UriKind.Absolute, out var uri)) return null;
+        var host = uri.Host.Trim().Trim('.').ToLowerInvariant();
+        if (string.IsNullOrWhiteSpace(host) || !Regex.IsMatch(host, @"^(?:[a-z0-9-]+\.)+[a-z]{2,}$", RegexOptions.IgnoreCase)) return null;
+        return host;
+    }
+
+    private static string? NormalizeLinkedIn(string? value)
+    {
+        var trimmed = TrimOrNull(value);
+        if (trimmed is null) return null;
+
+        var candidate = trimmed;
+        if (!candidate.Contains("://", StringComparison.Ordinal)) candidate = $"https://{candidate}";
+        if (!Uri.TryCreate(candidate, UriKind.Absolute, out var uri)) return null;
+        if (!uri.Host.Contains("linkedin.com", StringComparison.OrdinalIgnoreCase)) return null;
+
+        var path = uri.AbsolutePath.TrimEnd('/');
+        if (!Regex.IsMatch(path, @"^/(in|pub)/[^/]+(?:/[^/]+){0,2}$", RegexOptions.IgnoreCase)) return null;
+        return $"https://www.linkedin.com{path}";
+    }
+
+    private static string? NormalizeDateValue(string? value)
+    {
+        var trimmed = TrimOrNull(value);
+        return trimmed is not null && LooksLikeDateRange(trimmed) ? trimmed : null;
+    }
+
+    private static bool LooksLikeDateRange(string value)
+    {
+        return Regex.IsMatch(value, @"^(?:\d{1,2}/\d{1,2}/\d{4}|(?:Jan|January|Feb|February|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|August|Sep|Sept|September|Oct|October|Nov|November|Dec|December)\s+\d{4}|\d{4}|Present|Current)(?:\s*[-–]\s*(?:\d{1,2}/\d{1,2}/\d{4}|(?:Jan|January|Feb|February|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|August|Sep|Sept|September|Oct|October|Nov|November|Dec|December)\s+\d{4}|\d{4}|Present|Current))?$", RegexOptions.IgnoreCase);
+    }
+
+    private static bool LooksLikeUrlOrEmail(string value)
+    {
+        return value.Contains('@')
+            || value.Contains("www.", StringComparison.OrdinalIgnoreCase)
+            || value.Contains("http://", StringComparison.OrdinalIgnoreCase)
+            || value.Contains("https://", StringComparison.OrdinalIgnoreCase);
+    }
+
+    private static bool LooksLikeSectionHeading(string value)
+    {
+        return value.Equals("Work Experience", StringComparison.OrdinalIgnoreCase)
+            || value.Equals("Experience", StringComparison.OrdinalIgnoreCase)
+            || value.Equals("Employment History", StringComparison.OrdinalIgnoreCase)
+            || value.Equals("Education", StringComparison.OrdinalIgnoreCase)
+            || value.Equals("Skills", StringComparison.OrdinalIgnoreCase)
+            || value.Equals("Languages", StringComparison.OrdinalIgnoreCase)
+            || value.Equals("Interests", StringComparison.OrdinalIgnoreCase)
+            || value.Equals("Contact", StringComparison.OrdinalIgnoreCase)
+            || value.Equals("Professional Summary", StringComparison.OrdinalIgnoreCase)
+            || value.Equals("Summary", StringComparison.OrdinalIgnoreCase);
+    }
+
+    private static bool LooksLikeJobTitle(string value)
+    {
+        if (string.IsNullOrWhiteSpace(value) || LooksLikeDateRange(value) || LooksLikeUrlOrEmail(value)) return false;
+
+        return Regex.IsMatch(value, @"\b(developer|engineer|manager|lead|architect|consultant|specialist|analyst|administrator|coordinator|director|designer|intern|officer|owner|founder|teacher|researcher|writer|editor|producer|assistant|technician|supervisor|head)\b", RegexOptions.IgnoreCase)
+            || (value.Split(' ', StringSplitOptions.RemoveEmptyEntries).Length <= 6 && !LooksLikeCompanyName(value));
+    }
+
+    private static bool LooksLikeCompanyName(string value)
+    {
+        if (string.IsNullOrWhiteSpace(value) || LooksLikeDateRange(value) || LooksLikeUrlOrEmail(value)) return false;
+
+        return Regex.IsMatch(value, @"\b(inc|llc|ltd|limited|plc|corp|corporation|company|group|university|college|council|municipality|kommune|bank|studio|agency|institute|hospital|school|technologies|technology|systems|solutions|consulting|consultants|partners|foundation|ministry|government)\b", RegexOptions.IgnoreCase)
+            || value.Contains('&')
+            || Regex.IsMatch(value, @"\b[A-Z]{2,}\b");
+    }
+
    private static StructuredCvEducation NormalizeEducation(StructuredCvEducation? education)
    {
        education ??= new StructuredCvEducation();
@@ -207,8 +393,13 @@ public static class StructuredCvProfileJson
    private static StructuredCvLanguage NormalizeLanguage(StructuredCvLanguage? language)
    {
        language ??= new StructuredCvLanguage();
-        language.Name = TrimOrNull(language.Name);
-        language.Level = TrimOrNull(language.Level);
+
+        var originalName = TrimOrNull(language.Name);
+        var normalizedName = HumanLanguageCatalog.NormalizeLanguageName(originalName);
+        var normalizedLevel = HumanLanguageCatalog.ExtractLevel(language.Level) ?? HumanLanguageCatalog.ExtractLevel(originalName);
+
+        language.Name = normalizedName is not null && normalizedLevel is not null ? normalizedName : null;
+        language.Level = normalizedLevel;
        language.Notes = TrimOrNull(language.Notes);
        return language;
    }
@@ -360,7 +551,13 @@ public static class StructuredCvProfileJson
                    }
                }

-                return new StructuredCvLanguage { Name = name.NullIfWhitespace(), Level = level, Notes = notes };
+                var normalizedLevel = HumanLanguageCatalog.ExtractLevel(level) ?? HumanLanguageCatalog.ExtractLevel(item);
+                return new StructuredCvLanguage
+                {
+                    Name = normalizedLevel is not null ? HumanLanguageCatalog.NormalizeLanguageName(name) : null,
+                    Level = normalizedLevel,
+                    Notes = notes,
+                };
            })
            .Where(language => !string.IsNullOrWhiteSpace(language.Name))
            .ToList();
@@ -25,7 +25,7 @@ Job Tracker is a simple, self-hosted app for tracking job applications with a Re

 ## Quickstart (Docker)

-This runs: frontend (nginx), backend API, and the AI service.
+This runs: frontend (nginx), backend API, the local AI service, and an Ollama container for hybrid CV block classification.

 1) Create a `.env` file next to `docker-compose.yml` (you can start from `.env.example`).

@@ -108,9 +108,15 @@ The API calls a local FastAPI service to generate summaries. If it’s not runni
 With Docker (recommended):

 ```bash
-docker compose up --build ai-service
+# One command for local Ollama startup + pull + AI-service restart
+OLLAMA_MODEL=qwen2.5:7b ./scripts/start-ollama-cv.sh
+
+# Then start the rest of the app if needed
+docker compose up --build -d backend frontend
 ```

+The first Ollama startup is usually quick, but the first model pull and first generation can take a while. After the model is cached in the `ollama_data` volume, later restarts are much faster.
+
 Or run directly from `tools/summarizer/` (see `tools/summarizer/README.md`).

 ## Configuration
@@ -52,6 +52,8 @@ AUTH_ADMIN_EMAIL=you@example.com
 AUTH_ADMIN_PASSWORD=replace_with_strong_password
 APP_PUBLIC_BASE_URL=https://your-domain.example
 AI_SERVICE_BASE_URL=http://ai-service:8001
+OLLAMA_BASE_URL=http://ollama:11434
+OLLAMA_MODEL=qwen2.5:7b
 EMAIL_FOLLOWUPREMINDERS_ENABLED=true
 EMAIL_FOLLOWUPREMINDERS_UPCOMINGDAYS=2
 # Optional backward-compatible alias if older config still references the previous name:
@@ -87,7 +89,8 @@ If this app is going to be a real production service on Ubuntu:
 2. Gitea Actions runs tests
 3. if green, workflow uploads repo to server
 4. `deploy/deploy.sh` links `/opt/job-tracker/shared/.env` into the repo checkout, then runs `docker compose build && docker compose up -d`
-5. workflow checks service status after deployment
+5. if `OLLAMA_MODEL` is set, the deploy script waits for Ollama, pulls the configured model if missing, then restarts `ai-service` so hybrid CV classification can use it
+6. workflow checks service status after deployment

 ## Post-deploy verification you should also do manually the first time
 - confirm reverse proxy routes to the frontend correctly
@@ -96,3 +99,4 @@ If this app is going to be a real production service on Ubuntu:
 - confirm AI service container is reachable from backend
 - confirm reminder and admin/system pages load
 - verify follow-up reminder emails are enabled only when intended and that links open the correct job/tab
+hat links open the correct job/tab
@@ -45,6 +45,11 @@ build_with_recovery
 # Force recreation so updated port mappings, env vars, and container config always apply on deploy.
 compose up -d --force-recreate --remove-orphans

+if [ -n "${OLLAMA_MODEL:-}" ]; then
+  echo "Post-deploy Ollama warmup enabled for model: ${OLLAMA_MODEL}"
+  ./scripts/start-ollama-cv.sh
+fi
+
 sleep 5
 compose ps

@@ -71,8 +71,13 @@ services:
    build:
      context: ./tools/summarizer
      dockerfile: Dockerfile
+    environment:
+      - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-http://ollama:11434}
+      - OLLAMA_MODEL=${OLLAMA_MODEL:-qwen2.5:7b}
    ports:
      - "8001:8001"
+    depends_on:
+      - ollama
    networks:
      - default
      - shared_services
@@ -83,8 +88,29 @@ services:
      timeout: 10s
      retries: 3

+  ollama:
+    image: ollama/ollama:latest
+    ports:
+      - "11434:11434"
+    environment:
+      - OLLAMA_HOST=0.0.0.0:11434
+    volumes:
+      - ollama_data:/root/.ollama
+    networks:
+      - default
+      - shared_services
+    restart: unless-stopped
+    gpus: all
+    healthcheck:
+      test: ["CMD", "ollama", "list"]
+      interval: 20s
+      timeout: 15s
+      retries: 10
+      start_period: 20s
+
 volumes:
  jobtracker_data:
+  ollama_data:

 networks:
  shared_services:
@@ -1,8 +1,9 @@
 import React, { useCallback, useEffect, useMemo, useRef, useState } from "react";

-import { Alert, Avatar, Box, Button, Chip, Divider, FormControl, InputLabel, LinearProgress, MenuItem, Paper, Select, TextField, Typography } from "@mui/material";
+import { Accordion, AccordionDetails, AccordionSummary, Alert, Avatar, Box, Button, Chip, Divider, FormControl, InputLabel, LinearProgress, MenuItem, Paper, Select, TextField, Typography } from "@mui/material";

 import DeleteOutlineIcon from "@mui/icons-material/DeleteOutline";
+import ExpandMoreIcon from "@mui/icons-material/ExpandMore";
 import PhotoCameraOutlinedIcon from "@mui/icons-material/PhotoCameraOutlined";

 import { api } from "../api";
@@ -399,22 +400,40 @@ export default function ProfilePage() {
              >
                {reprocessingCv ? t("profileCvReprocessing") : t("profileCvReprocess")}
              </Button>
-              <Button variant="text" disabled={!profileCvText.trim()} onClick={() => navigator.clipboard.writeText(profileCvText)}>
-                {t("profileCopyCvText")}
-              </Button>
            </Box>
          </Box>
          {uploadingCv ? <LinearProgress sx={{ mb: 1.5 }} /> : null}
-          <TextField
-            label={t("profileCvTextLabel")}
-            value={profileCvText}
-            onChange={(e) => setProfileCvText(e.target.value)}
-            helperText={t("profileCvTextHelp")}
-            multiline
-            minRows={12}
-            disabled={!isLocal}
-            fullWidth
-          />
+          <Alert severity="info" sx={{ mb: 2, borderRadius: 2.5 }}>
+            {t("profileCvStructuredDefaultHint")}
+          </Alert>
+          <Accordion disableGutters elevation={0} sx={{ mb: 2, borderRadius: 3, border: "1px solid", borderColor: "divider", backgroundColor: "background.paper", "&:before": { display: "none" } }}>
+            <AccordionSummary expandIcon={<ExpandMoreIcon />}>
+              <Box sx={{ display: "flex", justifyContent: "space-between", gap: 1.5, alignItems: "center", width: "100%", pr: 1 }}>
+                <Box>
+                  <Typography variant="subtitle1" sx={{ fontWeight: 800 }}>{t("profileCvRawPanelTitle")}</Typography>
+                  <Typography variant="body2" sx={{ color: "text.secondary" }}>{t("profileCvRawPanelHelp")}</Typography>
+                </Box>
+                <Chip size="small" label={t("profileCvSectionWordCount", { count: cvWordCount })} />
+              </Box>
+            </AccordionSummary>
+            <AccordionDetails>
+              <TextField
+                label={t("profileCvTextLabel")}
+                value={profileCvText}
+                onChange={(e) => setProfileCvText(e.target.value)}
+                helperText={t("profileCvTextHelp")}
+                multiline
+                minRows={12}
+                disabled={!isLocal}
+                fullWidth
+              />
+              <Box sx={{ mt: 1.5, display: "flex", justifyContent: "flex-end" }}>
+                <Button variant="text" disabled={!profileCvText.trim()} onClick={() => navigator.clipboard.writeText(profileCvText)}>
+                  {t("profileCopyCvText")}
+                </Button>
+              </Box>
+            </AccordionDetails>
+          </Accordion>
          <Box sx={{ mt: 2, p: 1.5, borderRadius: 3, border: "1px solid", borderColor: "divider", backgroundColor: "background.paper" }}>
            <Box sx={{ display: "flex", justifyContent: "space-between", gap: 2, flexWrap: "wrap", alignItems: "center", mb: 1.5 }}>
              <Box>
@@ -147,10 +147,17 @@ test('profile page loads persisted structured cv and can re-parse it', async ()
  expect(screen.getByText(/extraction history/i)).toBeInTheDocument();
  expect(screen.getByText(/resume.pdf/i)).toBeInTheDocument();
  expect(screen.getByText(/current run/i)).toBeInTheDocument();
+  expect(screen.getAllByText(/original extraction/i).length).toBeGreaterThan(0);
+  const originalExtractionToggle = screen.getByRole('button', { name: /original extraction/i });
+  expect(originalExtractionToggle).toHaveAttribute('aria-expanded', 'false');
  expect(screen.getAllByText(/professional summary/i).length).toBeGreaterThan(0);
  expect(screen.getByLabelText(/full name/i)).toHaveValue('Demo User');
  expect(screen.getByText(/high 92%/i)).toBeInTheDocument();

+  fireEvent.click(originalExtractionToggle);
+  expect(originalExtractionToggle).toHaveAttribute('aria-expanded', 'true');
+  expect(await screen.findByLabelText(/profile cv \/ master resume text/i)).toHaveValue('Professional Summary\nBuilt backend systems');
+
  const analyzeButton = screen.getByRole('button', { name: /analyze sections/i });
  await waitFor(() => expect(analyzeButton).toBeEnabled());
  fireEvent.click(analyzeButton);
@@ -0,0 +1,79 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+cd "$(dirname "$0")/.."
+
+MODEL="${OLLAMA_MODEL:-qwen2.5:7b}"
+OLLAMA_WAIT_SECONDS="${OLLAMA_WAIT_SECONDS:-180}"
+PULL_WAIT_SECONDS="${OLLAMA_PULL_WAIT_SECONDS:-1800}"
+
+compose() {
+  docker compose "$@"
+}
+
+wait_for_ollama() {
+  local deadline=$((SECONDS + OLLAMA_WAIT_SECONDS))
+  while [ "$SECONDS" -lt "$deadline" ]; do
+    if compose exec -T ollama ollama list >/dev/null 2>&1; then
+      return 0
+    fi
+    sleep 3
+  done
+  return 1
+}
+
+model_present() {
+  compose exec -T ollama ollama list 2>/dev/null | awk 'NR>1 {print $1}' | grep -Fx "$MODEL" >/dev/null 2>&1
+}
+
+wait_for_model() {
+  local deadline=$((SECONDS + PULL_WAIT_SECONDS))
+  while [ "$SECONDS" -lt "$deadline" ]; do
+    if model_present; then
+      return 0
+    fi
+    sleep 5
+  done
+  return 1
+}
+
+echo "Starting Ollama service..."
+compose up -d ollama
+
+if ! wait_for_ollama; then
+  echo "Ollama did not become ready within ${OLLAMA_WAIT_SECONDS}s."
+  compose logs --tail=200 ollama || true
+  exit 1
+fi
+
+echo "Ollama is responding."
+
+if model_present; then
+  echo "Model already present: $MODEL"
+else
+  echo "Pulling Ollama model: $MODEL"
+  compose exec -T ollama ollama pull "$MODEL" || {
+    echo "Model pull command failed."
+    compose logs --tail=200 ollama || true
+    exit 1
+  }
+fi
+
+if ! wait_for_model; then
+  echo "Model ${MODEL} did not appear within ${PULL_WAIT_SECONDS}s."
+  compose exec -T ollama ollama list || true
+  exit 1
+fi
+
+echo "Ollama model ready: $MODEL"
+
+echo "Restarting AI service so it can use the ready Ollama model."
+compose up -d ai-service
+
+if ! compose ps ai-service --format '{{.State}}' 2>/dev/null | head -n 1 | tr '[:upper:]' '[:lower:]' | grep -qx 'running'; then
+  echo "AI service is not running after Ollama warmup."
+  compose logs --tail=200 ai-service || true
+  exit 1
+fi
+
+echo "Ollama warmup complete."
@@ -8,6 +8,7 @@ This service runs a local Hugging Face summarization model and also exposes docu
 - OCR fallback for scanned PDFs
 - OCR for image uploads (`png`, `jpg`, `jpeg`, `webp`)
 - DOCX / TXT / MD extraction
+- optional Ollama-backed CV block classification for harder sectioning

 ## Install

@@ -36,8 +37,30 @@ The Dockerfile installs Tesseract OCR so scanned PDFs and supported images can b
 - `GET /health` — health check and runtime capabilities
 - `POST /summarize` — JSON body `{ "text": "...", "max_length": 150, "min_length": 30 }`
 - `POST /extract-text` — multipart file upload, returns extracted text and OCR metadata
+- `POST /cv/classify-block` — JSON body `{ "block": "..." }`, uses Ollama when `OLLAMA_MODEL` is configured

-## Notes
- Model weights are downloaded on first run.
+## Ollama
+Set these before starting the service if you want the hybrid CV classifier enabled:
+
+```bash
+export OLLAMA_BASE_URL=http://ollama:11434
+export OLLAMA_MODEL=qwen2.5:7b
+```
+
+Choose the model by setting `OLLAMA_MODEL` and then warming it with the helper script:
+
+```bash
+OLLAMA_MODEL=qwen2.5:7b ./scripts/start-ollama-cv.sh
+```
+
+Equivalent manual flow:
+
+```bash
+docker compose up -d ollama
+docker compose exec ollama ollama pull qwen2.5:7b
+docker compose up -d ai-service
+```
+
+- Model weights are downloaded on first pull.
 - OCR quality depends on scan quality and language support.
 - Default OCR language is English (`eng`).
@@ -8,9 +8,13 @@ from docx import Document
 import fitz
 import hashlib
 import io
+import json
+import os
 import re
 import torch
 import pytesseract
+from urllib import request as urllib_request
+from urllib.error import URLError, HTTPError

 app = FastAPI(title="Local AI Service")

@@ -20,6 +24,8 @@ MAX_CONTEXT_CHARS = 2200
 MAX_EXTRACT_FILE_BYTES = 8 * 1024 * 1024
 OCR_LANGUAGES = "eng"
 IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".webp"}
+OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434").rstrip("/")
+OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "")


 def _load_runtime():
@@ -44,11 +50,47 @@ class SummarizeRequest(BaseModel):
    top_skills: int = Field(default=8, ge=3, le=12)


+class CvClassifyBlockRequest(BaseModel):
+    block: str = Field(min_length=1, max_length=6000)
+
+
 def _key(text: str, max_length: int, min_length: int, top_skills: int) -> str:
    h = hashlib.sha256(text.encode("utf-8")).hexdigest()
    return f"{h}:{max_length}:{min_length}:{top_skills}"


+def _ollama_status():
+    configured = bool(OLLAMA_MODEL)
+    if not configured:
+        return {
+            "ollama_configured": False,
+            "ollama_reachable": False,
+            "ollama_model": None,
+            "ollama_model_available": False,
+        }
+
+    req = urllib_request.Request(f"{OLLAMA_BASE_URL}/api/tags", method="GET")
+    try:
+        with urllib_request.urlopen(req, timeout=5) as response:
+            body = json.loads(response.read().decode("utf-8"))
+    except Exception:
+        return {
+            "ollama_configured": True,
+            "ollama_reachable": False,
+            "ollama_model": OLLAMA_MODEL,
+            "ollama_model_available": False,
+        }
+
+    models = body.get("models") or []
+    names = {item.get("name") for item in models if isinstance(item, dict)}
+    return {
+        "ollama_configured": True,
+        "ollama_reachable": True,
+        "ollama_model": OLLAMA_MODEL,
+        "ollama_model_available": OLLAMA_MODEL in names,
+    }
+
+
@app.get("/health")
 async def health():
    return {
@@ -59,6 +101,7 @@ async def health():
        "gpu_name": GPU_NAME,
        "ocr_available": True,
        "ocr_languages": OCR_LANGUAGES,
+        **_ollama_status(),
    }


@@ -272,6 +315,93 @@ def _model_summarize(text: str, max_length: int, min_length: int) -> str:
    return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()


+def _ollama_generate_json(prompt: str):
+    if not OLLAMA_MODEL:
+        raise HTTPException(status_code=503, detail="OLLAMA_MODEL is not configured.")
+
+    payload = json.dumps({
+        "model": OLLAMA_MODEL,
+        "prompt": prompt,
+        "stream": False,
+        "format": "json",
+        "options": {"temperature": 0.1}
+    }).encode("utf-8")
+
+    req = urllib_request.Request(
+        f"{OLLAMA_BASE_URL}/api/generate",
+        data=payload,
+        headers={"Content-Type": "application/json"},
+        method="POST",
+    )
+
+    try:
+        with urllib_request.urlopen(req, timeout=30) as response:
+            body = json.loads(response.read().decode("utf-8"))
+    except HTTPError as ex:
+        raise HTTPException(status_code=502, detail=f"Ollama request failed with {ex.code}.")
+    except URLError as ex:
+        raise HTTPException(status_code=503, detail=f"Ollama is unreachable: {ex.reason}.")
+
+    raw = (body.get("response") or "").strip()
+    if not raw:
+        raise HTTPException(status_code=502, detail="Ollama returned an empty response.")
+
+    try:
+        return json.loads(raw)
+    except json.JSONDecodeError:
+        start = raw.find("{")
+        end = raw.rfind("}")
+        if start >= 0 and end > start:
+            return json.loads(raw[start:end + 1])
+        raise HTTPException(status_code=502, detail="Ollama did not return valid JSON.")
+
+
+@app.post("/cv/classify-block")
+async def classify_cv_block(req: CvClassifyBlockRequest):
+    prompt = f"""
+You classify one CV text block into structured JSON.
+Return ONLY valid JSON with this exact shape:
+{{
+  "section": "Contact|Professional Summary|Work Experience|Education|Skills|Languages|Interests|Other",
+  "confidence": 0.0,
+  "reason": "short reason",
+  "title": string|null,
+  "company": string|null,
+  "location": string|null,
+  "start": string|null,
+  "end": string|null,
+  "bullets": string[]
+}}
+
+Rules:
+- Preserve facts only.
+- section must be one of the listed values.
+- Use Work Experience only for job/employment blocks.
+- For Contact blocks, keep title/company/start/end null and bullets empty.
+- For non-work blocks, title/company/start/end should usually be null.
+- location must look like a place, not a sentence.
+- dates must be one of: year, month+year, dd/mm/yyyy, Present, Current.
+- bullets should only be job tasks/achievements, not titles, companies, dates, or headings.
+- If unsure, choose Other and keep fields null/empty.
+
+Block:
+{req.block.strip()}
+""".strip()
+
+    parsed = _ollama_generate_json(prompt)
+    return {
+        "section": parsed.get("section") or "Other",
+        "confidence": parsed.get("confidence"),
+        "reason": parsed.get("reason"),
+        "title": parsed.get("title"),
+        "company": parsed.get("company"),
+        "location": parsed.get("location"),
+        "start": parsed.get("start"),
+        "end": parsed.get("end"),
+        "bullets": parsed.get("bullets") or [],
+    }
+
+
@app.post("/summarize")
 async def summarize(req: SummarizeRequest):
    if req.min_length >= req.max_length: