Files
2026-03-21 11:55:27 +01:00

37 lines
1.3 KiB
C#

using System;
using System.Globalization;
namespace JobTrackerApi.Services.JobImport;
public static class LanguageDetector
{
// Lightweight heuristic: good enough to distinguish Norwegian vs English for job ads.
public static string Detect(string? text)
{
if (string.IsNullOrWhiteSpace(text)) return "en";
var t = text.AsSpan();
// Norwegian characters strongly indicate "no".
for (var i = 0; i < t.Length; i++)
{
var ch = char.ToLowerInvariant(t[i]);
if (ch is 'æ' or 'ø' or 'å') return "no";
}
var lower = text.ToLower(CultureInfo.InvariantCulture);
var hits = 0;
hits += lower.Contains(" stilling ") || lower.Contains(" stillingen ") ? 2 : 0;
hits += lower.Contains(" søker ") || lower.Contains(" s\u00F8ker ") ? 2 : 0;
hits += lower.Contains(" arbeidsoppgaver") ? 2 : 0;
hits += lower.Contains(" kvalifikasjoner") ? 2 : 0;
hits += lower.Contains(" vi tilbyr") ? 2 : 0;
hits += lower.Contains(" krav ") ? 1 : 0;
hits += lower.Contains(" og ") ? 1 : 0;
hits += lower.Contains(" ikke ") ? 1 : 0;
hits += lower.Contains(" du ") || lower.Contains(" deg ") ? 1 : 0;
return hits >= 4 ? "no" : "en";
}
}