37 lines
1.3 KiB
C#
37 lines
1.3 KiB
C#
using System;
|
|
using System.Globalization;
|
|
|
|
namespace JobTrackerApi.Services.JobImport;
|
|
|
|
public static class LanguageDetector
|
|
{
|
|
// Lightweight heuristic: good enough to distinguish Norwegian vs English for job ads.
|
|
public static string Detect(string? text)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(text)) return "en";
|
|
var t = text.AsSpan();
|
|
|
|
// Norwegian characters strongly indicate "no".
|
|
for (var i = 0; i < t.Length; i++)
|
|
{
|
|
var ch = char.ToLowerInvariant(t[i]);
|
|
if (ch is 'æ' or 'ø' or 'å') return "no";
|
|
}
|
|
|
|
var lower = text.ToLower(CultureInfo.InvariantCulture);
|
|
var hits = 0;
|
|
hits += lower.Contains(" stilling ") || lower.Contains(" stillingen ") ? 2 : 0;
|
|
hits += lower.Contains(" søker ") || lower.Contains(" s\u00F8ker ") ? 2 : 0;
|
|
hits += lower.Contains(" arbeidsoppgaver") ? 2 : 0;
|
|
hits += lower.Contains(" kvalifikasjoner") ? 2 : 0;
|
|
hits += lower.Contains(" vi tilbyr") ? 2 : 0;
|
|
hits += lower.Contains(" krav ") ? 1 : 0;
|
|
hits += lower.Contains(" og ") ? 1 : 0;
|
|
hits += lower.Contains(" ikke ") ? 1 : 0;
|
|
hits += lower.Contains(" du ") || lower.Contains(" deg ") ? 1 : 0;
|
|
|
|
return hits >= 4 ? "no" : "en";
|
|
}
|
|
}
|
|
|