First Commit
This commit is contained in:
@@ -0,0 +1,36 @@
|
||||
using System;
|
||||
using System.Globalization;
|
||||
|
||||
namespace JobTrackerApi.Services.JobImport;
|
||||
|
||||
public static class LanguageDetector
|
||||
{
|
||||
// Lightweight heuristic: good enough to distinguish Norwegian vs English for job ads.
|
||||
public static string Detect(string? text)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(text)) return "en";
|
||||
var t = text.AsSpan();
|
||||
|
||||
// Norwegian characters strongly indicate "no".
|
||||
for (var i = 0; i < t.Length; i++)
|
||||
{
|
||||
var ch = char.ToLowerInvariant(t[i]);
|
||||
if (ch is 'æ' or 'ø' or 'å') return "no";
|
||||
}
|
||||
|
||||
var lower = text.ToLower(CultureInfo.InvariantCulture);
|
||||
var hits = 0;
|
||||
hits += lower.Contains(" stilling ") || lower.Contains(" stillingen ") ? 2 : 0;
|
||||
hits += lower.Contains(" søker ") || lower.Contains(" s\u00F8ker ") ? 2 : 0;
|
||||
hits += lower.Contains(" arbeidsoppgaver") ? 2 : 0;
|
||||
hits += lower.Contains(" kvalifikasjoner") ? 2 : 0;
|
||||
hits += lower.Contains(" vi tilbyr") ? 2 : 0;
|
||||
hits += lower.Contains(" krav ") ? 1 : 0;
|
||||
hits += lower.Contains(" og ") ? 1 : 0;
|
||||
hits += lower.Contains(" ikke ") ? 1 : 0;
|
||||
hits += lower.Contains(" du ") || lower.Contains(" deg ") ? 1 : 0;
|
||||
|
||||
return hits >= 4 ? "no" : "en";
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user