First Commit

This commit is contained in:
cesnimda
2026-03-21 11:55:27 +01:00
commit 2e8a29b4d0
1757 changed files with 166084 additions and 0 deletions
@@ -0,0 +1,48 @@
using System;
namespace JobTrackerApi.Services.JobImport.Plugins;
public sealed class FinnPlugin : IJobSitePlugin
{
public bool CanHandle(string url) => url.Contains("finn.no", StringComparison.OrdinalIgnoreCase);
public JobImportResult Parse(string html, string url)
{
var meta = HtmlExtract.ReadMeta(html);
var title = meta.TryGetValue("og:title", out var t) ? t : HtmlExtract.ReadTitle(html);
var desc = meta.TryGetValue("og:description", out var d) ? d : null;
var company = ExtractCompanyFromTitle(title);
return new JobImportResult
{
SourceUrl = url,
Title = CleanTitle(title),
Company = company,
Location = meta.TryGetValue("job:location", out var loc) ? loc : null,
Description = HtmlExtract.ToPlainText(desc),
Parser = "finn",
Success = !string.IsNullOrWhiteSpace(title) && !string.IsNullOrWhiteSpace(desc),
};
}
private static string? CleanTitle(string? title)
{
if (string.IsNullOrWhiteSpace(title)) return null;
// FINN often appends " - FINN.no" etc.
var s = title.Replace(" - FINN.no", "", StringComparison.OrdinalIgnoreCase).Trim();
return s.Length == 0 ? title : s;
}
private static string? ExtractCompanyFromTitle(string? title)
{
if (string.IsNullOrWhiteSpace(title)) return null;
// Common pattern: "Role hos Company" / "Role - Company"
var s = title;
var idx = s.LastIndexOf(" - ", StringComparison.Ordinal);
if (idx > 0 && idx < s.Length - 3) return s[(idx + 3)..].Trim();
idx = s.LastIndexOf(" hos ", StringComparison.OrdinalIgnoreCase);
if (idx > 0 && idx < s.Length - 5) return s[(idx + 5)..].Trim();
return null;
}
}
@@ -0,0 +1,25 @@
using System;
namespace JobTrackerApi.Services.JobImport.Plugins;
public sealed class JobbnorgePlugin : IJobSitePlugin
{
public bool CanHandle(string url) => url.Contains("jobbnorge.no", StringComparison.OrdinalIgnoreCase);
public JobImportResult Parse(string html, string url)
{
var meta = HtmlExtract.ReadMeta(html);
var title = meta.TryGetValue("og:title", out var t) ? t : HtmlExtract.ReadTitle(html);
var desc = meta.TryGetValue("og:description", out var d) ? d : null;
return new JobImportResult
{
SourceUrl = url,
Title = title,
Description = HtmlExtract.ToPlainText(desc),
Parser = "jobbnorge",
Success = !string.IsNullOrWhiteSpace(title) && !string.IsNullOrWhiteSpace(desc),
};
}
}
@@ -0,0 +1,27 @@
using System;
namespace JobTrackerApi.Services.JobImport.Plugins;
public sealed class LinkedInPlugin : IJobSitePlugin
{
public bool CanHandle(string url) => url.Contains("linkedin.com/jobs", StringComparison.OrdinalIgnoreCase);
public JobImportResult Parse(string html, string url)
{
// LinkedIn heavily relies on JS; meta tags are often the best available without a headless browser.
var meta = HtmlExtract.ReadMeta(html);
var title = meta.TryGetValue("og:title", out var t) ? t : HtmlExtract.ReadTitle(html);
var desc = meta.TryGetValue("og:description", out var d) ? d : null;
return new JobImportResult
{
SourceUrl = url,
Title = title,
Company = meta.TryGetValue("og:site_name", out var sn) ? sn : null,
Description = HtmlExtract.ToPlainText(desc),
Parser = "linkedin",
Success = !string.IsNullOrWhiteSpace(title) && !string.IsNullOrWhiteSpace(desc),
};
}
}
@@ -0,0 +1,29 @@
using System;
namespace JobTrackerApi.Services.JobImport.Plugins;
public sealed class NavPlugin : IJobSitePlugin
{
public bool CanHandle(string url)
=> url.Contains("arbeidsplassen.nav.no", StringComparison.OrdinalIgnoreCase) ||
url.Contains("nav.no", StringComparison.OrdinalIgnoreCase);
public JobImportResult Parse(string html, string url)
{
var meta = HtmlExtract.ReadMeta(html);
var title = meta.TryGetValue("og:title", out var t) ? t : HtmlExtract.ReadTitle(html);
var desc = meta.TryGetValue("og:description", out var d) ? d : null;
var siteName = meta.TryGetValue("og:site_name", out var sn) ? sn : null;
return new JobImportResult
{
SourceUrl = url,
Title = title,
Company = siteName, // better than nothing; universal parser often gets this anyway.
Description = HtmlExtract.ToPlainText(desc),
Parser = "nav",
Success = !string.IsNullOrWhiteSpace(title) && !string.IsNullOrWhiteSpace(desc),
};
}
}