First Commit

This commit is contained in:
cesnimda
2026-03-21 11:55:27 +01:00
commit 2e8a29b4d0
1757 changed files with 166084 additions and 0 deletions
@@ -0,0 +1,8 @@
namespace JobTrackerApi.Services.JobImport;
public interface IJobSitePlugin
{
bool CanHandle(string url);
JobImportResult Parse(string html, string url);
}
@@ -0,0 +1,21 @@
using System;
namespace JobTrackerApi.Services.JobImport;
public sealed record JobImportResult
{
public string? Title { get; init; }
public string? Company { get; init; }
public string? Location { get; init; }
public string? Description { get; init; }
public string? TranslatedDescription { get; init; }
public string? Language { get; init; } // ISO-ish, e.g. "en", "no"
public string[] Tags { get; init; } = Array.Empty<string>();
public string SourceUrl { get; init; } = "";
public DateTime? Deadline { get; init; }
public bool Success { get; init; }
public string? Parser { get; init; } // "universal", "finn", ...
public string? Error { get; init; }
}
@@ -0,0 +1,185 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
using JobTrackerApi.Services.JobImport.Translation;
namespace JobTrackerApi.Services.JobImport;
public sealed class JobImportService
{
private readonly IHttpClientFactory _httpClientFactory;
private readonly UniversalJobParser _universal;
private readonly IEnumerable<IJobSitePlugin> _plugins;
private readonly ITranslationService _translation;
public JobImportService(
IHttpClientFactory httpClientFactory,
UniversalJobParser universal,
IEnumerable<IJobSitePlugin> plugins,
ITranslationService translation)
{
_httpClientFactory = httpClientFactory;
_universal = universal;
_plugins = plugins;
_translation = translation;
}
public async Task<JobImportResult> PreviewAsync(string url, CancellationToken cancellationToken)
{
if (!TryValidateUrl(url, out var normalized, out var error))
{
return new JobImportResult
{
SourceUrl = url ?? "",
Success = false,
Parser = "none",
Error = error
};
}
var html = await FetchHtmlAsync(normalized, cancellationToken);
if (html is null)
{
return new JobImportResult
{
SourceUrl = normalized,
Success = false,
Parser = "fetch",
Error = "Failed to fetch HTML."
};
}
var parsed = _universal.Parse(html, normalized);
if (!parsed.Success)
{
foreach (var plugin in _plugins.Where(p => p.CanHandle(normalized)))
{
try
{
var p = plugin.Parse(html, normalized);
if (p.Success)
{
parsed = p;
break;
}
parsed = p; // keep last failure for debugging
}
catch (Exception ex)
{
parsed = new JobImportResult
{
SourceUrl = normalized,
Success = false,
Parser = plugin.GetType().Name,
Error = ex.Message
};
}
}
}
if (!parsed.Success) return parsed with { SourceUrl = normalized };
var lang = LanguageDetector.Detect(parsed.Description);
var tags = SkillTagger.Detect(parsed.Description);
string? translated = null;
if (string.Equals(lang, "no", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(parsed.Description))
{
translated = await _translation.TranslateToEnglishAsync(parsed.Description!, "no", cancellationToken);
}
return parsed with
{
SourceUrl = normalized,
Language = lang,
Tags = tags,
TranslatedDescription = translated
};
}
private async Task<string?> FetchHtmlAsync(string url, CancellationToken cancellationToken)
{
using var client = _httpClientFactory.CreateClient("jobimport");
using var req = new HttpRequestMessage(HttpMethod.Get, url);
req.Headers.TryAddWithoutValidation("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) JobTracker/1.0");
req.Headers.TryAddWithoutValidation("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
req.Headers.TryAddWithoutValidation("Accept-Language", "en-US,en;q=0.8,no;q=0.6,nb;q=0.6");
using var res = await client.SendAsync(req, HttpCompletionOption.ResponseHeadersRead, cancellationToken);
if ((int)res.StatusCode >= 300 && (int)res.StatusCode < 400) return null; // avoid redirect chains to non-html.
if (!res.IsSuccessStatusCode) return null;
var ct = res.Content.Headers.ContentType?.MediaType ?? "";
if (ct.Length > 0 && !ct.Contains("html", StringComparison.OrdinalIgnoreCase) && !ct.Contains("xml", StringComparison.OrdinalIgnoreCase))
{
// Still read: many sites omit content-type. Best-effort.
}
// Cap to avoid huge downloads.
var bytes = await res.Content.ReadAsByteArrayAsync(cancellationToken);
if (bytes.Length > 4_000_000) return null;
return System.Text.Encoding.UTF8.GetString(bytes);
}
private static bool TryValidateUrl(string? url, out string normalized, out string error)
{
normalized = "";
error = "";
if (string.IsNullOrWhiteSpace(url))
{
error = "URL is required.";
return false;
}
if (!Uri.TryCreate(url.Trim(), UriKind.Absolute, out var uri))
{
error = "Invalid URL.";
return false;
}
if (uri.Scheme is not ("http" or "https"))
{
error = "Only http/https URLs are supported.";
return false;
}
if (uri.IsLoopback || string.Equals(uri.Host, "localhost", StringComparison.OrdinalIgnoreCase))
{
error = "Local URLs are not allowed.";
return false;
}
// Block literal private IPs.
if (IPAddress.TryParse(uri.Host, out var ip))
{
if (IsPrivateIp(ip))
{
error = "Private IP URLs are not allowed.";
return false;
}
}
normalized = uri.ToString();
return true;
}
private static bool IsPrivateIp(IPAddress ip)
{
if (ip.AddressFamily == System.Net.Sockets.AddressFamily.InterNetwork)
{
var b = ip.GetAddressBytes();
return b[0] == 10 ||
(b[0] == 172 && b[1] >= 16 && b[1] <= 31) ||
(b[0] == 192 && b[1] == 168) ||
(b[0] == 169 && b[1] == 254);
}
if (ip.AddressFamily == System.Net.Sockets.AddressFamily.InterNetworkV6)
{
return ip.IsIPv6LinkLocal || ip.IsIPv6SiteLocal;
}
return false;
}
}
@@ -0,0 +1,36 @@
using System;
using System.Globalization;
namespace JobTrackerApi.Services.JobImport;
public static class LanguageDetector
{
// Lightweight heuristic: good enough to distinguish Norwegian vs English for job ads.
public static string Detect(string? text)
{
if (string.IsNullOrWhiteSpace(text)) return "en";
var t = text.AsSpan();
// Norwegian characters strongly indicate "no".
for (var i = 0; i < t.Length; i++)
{
var ch = char.ToLowerInvariant(t[i]);
if (ch is 'æ' or 'ø' or 'å') return "no";
}
var lower = text.ToLower(CultureInfo.InvariantCulture);
var hits = 0;
hits += lower.Contains(" stilling ") || lower.Contains(" stillingen ") ? 2 : 0;
hits += lower.Contains(" søker ") || lower.Contains(" s\u00F8ker ") ? 2 : 0;
hits += lower.Contains(" arbeidsoppgaver") ? 2 : 0;
hits += lower.Contains(" kvalifikasjoner") ? 2 : 0;
hits += lower.Contains(" vi tilbyr") ? 2 : 0;
hits += lower.Contains(" krav ") ? 1 : 0;
hits += lower.Contains(" og ") ? 1 : 0;
hits += lower.Contains(" ikke ") ? 1 : 0;
hits += lower.Contains(" du ") || lower.Contains(" deg ") ? 1 : 0;
return hits >= 4 ? "no" : "en";
}
}
@@ -0,0 +1,48 @@
using System;
namespace JobTrackerApi.Services.JobImport.Plugins;
public sealed class FinnPlugin : IJobSitePlugin
{
public bool CanHandle(string url) => url.Contains("finn.no", StringComparison.OrdinalIgnoreCase);
public JobImportResult Parse(string html, string url)
{
var meta = HtmlExtract.ReadMeta(html);
var title = meta.TryGetValue("og:title", out var t) ? t : HtmlExtract.ReadTitle(html);
var desc = meta.TryGetValue("og:description", out var d) ? d : null;
var company = ExtractCompanyFromTitle(title);
return new JobImportResult
{
SourceUrl = url,
Title = CleanTitle(title),
Company = company,
Location = meta.TryGetValue("job:location", out var loc) ? loc : null,
Description = HtmlExtract.ToPlainText(desc),
Parser = "finn",
Success = !string.IsNullOrWhiteSpace(title) && !string.IsNullOrWhiteSpace(desc),
};
}
private static string? CleanTitle(string? title)
{
if (string.IsNullOrWhiteSpace(title)) return null;
// FINN often appends " - FINN.no" etc.
var s = title.Replace(" - FINN.no", "", StringComparison.OrdinalIgnoreCase).Trim();
return s.Length == 0 ? title : s;
}
private static string? ExtractCompanyFromTitle(string? title)
{
if (string.IsNullOrWhiteSpace(title)) return null;
// Common pattern: "Role hos Company" / "Role - Company"
var s = title;
var idx = s.LastIndexOf(" - ", StringComparison.Ordinal);
if (idx > 0 && idx < s.Length - 3) return s[(idx + 3)..].Trim();
idx = s.LastIndexOf(" hos ", StringComparison.OrdinalIgnoreCase);
if (idx > 0 && idx < s.Length - 5) return s[(idx + 5)..].Trim();
return null;
}
}
@@ -0,0 +1,25 @@
using System;
namespace JobTrackerApi.Services.JobImport.Plugins;
public sealed class JobbnorgePlugin : IJobSitePlugin
{
public bool CanHandle(string url) => url.Contains("jobbnorge.no", StringComparison.OrdinalIgnoreCase);
public JobImportResult Parse(string html, string url)
{
var meta = HtmlExtract.ReadMeta(html);
var title = meta.TryGetValue("og:title", out var t) ? t : HtmlExtract.ReadTitle(html);
var desc = meta.TryGetValue("og:description", out var d) ? d : null;
return new JobImportResult
{
SourceUrl = url,
Title = title,
Description = HtmlExtract.ToPlainText(desc),
Parser = "jobbnorge",
Success = !string.IsNullOrWhiteSpace(title) && !string.IsNullOrWhiteSpace(desc),
};
}
}
@@ -0,0 +1,27 @@
using System;
namespace JobTrackerApi.Services.JobImport.Plugins;
public sealed class LinkedInPlugin : IJobSitePlugin
{
public bool CanHandle(string url) => url.Contains("linkedin.com/jobs", StringComparison.OrdinalIgnoreCase);
public JobImportResult Parse(string html, string url)
{
// LinkedIn heavily relies on JS; meta tags are often the best available without a headless browser.
var meta = HtmlExtract.ReadMeta(html);
var title = meta.TryGetValue("og:title", out var t) ? t : HtmlExtract.ReadTitle(html);
var desc = meta.TryGetValue("og:description", out var d) ? d : null;
return new JobImportResult
{
SourceUrl = url,
Title = title,
Company = meta.TryGetValue("og:site_name", out var sn) ? sn : null,
Description = HtmlExtract.ToPlainText(desc),
Parser = "linkedin",
Success = !string.IsNullOrWhiteSpace(title) && !string.IsNullOrWhiteSpace(desc),
};
}
}
@@ -0,0 +1,29 @@
using System;
namespace JobTrackerApi.Services.JobImport.Plugins;
public sealed class NavPlugin : IJobSitePlugin
{
public bool CanHandle(string url)
=> url.Contains("arbeidsplassen.nav.no", StringComparison.OrdinalIgnoreCase) ||
url.Contains("nav.no", StringComparison.OrdinalIgnoreCase);
public JobImportResult Parse(string html, string url)
{
var meta = HtmlExtract.ReadMeta(html);
var title = meta.TryGetValue("og:title", out var t) ? t : HtmlExtract.ReadTitle(html);
var desc = meta.TryGetValue("og:description", out var d) ? d : null;
var siteName = meta.TryGetValue("og:site_name", out var sn) ? sn : null;
return new JobImportResult
{
SourceUrl = url,
Title = title,
Company = siteName, // better than nothing; universal parser often gets this anyway.
Description = HtmlExtract.ToPlainText(desc),
Parser = "nav",
Success = !string.IsNullOrWhiteSpace(title) && !string.IsNullOrWhiteSpace(desc),
};
}
}
@@ -0,0 +1,35 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
namespace JobTrackerApi.Services.JobImport;
public static class SkillTagger
{
private static readonly (string Tag, Regex Pattern)[] Patterns =
{
("C#", new Regex(@"\bC#\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)),
(".NET", new Regex(@"\b\.NET\b|\bASP\.NET\b|\bDOTNET\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)),
("Python", new Regex(@"\bPython\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)),
("Docker", new Regex(@"\bDocker\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)),
("Azure", new Regex(@"\bAzure\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)),
("AWS", new Regex(@"\bAWS\b|\bAmazon Web Services\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)),
("React", new Regex(@"\bReact\b|\bReact\.js\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)),
("TypeScript", new Regex(@"\bTypeScript\b|\bTS\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)),
("SQL", new Regex(@"\bSQL\b|\bPostgreSQL\b|\bMySQL\b|\bSQLite\b|\bMS\s*SQL\b|\bT-?SQL\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)),
("Kubernetes", new Regex(@"\bKubernetes\b|\bK8s\b", RegexOptions.IgnoreCase | RegexOptions.Compiled)),
};
public static string[] Detect(string? description)
{
if (string.IsNullOrWhiteSpace(description)) return Array.Empty<string>();
var tags = new List<string>(capacity: 8);
foreach (var (tag, pattern) in Patterns)
{
if (pattern.IsMatch(description)) tags.Add(tag);
}
return tags.Distinct(StringComparer.OrdinalIgnoreCase).ToArray();
}
}
@@ -0,0 +1,10 @@
using System.Threading;
using System.Threading.Tasks;
namespace JobTrackerApi.Services.JobImport.Translation;
public interface ITranslationService
{
Task<string?> TranslateToEnglishAsync(string text, string sourceLanguage, CancellationToken cancellationToken);
}
@@ -0,0 +1,49 @@
using System;
using System.Net.Http;
using System.Net.Http.Json;
using System.Threading;
using System.Threading.Tasks;
namespace JobTrackerApi.Services.JobImport.Translation;
public sealed class LibreTranslateService : ITranslationService
{
private readonly IHttpClientFactory _httpClientFactory;
private readonly string _baseUrl;
private readonly string? _apiKey;
public LibreTranslateService(IHttpClientFactory httpClientFactory, IConfiguration cfg)
{
_httpClientFactory = httpClientFactory;
_baseUrl = (cfg["Translation:LibreTranslate:BaseUrl"] ?? "").Trim().TrimEnd('/');
_apiKey = string.IsNullOrWhiteSpace(cfg["Translation:LibreTranslate:ApiKey"]) ? null : cfg["Translation:LibreTranslate:ApiKey"]!.Trim();
}
public async Task<string?> TranslateToEnglishAsync(string text, string sourceLanguage, CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(text)) return null;
if (string.IsNullOrWhiteSpace(_baseUrl)) return null;
using var client = _httpClientFactory.CreateClient();
using var req = new HttpRequestMessage(HttpMethod.Post, $"{_baseUrl}/translate")
{
Content = JsonContent.Create(new
{
q = text,
source = sourceLanguage,
target = "en",
format = "text",
api_key = _apiKey
})
};
using var res = await client.SendAsync(req, cancellationToken);
if (!res.IsSuccessStatusCode) return null;
var body = await res.Content.ReadFromJsonAsync<LibreTranslateResponse>(cancellationToken: cancellationToken);
return string.IsNullOrWhiteSpace(body?.translatedText) ? null : body!.translatedText;
}
private sealed record LibreTranslateResponse(string? translatedText);
}
@@ -0,0 +1,11 @@
using System.Threading;
using System.Threading.Tasks;
namespace JobTrackerApi.Services.JobImport.Translation;
public sealed class NoOpTranslationService : ITranslationService
{
public Task<string?> TranslateToEnglishAsync(string text, string sourceLanguage, CancellationToken cancellationToken)
=> Task.FromResult<string?>(null);
}
@@ -0,0 +1,268 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text.Json;
using System.Text.RegularExpressions;
namespace JobTrackerApi.Services.JobImport;
public sealed class UniversalJobParser
{
private static readonly Regex JsonLdScriptRegex =
new(@"<script[^>]+type\s*=\s*[""']application/ld\+json[""'][^>]*>(?<json>[\s\S]*?)</script>",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
public JobImportResult Parse(string html, string url)
{
if (string.IsNullOrWhiteSpace(html))
{
return new JobImportResult { SourceUrl = url, Success = false, Parser = "universal", Error = "Empty HTML." };
}
foreach (Match m in JsonLdScriptRegex.Matches(html))
{
var json = (m.Groups["json"].Value ?? "").Trim();
if (json.Length == 0) continue;
// Some sites embed multiple JSON objects in one script; try best-effort.
var candidates = SplitJsonLdPayload(json);
foreach (var c in candidates)
{
if (!TryParseJobPosting(c, url, out var result)) continue;
return result with { Parser = "universal", Success = true };
}
}
return new JobImportResult { SourceUrl = url, Success = false, Parser = "universal", Error = "No JobPosting schema found." };
}
private static IEnumerable<string> SplitJsonLdPayload(string raw)
{
// Many pages have valid JSON; keep it simple. If parsing fails, try trimming common junk.
yield return raw;
yield return raw.Trim().TrimEnd(';');
}
private static bool TryParseJobPosting(string json, string url, out JobImportResult result)
{
result = new JobImportResult { SourceUrl = url, Parser = "universal", Success = false };
try
{
using var doc = JsonDocument.Parse(json);
var node = FindJobPostingNode(doc.RootElement);
if (node is null) return false;
var job = node.Value;
var title = GetString(job, "title");
var description = GetString(job, "description");
var company = GetString(job, "hiringOrganization", "name")
?? GetString(job, "hiringOrganization", "legalName");
var location = ExtractLocation(job);
var deadline = ParseDateTime(GetString(job, "validThrough"));
description = HtmlExtract.ToPlainText(description);
result = new JobImportResult
{
SourceUrl = url,
Title = title,
Company = company,
Location = location,
Description = description,
Deadline = deadline,
Success = !string.IsNullOrWhiteSpace(title) && !string.IsNullOrWhiteSpace(description),
Parser = "universal"
};
return result.Success;
}
catch
{
return false;
}
}
private static JsonElement? FindJobPostingNode(JsonElement root)
{
// Accept: { "@type":"JobPosting", ... }
if (IsJobPosting(root)) return root;
// Accept: { "@graph":[...]} or arrays.
if (root.ValueKind == JsonValueKind.Object)
{
if (root.TryGetProperty("@graph", out var g) && g.ValueKind == JsonValueKind.Array)
{
foreach (var el in g.EnumerateArray())
{
var found = FindJobPostingNode(el);
if (found is not null) return found;
}
}
foreach (var prop in root.EnumerateObject())
{
var found = FindJobPostingNode(prop.Value);
if (found is not null) return found;
}
}
if (root.ValueKind == JsonValueKind.Array)
{
foreach (var el in root.EnumerateArray())
{
var found = FindJobPostingNode(el);
if (found is not null) return found;
}
}
return null;
}
private static bool IsJobPosting(JsonElement el)
{
if (el.ValueKind != JsonValueKind.Object) return false;
if (!el.TryGetProperty("@type", out var typeEl)) return false;
if (typeEl.ValueKind == JsonValueKind.String)
{
return string.Equals(typeEl.GetString(), "JobPosting", StringComparison.OrdinalIgnoreCase);
}
if (typeEl.ValueKind == JsonValueKind.Array)
{
foreach (var t in typeEl.EnumerateArray())
{
if (t.ValueKind == JsonValueKind.String &&
string.Equals(t.GetString(), "JobPosting", StringComparison.OrdinalIgnoreCase))
{
return true;
}
}
}
return false;
}
private static string? GetString(JsonElement el, params string[] path)
{
var cur = el;
for (var i = 0; i < path.Length; i++)
{
if (cur.ValueKind != JsonValueKind.Object) return null;
if (!cur.TryGetProperty(path[i], out var next)) return null;
cur = next;
}
return cur.ValueKind switch
{
JsonValueKind.String => cur.GetString(),
JsonValueKind.Number => cur.ToString(),
_ => null
};
}
private static string? ExtractLocation(JsonElement job)
{
// jobLocation can be object or array; address fields vary.
if (!job.TryGetProperty("jobLocation", out var jl)) return null;
var addr = FindFirstAddress(jl);
if (addr is null) return null;
var city = GetString(addr.Value, "addressLocality");
var region = GetString(addr.Value, "addressRegion");
var country = GetString(addr.Value, "addressCountry");
var parts = new[] { city, region, country }.Where(x => !string.IsNullOrWhiteSpace(x)).ToArray();
return parts.Length == 0 ? null : string.Join(", ", parts);
}
private static JsonElement? FindFirstAddress(JsonElement jobLocation)
{
if (jobLocation.ValueKind == JsonValueKind.Object)
{
if (jobLocation.TryGetProperty("address", out var a))
{
if (a.ValueKind == JsonValueKind.Object) return a;
}
return null;
}
if (jobLocation.ValueKind == JsonValueKind.Array)
{
foreach (var el in jobLocation.EnumerateArray())
{
var addr = FindFirstAddress(el);
if (addr is not null) return addr;
}
}
return null;
}
private static DateTime? ParseDateTime(string? raw)
{
if (string.IsNullOrWhiteSpace(raw)) return null;
if (DateTime.TryParse(raw, CultureInfo.InvariantCulture, DateTimeStyles.AdjustToUniversal, out var dt)) return dt;
return null;
}
}
internal static class HtmlExtract
{
private static readonly Regex TitleRegex =
new(@"<title[^>]*>(?<t>[\s\S]*?)</title>", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex TagRegex =
new(@"<[^>]+>", RegexOptions.Compiled);
private static readonly Regex WsRegex =
new(@"\s+", RegexOptions.Compiled);
public static string? ReadTitle(string html)
{
var m = TitleRegex.Match(html);
if (!m.Success) return null;
return DecodeHtmlEntities(m.Groups["t"].Value).Trim();
}
public static Dictionary<string, string> ReadMeta(string html)
{
// Very small meta extractor: picks up OpenGraph + standard meta tags.
var dict = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
foreach (Match m in Regex.Matches(html, @"<meta\s+[^>]*>", RegexOptions.IgnoreCase | RegexOptions.Compiled))
{
var tag = m.Value;
var key = GetAttr(tag, "property") ?? GetAttr(tag, "name");
var content = GetAttr(tag, "content");
if (string.IsNullOrWhiteSpace(key) || string.IsNullOrWhiteSpace(content)) continue;
if (!dict.ContainsKey(key)) dict[key] = DecodeHtmlEntities(content).Trim();
}
return dict;
}
public static string? ToPlainText(string? htmlOrText)
{
if (string.IsNullOrWhiteSpace(htmlOrText)) return null;
var s = DecodeHtmlEntities(htmlOrText);
s = TagRegex.Replace(s, " ");
s = WsRegex.Replace(s, " ").Trim();
return s.Length == 0 ? null : s;
}
private static string? GetAttr(string tag, string attr)
{
var m = Regex.Match(tag, attr + @"\s*=\s*(?<q>[""'])(?<v>[\s\S]*?)(\k<q>)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
if (m.Success) return m.Groups["v"].Value;
// Unquoted attribute values.
m = Regex.Match(tag, attr + @"\s*=\s*(?<v>[^\s>]+)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
return m.Success ? m.Groups["v"].Value : null;
}
private static string DecodeHtmlEntities(string s)
=> System.Net.WebUtility.HtmlDecode(s);
}