lychee_lib/utils/url.rs
1use std::sync::LazyLock;
2
3use linkify::LinkFinder;
4use url::Url;
5
6/// Returns whether the text represents a root-relative link. These look like
7/// `/this` and are resolved relative to a base URL's origin. This can also be called
8/// "domain-relative URL" (by [MDN]) and "path-absolute-URL string" (by [WHATWG]).
9/// From [MDN]:
10///
11/// > Domain-relative URL: `/en-US/docs/Learn_web_development` — the protocol and
12/// > the domain name are both missing. The browser will use the same protocol
13/// > and the same domain name as the one used to load the document hosting that URL.
14///
15/// [MDN]: https://developer.mozilla.org/en-US/docs/Learn_web_development/Howto/Web_mechanics/What_is_a_URL#absolute_urls_vs._relative_urls
16/// [WHATWG]: https://url.spec.whatwg.org/#path-absolute-url-string
17pub(crate) fn is_root_relative_link(text: &str) -> bool {
18 !is_scheme_relative_link(text) && text.trim_ascii_start().starts_with('/')
19}
20
21/// Returns whether the text represents a scheme-relative link. These look like
22/// `//example.com/subpath`. From [MDN]:
23///
24/// > Scheme-relative URL: `//developer.mozilla.org/en-US/docs/Learn_web_development` —
25/// > only the protocol is missing. The browser will use the same protocol as the one
26/// > used to load the document hosting that URL.
27///
28/// [MDN]: https://developer.mozilla.org/en-US/docs/Learn_web_development/Howto/Web_mechanics/What_is_a_URL#absolute_urls_vs._relative_urls
29pub(crate) fn is_scheme_relative_link(text: &str) -> bool {
30 text.trim_ascii_start().starts_with("//")
31}
32
33/// Attempts to parse a string which might represent a URL or a filesystem path.
34/// Returns [`Ok`] if it is unambiguously a valid URL, otherwise returns [`Err`]
35/// with the original input.
36///
37/// On Windows, we take care to make sure absolute paths---which could also be
38/// parsed as URLs---are not parsed as URLs.
39///
40/// # Errors
41///
42/// Returns an [`Err`] if the given text is not a valid URL, or if the given text
43/// *could* be interpreted as a filesystem path. The string is returned within
44/// the error to allow for easier subsequent processing.
45pub(crate) fn parse_url_or_path(input: &str) -> Result<Url, &str> {
46 match Url::parse(input) {
47 Ok(url) if url.scheme().len() == 1 => Err(input),
48 Ok(url) => Ok(url),
49 _ => Err(input),
50 }
51}
52
53static LINK_FINDER: LazyLock<LinkFinder> = LazyLock::new(LinkFinder::new);
54
55// Use `LinkFinder` to offload the raw link searching in plaintext
56pub(crate) fn find_links(input: &str) -> impl Iterator<Item = linkify::Link<'_>> {
57 LINK_FINDER.links(input)
58}
59
60#[cfg(test)]
61mod tests {
62 use super::*;
63 use rstest::rstest;
64
65 #[rstest]
66 // OK URLs
67 #[case("tel:1", Ok("tel:1"))]
68 #[case("file:///a", Ok("file:///a"))]
69 #[case("http://a.com", Ok("http://a.com/"))]
70 // Invalid URLs
71 #[case("", Err(""))]
72 #[case(".", Err("."))]
73 #[case("C:", Err("C:"))]
74 #[case("/unix", Err("/unix"))]
75 #[case("C:/a", Err("C:/a"))]
76 #[case(r"C:\a\b", Err(r"C:\a\b"))]
77 #[case("**/*.md", Err("**/*.md"))]
78 #[case("something", Err("something"))]
79 fn test_parse_url_or_path(#[case] input: &str, #[case] expected: Result<&str, &str>) {
80 let result = parse_url_or_path(input);
81 assert_eq!(
82 result.as_ref().map(Url::as_str),
83 expected.as_deref(),
84 "input={input:?}, expected={expected:?}"
85 );
86 }
87}