Skip to main content

lychee_lib/utils/
url.rs

1use std::sync::LazyLock;
2
3use linkify::LinkFinder;
4use url::Url;
5
6/// Returns whether the text represents a root-relative link. These look like
7/// `/this` and are resolved relative to a base URL's origin. This can also be called
8/// "domain-relative URL" (by [MDN]) and "path-absolute-URL string" (by [WHATWG]).
9/// From [MDN]:
10///
11/// > Domain-relative URL: `/en-US/docs/Learn_web_development` — the protocol and
12/// > the domain name are both missing. The browser will use the same protocol
13/// > and the same domain name as the one used to load the document hosting that URL.
14///
15/// [MDN]: https://developer.mozilla.org/en-US/docs/Learn_web_development/Howto/Web_mechanics/What_is_a_URL#absolute_urls_vs._relative_urls
16/// [WHATWG]: https://url.spec.whatwg.org/#path-absolute-url-string
17pub(crate) fn is_root_relative_link(text: &str) -> bool {
18    !is_scheme_relative_link(text) && text.trim_ascii_start().starts_with('/')
19}
20
21/// Returns whether the text represents a scheme-relative link. These look like
22/// `//example.com/subpath`. From [MDN]:
23///
24/// > Scheme-relative URL: `//developer.mozilla.org/en-US/docs/Learn_web_development` —
25/// > only the protocol is missing. The browser will use the same protocol as the one
26/// > used to load the document hosting that URL.
27///
28/// [MDN]: https://developer.mozilla.org/en-US/docs/Learn_web_development/Howto/Web_mechanics/What_is_a_URL#absolute_urls_vs._relative_urls
29pub(crate) fn is_scheme_relative_link(text: &str) -> bool {
30    text.trim_ascii_start().starts_with("//")
31}
32
33/// Attempts to parse a string which might represent a URL or a filesystem path.
34/// Returns [`Ok`] if it is unambiguously a valid URL, otherwise returns [`Err`]
35/// with the original input.
36///
37/// On Windows, we take care to make sure absolute paths---which could also be
38/// parsed as URLs---are not parsed as URLs.
39///
40/// # Errors
41///
42/// Returns an [`Err`] if the given text is not a valid URL, or if the given text
43/// *could* be interpreted as a filesystem path. The string is returned within
44/// the error to allow for easier subsequent processing.
45pub(crate) fn parse_url_or_path(input: &str) -> Result<Url, &str> {
46    match Url::parse(input) {
47        Ok(url) if url.scheme().len() == 1 => Err(input),
48        Ok(url) => Ok(url),
49        _ => Err(input),
50    }
51}
52
53static LINK_FINDER: LazyLock<LinkFinder> = LazyLock::new(LinkFinder::new);
54
55// Use `LinkFinder` to offload the raw link searching in plaintext
56pub(crate) fn find_links(input: &str) -> impl Iterator<Item = linkify::Link<'_>> {
57    LINK_FINDER.links(input)
58}
59
60#[cfg(test)]
61mod tests {
62    use super::*;
63    use rstest::rstest;
64
65    #[rstest]
66    // OK URLs
67    #[case("tel:1", Ok("tel:1"))]
68    #[case("file:///a", Ok("file:///a"))]
69    #[case("http://a.com", Ok("http://a.com/"))]
70    // Invalid URLs
71    #[case("", Err(""))]
72    #[case(".", Err("."))]
73    #[case("C:", Err("C:"))]
74    #[case("/unix", Err("/unix"))]
75    #[case("C:/a", Err("C:/a"))]
76    #[case(r"C:\a\b", Err(r"C:\a\b"))]
77    #[case("**/*.md", Err("**/*.md"))]
78    #[case("something", Err("something"))]
79    fn test_parse_url_or_path(#[case] input: &str, #[case] expected: Result<&str, &str>) {
80        let result = parse_url_or_path(input);
81        assert_eq!(
82            result.as_ref().map(Url::as_str),
83            expected.as_deref(),
84            "input={input:?}, expected={expected:?}"
85        );
86    }
87}