Skip to main content

lychee_lib/types/
base_info.rs

1//! Parses and resolves [`RawUri`] into into fully-qualified [`Uri`] by
2//! applying base URL and root dir mappings.
3
4use reqwest::Url;
5use serde::Deserialize;
6use std::borrow::Cow;
7use std::path::{Path, PathBuf};
8use url::ParseError;
9
10use crate::ErrorKind;
11use crate::Uri;
12use crate::utils;
13use crate::utils::url::is_root_relative_link;
14
15/// Information used for resolving relative URLs within a particular
16/// input source. There should be a 1:1 correspondence between each
17/// `BaseInfo` and its originating `InputSource`. The main entry
18/// point for constructing is [`BaseInfo::from_source_url`].
19///
20/// Once constructed, [`BaseInfo::parse_url_text`] can be used to
21/// parse and resolve a (possibly relative) URL obtained from within
22/// the associated `InputSource`.
23///
24/// A `BaseInfo` may be built from input sources which cannot resolve
25/// relative links---for instance, stdin. It may also be built from input
26/// sources which can resolve *locally*-relative links, but not *root*-relative
27/// links.
28#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Default)]
29#[serde(try_from = "String")]
30pub enum BaseInfo {
31    /// No base information is available. This is for sources with no base
32    /// information, such as [`ResolvedInputSource::Stdin`], and for URLs which
33    /// *cannot be a base*, such as `data:` and `tel:`. [`BaseInfo::None`]
34    /// can resolve no relative links; only fully-qualified links will be
35    /// parsed successfully.
36    #[default]
37    None,
38
39    /// A base which cannot resolve root-relative links. This is for
40    /// `file:` URLs where the root directory is not known. As such, you can
41    /// traverse relative to the current URL (by traversing the filesystem),
42    /// but you cannot jump to the "root".
43    NoRoot(Url),
44
45    /// A full base made up of `origin` and `path`. This can resolve
46    /// all kinds of relative links.
47    ///
48    /// All non-`file:` URLs which *can be a base* fall into this case. For these,
49    /// `origin` and `path` are obtained by dividing the source URL into its
50    /// origin and path. When joined, `${origin}/${path}` should be equivalent
51    /// to the source's original URL.
52    ///
53    /// This also represents `file:` URLs with a known root. The `origin` field
54    /// records the `file:` URL which will be used to resolve root-relative links.
55    /// The `path` field is the subpath to a particular input source within the
56    /// root. This is retained to resolve locally-relative links.
57    ///
58    /// In all cases, the fields should satisfy `origin.join(path) == input_source_url`
59    /// where `input_source_url` is the URL of the originating input source.
60    Full {
61        /// A `file:` or *can be a base* URL which acts as the origin. If this is
62        /// a `file:` URL, root-relative links will resolve to subpaths of this URL.
63        /// See the [`BaseInfo::Full`] for more information.
64        origin: Url,
65
66        /// The `path` field is conditionally joined with `origin` to resolve
67        /// links. This is a (possibly-empty) locally- or root-relative link
68        /// and should not be a full URL or a scheme-relative link.
69        path: String,
70    },
71}
72
73impl BaseInfo {
74    /// Constructs [`BaseInfo::None`].
75    #[must_use]
76    pub const fn none() -> Self {
77        Self::None
78    }
79
80    /// Constructs [`BaseInfo::Full`] with the given fields.
81    #[must_use]
82    pub const fn full(origin: Url, path: String) -> Self {
83        Self::Full { origin, path }
84    }
85
86    /// Constructs a [`BaseInfo`], with the variant being determined by the given URL.
87    ///
88    /// - A [`Url::cannot_be_a_base`] URL will yield [`BaseInfo::None`].
89    /// - A `file:` URL will yield [`BaseInfo::NoRoot`].
90    /// - For other URLs, a [`BaseInfo::Full`] will be constructed from the URL's
91    ///   origin and path.
92    ///
93    /// Compared to [`BaseInfo::from_base_url`], this function is more lenient in
94    /// what it accepts because this function should return *a* result for all
95    /// input source URLs.
96    #[must_use]
97    pub fn from_source_url(url: &Url) -> Self {
98        if url.scheme() == "file" {
99            Self::NoRoot(url.clone())
100        } else {
101            match Self::split_url_origin_and_path(url) {
102                Some((origin, path)) => Self::full(origin, path),
103                None => Self::none(),
104            }
105        }
106    }
107
108    /// Split URL into its origin and path, if possible. Will fail and return
109    /// `None` for URLs which *cannot be a base*.
110    fn split_url_origin_and_path(url: &Url) -> Option<(Url, String)> {
111        let origin = url.join("/").ok()?;
112        let subpath = origin.make_relative(url)?;
113        Some((origin, subpath))
114    }
115
116    /// Constructs a [`BaseInfo`] from the given URL, requiring that the given path be acceptable as a
117    /// base URL. That is, it cannot be a special scheme like `data:`.
118    ///
119    /// # Errors
120    ///
121    /// Errors if the given URL cannot be a base.
122    pub fn from_base_url(url: &Url) -> Result<BaseInfo, ErrorKind> {
123        if url.cannot_be_a_base() {
124            return Err(ErrorKind::InvalidBase(
125                url.to_string(),
126                "The given URL cannot be used as a base URL".to_string(),
127            ));
128        }
129
130        Ok(Self::from_source_url(url))
131    }
132
133    /// Constructs a [`BaseInfo`] from the given filesystem path, requiring that
134    /// the given path be absolute. Assumes that the given path represents a directory.
135    ///
136    /// This constructs a [`BaseInfo::Full`] where root-relative links will go to
137    /// the given path.
138    ///
139    /// # Errors
140    ///
141    /// Errors if the given path is not an absolute path.
142    pub fn from_path(path: &Path) -> Result<BaseInfo, ErrorKind> {
143        let Ok(url) = Url::from_directory_path(path) else {
144            return Err(ErrorKind::InvalidBase(
145                path.to_string_lossy().to_string(),
146                "Base must either be a full URL (with scheme) or an absolute local path"
147                    .to_string(),
148            ));
149        };
150
151        Self::from_base_url(&url).map(|x| x.use_fs_path_as_origin().into_owned())
152    }
153
154    /// If this is a [`BaseInfo::NoRoot`], promote it to a [`BaseInfo::Full`]
155    /// by using the filesystem root as the "origin" for root-relative links.
156    /// Root-relative links will go to the filesystem root.
157    ///
158    /// Generally, this function should be avoided in favour of a more explicit
159    /// user-provided root directory. The filesystem root is rarely a good place
160    /// to look for files.
161    ///
162    /// Makes no change to other [`BaseInfo`] variants.
163    ///
164    /// # Panics
165    ///
166    /// If unable to split a [`BaseInfo::NoRoot`] into origin and path.
167    #[must_use]
168    pub fn use_fs_root_as_origin(&self) -> Cow<'_, Self> {
169        let Self::NoRoot(url) = self else {
170            return Cow::Borrowed(self);
171        };
172
173        let (fs_root, subpath) = Self::split_url_origin_and_path(url)
174            .expect("splitting up a NoRoot file:// URL should work");
175
176        Cow::Owned(Self::full(fs_root, subpath))
177    }
178
179    /// If this is a [`BaseInfo::NoRoot`], promote it to a [`BaseInfo::Full`]
180    /// by using the entire filesystem path as the "origin" for root-relative links.
181    /// Root-relative links will go to the URL that was previously within `NoRoot`.
182    ///
183    /// Generally, this function should be avoided in favour of a more explicit
184    /// user-provided root directory.
185    ///
186    /// Makes no change to other [`BaseInfo`] variants.
187    #[must_use]
188    pub fn use_fs_path_as_origin(&self) -> Cow<'_, Self> {
189        let Self::NoRoot(url) = self else {
190            return Cow::Borrowed(self);
191        };
192
193        Cow::Owned(Self::full(url.clone(), String::new()))
194    }
195
196    /// Returns the URL for the current [`BaseInfo`], joining the origin and path
197    /// if needed.
198    #[must_use]
199    pub fn url(&self) -> Option<Url> {
200        match self {
201            Self::None => None,
202            Self::NoRoot(url) => Some(url.clone()),
203            Self::Full { origin, path } => origin.join(path).ok(),
204        }
205    }
206
207    /// Returns the filesystem path for the current [`BaseInfo`] if the underlying
208    /// URL is a `file:` URL.
209    #[must_use]
210    pub fn to_file_path(&self) -> Option<PathBuf> {
211        self.url()
212            .filter(|url| url.scheme() == "file")
213            .and_then(|x| x.to_file_path().ok())
214    }
215
216    /// Returns the scheme of the underlying URL.
217    #[must_use]
218    pub fn scheme(&self) -> Option<&str> {
219        match self {
220            Self::None => None,
221            Self::NoRoot(url) | Self::Full { origin: url, .. } => Some(url.scheme()),
222        }
223    }
224
225    /// Returns whether this value is [`BaseInfo::None`].
226    #[must_use]
227    pub const fn is_none(&self) -> bool {
228        matches!(self, Self::None)
229    }
230
231    /// Returns whether this [`BaseInfo`] variant supports resolving root-relative links.
232    ///
233    /// If true, implies [`BaseInfo::supports_locally_relative`].
234    #[must_use]
235    pub const fn supports_root_relative(&self) -> bool {
236        matches!(self, Self::Full { .. })
237    }
238
239    /// Returns whether this [`BaseInfo`] variant supports resolving locally-relative links.
240    #[must_use]
241    pub const fn supports_locally_relative(&self) -> bool {
242        !self.is_none()
243    }
244
245    /// Returns the [`BaseInfo`] which has _more information_
246    /// between `self` and the given `fallback`.
247    ///
248    /// [`BaseInfo::Full`] is preferred over [`BaseInfo::NoRoot`]
249    /// which is preferred over [`BaseInfo::None`]. If both `self`
250    /// and `fallback` are the same variant, then `self` will be preferred.
251    #[must_use]
252    #[allow(clippy::match_same_arms)]
253    pub const fn or_fallback<'a>(&'a self, fallback: &'a Self) -> &'a Self {
254        match (self, fallback) {
255            (x @ Self::Full { .. }, _) => x,
256            (_, x @ Self::Full { .. }) => x,
257            (x @ Self::NoRoot(_), _) => x,
258            (_, x @ Self::NoRoot(_)) => x,
259            (x @ Self::None, Self::None) => x,
260        }
261    }
262
263    /// Parses the given URL text into a fully-qualified URL, including
264    /// resolving relative links if supported by the current [`BaseInfo`].
265    ///
266    /// To parse and resolve relative links, this uses [`Url::join`] with
267    /// the current [`BaseInfo`]'s URL as a base, as applicable.
268    ///
269    /// # Errors
270    ///
271    /// Returns an error if the text is an invalid URL, or if the text is a
272    /// relative link and this [`BaseInfo`] variant cannot resolve
273    /// the relative link.
274    pub fn parse_url_text(&self, text: &str) -> Result<Url, ErrorKind> {
275        use ParseError::RelativeUrlWithoutBase;
276
277        match Uri::try_from(text) {
278            Ok(Uri { url }) => Ok(url),
279
280            Err(ErrorKind::ParseUrl(RelativeUrlWithoutBase, _))
281                if !self.supports_root_relative() && is_root_relative_link(text) =>
282            {
283                Err(ErrorKind::RootRelativeLinkWithoutRoot(text.to_string()))
284            }
285
286            Err(ErrorKind::ParseUrl(RelativeUrlWithoutBase, _)) => match self {
287                // Cannot resolve any relative links
288                Self::None => Err(RelativeUrlWithoutBase),
289
290                // Resolve locally-relative link using NoRoot
291                Self::NoRoot(base) => base.join(text),
292
293                // Resolve root-relative link with `file:` base by changing it to
294                // a subpath of the origin.
295                Self::Full { origin, .. }
296                    if is_root_relative_link(text) && origin.scheme() == "file" =>
297                {
298                    let locally_relative = format!(".{}", text.trim_ascii_start());
299                    origin.join(&locally_relative)
300                }
301
302                // Resolve all other relative links, including root-relative links
303                // of non-file bases.
304                Self::Full { origin, path } => origin.join(path).and_then(|x| x.join(text)),
305            }
306            .map_err(|e| ErrorKind::ParseUrl(e, text.to_string())),
307
308            Err(e) => Err(e),
309        }
310    }
311
312    /// Parses the given URL text into a fully-qualified URL, including
313    /// resolving relative links if supported by the current [`BaseInfo`]
314    /// and applying the given root-dir if necessary.
315    ///
316    /// The root-dir is applied if the current `BaseInfo` is [`BaseInfo::None`]
317    /// or has a `file:` URL and if the given text is a root-relative link.
318    /// In these cases, the given `root_dir` will *override* the original
319    /// `BaseInfo`.
320    ///
321    /// # Errors
322    ///
323    /// Propagates errors from [`BaseInfo::parse_url_text`].
324    pub fn parse_url_text_with_root_dir(
325        &self,
326        text: &str,
327        root_dir: Option<&Url>,
328    ) -> Result<Url, ErrorKind> {
329        // HACK: if root-dir is specified, apply it by fudging around with
330        // file:// URLs. eventually, someone up the stack should construct
331        // the BaseInfo::Full for root-dir and this function should be deleted.
332
333        // NOTE: also apply root-dir for BaseInfo::None :)
334        let fake_base_info = match (self.scheme(), root_dir) {
335            (Some("file") | None, Some(root_dir)) if is_root_relative_link(text) => {
336                Cow::Owned(Self::full(root_dir.clone(), String::new()))
337            }
338            _ => Cow::Borrowed(self),
339        };
340
341        fake_base_info.parse_url_text(text)
342    }
343}
344
345impl TryFrom<&str> for BaseInfo {
346    type Error = ErrorKind;
347
348    /// Attempts to parse a base from the given string which may be
349    /// a URL or a filesystem path. In both cases, the string must
350    /// represent a valid base (i.e., not resulting in [`BaseInfo::None`]).
351    /// Otherwise, an error will be returned.
352    ///
353    /// Note that this makes a distinction between filesystem paths as paths
354    /// and filesystem paths as URLs. When specified as a path, they will
355    /// become [`BaseInfo::Full`] but when specified as a URL, they will
356    /// become [`BaseInfo::NoRoot`].
357    ///
358    /// Additionally, the empty string is accepted and will be parsed to
359    /// [`BaseInfo::None`].
360    fn try_from(value: &str) -> Result<Self, ErrorKind> {
361        if value.is_empty() {
362            return Ok(BaseInfo::none());
363        }
364        match utils::url::parse_url_or_path(value) {
365            Ok(url) => BaseInfo::from_base_url(&url),
366            Err(path) => BaseInfo::from_path(&PathBuf::from(path)),
367        }
368    }
369}
370
371impl TryFrom<String> for BaseInfo {
372    type Error = ErrorKind;
373    fn try_from(value: String) -> Result<Self, ErrorKind> {
374        BaseInfo::try_from(value.as_ref())
375    }
376}
377
378#[cfg(test)]
379mod tests {
380    use super::BaseInfo;
381    use reqwest::Url;
382    use rstest::rstest;
383    use std::path::PathBuf;
384
385    #[test]
386    fn test_base_info_construction() {
387        assert_eq!(
388            BaseInfo::try_from("https://a.com/b/?q#x").unwrap(),
389            BaseInfo::full(Url::parse("https://a.com").unwrap(), "b/?q#x".to_string())
390        );
391        assert_eq!(
392            BaseInfo::try_from("file:///file-path").unwrap(),
393            BaseInfo::NoRoot(Url::parse("file:///file-path").unwrap())
394        );
395        assert_eq!(
396            BaseInfo::try_from("/file-path").unwrap(),
397            BaseInfo::full(Url::parse("file:///file-path/").unwrap(), String::new())
398        );
399
400        // symbols inside a path are encoded if needed and should *not* be decoded.
401        assert_eq!(
402            BaseInfo::from_path(&PathBuf::from("/file path")).unwrap(),
403            BaseInfo::full(Url::parse("file:///file%20path/").unwrap(), String::new())
404        );
405        assert_eq!(
406            BaseInfo::from_path(&PathBuf::from("/file%20path")).unwrap(),
407            BaseInfo::full(Url::parse("file:///file%2520path/").unwrap(), String::new())
408        );
409        // query parameters are *not* interpreted from paths and are treated as literals
410        assert_eq!(
411            BaseInfo::from_path(&PathBuf::from("/file?q=2")).unwrap(),
412            BaseInfo::full(Url::parse("file:///file%3Fq=2/").unwrap(), String::new())
413        );
414
415        // symbols are encoded inside URLs if needed
416        assert_eq!(
417            BaseInfo::from_source_url(&Url::parse("http://a.com/x y/").unwrap()),
418            BaseInfo::full(Url::parse("http://a.com/").unwrap(), "x%20y/".to_owned())
419        );
420        assert_eq!(
421            BaseInfo::from_source_url(&Url::parse("http://a.com/x?q=x y").unwrap()),
422            BaseInfo::full(Url::parse("http://a.com/").unwrap(), "x?q=x%20y".to_owned())
423        );
424        assert_eq!(
425            BaseInfo::from_source_url(&Url::parse("http://a.com/Ω≈ç√∫˜µ≤≥÷/").unwrap()),
426            BaseInfo::full(
427                Url::parse("http://a.com/").unwrap(),
428                "%CE%A9%E2%89%88%C3%A7%E2%88%9A%E2%88%AB%CB%9C%C2%B5%E2%89%A4%E2%89%A5%C3%B7/"
429                    .to_owned()
430            )
431        );
432        assert_eq!(
433            BaseInfo::from_source_url(&Url::parse("http://みんな.com/x").unwrap()),
434            BaseInfo::full(
435                Url::parse("http://xn--q9jyb4c.com/").unwrap(),
436                "x".to_owned()
437            )
438        );
439        assert_eq!(
440            BaseInfo::from_source_url(&Url::parse("http://München-Ost.com/x").unwrap()),
441            BaseInfo::full(
442                Url::parse("http://xn--mnchen-ost-9db.com/").unwrap(),
443                "x".to_owned()
444            )
445        );
446        assert_eq!(
447            BaseInfo::from_source_url(&Url::parse("http://😉.com/x").unwrap()),
448            BaseInfo::full(Url::parse("http://xn--n28h.com/").unwrap(), "x".to_owned())
449        );
450
451        let urls = [
452            "https://a.com/b/?q#x",
453            "file:///a.com/b/?q#x",
454            "https://a.com/b%20a/?q#x",
455        ];
456        // .url() of base-info should return the original URL with no changes to encoding
457        for url_str in urls {
458            let url = Url::parse(url_str).unwrap();
459            assert_eq!(BaseInfo::try_from(url_str).unwrap().url(), Some(url));
460        }
461    }
462
463    #[test]
464    fn test_base_info_with_http_base() {
465        let base = BaseInfo::try_from("https://a.com/c/u/").unwrap();
466        let root_dir = Url::parse("file:///root/").unwrap();
467
468        // shouldn't trigger the root URL
469        assert_eq!(
470            base.parse_url_text_with_root_dir("/a", Some(&root_dir)),
471            Ok(Url::parse("https://a.com/a").unwrap())
472        );
473
474        assert_eq!(
475            base.parse_url_text_with_root_dir("..", Some(&root_dir)),
476            Ok(Url::parse("https://a.com/c/").unwrap())
477        );
478    }
479
480    #[test]
481    fn test_base_info_parse_with_root_dir() {
482        let base = BaseInfo::try_from("/file-path").unwrap();
483        let root_dir = Url::parse("file:///root/").unwrap();
484
485        // first, links which shouldn't trigger the root URL
486        assert_eq!(
487            base.parse_url_text_with_root_dir("a", Some(&root_dir)),
488            Ok(Url::parse("file:///file-path/a").unwrap())
489        );
490        assert_eq!(
491            base.parse_url_text_with_root_dir("./a", Some(&root_dir)),
492            Ok(Url::parse("file:///file-path/a").unwrap())
493        );
494        assert_eq!(
495            base.parse_url_text_with_root_dir("///scheme-relative", Some(&root_dir)),
496            Ok(Url::parse("file:///scheme-relative").unwrap())
497        );
498        assert_eq!(
499            base.parse_url_text_with_root_dir("https://a.com/b?q", Some(&root_dir)),
500            Ok(Url::parse("https://a.com/b?q").unwrap())
501        );
502        assert_eq!(
503            base.parse_url_text_with_root_dir("file:///a/", Some(&root_dir)),
504            Ok(Url::parse("file:///a/").unwrap())
505        );
506
507        // basic root dir use
508        assert_eq!(
509            base.parse_url_text_with_root_dir("/a", Some(&root_dir)),
510            Ok(Url::parse("file:///root/a").unwrap())
511        );
512
513        // root-dir can be traversed out of
514        assert_eq!(
515            base.parse_url_text_with_root_dir("/../../", Some(&root_dir)),
516            Ok(Url::parse("file:///").unwrap())
517        );
518    }
519
520    #[rstest]
521    // normal HTTP traversal and parsing absolute links
522    #[case("https://a.com/b", "x/", "d", "https://a.com/x/d")]
523    #[case("https://a.com/b/", "x/", "d", "https://a.com/b/x/d")]
524    #[case("https://a.com/b/", "", "https://new.com", "https://new.com/")]
525    // parsing absolute file://
526    #[case("https://a.com/b/", "", "file:///a", "file:///a")]
527    #[case("https://a.com/b/", "", "file:///a/", "file:///a/")]
528    #[case("https://a.com/b/", "", "file:///a/b/", "file:///a/b/")]
529    // file traversal
530    #[case("file:///a/b/", "", "/x/y", "file:///a/b/x/y")]
531    #[case("file:///a/b/", "", "a/", "file:///a/b/a/")]
532    #[case("file:///a/b/", "a/", "../..", "file:///a/")]
533    #[case("file:///a/b/", "a/", "/", "file:///a/b/")]
534    #[case("file:///a/b/", "", "/..", "file:///a/")]
535    #[case("file:///a/b/", "", "/../../", "file:///")]
536    #[case("file:///a/b/", "", "?", "file:///a/b/?")]
537    #[case("file:///a/b/", ".", "?", "file:///a/b/?")]
538    // HTTP relative links
539    #[case("https://a.com/x", "", "#", "https://a.com/x#")]
540    #[case("https://a.com/x", "", "../../..", "https://a.com/")]
541    #[case("https://a.com/x", "?q", "#x", "https://a.com/x?q#x")]
542    #[case("https://a.com/x", ".", "?a", "https://a.com/?a")]
543    #[case("https://a.com/x/", "", "/", "https://a.com/")]
544    #[case("https://a.com/x?q#anchor", "", "?q", "https://a.com/x?q")]
545    #[case("https://a.com/x#anchor", "", "?x", "https://a.com/x?x")]
546    // scheme relative link - can traverse outside of root
547    #[case("file:///root/", "", "///new-root", "file:///new-root")]
548    #[case("file:///root/", "", "//a.com/boop", "file://a.com/boop")]
549    #[case("https://root/", "", "//a.com/boop", "https://a.com/boop")]
550    fn test_parse_url_text(
551        #[case] origin: &str,
552        #[case] path: &str,
553        #[case] text: &str,
554        #[case] expected: &str,
555    ) {
556        assert_eq!(
557            BaseInfo::full(Url::parse(origin).unwrap(), path.to_string())
558                .parse_url_text(text)
559                .unwrap()
560                .to_string(),
561            expected,
562            "origin={origin}, path={path:?}, text={text:?}, expected={expected}"
563        );
564    }
565
566    #[rstest]
567    // file URLs without trailing / are kinda weird.
568    #[case("file:///a/b/c", "", "/../../x", "file:///x")]
569    #[case("file:///a/b/c", "", "/", "file:///a/b/")]
570    #[case("file:///a/b/c", "", ".?qq", "file:///a/b/?qq")]
571    #[case("file:///a/b/c", "", "#x", "file:///a/b/c#x")]
572    #[case("file:///a/b/c", "", "./", "file:///a/b/")]
573    #[case("file:///a/b/c", "", "c", "file:///a/b/c")]
574    // joining with d
575    #[case("file:///a/b/c", "d", "/../../x", "file:///x")]
576    #[case("file:///a/b/c", "d", "/", "file:///a/b/")]
577    #[case("file:///a/b/c", "d", ".", "file:///a/b/")]
578    #[case("file:///a/b/c", "d", "./", "file:///a/b/")]
579    // joining with d/
580    #[case("file:///a/b/c", "d/", "/", "file:///a/b/")]
581    #[case("file:///a/b/c", "d/", ".", "file:///a/b/d/")]
582    #[case("file:///a/b/c", "d/", "./", "file:///a/b/d/")]
583    fn test_parse_url_text_with_trailing_filename(
584        #[case] origin: &str,
585        #[case] path: &str,
586        #[case] text: &str,
587        #[case] expected: &str,
588    ) {
589        assert_eq!(
590            BaseInfo::full(Url::parse(origin).unwrap(), path.to_string())
591                .parse_url_text(text)
592                .unwrap()
593                .to_string(),
594            expected,
595            "origin={origin}, path={path:?}, text={text:?}, expected={expected}"
596        );
597    }
598
599    #[test]
600    fn test_none_rejects_relative_but_accepts_absolute() {
601        // Ensures BaseInfo::None doesn't silently swallow relative links
602        let none = BaseInfo::none();
603        // Absolute URLs still work
604        assert!(none.parse_url_text("https://a.com").is_ok());
605        // Relative links fail
606        assert!(none.parse_url_text("relative").is_err());
607        assert!(none.parse_url_text("/root-relative").is_err());
608    }
609
610    #[test]
611    fn test_no_root_rejects_root_relative() {
612        // A file:// source without --root-dir can resolve siblings but not root-relative links
613        let no_root = BaseInfo::try_from("file:///some/path/").unwrap();
614        assert_eq!(
615            no_root.parse_url_text("sibling.html").unwrap(),
616            Url::parse("file:///some/path/sibling.html").unwrap()
617        );
618        assert!(no_root.parse_url_text("/root-relative").is_err());
619    }
620
621    #[test]
622    fn test_or_fallback_prefers_more_capable_variant() {
623        // Pins the fallback priority that drives base selection in the collector
624        let none = BaseInfo::none();
625        let no_root = BaseInfo::NoRoot(Url::parse("file:///a/").unwrap());
626        let full = BaseInfo::full(Url::parse("https://a.com/").unwrap(), String::new());
627
628        assert_eq!(none.or_fallback(&full), &full);
629        assert_eq!(full.or_fallback(&none), &full);
630        assert_eq!(none.or_fallback(&no_root), &no_root);
631        assert_eq!(no_root.or_fallback(&full), &full);
632        assert_eq!(none.or_fallback(&none), &none);
633    }
634
635    #[test]
636    fn test_try_from_rejects_invalid_bases() {
637        // Prevent data: URLs and relative paths from silently becoming a base in the future
638        assert!(BaseInfo::try_from("data:text/plain,hello").is_err());
639        assert!(BaseInfo::try_from("relative/path").is_err());
640        assert!(BaseInfo::from_path(&PathBuf::from("relative")).is_err());
641    }
642}