1use reqwest::Url;
5use serde::Deserialize;
6use std::borrow::Cow;
7use std::path::{Path, PathBuf};
8use url::ParseError;
9
10use crate::ErrorKind;
11use crate::Uri;
12use crate::utils;
13use crate::utils::url::is_root_relative_link;
14
15#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Default)]
29#[serde(try_from = "String")]
30pub enum BaseInfo {
31 #[default]
37 None,
38
39 NoRoot(Url),
44
45 Full {
61 origin: Url,
65
66 path: String,
70 },
71}
72
73impl BaseInfo {
74 #[must_use]
76 pub const fn none() -> Self {
77 Self::None
78 }
79
80 #[must_use]
82 pub const fn full(origin: Url, path: String) -> Self {
83 Self::Full { origin, path }
84 }
85
86 #[must_use]
97 pub fn from_source_url(url: &Url) -> Self {
98 if url.scheme() == "file" {
99 Self::NoRoot(url.clone())
100 } else {
101 match Self::split_url_origin_and_path(url) {
102 Some((origin, path)) => Self::full(origin, path),
103 None => Self::none(),
104 }
105 }
106 }
107
108 fn split_url_origin_and_path(url: &Url) -> Option<(Url, String)> {
111 let origin = url.join("/").ok()?;
112 let subpath = origin.make_relative(url)?;
113 Some((origin, subpath))
114 }
115
116 pub fn from_base_url(url: &Url) -> Result<BaseInfo, ErrorKind> {
123 if url.cannot_be_a_base() {
124 return Err(ErrorKind::InvalidBase(
125 url.to_string(),
126 "The given URL cannot be used as a base URL".to_string(),
127 ));
128 }
129
130 Ok(Self::from_source_url(url))
131 }
132
133 pub fn from_path(path: &Path) -> Result<BaseInfo, ErrorKind> {
143 let Ok(url) = Url::from_directory_path(path) else {
144 return Err(ErrorKind::InvalidBase(
145 path.to_string_lossy().to_string(),
146 "Base must either be a full URL (with scheme) or an absolute local path"
147 .to_string(),
148 ));
149 };
150
151 Self::from_base_url(&url).map(|x| x.use_fs_path_as_origin().into_owned())
152 }
153
154 #[must_use]
168 pub fn use_fs_root_as_origin(&self) -> Cow<'_, Self> {
169 let Self::NoRoot(url) = self else {
170 return Cow::Borrowed(self);
171 };
172
173 let (fs_root, subpath) = Self::split_url_origin_and_path(url)
174 .expect("splitting up a NoRoot file:// URL should work");
175
176 Cow::Owned(Self::full(fs_root, subpath))
177 }
178
179 #[must_use]
188 pub fn use_fs_path_as_origin(&self) -> Cow<'_, Self> {
189 let Self::NoRoot(url) = self else {
190 return Cow::Borrowed(self);
191 };
192
193 Cow::Owned(Self::full(url.clone(), String::new()))
194 }
195
196 #[must_use]
199 pub fn url(&self) -> Option<Url> {
200 match self {
201 Self::None => None,
202 Self::NoRoot(url) => Some(url.clone()),
203 Self::Full { origin, path } => origin.join(path).ok(),
204 }
205 }
206
207 #[must_use]
210 pub fn to_file_path(&self) -> Option<PathBuf> {
211 self.url()
212 .filter(|url| url.scheme() == "file")
213 .and_then(|x| x.to_file_path().ok())
214 }
215
216 #[must_use]
218 pub fn scheme(&self) -> Option<&str> {
219 match self {
220 Self::None => None,
221 Self::NoRoot(url) | Self::Full { origin: url, .. } => Some(url.scheme()),
222 }
223 }
224
225 #[must_use]
227 pub const fn is_none(&self) -> bool {
228 matches!(self, Self::None)
229 }
230
231 #[must_use]
235 pub const fn supports_root_relative(&self) -> bool {
236 matches!(self, Self::Full { .. })
237 }
238
239 #[must_use]
241 pub const fn supports_locally_relative(&self) -> bool {
242 !self.is_none()
243 }
244
245 #[must_use]
252 #[allow(clippy::match_same_arms)]
253 pub const fn or_fallback<'a>(&'a self, fallback: &'a Self) -> &'a Self {
254 match (self, fallback) {
255 (x @ Self::Full { .. }, _) => x,
256 (_, x @ Self::Full { .. }) => x,
257 (x @ Self::NoRoot(_), _) => x,
258 (_, x @ Self::NoRoot(_)) => x,
259 (x @ Self::None, Self::None) => x,
260 }
261 }
262
263 pub fn parse_url_text(&self, text: &str) -> Result<Url, ErrorKind> {
275 use ParseError::RelativeUrlWithoutBase;
276
277 match Uri::try_from(text) {
278 Ok(Uri { url }) => Ok(url),
279
280 Err(ErrorKind::ParseUrl(RelativeUrlWithoutBase, _))
281 if !self.supports_root_relative() && is_root_relative_link(text) =>
282 {
283 Err(ErrorKind::RootRelativeLinkWithoutRoot(text.to_string()))
284 }
285
286 Err(ErrorKind::ParseUrl(RelativeUrlWithoutBase, _)) => match self {
287 Self::None => Err(RelativeUrlWithoutBase),
289
290 Self::NoRoot(base) => base.join(text),
292
293 Self::Full { origin, .. }
296 if is_root_relative_link(text) && origin.scheme() == "file" =>
297 {
298 let locally_relative = format!(".{}", text.trim_ascii_start());
299 origin.join(&locally_relative)
300 }
301
302 Self::Full { origin, path } => origin.join(path).and_then(|x| x.join(text)),
305 }
306 .map_err(|e| ErrorKind::ParseUrl(e, text.to_string())),
307
308 Err(e) => Err(e),
309 }
310 }
311
312 pub fn parse_url_text_with_root_dir(
325 &self,
326 text: &str,
327 root_dir: Option<&Url>,
328 ) -> Result<Url, ErrorKind> {
329 let fake_base_info = match (self.scheme(), root_dir) {
335 (Some("file") | None, Some(root_dir)) if is_root_relative_link(text) => {
336 Cow::Owned(Self::full(root_dir.clone(), String::new()))
337 }
338 _ => Cow::Borrowed(self),
339 };
340
341 fake_base_info.parse_url_text(text)
342 }
343}
344
345impl TryFrom<&str> for BaseInfo {
346 type Error = ErrorKind;
347
348 fn try_from(value: &str) -> Result<Self, ErrorKind> {
361 if value.is_empty() {
362 return Ok(BaseInfo::none());
363 }
364 match utils::url::parse_url_or_path(value) {
365 Ok(url) => BaseInfo::from_base_url(&url),
366 Err(path) => BaseInfo::from_path(&PathBuf::from(path)),
367 }
368 }
369}
370
371impl TryFrom<String> for BaseInfo {
372 type Error = ErrorKind;
373 fn try_from(value: String) -> Result<Self, ErrorKind> {
374 BaseInfo::try_from(value.as_ref())
375 }
376}
377
378#[cfg(test)]
379mod tests {
380 use super::BaseInfo;
381 use reqwest::Url;
382 use rstest::rstest;
383 use std::path::PathBuf;
384
385 #[test]
386 fn test_base_info_construction() {
387 assert_eq!(
388 BaseInfo::try_from("https://a.com/b/?q#x").unwrap(),
389 BaseInfo::full(Url::parse("https://a.com").unwrap(), "b/?q#x".to_string())
390 );
391 assert_eq!(
392 BaseInfo::try_from("file:///file-path").unwrap(),
393 BaseInfo::NoRoot(Url::parse("file:///file-path").unwrap())
394 );
395 assert_eq!(
396 BaseInfo::try_from("/file-path").unwrap(),
397 BaseInfo::full(Url::parse("file:///file-path/").unwrap(), String::new())
398 );
399
400 assert_eq!(
402 BaseInfo::from_path(&PathBuf::from("/file path")).unwrap(),
403 BaseInfo::full(Url::parse("file:///file%20path/").unwrap(), String::new())
404 );
405 assert_eq!(
406 BaseInfo::from_path(&PathBuf::from("/file%20path")).unwrap(),
407 BaseInfo::full(Url::parse("file:///file%2520path/").unwrap(), String::new())
408 );
409 assert_eq!(
411 BaseInfo::from_path(&PathBuf::from("/file?q=2")).unwrap(),
412 BaseInfo::full(Url::parse("file:///file%3Fq=2/").unwrap(), String::new())
413 );
414
415 assert_eq!(
417 BaseInfo::from_source_url(&Url::parse("http://a.com/x y/").unwrap()),
418 BaseInfo::full(Url::parse("http://a.com/").unwrap(), "x%20y/".to_owned())
419 );
420 assert_eq!(
421 BaseInfo::from_source_url(&Url::parse("http://a.com/x?q=x y").unwrap()),
422 BaseInfo::full(Url::parse("http://a.com/").unwrap(), "x?q=x%20y".to_owned())
423 );
424 assert_eq!(
425 BaseInfo::from_source_url(&Url::parse("http://a.com/Ω≈ç√∫˜µ≤≥÷/").unwrap()),
426 BaseInfo::full(
427 Url::parse("http://a.com/").unwrap(),
428 "%CE%A9%E2%89%88%C3%A7%E2%88%9A%E2%88%AB%CB%9C%C2%B5%E2%89%A4%E2%89%A5%C3%B7/"
429 .to_owned()
430 )
431 );
432 assert_eq!(
433 BaseInfo::from_source_url(&Url::parse("http://みんな.com/x").unwrap()),
434 BaseInfo::full(
435 Url::parse("http://xn--q9jyb4c.com/").unwrap(),
436 "x".to_owned()
437 )
438 );
439 assert_eq!(
440 BaseInfo::from_source_url(&Url::parse("http://München-Ost.com/x").unwrap()),
441 BaseInfo::full(
442 Url::parse("http://xn--mnchen-ost-9db.com/").unwrap(),
443 "x".to_owned()
444 )
445 );
446 assert_eq!(
447 BaseInfo::from_source_url(&Url::parse("http://😉.com/x").unwrap()),
448 BaseInfo::full(Url::parse("http://xn--n28h.com/").unwrap(), "x".to_owned())
449 );
450
451 let urls = [
452 "https://a.com/b/?q#x",
453 "file:///a.com/b/?q#x",
454 "https://a.com/b%20a/?q#x",
455 ];
456 for url_str in urls {
458 let url = Url::parse(url_str).unwrap();
459 assert_eq!(BaseInfo::try_from(url_str).unwrap().url(), Some(url));
460 }
461 }
462
463 #[test]
464 fn test_base_info_with_http_base() {
465 let base = BaseInfo::try_from("https://a.com/c/u/").unwrap();
466 let root_dir = Url::parse("file:///root/").unwrap();
467
468 assert_eq!(
470 base.parse_url_text_with_root_dir("/a", Some(&root_dir)),
471 Ok(Url::parse("https://a.com/a").unwrap())
472 );
473
474 assert_eq!(
475 base.parse_url_text_with_root_dir("..", Some(&root_dir)),
476 Ok(Url::parse("https://a.com/c/").unwrap())
477 );
478 }
479
480 #[test]
481 fn test_base_info_parse_with_root_dir() {
482 let base = BaseInfo::try_from("/file-path").unwrap();
483 let root_dir = Url::parse("file:///root/").unwrap();
484
485 assert_eq!(
487 base.parse_url_text_with_root_dir("a", Some(&root_dir)),
488 Ok(Url::parse("file:///file-path/a").unwrap())
489 );
490 assert_eq!(
491 base.parse_url_text_with_root_dir("./a", Some(&root_dir)),
492 Ok(Url::parse("file:///file-path/a").unwrap())
493 );
494 assert_eq!(
495 base.parse_url_text_with_root_dir("///scheme-relative", Some(&root_dir)),
496 Ok(Url::parse("file:///scheme-relative").unwrap())
497 );
498 assert_eq!(
499 base.parse_url_text_with_root_dir("https://a.com/b?q", Some(&root_dir)),
500 Ok(Url::parse("https://a.com/b?q").unwrap())
501 );
502 assert_eq!(
503 base.parse_url_text_with_root_dir("file:///a/", Some(&root_dir)),
504 Ok(Url::parse("file:///a/").unwrap())
505 );
506
507 assert_eq!(
509 base.parse_url_text_with_root_dir("/a", Some(&root_dir)),
510 Ok(Url::parse("file:///root/a").unwrap())
511 );
512
513 assert_eq!(
515 base.parse_url_text_with_root_dir("/../../", Some(&root_dir)),
516 Ok(Url::parse("file:///").unwrap())
517 );
518 }
519
520 #[rstest]
521 #[case("https://a.com/b", "x/", "d", "https://a.com/x/d")]
523 #[case("https://a.com/b/", "x/", "d", "https://a.com/b/x/d")]
524 #[case("https://a.com/b/", "", "https://new.com", "https://new.com/")]
525 #[case("https://a.com/b/", "", "file:///a", "file:///a")]
527 #[case("https://a.com/b/", "", "file:///a/", "file:///a/")]
528 #[case("https://a.com/b/", "", "file:///a/b/", "file:///a/b/")]
529 #[case("file:///a/b/", "", "/x/y", "file:///a/b/x/y")]
531 #[case("file:///a/b/", "", "a/", "file:///a/b/a/")]
532 #[case("file:///a/b/", "a/", "../..", "file:///a/")]
533 #[case("file:///a/b/", "a/", "/", "file:///a/b/")]
534 #[case("file:///a/b/", "", "/..", "file:///a/")]
535 #[case("file:///a/b/", "", "/../../", "file:///")]
536 #[case("file:///a/b/", "", "?", "file:///a/b/?")]
537 #[case("file:///a/b/", ".", "?", "file:///a/b/?")]
538 #[case("https://a.com/x", "", "#", "https://a.com/x#")]
540 #[case("https://a.com/x", "", "../../..", "https://a.com/")]
541 #[case("https://a.com/x", "?q", "#x", "https://a.com/x?q#x")]
542 #[case("https://a.com/x", ".", "?a", "https://a.com/?a")]
543 #[case("https://a.com/x/", "", "/", "https://a.com/")]
544 #[case("https://a.com/x?q#anchor", "", "?q", "https://a.com/x?q")]
545 #[case("https://a.com/x#anchor", "", "?x", "https://a.com/x?x")]
546 #[case("file:///root/", "", "///new-root", "file:///new-root")]
548 #[case("file:///root/", "", "//a.com/boop", "file://a.com/boop")]
549 #[case("https://root/", "", "//a.com/boop", "https://a.com/boop")]
550 fn test_parse_url_text(
551 #[case] origin: &str,
552 #[case] path: &str,
553 #[case] text: &str,
554 #[case] expected: &str,
555 ) {
556 assert_eq!(
557 BaseInfo::full(Url::parse(origin).unwrap(), path.to_string())
558 .parse_url_text(text)
559 .unwrap()
560 .to_string(),
561 expected,
562 "origin={origin}, path={path:?}, text={text:?}, expected={expected}"
563 );
564 }
565
566 #[rstest]
567 #[case("file:///a/b/c", "", "/../../x", "file:///x")]
569 #[case("file:///a/b/c", "", "/", "file:///a/b/")]
570 #[case("file:///a/b/c", "", ".?qq", "file:///a/b/?qq")]
571 #[case("file:///a/b/c", "", "#x", "file:///a/b/c#x")]
572 #[case("file:///a/b/c", "", "./", "file:///a/b/")]
573 #[case("file:///a/b/c", "", "c", "file:///a/b/c")]
574 #[case("file:///a/b/c", "d", "/../../x", "file:///x")]
576 #[case("file:///a/b/c", "d", "/", "file:///a/b/")]
577 #[case("file:///a/b/c", "d", ".", "file:///a/b/")]
578 #[case("file:///a/b/c", "d", "./", "file:///a/b/")]
579 #[case("file:///a/b/c", "d/", "/", "file:///a/b/")]
581 #[case("file:///a/b/c", "d/", ".", "file:///a/b/d/")]
582 #[case("file:///a/b/c", "d/", "./", "file:///a/b/d/")]
583 fn test_parse_url_text_with_trailing_filename(
584 #[case] origin: &str,
585 #[case] path: &str,
586 #[case] text: &str,
587 #[case] expected: &str,
588 ) {
589 assert_eq!(
590 BaseInfo::full(Url::parse(origin).unwrap(), path.to_string())
591 .parse_url_text(text)
592 .unwrap()
593 .to_string(),
594 expected,
595 "origin={origin}, path={path:?}, text={text:?}, expected={expected}"
596 );
597 }
598
599 #[test]
600 fn test_none_rejects_relative_but_accepts_absolute() {
601 let none = BaseInfo::none();
603 assert!(none.parse_url_text("https://a.com").is_ok());
605 assert!(none.parse_url_text("relative").is_err());
607 assert!(none.parse_url_text("/root-relative").is_err());
608 }
609
610 #[test]
611 fn test_no_root_rejects_root_relative() {
612 let no_root = BaseInfo::try_from("file:///some/path/").unwrap();
614 assert_eq!(
615 no_root.parse_url_text("sibling.html").unwrap(),
616 Url::parse("file:///some/path/sibling.html").unwrap()
617 );
618 assert!(no_root.parse_url_text("/root-relative").is_err());
619 }
620
621 #[test]
622 fn test_or_fallback_prefers_more_capable_variant() {
623 let none = BaseInfo::none();
625 let no_root = BaseInfo::NoRoot(Url::parse("file:///a/").unwrap());
626 let full = BaseInfo::full(Url::parse("https://a.com/").unwrap(), String::new());
627
628 assert_eq!(none.or_fallback(&full), &full);
629 assert_eq!(full.or_fallback(&none), &full);
630 assert_eq!(none.or_fallback(&no_root), &no_root);
631 assert_eq!(no_root.or_fallback(&full), &full);
632 assert_eq!(none.or_fallback(&none), &none);
633 }
634
635 #[test]
636 fn test_try_from_rejects_invalid_bases() {
637 assert!(BaseInfo::try_from("data:text/plain,hello").is_err());
639 assert!(BaseInfo::try_from("relative/path").is_err());
640 assert!(BaseInfo::from_path(&PathBuf::from("relative")).is_err());
641 }
642}