1use log::warn;
2use reqwest::Url;
3use std::path::Path;
4use url::PathSegmentsMut;
5
6use crate::{
7 BaseInfo, BasicAuthCredentials, LycheeResult, Request, RequestError, Uri,
8 basic_auth::BasicAuthExtractor,
9 types::{ResolvedInputSource, uri::raw::RawUri},
10};
11
12pub(crate) fn extract_credentials(
14 extractor: Option<&BasicAuthExtractor>,
15 uri: &Uri,
16) -> Option<BasicAuthCredentials> {
17 extractor.as_ref().and_then(|ext| ext.matches(uri))
18}
19
20fn create_request(
22 raw_uri: &RawUri,
23 source: &ResolvedInputSource,
24 root_dir: Option<&Path>,
25 base: &BaseInfo,
26 extractor: Option<&BasicAuthExtractor>,
27) -> LycheeResult<Request> {
28 let uri = try_parse_into_uri(raw_uri, root_dir, base)?;
29 let source = source.clone();
30 let element = raw_uri.element.clone();
31 let attribute = raw_uri.attribute.clone();
32 let span = Some(raw_uri.span);
33 let credentials = extract_credentials(extractor, &uri);
34
35 Ok(Request {
36 uri,
37 source,
38 element,
39 attribute,
40 span,
41 credentials,
42 })
43}
44
45fn try_parse_into_uri(
57 raw_uri: &RawUri,
58 root_dir: Option<&Path>,
59 base: &BaseInfo,
60) -> LycheeResult<Uri> {
61 let root_dir = root_dir.and_then(|x| Url::from_directory_path(x).ok());
63
64 let mut url = base.parse_url_text_with_root_dir(&raw_uri.text, root_dir.as_ref())?;
65
66 if url.scheme() == "file" {
69 if url.path() != "/" && url.path().ends_with('/') {
70 warn!(
71 "Removing trailing slash from file URL: {url}. {} {}",
72 "This lets the URL match both files and folders.",
73 "In future, a file URL ending in / might fail link checking if it points to a file."
74 );
75 }
76 let _ = url
77 .path_segments_mut()
78 .as_mut()
79 .map(PathSegmentsMut::pop_if_empty);
80 }
81
82 Ok(url.into())
83}
84
85pub(crate) fn create(
92 uris: Vec<RawUri>,
93 source: &ResolvedInputSource,
94 root_dir: Option<&Path>,
95 fallback_base: &BaseInfo,
96 extractor: Option<&BasicAuthExtractor>,
97) -> Vec<Result<Request, RequestError>> {
98 let source_base = match source.to_base_info() {
99 Ok(base) => base,
100 Err(e) => {
101 return vec![Err(RequestError::InputSourceError(
104 source.clone().into(),
105 e,
106 ))];
107 }
108 };
109
110 let fallback_base = fallback_base.use_fs_root_as_origin();
114 let base = source_base.or_fallback(&fallback_base);
115
116 let mut vec = vec![];
117
118 for raw_uri in uris {
119 let result = create_request(&raw_uri, source, root_dir, base, extractor);
120 match result {
121 Ok(request) => {
122 vec.push(Ok(request));
123 }
124 Err(e) => vec.push(Err(RequestError::CreateRequestItem(
125 raw_uri.clone(),
126 source.clone(),
127 e,
128 ))),
129 }
130 }
131
132 vec
133}
134
135#[cfg(test)]
136mod tests {
137 use std::borrow::Cow;
138 use std::num::NonZeroUsize;
139 use std::path::PathBuf;
140
141 use crate::Request;
142 use crate::types::uri::raw::{RawUri, RawUriSpan};
143
144 use super::*;
145
146 const SPAN: RawUriSpan = RawUriSpan {
147 line: NonZeroUsize::MIN,
148 column: Some(NonZeroUsize::MIN),
149 };
150
151 fn create_ok_only(
158 uris: Vec<RawUri>,
159 source: &ResolvedInputSource,
160 root_dir: Option<&Path>,
161 base: &BaseInfo,
162 extractor: Option<&BasicAuthExtractor>,
163 ) -> Vec<Request> {
164 create(uris, source, root_dir, base, extractor)
165 .into_iter()
166 .filter_map(Result::ok)
167 .collect()
168 }
169
170 fn raw_uri(text: &'static str) -> RawUri {
171 RawUri {
172 text: text.to_string(),
173 element: None,
174 attribute: None,
175 span: SPAN,
176 }
177 }
178
179 #[test]
180 fn test_relative_url_resolution() {
181 let base = BaseInfo::try_from("https://example.com/path/page.html").unwrap();
182 let source = ResolvedInputSource::String(Cow::Borrowed(""));
183
184 let uris = vec![raw_uri("relative.html")];
185 let requests = create_ok_only(uris, &source, None, &base, None);
186
187 assert_eq!(requests.len(), 1);
188 assert!(
189 requests
190 .iter()
191 .any(|r| r.uri.url.as_str() == "https://example.com/path/relative.html")
192 );
193 }
194
195 #[test]
196 fn test_absolute_url_resolution() {
197 let base = BaseInfo::try_from("https://example.com/path/page.html").unwrap();
198 let source = ResolvedInputSource::String(Cow::Borrowed(""));
199
200 let uris = vec![raw_uri("https://another.com/page")];
201 let requests = create_ok_only(uris, &source, None, &base, None);
202
203 assert_eq!(requests.len(), 1);
204 assert!(
205 requests
206 .iter()
207 .any(|r| r.uri.url.as_str() == "https://another.com/page")
208 );
209 }
210
211 #[test]
212 fn test_root_relative_url_resolution() {
213 let base = BaseInfo::try_from("https://example.com/path/page.html").unwrap();
214 let source = ResolvedInputSource::String(Cow::Borrowed(""));
215
216 let uris = vec![raw_uri("/root-relative")];
217 let requests = create_ok_only(uris, &source, None, &base, None);
218
219 assert_eq!(requests.len(), 1);
220 assert!(
221 requests
222 .iter()
223 .any(|r| r.uri.url.as_str() == "https://example.com/root-relative")
224 );
225 }
226
227 #[test]
228 fn test_parent_directory_url_resolution() {
229 let base = BaseInfo::try_from("https://example.com/path/page.html").unwrap();
230 let source = ResolvedInputSource::String(Cow::Borrowed(""));
231
232 let uris = vec![raw_uri("../parent")];
233 let requests = create_ok_only(uris, &source, None, &base, None);
234
235 assert_eq!(requests.len(), 1);
236 assert!(
237 requests
238 .iter()
239 .any(|r| r.uri.url.as_str() == "https://example.com/parent")
240 );
241 }
242
243 #[test]
244 fn test_fragment_url_resolution() {
245 let base = BaseInfo::try_from("https://example.com/path/page.html").unwrap();
246 let source = ResolvedInputSource::String(Cow::Borrowed(""));
247
248 let uris = vec![raw_uri("#fragment")];
249 let requests = create_ok_only(uris, &source, None, &base, None);
250
251 assert_eq!(requests.len(), 1);
252 assert!(
253 requests
254 .iter()
255 .any(|r| r.uri.url.as_str() == "https://example.com/path/page.html#fragment")
256 );
257 }
258
259 #[test]
260 fn test_relative_url_resolution_from_root_dir() {
261 let root_dir = PathBuf::from("/tmp/lychee");
262 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
263
264 let uris = vec![raw_uri("relative.html")];
265 let requests = create_ok_only(uris, &source, Some(&root_dir), &BaseInfo::none(), None);
266
267 assert_eq!(requests.len(), 1);
268 assert!(
269 requests
270 .iter()
271 .any(|r| r.uri.url.as_str() == "file:///some/relative.html")
272 );
273 }
274
275 #[test]
276 fn test_absolute_url_resolution_from_root_dir() {
277 let root_dir = PathBuf::from("/tmp/lychee");
278 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
279
280 let uris = vec![raw_uri("https://another.com/page")];
281 let requests = create_ok_only(uris, &source, Some(&root_dir), &BaseInfo::none(), None);
282
283 assert_eq!(requests.len(), 1);
284 assert!(
285 requests
286 .iter()
287 .any(|r| r.uri.url.as_str() == "https://another.com/page")
288 );
289 }
290
291 #[test]
292 fn test_root_relative_url_resolution_from_root_dir() {
293 let root_dir = PathBuf::from("/tmp/lychee");
294 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
295
296 let uris = vec![raw_uri("/root-relative")];
297 let requests = create_ok_only(uris, &source, Some(&root_dir), &BaseInfo::none(), None);
298
299 assert_eq!(requests.len(), 1);
300 assert!(
301 requests
302 .iter()
303 .any(|r| r.uri.url.as_str() == "file:///tmp/lychee/root-relative")
304 );
305 }
306
307 #[test]
308 fn test_parent_directory_url_resolution_from_root_dir() {
309 let root_dir = PathBuf::from("/tmp/lychee");
310 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
311
312 let uris = vec![raw_uri("../parent")];
313 let requests = create_ok_only(uris, &source, Some(&root_dir), &BaseInfo::none(), None);
314
315 assert_eq!(requests.len(), 1);
316 assert!(
317 requests
318 .iter()
319 .any(|r| r.uri.url.as_str() == "file:///parent")
320 );
321 }
322
323 #[test]
324 fn test_fragment_url_resolution_from_root_dir() {
325 let root_dir = PathBuf::from("/tmp/lychee");
326 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
327
328 let uris = vec![raw_uri("#fragment")];
329 let requests = create_ok_only(uris, &source, Some(&root_dir), &BaseInfo::none(), None);
330
331 assert_eq!(requests.len(), 1);
332 assert!(
333 requests
334 .iter()
335 .any(|r| r.uri.url.as_str() == "file:///some/page.html#fragment")
336 );
337 }
338
339 #[test]
340 fn test_relative_url_resolution_from_root_dir_and_base_url() {
341 let root_dir = PathBuf::from("/tmp/lychee");
342 let base = BaseInfo::try_from("https://example.com/path/page.html").unwrap();
343 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
344
345 let uris = vec![raw_uri("relative.html")];
346 let requests = create_ok_only(uris, &source, Some(&root_dir), &base, None);
347
348 assert_eq!(requests.len(), 1);
349 assert!(
350 requests
351 .iter()
352 .any(|r| r.uri.url.as_str() == "https://example.com/path/relative.html")
353 );
354 }
355
356 #[test]
357 fn test_absolute_url_resolution_from_root_dir_and_base_url() {
358 let root_dir = PathBuf::from("/tmp/lychee");
359 let base = BaseInfo::try_from("https://example.com/path/page.html").unwrap();
360 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
361
362 let uris = vec![raw_uri("https://another.com/page")];
363 let requests = create_ok_only(uris, &source, Some(&root_dir), &base, None);
364
365 assert_eq!(requests.len(), 1);
366 assert!(
367 requests
368 .iter()
369 .any(|r| r.uri.url.as_str() == "https://another.com/page")
370 );
371 }
372
373 #[test]
374 fn test_root_relative_url_resolution_from_root_dir_and_base_url() {
375 let root_dir = PathBuf::from("/tmp/lychee");
376 let base = BaseInfo::try_from("https://example.com/path/page.html").unwrap();
377 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
378
379 let uris = vec![raw_uri("/root-relative")];
380 let requests = create_ok_only(uris, &source, Some(&root_dir), &base, None);
381
382 assert_eq!(requests.len(), 1);
383 assert!(
384 requests
385 .iter()
386 .any(|r| r.uri.url.as_str() == "https://example.com/root-relative")
387 );
388 }
389
390 #[test]
391 fn test_parent_directory_url_resolution_from_root_dir_and_base_url() {
392 let root_dir = PathBuf::from("/tmp/lychee");
393 let base = BaseInfo::try_from("https://example.com/path/page.html").unwrap();
394 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
395
396 let uris = vec![raw_uri("../parent")];
397 let requests = create_ok_only(uris, &source, Some(&root_dir), &base, None);
398
399 assert_eq!(requests.len(), 1);
400 assert!(
401 requests
402 .iter()
403 .any(|r| r.uri.url.as_str() == "https://example.com/parent")
404 );
405 }
406
407 #[test]
408 fn test_fragment_url_resolution_from_root_dir_and_base_url() {
409 let root_dir = PathBuf::from("/tmp/lychee");
410 let base = BaseInfo::try_from("https://example.com/path/page.html").unwrap();
411 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
412
413 let uris = vec![raw_uri("#fragment")];
414 let requests = create_ok_only(uris, &source, Some(&root_dir), &base, None);
415
416 assert_eq!(requests.len(), 1);
417 assert!(
418 requests
419 .iter()
420 .any(|r| r.uri.url.as_str() == "https://example.com/path/page.html#fragment")
421 );
422 }
423
424 #[test]
425 fn test_no_base_url_resolution() {
426 let source = ResolvedInputSource::String(Cow::Borrowed(""));
427
428 let uris = vec![raw_uri("https://example.com/page")];
429 let requests = create_ok_only(uris, &source, None, &BaseInfo::none(), None);
430
431 assert_eq!(requests.len(), 1);
432 assert!(
433 requests
434 .iter()
435 .any(|r| r.uri.url.as_str() == "https://example.com/page")
436 );
437 }
438
439 #[test]
440 fn test_create_request_from_relative_file_path() {
441 let base = BaseInfo::from_path(&PathBuf::from("/tmp/lychee")).unwrap();
442 let input_source = ResolvedInputSource::FsPath(PathBuf::from("page.html"));
443
444 let actual =
445 create_request(&raw_uri("file.html"), &input_source, None, &base, None).unwrap();
446
447 assert_eq!(
448 actual,
449 Request::new(
450 Uri {
451 url: Url::from_file_path("/tmp/lychee/file.html").unwrap(),
452 },
453 input_source,
454 )
455 .with_span(SPAN)
456 );
457 }
458
459 #[test]
460 fn test_create_request_from_relative_file_path_errors() {
461 assert!(
463 create_request(
464 &raw_uri("file.html"),
465 &ResolvedInputSource::Stdin,
466 None,
467 &BaseInfo::none(),
468 None,
469 )
470 .is_err()
471 );
472
473 assert!(
475 create_request(
476 &raw_uri("/file.html"),
477 &ResolvedInputSource::FsPath(PathBuf::from("page.html")),
478 None,
479 &BaseInfo::none(),
480 None,
481 )
482 .is_err()
483 );
484 }
485
486 #[test]
487 fn test_create_request_from_absolute_file_path() {
488 let base = BaseInfo::from_path(&PathBuf::from("/tmp/lychee")).unwrap();
489 let input_source = ResolvedInputSource::FsPath(PathBuf::from("/tmp/lychee/page.html"));
490
491 let actual = create_request(
493 &raw_uri("/usr/local/share/doc/example.html"),
494 &input_source,
495 None,
496 &base,
497 None,
498 )
499 .unwrap();
500
501 assert_eq!(
502 actual,
503 Request::new(
504 Uri {
505 url: Url::from_file_path("/tmp/lychee/usr/local/share/doc/example.html")
506 .unwrap(),
507 },
508 input_source,
509 )
510 .with_span(SPAN)
511 );
512 }
513
514 #[test]
515 fn test_parse_relative_path_into_uri() {
516 let base = BaseInfo::from_path(&PathBuf::from("/tmp/lychee")).unwrap();
517
518 let raw_uri = raw_uri("relative.html");
519 let uri = try_parse_into_uri(&raw_uri, None, &base).unwrap();
520
521 assert_eq!(uri.url.as_str(), "file:///tmp/lychee/relative.html");
522 }
523
524 #[test]
525 fn test_parse_absolute_path_into_uri() {
526 let base = BaseInfo::from_path(&PathBuf::from("/tmp/lychee")).unwrap();
527
528 let raw_uri = raw_uri("absolute.html");
529 let uri = try_parse_into_uri(&raw_uri, None, &base).unwrap();
530
531 assert_eq!(uri.url.as_str(), "file:///tmp/lychee/absolute.html");
532 }
533}