Skip to main content

lychee_lib/
client.rs

1//! Handler of link checking operations.
2//!
3//! This module defines two structs, [`Client`] and [`ClientBuilder`].
4//! `Client` handles incoming requests and returns responses.
5//! `ClientBuilder` exposes a finer level of granularity for building
6//! a `Client`.
7//!
8//! For convenience, a free function [`check`] is provided for ad-hoc
9//! link checks.
10#![allow(
11    clippy::module_name_repetitions,
12    clippy::struct_excessive_bools,
13    clippy::default_trait_access,
14    clippy::used_underscore_binding
15)]
16use std::{collections::HashSet, sync::Arc, time::Duration};
17
18use http::{
19    StatusCode,
20    header::{HeaderMap, HeaderValue},
21};
22use log::debug;
23use octocrab::Octocrab;
24use regex::RegexSet;
25use reqwest::{header, redirect, tls};
26use reqwest_cookie_store::CookieStoreMutex;
27use secrecy::{ExposeSecret, SecretString};
28use typed_builder::TypedBuilder;
29
30use crate::{
31    BaseInfo, BasicAuthCredentials, ErrorKind, Request, Response, Result, Status, Uri,
32    chain::RequestChain,
33    checker::{file::FileChecker, mail::MailChecker, website::WebsiteChecker},
34    filter::Filter,
35    ratelimit::{ClientMap, HostConfigs, HostKey, HostPool, RateLimitConfig},
36    remap::Remaps,
37    types::{DEFAULT_ACCEPTED_STATUS_CODES, redirect_history::RedirectHistory},
38};
39
40/// Default number of redirects before a request is deemed as failed, 5.
41pub const DEFAULT_MAX_REDIRECTS: usize = 5;
42/// Default number of retries before a request is deemed as failed, 3.
43pub const DEFAULT_MAX_RETRIES: u64 = 3;
44/// Default wait time in seconds between retries, 1.
45pub const DEFAULT_RETRY_WAIT_TIME_SECS: u64 = 1;
46/// Default timeout in seconds before a request is deemed as failed, 20.
47pub const DEFAULT_TIMEOUT_SECS: u64 = 20;
48/// Default user agent, `lychee-<PKG_VERSION>`.
49pub const DEFAULT_USER_AGENT: &str = concat!("lychee/", env!("CARGO_PKG_VERSION"));
50
51// Constants currently not configurable by the user.
52/// A timeout for only the connect phase of a [`Client`].
53const CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
54/// TCP keepalive.
55///
56/// See <https://tldp.org/HOWTO/TCP-Keepalive-HOWTO/overview.html> for more
57/// information.
58const TCP_KEEPALIVE: Duration = Duration::from_secs(60);
59
60/// Builder for [`Client`].
61///
62/// See crate-level documentation for usage example.
63#[derive(TypedBuilder, Debug, Clone)]
64#[builder(field_defaults(default, setter(into)))]
65pub struct ClientBuilder {
66    /// Optional GitHub token used for GitHub links.
67    ///
68    /// This allows much more request before getting rate-limited.
69    ///
70    /// # Rate-limiting Defaults
71    ///
72    /// As of Feb 2022, it's 60 per hour without GitHub token v.s.
73    /// 5000 per hour with token.
74    github_token: Option<SecretString>,
75
76    /// Remap URIs matching a pattern to a different URI.
77    ///
78    /// This makes it possible to remap any HTTP/HTTPS endpoint to a different
79    /// HTTP/HTTPS one. This feature could also be used to proxy
80    /// certain requests.
81    ///
82    /// # Usage Notes
83    ///
84    /// Use with caution because a large set of remapping rules may cause
85    /// performance issues.
86    ///
87    /// Furthermore rules are executed sequentially and multiple mappings for
88    /// the same URI are allowed, so it is up to the library user's discretion to
89    /// make sure rules don't conflict with each other.
90    remaps: Option<Remaps>,
91
92    /// Automatically append file extensions to `file://` URIs as needed
93    ///
94    /// This option takes effect on `file://` URIs which do not exist.
95    fallback_extensions: Vec<String>,
96
97    /// Index file names to use when resolving `file://` URIs which point to
98    /// directories.
99    ///
100    /// For local directory links, if this is non-`None`, then at least one
101    /// index file from this list must exist in order for the link to be
102    /// considered valid. Index files names are required to match regular
103    /// files, aside from the special `.` name which will match the
104    /// directory itself.
105    ///
106    /// If `None`, index file checking is disabled and directory links are valid
107    /// as long as the directory exists on disk.
108    ///
109    /// In the [`ClientBuilder`], this defaults to `None`.
110    #[builder(default = None)]
111    index_files: Option<Vec<String>>,
112
113    /// Links matching this set of regular expressions are **always** checked.
114    ///
115    /// This has higher precedence over [`ClientBuilder::excludes`], **but**
116    /// has lower precedence compared to any other `exclude_` fields or
117    /// [`ClientBuilder::schemes`] below.
118    includes: Option<RegexSet>,
119
120    /// Links matching this set of regular expressions are ignored, **except**
121    /// when a link also matches against [`ClientBuilder::includes`].
122    excludes: Option<RegexSet>,
123
124    /// When `true`, exclude all private network addresses.
125    ///
126    /// This effectively turns on the following fields:
127    /// - [`ClientBuilder::exclude_private_ips`]
128    /// - [`ClientBuilder::exclude_link_local_ips`]
129    /// - [`ClientBuilder::exclude_loopback_ips`]
130    exclude_all_private: bool,
131
132    /// When `true`, exclude private IP addresses.
133    ///
134    /// # IPv4
135    ///
136    /// The private address ranges are defined in [IETF RFC 1918] and include:
137    ///
138    ///  - `10.0.0.0/8`
139    ///  - `172.16.0.0/12`
140    ///  - `192.168.0.0/16`
141    ///
142    /// # IPv6
143    ///
144    /// The address is a unique local address (`fc00::/7`).
145    ///
146    /// This property is defined in [IETF RFC 4193].
147    ///
148    /// # Note
149    ///
150    /// Unicast site-local network was defined in [IETF RFC 4291], but was fully
151    /// deprecated in [IETF RFC 3879]. So it is **NOT** considered as private on
152    /// this purpose.
153    ///
154    /// [IETF RFC 1918]: https://tools.ietf.org/html/rfc1918
155    /// [IETF RFC 4193]: https://tools.ietf.org/html/rfc4193
156    /// [IETF RFC 4291]: https://tools.ietf.org/html/rfc4291
157    /// [IETF RFC 3879]: https://tools.ietf.org/html/rfc3879
158    exclude_private_ips: bool,
159
160    /// When `true`, exclude link-local IPs.
161    ///
162    /// # IPv4
163    ///
164    /// The address is `169.254.0.0/16`.
165    ///
166    /// This property is defined by [IETF RFC 3927].
167    ///
168    /// # IPv6
169    ///
170    /// The address is a unicast address with link-local scope,  as defined in
171    /// [RFC 4291].
172    ///
173    /// A unicast address has link-local scope if it has the prefix `fe80::/10`,
174    /// as per [RFC 4291 section 2.4].
175    ///
176    /// [IETF RFC 3927]: https://tools.ietf.org/html/rfc3927
177    /// [RFC 4291]: https://tools.ietf.org/html/rfc4291
178    /// [RFC 4291 section 2.4]: https://tools.ietf.org/html/rfc4291#section-2.4
179    exclude_link_local_ips: bool,
180
181    /// When `true`, exclude loopback IP addresses.
182    ///
183    /// # IPv4
184    ///
185    /// This is a loopback address (`127.0.0.0/8`).
186    ///
187    /// This property is defined by [IETF RFC 1122].
188    ///
189    /// # IPv6
190    ///
191    /// This is the loopback address (`::1`), as defined in
192    /// [IETF RFC 4291 section 2.5.3].
193    ///
194    /// [IETF RFC 1122]: https://tools.ietf.org/html/rfc1122
195    /// [IETF RFC 4291 section 2.5.3]: https://tools.ietf.org/html/rfc4291#section-2.5.3
196    exclude_loopback_ips: bool,
197
198    /// When `true`, check mail addresses.
199    include_mail: bool,
200
201    /// Maximum number of redirects per request before returning an error.
202    ///
203    /// Defaults to [`DEFAULT_MAX_REDIRECTS`].
204    #[builder(default = DEFAULT_MAX_REDIRECTS)]
205    max_redirects: usize,
206
207    /// Maximum number of retries per request before returning an error.
208    ///
209    /// Defaults to [`DEFAULT_MAX_RETRIES`].
210    #[builder(default = DEFAULT_MAX_RETRIES)]
211    max_retries: u64,
212
213    /// Minimum accepted TLS version.
214    min_tls_version: Option<tls::Version>,
215
216    /// User-agent used for checking links.
217    ///
218    /// Defaults to [`DEFAULT_USER_AGENT`].
219    ///
220    /// # Notes
221    ///
222    /// This may be helpful for bypassing certain firewalls.
223    // Faking the user agent is necessary for some websites, unfortunately.
224    // Otherwise we get a 403 from the firewall (e.g. Sucuri/Cloudproxy on ldra.com).
225    #[builder(default_code = "String::from(DEFAULT_USER_AGENT)")]
226    user_agent: String,
227
228    /// When `true`, accept invalid SSL certificates.
229    ///
230    /// # Warning
231    ///
232    /// You should think very carefully before allowing invalid SSL
233    /// certificates. It will accept any certificate for any site to be trusted
234    /// including expired certificates. This introduces significant
235    /// vulnerabilities, and should only be used as a last resort.
236    // TODO: We should add a warning message in CLI. (Lucius, Jan 2023)
237    allow_insecure: bool,
238
239    /// Set of accepted URL schemes.
240    ///
241    /// Only links with matched URI schemes are checked. This has no effect when
242    /// it's empty.
243    schemes: HashSet<String>,
244
245    /// Default [headers] for every request.
246    ///
247    /// This allows working around validation issues on some websites. See also
248    /// [here] for usage examples.
249    ///
250    /// [headers]: https://docs.rs/http/latest/http/header/struct.HeaderName.html
251    /// [here]: https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#method.default_headers
252    custom_headers: HeaderMap,
253
254    /// HTTP method used for requests, e.g. `GET` or `HEAD`.
255    #[builder(default = reqwest::Method::GET)]
256    method: reqwest::Method,
257
258    /// Set of accepted return codes / status codes.
259    ///
260    /// Unmatched return codes/ status codes are deemed as errors.
261    #[builder(default = DEFAULT_ACCEPTED_STATUS_CODES.clone())]
262    accepted: HashSet<StatusCode>,
263
264    /// Response timeout per request.
265    timeout: Option<Duration>,
266
267    /// Base for resolving paths.
268    ///
269    /// E.g. if the base is `/home/user/` and the path is `file.txt`, the
270    /// resolved path would be `/home/user/file.txt`.
271    base: BaseInfo,
272
273    /// Initial time between retries of failed requests.
274    ///
275    /// Defaults to [`DEFAULT_RETRY_WAIT_TIME_SECS`].
276    ///
277    /// # Notes
278    ///
279    /// For each request, the wait time increases using an exponential backoff
280    /// mechanism. For example, if the value is 1 second, then it waits for
281    /// 2 ^ (N-1) seconds before the N-th retry.
282    ///
283    /// This prevents spending too much system resources on slow responders and
284    /// prioritizes other requests.
285    #[builder(default_code = "Duration::from_secs(DEFAULT_RETRY_WAIT_TIME_SECS as u64)")]
286    retry_wait_time: Duration,
287
288    /// When `true`, requires using HTTPS when it's available.
289    ///
290    /// This would treat unencrypted links as errors when HTTPS is available.
291    /// It has no effect on non-HTTP schemes or if the URL doesn't support
292    /// HTTPS.
293    require_https: bool,
294
295    /// Cookie store used for requests.
296    ///
297    /// See <https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#method.cookie_store>
298    cookie_jar: Option<Arc<CookieStoreMutex>>,
299
300    /// Enable the checking of fragments in links.
301    include_fragments: bool,
302
303    /// Enable the checking of wikilinks in markdown files.
304    /// Note that base must not be `None` if you set this `true`.
305    include_wikilinks: bool,
306
307    /// Requests run through this chain where each item in the chain
308    /// can modify the request. A chained item can also decide to exit
309    /// early and return a status, so that subsequent chain items are
310    /// skipped and the lychee-internal request chain is not activated.
311    plugin_request_chain: RequestChain,
312
313    /// Global rate limiting configuration that applies as defaults to all hosts
314    rate_limit_config: RateLimitConfig,
315
316    /// Per-host configuration overrides
317    hosts: HostConfigs,
318}
319
320impl Default for ClientBuilder {
321    #[inline]
322    fn default() -> Self {
323        Self::builder().build()
324    }
325}
326
327impl ClientBuilder {
328    /// Instantiates a [`Client`].
329    ///
330    /// # Errors
331    ///
332    /// Returns an `Err` if:
333    /// - The user-agent contains characters other than ASCII 32-127.
334    /// - The reqwest client cannot be instantiated. This occurs if a TLS
335    ///   backend cannot be initialized or the resolver fails to load the system
336    ///   configuration. See [here].
337    /// - The GitHub client cannot be created. Since the implementation also
338    ///   uses reqwest under the hood, this errors in the same circumstances as
339    ///   the last one.
340    ///
341    /// [here]: https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#errors
342    pub fn client(self) -> Result<Client> {
343        let redirect_history = RedirectHistory::new();
344        let reqwest_client = self
345            .build_client(&redirect_history)?
346            .build()
347            .map_err(ErrorKind::BuildRequestClient)?;
348
349        let client_map = self.build_host_clients(&redirect_history)?;
350
351        let host_pool = HostPool::new(
352            self.rate_limit_config,
353            self.hosts,
354            reqwest_client,
355            client_map,
356        );
357
358        let github_client = match self.github_token.as_ref().map(ExposeSecret::expose_secret) {
359            Some(token) if !token.is_empty() => Some(
360                Octocrab::builder()
361                    .personal_token(token.to_string())
362                    .build()
363                    // this is essentially the same `reqwest::ClientBuilder::build` error
364                    // see https://docs.rs/octocrab/0.18.1/src/octocrab/lib.rs.html#360-364
365                    .map_err(|e: octocrab::Error| ErrorKind::BuildGithubClient(Box::new(e)))?,
366            ),
367            _ => None,
368        };
369
370        let filter = Filter {
371            includes: self.includes.map(Into::into),
372            excludes: self.excludes.map(Into::into),
373            schemes: self.schemes,
374            // exclude_all_private option turns on all "private" excludes,
375            // including private IPs, link-local IPs and loopback IPs
376            exclude_private_ips: self.exclude_all_private || self.exclude_private_ips,
377            exclude_link_local_ips: self.exclude_all_private || self.exclude_link_local_ips,
378            exclude_loopback_ips: self.exclude_all_private || self.exclude_loopback_ips,
379            include_mail: self.include_mail,
380        };
381
382        let website_checker = WebsiteChecker::new(
383            self.method,
384            self.retry_wait_time,
385            redirect_history.clone(),
386            self.max_retries,
387            self.accepted,
388            github_client,
389            self.require_https,
390            self.plugin_request_chain,
391            self.include_fragments,
392            Arc::new(host_pool),
393        );
394
395        Ok(Client {
396            remaps: self.remaps,
397            filter,
398            email_checker: MailChecker::new(self.timeout),
399            website_checker,
400            file_checker: FileChecker::new(
401                &self.base,
402                self.fallback_extensions,
403                self.index_files,
404                self.include_fragments,
405                self.include_wikilinks,
406            )?,
407        })
408    }
409
410    /// Build the host-specific clients with their host-specific headers
411    fn build_host_clients(&self, redirect_history: &RedirectHistory) -> Result<ClientMap> {
412        self.hosts
413            .iter()
414            .map(|(host, config)| {
415                let mut headers = self.default_headers()?;
416                headers.extend(config.headers.clone());
417                let client = self
418                    .build_client(redirect_history)?
419                    .default_headers(headers)
420                    .build()
421                    .map_err(ErrorKind::BuildRequestClient)?;
422                Ok((HostKey::from(host.as_str()), client))
423            })
424            .collect()
425    }
426
427    /// Create a [`reqwest::ClientBuilder`] based on various fields
428    fn build_client(&self, redirect_history: &RedirectHistory) -> Result<reqwest::ClientBuilder> {
429        let mut builder = reqwest::ClientBuilder::new()
430            .gzip(true)
431            .default_headers(self.default_headers()?)
432            .danger_accept_invalid_certs(self.allow_insecure)
433            .connect_timeout(CONNECT_TIMEOUT)
434            .tcp_keepalive(TCP_KEEPALIVE)
435            .redirect(redirect_policy(
436                redirect_history.clone(),
437                self.max_redirects,
438            ));
439
440        if let Some(cookie_jar) = self.cookie_jar.clone() {
441            builder = builder.cookie_provider(cookie_jar);
442        }
443
444        if let Some(min_tls) = self.min_tls_version {
445            builder = builder.min_tls_version(min_tls);
446        }
447
448        if let Some(timeout) = self.timeout {
449            builder = builder.timeout(timeout);
450        }
451
452        Ok(builder)
453    }
454
455    fn default_headers(&self) -> Result<HeaderMap> {
456        let user_agent = self.user_agent.clone();
457        let mut headers = self.custom_headers.clone();
458
459        if let Some(prev_user_agent) =
460            headers.insert(header::USER_AGENT, HeaderValue::try_from(&user_agent)?)
461        {
462            debug!(
463                "Found user-agent in headers: {}. Overriding it with {user_agent}.",
464                prev_user_agent.to_str().unwrap_or("�"),
465            );
466        }
467
468        headers.insert(
469            header::TRANSFER_ENCODING,
470            HeaderValue::from_static("chunked"),
471        );
472
473        Ok(headers)
474    }
475}
476
477/// Create our custom [`redirect::Policy`] in order to stop following redirects
478/// once `max_redirects` is reached and to record redirections for reporting.
479fn redirect_policy(redirect_history: RedirectHistory, max_redirects: usize) -> redirect::Policy {
480    redirect::Policy::custom(move |attempt| {
481        if attempt.previous().len() > max_redirects {
482            attempt.stop()
483        } else {
484            redirect_history.record_redirects(&attempt);
485            debug!("Following redirect to {}", attempt.url());
486            attempt.follow()
487        }
488    })
489}
490
491/// Handles incoming requests and returns responses.
492///
493/// See [`ClientBuilder`] which contains sane defaults for all configuration
494/// options.
495#[derive(Debug, Clone)]
496pub struct Client {
497    /// Optional remapping rules for URIs matching pattern.
498    remaps: Option<Remaps>,
499
500    /// Rules to decide whether a given link should be checked or ignored.
501    filter: Filter,
502
503    /// A checker for website URLs.
504    website_checker: WebsiteChecker,
505
506    /// A checker for file URLs.
507    file_checker: FileChecker,
508
509    /// A checker for email URLs.
510    email_checker: MailChecker,
511}
512
513impl Client {
514    /// Get `HostPool`
515    #[must_use]
516    pub fn host_pool(&self) -> Arc<HostPool> {
517        self.website_checker.host_pool()
518    }
519
520    /// Check a single request.
521    ///
522    /// `request` can be either a [`Request`] or a type that can be converted
523    /// into it. In any case, it must represent a valid URI.
524    ///
525    /// # Errors
526    ///
527    /// Returns an `Err` if:
528    /// - `request` does not represent a valid URI.
529    /// - Encrypted connection for a HTTP URL is available but unused. (Only
530    ///   checked when `Client::require_https` is `true`.)
531    #[allow(clippy::missing_panics_doc)]
532    pub async fn check<T, E>(&self, request: T) -> Result<Response>
533    where
534        Request: TryFrom<T, Error = E>,
535        ErrorKind: From<E>,
536    {
537        let Request {
538            ref mut uri,
539            credentials,
540            source,
541            span,
542            ..
543        } = request.try_into()?;
544
545        self.remap(uri)?;
546
547        if self.is_excluded(uri) {
548            return Ok(Response::new(
549                uri.clone(),
550                Status::Excluded,
551                source.into(),
552                span,
553                None,
554            ));
555        }
556
557        let start = std::time::Instant::now(); // Measure check time
558
559        let status = match uri.scheme() {
560            _ if uri.is_tel() => Status::Excluded, // We don't check tel: URIs
561            _ if uri.is_file() => self.check_file(uri).await,
562            _ if uri.is_mail() => self.check_mail(uri).await,
563            _ => self.check_website(uri, credentials).await?,
564        };
565
566        Ok(Response::new(
567            uri.clone(),
568            status,
569            source.into(),
570            span,
571            Some(start.elapsed()),
572        ))
573    }
574
575    /// Check a single file using the file checker.
576    pub async fn check_file(&self, uri: &Uri) -> Status {
577        self.file_checker.check(uri).await
578    }
579
580    /// Remap `uri` using the client-defined remapping rules.
581    ///
582    /// # Errors
583    ///
584    /// Returns an `Err` if the final, remapped `uri` is not a valid URI.
585    pub fn remap(&self, uri: &mut Uri) -> Result<()> {
586        if let Some(ref remaps) = self.remaps {
587            uri.url = remaps.remap(&uri.url)?;
588        }
589        Ok(())
590    }
591
592    /// Returns whether the given `uri` should be ignored from checking.
593    #[must_use]
594    pub fn is_excluded(&self, uri: &Uri) -> bool {
595        self.filter.is_excluded(uri)
596    }
597
598    /// Checks the given URI of a website.
599    ///
600    /// # Errors
601    ///
602    /// This returns an `Err` if
603    /// - The URI is invalid.
604    /// - The request failed.
605    /// - The response status code is not accepted.
606    /// - The URI cannot be converted to HTTPS.
607    pub async fn check_website(
608        &self,
609        uri: &Uri,
610        credentials: Option<BasicAuthCredentials>,
611    ) -> Result<Status> {
612        self.website_checker.check_website(uri, credentials).await
613    }
614
615    /// Checks a `mailto` URI.
616    pub async fn check_mail(&self, uri: &Uri) -> Status {
617        self.email_checker.check_mail(uri).await
618    }
619}
620
621/// A shorthand function to check a single URI.
622///
623/// This provides the simplest link check utility without having to create a
624/// [`Client`]. For more complex scenarios, see documentation of
625/// [`ClientBuilder`] instead.
626///
627/// # Errors
628///
629/// Returns an `Err` if:
630/// - The request client cannot be built (see [`ClientBuilder::client`] for
631///   failure cases).
632/// - The request cannot be checked (see [`Client::check`] for failure cases).
633pub async fn check<T, E>(request: T) -> Result<Response>
634where
635    Request: TryFrom<T, Error = E>,
636    ErrorKind: From<E>,
637{
638    let client = ClientBuilder::builder().build().client()?;
639    client.check(request).await
640}
641
642#[cfg(test)]
643mod tests {
644    use std::{
645        fs::File,
646        time::{Duration, Instant},
647    };
648
649    use async_trait::async_trait;
650    use http::{StatusCode, header::HeaderMap};
651    use reqwest::header;
652    use tempfile::tempdir;
653    use test_utils::get_mock_client_response;
654    use test_utils::mock_server;
655    use test_utils::redirecting_mock_server;
656    use wiremock::{
657        Mock,
658        matchers::{method, path},
659    };
660
661    use super::ClientBuilder;
662    use crate::{
663        ErrorKind, Redirect, Redirects, Request, Status, Uri,
664        chain::{ChainResult, Handler, RequestChain},
665    };
666
667    #[tokio::test]
668    async fn test_nonexistent() {
669        let mock_server = mock_server!(StatusCode::NOT_FOUND);
670        let res = get_mock_client_response!(mock_server.uri()).await;
671
672        assert!(res.status().is_error());
673    }
674
675    #[tokio::test]
676    async fn test_nonexistent_with_path() {
677        let res = get_mock_client_response!("http://127.0.0.1/invalid").await;
678        assert!(res.status().is_error());
679    }
680
681    #[tokio::test]
682    async fn test_github() {
683        let res = get_mock_client_response!("https://github.com/lycheeverse/lychee").await;
684        assert!(res.status().is_success());
685    }
686
687    #[tokio::test]
688    async fn test_github_nonexistent_repo() {
689        let res = get_mock_client_response!("https://github.com/lycheeverse/not-lychee").await;
690        assert!(res.status().is_error());
691    }
692
693    #[tokio::test]
694    async fn test_github_nonexistent_file() {
695        let res = get_mock_client_response!(
696            "https://github.com/lycheeverse/lychee/blob/master/NON_EXISTENT_FILE.md",
697        )
698        .await;
699        assert!(res.status().is_error());
700    }
701
702    #[tokio::test]
703    async fn test_youtube() {
704        // This is applying a quirk. See the quirks module.
705        let res = get_mock_client_response!("https://www.youtube.com/watch?v=NlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7").await;
706        assert!(res.status().is_success());
707
708        let res = get_mock_client_response!("https://www.youtube.com/watch?v=invalidNlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7").await;
709        assert!(res.status().is_error());
710    }
711
712    #[tokio::test]
713    async fn test_basic_auth() {
714        let mut r: Request = "https://authenticationtest.com/HTTPAuth/"
715            .try_into()
716            .unwrap();
717
718        let res = get_mock_client_response!(r.clone()).await;
719        assert_eq!(res.status().code(), Some(401.try_into().unwrap()));
720
721        r.credentials = Some(crate::BasicAuthCredentials {
722            username: "user".into(),
723            password: "pass".into(),
724        });
725
726        let res = get_mock_client_response!(r).await;
727        assert!(matches!(
728            res.status(),
729            Status::Redirected(StatusCode::OK, _)
730        ));
731    }
732
733    #[tokio::test]
734    async fn test_non_github() {
735        let mock_server = mock_server!(StatusCode::OK);
736        let res = get_mock_client_response!(mock_server.uri()).await;
737
738        assert!(res.status().is_success());
739    }
740
741    #[tokio::test]
742    async fn test_invalid_ssl() {
743        let res = get_mock_client_response!("https://expired.badssl.com/").await;
744
745        assert!(res.status().is_error());
746
747        // Same, but ignore certificate error
748        let res = ClientBuilder::builder()
749            .allow_insecure(true)
750            .build()
751            .client()
752            .unwrap()
753            .check("https://expired.badssl.com/")
754            .await
755            .unwrap();
756        assert!(res.status().is_success());
757    }
758
759    #[tokio::test]
760    async fn test_file() {
761        let dir = tempdir().unwrap();
762        let file = dir.path().join("temp");
763        File::create(file).unwrap();
764        let uri = format!("file://{}", dir.path().join("temp").to_str().unwrap());
765
766        let res = get_mock_client_response!(uri).await;
767        assert!(res.status().is_success());
768    }
769
770    #[tokio::test]
771    async fn test_custom_headers() {
772        // See https://github.com/rust-lang/crates.io/issues/788
773        let mut custom = HeaderMap::new();
774        custom.insert(header::ACCEPT, "text/html".parse().unwrap());
775        let res = ClientBuilder::builder()
776            .custom_headers(custom)
777            .build()
778            .client()
779            .unwrap()
780            .check("https://crates.io/crates/lychee")
781            .await
782            .unwrap();
783        assert!(res.status().is_success());
784    }
785
786    #[tokio::test]
787    async fn test_exclude_mail_by_default() {
788        let client = ClientBuilder::builder()
789            .exclude_all_private(true)
790            .build()
791            .client()
792            .unwrap();
793        assert!(client.is_excluded(&Uri {
794            url: "mailto://mail@example.com".try_into().unwrap()
795        }));
796    }
797
798    #[tokio::test]
799    async fn test_include_mail() {
800        let client = ClientBuilder::builder()
801            .include_mail(false)
802            .exclude_all_private(true)
803            .build()
804            .client()
805            .unwrap();
806        assert!(client.is_excluded(&Uri {
807            url: "mailto://mail@example.com".try_into().unwrap()
808        }));
809
810        let client = ClientBuilder::builder()
811            .include_mail(true)
812            .exclude_all_private(true)
813            .build()
814            .client()
815            .unwrap();
816        assert!(!client.is_excluded(&Uri {
817            url: "mailto://mail@example.com".try_into().unwrap()
818        }));
819    }
820
821    #[tokio::test]
822    async fn test_include_tel() {
823        let client = ClientBuilder::builder().build().client().unwrap();
824        assert!(client.is_excluded(&Uri {
825            url: "tel:1234567890".try_into().unwrap()
826        }));
827    }
828
829    #[tokio::test]
830    async fn test_require_https() {
831        let client = ClientBuilder::builder().build().client().unwrap();
832        let res = client.check("http://example.com").await.unwrap();
833        assert!(res.status().is_success());
834
835        // Same request will fail if HTTPS is required
836        let client = ClientBuilder::builder()
837            .require_https(true)
838            .build()
839            .client()
840            .unwrap();
841        let res = client.check("http://example.com").await.unwrap();
842        assert!(res.status().is_error());
843    }
844
845    #[tokio::test]
846    async fn test_timeout() {
847        // Note: this checks response timeout, not connect timeout.
848        // To check connect timeout, we'd have to do something more involved,
849        // see: https://github.com/LukeMathWalker/wiremock-rs/issues/19
850        let mock_delay = Duration::from_millis(20);
851        let checker_timeout = Duration::from_millis(10);
852        assert!(mock_delay > checker_timeout);
853
854        let mock_server = mock_server!(StatusCode::OK, set_delay(mock_delay));
855
856        let client = ClientBuilder::builder()
857            .timeout(checker_timeout)
858            .max_retries(0u64)
859            .build()
860            .client()
861            .unwrap();
862
863        let res = client.check(mock_server.uri()).await.unwrap();
864        assert!(res.status().is_timeout());
865    }
866
867    #[tokio::test]
868    async fn test_exponential_backoff() {
869        let mock_delay = Duration::from_millis(20);
870        let checker_timeout = Duration::from_millis(10);
871        assert!(mock_delay > checker_timeout);
872
873        let mock_server = mock_server!(StatusCode::OK, set_delay(mock_delay));
874
875        // Perform a warm-up request to ensure the lazy regexes
876        // in lychee-lib/src/quirks/mod.rs are compiled.
877        // On some platforms, this can take some time(approx. 110ms),
878        // which should not be counted in the test.
879        let warm_up_client = ClientBuilder::builder()
880            .max_retries(0_u64)
881            .build()
882            .client()
883            .unwrap();
884        let _res = warm_up_client.check(mock_server.uri()).await.unwrap();
885
886        let client = ClientBuilder::builder()
887            .timeout(checker_timeout)
888            .max_retries(3_u64)
889            .retry_wait_time(Duration::from_millis(50))
890            .build()
891            .client()
892            .unwrap();
893
894        // Summary:
895        // 1. First request fails with timeout (after 10ms)
896        // 2. Retry after 50ms (total 60ms)
897        // 3. Second request fails with timeout (after 10ms)
898        // 4. Retry after 100ms (total 160ms)
899        // 5. Third request fails with timeout (after 10ms)
900        // 6. Retry after 200ms (total 360ms)
901        // Total: 360ms
902
903        let start = Instant::now();
904        let res = client.check(mock_server.uri()).await.unwrap();
905        let end = start.elapsed();
906
907        assert!(res.status().is_error());
908
909        // on slow connections, this might take a bit longer than nominal
910        // backed-off timeout (7 secs)
911        assert!((350..=550).contains(&end.as_millis()));
912    }
913
914    #[tokio::test]
915    async fn test_avoid_reqwest_panic() {
916        let client = ClientBuilder::builder().build().client().unwrap();
917        // This request will result in an Unsupported status, but it won't panic
918        let res = client.check("http://\"").await.unwrap();
919
920        assert!(matches!(
921            res.status(),
922            Status::Unsupported(ErrorKind::BuildRequestClient(_))
923        ));
924        assert!(res.status().is_unsupported());
925    }
926
927    #[tokio::test]
928    async fn test_max_redirects() {
929        let mock_server = wiremock::MockServer::start().await;
930
931        let redirect_uri = format!("{}/redirect", &mock_server.uri());
932        let redirect = wiremock::ResponseTemplate::new(StatusCode::PERMANENT_REDIRECT)
933            .insert_header("Location", redirect_uri.as_str());
934
935        let redirect_count = 15usize;
936        let initial_invocation = 1;
937
938        // Set up infinite redirect loop
939        Mock::given(method("GET"))
940            .and(path("/redirect"))
941            .respond_with(move |_: &_| redirect.clone())
942            .expect(initial_invocation + redirect_count as u64)
943            .mount(&mock_server)
944            .await;
945
946        let res = ClientBuilder::builder()
947            .max_redirects(redirect_count)
948            .build()
949            .client()
950            .unwrap()
951            .check(redirect_uri.clone())
952            .await
953            .unwrap();
954
955        assert_eq!(
956            res.status(),
957            &Status::Error(ErrorKind::RejectedStatusCode(
958                StatusCode::PERMANENT_REDIRECT
959            ))
960        );
961    }
962
963    #[tokio::test]
964    async fn test_redirects() {
965        redirecting_mock_server!(async |redirect_url: Url, ok_url| {
966            let res = ClientBuilder::builder()
967                .max_redirects(1_usize)
968                .build()
969                .client()
970                .unwrap()
971                .check(Uri::from((redirect_url).clone()))
972                .await
973                .unwrap();
974
975            let mut redirects = Redirects::new(redirect_url);
976            redirects.push(Redirect {
977                url: ok_url,
978                code: StatusCode::PERMANENT_REDIRECT,
979            });
980            assert_eq!(res.status(), &Status::Redirected(StatusCode::OK, redirects));
981        })
982        .await;
983    }
984
985    #[tokio::test]
986    async fn test_unsupported_scheme() {
987        let examples = vec![
988            "ftp://example.com",
989            "gopher://example.com",
990            "slack://example.com",
991        ];
992
993        for example in examples {
994            let client = ClientBuilder::builder().build().client().unwrap();
995            let res = client.check(example).await.unwrap();
996            assert!(res.status().is_unsupported());
997        }
998    }
999
1000    #[tokio::test]
1001    async fn test_chain() {
1002        use reqwest::Request;
1003
1004        #[derive(Debug)]
1005        struct ExampleHandler();
1006
1007        #[async_trait]
1008        impl Handler<Request, Status> for ExampleHandler {
1009            async fn handle(&mut self, _: Request) -> ChainResult<Request, Status> {
1010                ChainResult::Done(Status::Excluded)
1011            }
1012        }
1013
1014        let chain = RequestChain::new(vec![Box::new(ExampleHandler {})]);
1015
1016        let client = ClientBuilder::builder()
1017            .plugin_request_chain(chain)
1018            .build()
1019            .client()
1020            .unwrap();
1021
1022        let result = client.check("http://example.com");
1023        let res = result.await.unwrap();
1024        assert_eq!(res.status(), &Status::Excluded);
1025    }
1026}