Skip to main content

lychee_lib/types/uri/
raw.rs

1use std::{fmt::Display, num::NonZeroUsize};
2
3use serde::Serialize;
4
5/// A raw URI that got extracted from a document with a fuzzy parser.
6/// Note that this can still be invalid according to stricter URI standards
7#[derive(Clone, Debug, PartialEq, Eq, Hash)]
8pub struct RawUri {
9    /// Unparsed URI represented as a `String`. There is no guarantee that it
10    /// can be parsed into a URI object
11    pub text: String,
12    /// Name of the element that contained the URI (e.g. `a` for the <a> tag).
13    /// This is a way to classify links to make it easier to offer fine control
14    /// over the links that will be checked e.g. by trying to filter out links
15    /// that were found in unwanted tags like `<pre>` or `<code>`.
16    pub element: Option<String>,
17    /// Name of the attribute that contained the URI (e.g. `src`). This is a way
18    /// to classify links to make it easier to offer fine control over the links
19    /// that will be checked e.g. by trying to filter out links that were found
20    /// in unwanted attributes like `srcset` or `manifest`.
21    pub attribute: Option<String>,
22    /// The position of the URI in the document.
23    pub span: RawUriSpan,
24}
25
26impl Display for RawUri {
27    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28        write!(f, "{:?} (Attribute: {:?})", self.text, self.attribute)
29    }
30}
31
32#[cfg(test)]
33impl From<(&str, RawUriSpan)> for RawUri {
34    fn from((text, span): (&str, RawUriSpan)) -> Self {
35        RawUri {
36            text: text.to_string(),
37            element: None,
38            attribute: None,
39            span,
40        }
41    }
42}
43
44/// A span of a [`RawUri`] in the document.
45///
46/// The span can be used to give more precise error messages.
47#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize)]
48pub struct RawUriSpan {
49    /// The line of the URI.
50    ///
51    /// The line is 1-based.
52    pub line: NonZeroUsize,
53    /// The column of the URI if computable.
54    ///
55    /// The column is 1-based.
56    /// This is `None`, if the column can't be computed exactly,
57    /// e.g. when it comes from the `html5ever` parser.
58    pub column: Option<NonZeroUsize>,
59}
60
61impl Display for RawUriSpan {
62    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63        if let Some(column) = self.column {
64            write!(f, "{}:{}", self.line, column)
65        } else {
66            write!(f, "{}", self.line)
67        }
68    }
69}
70
71/// Test helper to create [`RawUriSpan`]s easily.
72#[cfg(test)]
73pub(crate) const fn span(line: usize, column: usize) -> RawUriSpan {
74    RawUriSpan {
75        line: NonZeroUsize::new(line).unwrap(),
76        column: Some(NonZeroUsize::new(column).unwrap()),
77    }
78}
79
80/// Test helper to create a [`RawUriSpan`] from just the line and leave the column unset.
81#[cfg(test)]
82pub(crate) const fn span_line(line: usize) -> RawUriSpan {
83    RawUriSpan {
84        line: std::num::NonZeroUsize::new(line).unwrap(),
85        column: None,
86    }
87}
88
89/// A trait for calculating a [`RawUriSpan`] at a given byte offset in the document.
90///
91/// If you have a document and want spans with absolute positions, use [`SourceSpanProvider`].
92/// If you start inside a document at a given offset, use [`OffsetSpanProvider`].
93pub(crate) trait SpanProvider {
94    /// Compute the [`RawUriSpan`] at a given byte offset in the document.
95    fn span(&self, offset: usize) -> RawUriSpan;
96}
97
98/// A [`SpanProvider`] which calculates spans depending on the input lines.
99///
100/// Precomputes line lengths so that constructing [`RawUriSpan`]s is faster.
101/// If you start inside a document at a given offset, consider using [`OffsetSpanProvider`].
102#[derive(Clone, Debug)]
103pub(crate) struct SourceSpanProvider<'a> {
104    /// The computed map from line number to offset in the document.
105    line_starts: Vec<usize>,
106    /// The input document.
107    ///
108    /// This is used to compute column information, since we can't rely on each character being a
109    /// single byte long.
110    input: &'a str,
111}
112
113impl<'a> SourceSpanProvider<'a> {
114    /// Create a [`SpanProvider`] from the given document.
115    ///
116    /// If the input is part of a larger document, consider using [`OffsetSpanProvider`] instead.
117    ///
118    /// This function isn't just a simple constructor but does some work, so call this only if you
119    /// want to use it.
120    pub(crate) fn from_input(input: &'a str) -> Self {
121        // FIXME: Consider making this lazy?
122        let line_starts: Vec<_> = core::iter::once(0)
123            .chain(input.match_indices('\n').map(|(i, _)| i + 1))
124            .collect();
125        Self { line_starts, input }
126    }
127}
128
129impl SpanProvider for SourceSpanProvider<'_> {
130    fn span(&self, offset: usize) -> RawUriSpan {
131        const ONE: NonZeroUsize = NonZeroUsize::MIN;
132        let line = match self.line_starts.binary_search(&offset) {
133            Ok(i) => i,
134            Err(i) => i - 1,
135        };
136        // Since we get the index by the binary_search above and subtract `1` if it would be larger
137        // than the length of the document, this shouldn't panic.
138        let line_offset = self.line_starts[line];
139        let column = self
140            .input
141            .get(line_offset..offset)
142            .or_else(|| self.input.get(line_offset..))
143            // columns are 1-based
144            .map(|v| ONE.saturating_add(v.chars().count()));
145
146        RawUriSpan {
147            // lines are 1-based
148            line: ONE.saturating_add(line),
149            column,
150        }
151    }
152}
153
154/// A [`SpanProvider`] which starts at a given offset in the document.
155///
156/// All given offsets are changed by the given amount before computing the
157/// resulting [`RawUriSpan`] with the inner [`SpanProvider`].
158#[derive(Clone, Debug)]
159pub(crate) struct OffsetSpanProvider<'a, T: SpanProvider = SourceSpanProvider<'a>> {
160    /// The byte offset in the document by which all given offsets are changed before computing the
161    /// resulting [`RawUriSpan`] with the inner [`SpanProvider`].
162    pub(crate) offset: usize,
163    /// The inner [`SpanProvider`] which will be used to determine the spans.
164    pub(crate) inner: &'a T,
165}
166
167impl<T: SpanProvider> SpanProvider for OffsetSpanProvider<'_, T> {
168    fn span(&self, offset: usize) -> RawUriSpan {
169        self.inner.span(self.offset + offset)
170    }
171}