lychee_lib/types/uri/raw.rs
1use std::{fmt::Display, num::NonZeroUsize};
2
3use serde::Serialize;
4
5/// A raw URI that got extracted from a document with a fuzzy parser.
6/// Note that this can still be invalid according to stricter URI standards
7#[derive(Clone, Debug, PartialEq, Eq, Hash)]
8pub struct RawUri {
9 /// Unparsed URI represented as a `String`. There is no guarantee that it
10 /// can be parsed into a URI object
11 pub text: String,
12 /// Name of the element that contained the URI (e.g. `a` for the <a> tag).
13 /// This is a way to classify links to make it easier to offer fine control
14 /// over the links that will be checked e.g. by trying to filter out links
15 /// that were found in unwanted tags like `<pre>` or `<code>`.
16 pub element: Option<String>,
17 /// Name of the attribute that contained the URI (e.g. `src`). This is a way
18 /// to classify links to make it easier to offer fine control over the links
19 /// that will be checked e.g. by trying to filter out links that were found
20 /// in unwanted attributes like `srcset` or `manifest`.
21 pub attribute: Option<String>,
22 /// The position of the URI in the document.
23 pub span: RawUriSpan,
24}
25
26impl Display for RawUri {
27 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28 write!(f, "{:?} (Attribute: {:?})", self.text, self.attribute)
29 }
30}
31
32#[cfg(test)]
33impl From<(&str, RawUriSpan)> for RawUri {
34 fn from((text, span): (&str, RawUriSpan)) -> Self {
35 RawUri {
36 text: text.to_string(),
37 element: None,
38 attribute: None,
39 span,
40 }
41 }
42}
43
44/// A span of a [`RawUri`] in the document.
45///
46/// The span can be used to give more precise error messages.
47#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize)]
48pub struct RawUriSpan {
49 /// The line of the URI.
50 ///
51 /// The line is 1-based.
52 pub line: NonZeroUsize,
53 /// The column of the URI if computable.
54 ///
55 /// The column is 1-based.
56 /// This is `None`, if the column can't be computed exactly,
57 /// e.g. when it comes from the `html5ever` parser.
58 pub column: Option<NonZeroUsize>,
59}
60
61impl Display for RawUriSpan {
62 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63 if let Some(column) = self.column {
64 write!(f, "{}:{}", self.line, column)
65 } else {
66 write!(f, "{}", self.line)
67 }
68 }
69}
70
71/// Test helper to create [`RawUriSpan`]s easily.
72#[cfg(test)]
73pub(crate) const fn span(line: usize, column: usize) -> RawUriSpan {
74 RawUriSpan {
75 line: NonZeroUsize::new(line).unwrap(),
76 column: Some(NonZeroUsize::new(column).unwrap()),
77 }
78}
79
80/// Test helper to create a [`RawUriSpan`] from just the line and leave the column unset.
81#[cfg(test)]
82pub(crate) const fn span_line(line: usize) -> RawUriSpan {
83 RawUriSpan {
84 line: std::num::NonZeroUsize::new(line).unwrap(),
85 column: None,
86 }
87}
88
89/// A trait for calculating a [`RawUriSpan`] at a given byte offset in the document.
90///
91/// If you have a document and want spans with absolute positions, use [`SourceSpanProvider`].
92/// If you start inside a document at a given offset, use [`OffsetSpanProvider`].
93pub(crate) trait SpanProvider {
94 /// Compute the [`RawUriSpan`] at a given byte offset in the document.
95 fn span(&self, offset: usize) -> RawUriSpan;
96}
97
98/// A [`SpanProvider`] which calculates spans depending on the input lines.
99///
100/// Precomputes line lengths so that constructing [`RawUriSpan`]s is faster.
101/// If you start inside a document at a given offset, consider using [`OffsetSpanProvider`].
102#[derive(Clone, Debug)]
103pub(crate) struct SourceSpanProvider<'a> {
104 /// The computed map from line number to offset in the document.
105 line_starts: Vec<usize>,
106 /// The input document.
107 ///
108 /// This is used to compute column information, since we can't rely on each character being a
109 /// single byte long.
110 input: &'a str,
111}
112
113impl<'a> SourceSpanProvider<'a> {
114 /// Create a [`SpanProvider`] from the given document.
115 ///
116 /// If the input is part of a larger document, consider using [`OffsetSpanProvider`] instead.
117 ///
118 /// This function isn't just a simple constructor but does some work, so call this only if you
119 /// want to use it.
120 pub(crate) fn from_input(input: &'a str) -> Self {
121 // FIXME: Consider making this lazy?
122 let line_starts: Vec<_> = core::iter::once(0)
123 .chain(input.match_indices('\n').map(|(i, _)| i + 1))
124 .collect();
125 Self { line_starts, input }
126 }
127}
128
129impl SpanProvider for SourceSpanProvider<'_> {
130 fn span(&self, offset: usize) -> RawUriSpan {
131 const ONE: NonZeroUsize = NonZeroUsize::MIN;
132 let line = match self.line_starts.binary_search(&offset) {
133 Ok(i) => i,
134 Err(i) => i - 1,
135 };
136 // Since we get the index by the binary_search above and subtract `1` if it would be larger
137 // than the length of the document, this shouldn't panic.
138 let line_offset = self.line_starts[line];
139 let column = self
140 .input
141 .get(line_offset..offset)
142 .or_else(|| self.input.get(line_offset..))
143 // columns are 1-based
144 .map(|v| ONE.saturating_add(v.chars().count()));
145
146 RawUriSpan {
147 // lines are 1-based
148 line: ONE.saturating_add(line),
149 column,
150 }
151 }
152}
153
154/// A [`SpanProvider`] which starts at a given offset in the document.
155///
156/// All given offsets are changed by the given amount before computing the
157/// resulting [`RawUriSpan`] with the inner [`SpanProvider`].
158#[derive(Clone, Debug)]
159pub(crate) struct OffsetSpanProvider<'a, T: SpanProvider = SourceSpanProvider<'a>> {
160 /// The byte offset in the document by which all given offsets are changed before computing the
161 /// resulting [`RawUriSpan`] with the inner [`SpanProvider`].
162 pub(crate) offset: usize,
163 /// The inner [`SpanProvider`] which will be used to determine the spans.
164 pub(crate) inner: &'a T,
165}
166
167impl<T: SpanProvider> SpanProvider for OffsetSpanProvider<'_, T> {
168 fn span(&self, offset: usize) -> RawUriSpan {
169 self.inner.span(self.offset + offset)
170 }
171}