Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/parser/lexer/lexeme/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ impl<'i, T> Lexeme<'i, T> {
&self.input
}

#[inline]
pub const fn input_byte_offset(&self) -> usize {
self.previously_consumed_byte_count
}

#[inline]
pub const fn token_outline(&self) -> &T {
&self.token_outline
Expand Down
36 changes: 35 additions & 1 deletion src/rewritable_units/tokens/attributes.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
use crate::base::{Bytes, BytesCow, eq_case_insensitive};
use crate::base::{Bytes, BytesCow, SourceLocation, eq_case_insensitive};
use crate::errors::RewritingError;
use crate::html::escape_double_quotes_only;
use crate::parser::AttributeBuffer;
use crate::rewritable_units::Serialize;
use encoding_rs::Encoding;
use std::cell::OnceCell;
use std::fmt::{self, Debug};
use std::num::NonZero;
use thiserror::Error;

/// An error that occurs when invalid value is provided for the attribute name.
Expand Down Expand Up @@ -41,6 +42,8 @@ pub struct Attribute<'i> {
value: BytesCow<'i>,
raw: Option<Bytes<'i>>,
encoding: &'static Encoding,
/// absolute document position of attribute name and attribute value
name_value_start: Option<(usize, NonZero<usize>)>,
}

impl<'i> Attribute<'i> {
Expand All @@ -51,12 +54,14 @@ impl<'i> Attribute<'i> {
value: BytesCow<'i>,
raw: Bytes<'i>,
encoding: &'static Encoding,
name_value_start: Option<(usize, NonZero<usize>)>,
) -> Self {
Attribute {
name,
value,
raw: Some(raw),
encoding,
name_value_start,
}
}

Expand Down Expand Up @@ -104,10 +109,33 @@ impl<'i> Attribute<'i> {
self.value.as_string(self.encoding)
}

/// Returns the source location of the attribute name in the original document.
///
/// Returns `None` for attributes that were added or modified.
#[inline]
#[must_use]
pub fn name_source_location(&self) -> Option<SourceLocation> {
self.name_value_start
.map(|(name, _)| SourceLocation::from_start_len(name, self.name.len()))
}

/// Returns the source location of the attribute value in the original document.
///
/// The range covers only the value itself, excluding any quotes or the `=` sign.
///
/// Returns `None` for attributes that were added or modified.
#[inline]
#[must_use]
pub fn value_source_location(&self) -> Option<SourceLocation> {
self.name_value_start
.map(|(_, value)| SourceLocation::from_start_len(value.get(), self.value.len()))
}

#[inline]
fn set_value(&mut self, value: &str) {
self.value = BytesCow::owned_from_str(value, self.encoding);
self.raw = None;
self.name_value_start = None;
}
}

Expand Down Expand Up @@ -141,6 +169,7 @@ pub(crate) struct Attributes<'i> {
attribute_buffer: &'i AttributeBuffer,
items: OnceCell<Vec<Attribute<'i>>>,
pub(crate) encoding: &'static Encoding,
source_byte_offset: usize,
}

impl<'i> Attributes<'i> {
Expand All @@ -150,12 +179,14 @@ impl<'i> Attributes<'i> {
input: &'i Bytes<'i>,
attribute_buffer: &'i AttributeBuffer,
encoding: &'static Encoding,
source_byte_offset: usize,
) -> Self {
Attributes {
input,
attribute_buffer,
items: OnceCell::default(),
encoding,
source_byte_offset,
}
}

Expand Down Expand Up @@ -210,6 +241,7 @@ impl<'i> Attributes<'i> {
value: BytesCow::owned_from_str(value, encoding),
raw: None,
encoding,
name_value_start: None,
});
}
}
Expand Down Expand Up @@ -240,6 +272,7 @@ impl<'i> Attributes<'i> {
debug_assert!(false);
Bytes::default()
};
let base = self.source_byte_offset;
self.attribute_buffer.iter().map(move |a| {
Attribute::new(
self.input
Expand All @@ -254,6 +287,7 @@ impl<'i> Attributes<'i> {
.opt_slice(Some(a.raw_range))
.unwrap_or_else(cant_fail),
self.encoding,
NonZero::new(base + a.value.start).map(|val| (base + a.name.start, val)),
)
})
}
Expand Down
2 changes: 1 addition & 1 deletion src/rewritable_units/tokens/capturer/to_token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ impl ToToken for TagLexeme<'_> {
capture_flags.remove(TokenCaptureFlags::NEXT_START_TAG);
ToTokenResult::Token(StartTag::new_token(
self.part(name),
Attributes::new(self.input(), attributes, encoding),
Attributes::new(self.input(), attributes, encoding, self.input_byte_offset()),
ns,
self_closing,
self.spanned().into(),
Expand Down
74 changes: 74 additions & 0 deletions src/rewriter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1105,5 +1105,79 @@ mod tests {
"Error in element text handler",
);
}

#[test]
fn attribute_source_locations() {
let html = r#"<div class="foo" id='bar' data-x=baz>"#;
let locations = Arc::new(Mutex::new(Vec::new()));
let locations_clone = Arc::clone(&locations);

rewrite_str::<LocalHandlerTypes>(
html,
RewriteStrSettings {
element_content_handlers: vec![element!("div", move |el| {
for attr in el.attributes() {
let name_loc = attr.name_source_location();
let value_loc = attr.value_source_location();
locations_clone.lock().unwrap().push((
attr.name(),
attr.value(),
name_loc.map(|l| l.bytes()),
value_loc.map(|l| l.bytes()),
));
}
Ok(())
})],
..RewriteStrSettings::new()
},
)
.unwrap();

let locs = locations.lock().unwrap();
// class="foo"
assert_eq!(locs[0].0, "class");
assert_eq!(locs[0].1, "foo");
assert_eq!(&html[locs[0].2.clone().unwrap()], "class");
assert_eq!(&html[locs[0].3.clone().unwrap()], "foo");

// id='bar'
assert_eq!(locs[1].0, "id");
assert_eq!(locs[1].1, "bar");
assert_eq!(&html[locs[1].2.clone().unwrap()], "id");
assert_eq!(&html[locs[1].3.clone().unwrap()], "bar");

// data-x=baz (unquoted)
assert_eq!(locs[2].0, "data-x");
assert_eq!(locs[2].1, "baz");
assert_eq!(&html[locs[2].2.clone().unwrap()], "data-x");
assert_eq!(&html[locs[2].3.clone().unwrap()], "baz");
}

#[test]
fn attribute_source_locations_none_for_programmatic_attributes() {
rewrite_str::<LocalHandlerTypes>(
"<div></div>",
RewriteStrSettings {
element_content_handlers: vec![element!("div", |el| {
el.set_attribute("added", "val").unwrap();
for attr in el.attributes() {
if attr.name() == "added" {
assert!(
attr.name_source_location().is_none(),
"programmatic attribute should have no name source location",
);
assert!(
attr.value_source_location().is_none(),
"programmatic attribute should have no value source location",
);
}
}
Ok(())
})],
..RewriteStrSettings::new()
},
)
.unwrap();
}
}
}
Loading