Install the main crate:
cargo add microdomBasic usage:
use microdom::parse_html;
fn main() {
let dom = parse_html(r#"<h1>Fast API <b>parallel</b> calls</h1>"#);
let title = dom.select("//h1").unwrap().to_string();
let bold = dom.select("//h1/b").unwrap().to_string();
println!("title: {}", title); // <h1>Fast API <b>parallel</b> calls</h1>
println!("bold: {}", bold); // <b>parallel</b>
}XPath selection for HTML in Rust. Alpha stage, expect breaking changes.
Warning The HTML parser attempts to follow the HTML5 specification and fix broken HTML, but it currently does so imperfectly. However, it should be entirely sufficient for most parsing needs.
microdom(Changelog): The main crate. It provides HTML parsing and XPath selection by orchestrating the other two crates.coredom(Changelog): The core DOM representation and basic tree operations.microdom-xpath-engine(Changelog): The standalone XPath 3.1 evaluation engine.
- 2x faster than html5ever (see benchmarks)
- Zero dependencies
- XPath 3.1 support out of the box + custom function extensions
- Custom function extensions
| Library | HTML+CSS+JS ~10KB | Clean HTML ~20KB | Micro | Avg speed | Notes |
|---|---|---|---|---|---|
| microdom | 75 MiB/s | 81 MiB/s | 3.1 MiB/s | ~53 MiB/s | Full DOM tree ¹ |
| html5ever | 26 MiB/s | 23 MiB/s | 7.8 MiB/s | ~19 MiB/s | Full DOM tree; faster on tiny docs |
| tl | 190 MiB/s | 129 MiB/s | 37 MiB/s | ~119 MiB/s |
¹ microdom does not fully follow the HTML standard — on severely malformed markup, content may leak into attribute values. However, it never panics or fails: parsing is always attempted. Sufficient for most data extraction use cases.
use microdom::{parse_html, SelectResult, CustomEvaluator, XPathValue};
fn main() {
let html = r#"
<html>
<body>
<div id="answer-1" class="answer">Answer 1</div>
<div id="answer-2" class="answer">Answer 2</div>
<div id="answer-3" class="answer new-answer">Answer 3</div>
</body>
</html>
"#;
let mut lib_evaluator = CustomEvaluator::new();
lib_evaluator.register_native("lib:hello", |args| {
Ok(XPathValue::String("Hello, lib!".into()))
});
let mut custom_evaluator = CustomEvaluator::new();
custom_evaluator.update(&lib_evaluator);
custom_evaluator.register_native("userpath:hello", |args| {
Ok(XPathValue::String(format!("Hello, World! {:?}", args[0].to_str()))) // Hello, World! "<some_argument>"
});
let dom = parse_html(&html);
let xpath = r#"//*[@id="answer-*" and not(contains(@class, "new"))]"#;
let result = custom_evaluator.select(&dom, xpath).unwrap(); // (Text, Int, List)
//let result = dom.select(xpath).unwrap();
// Output:
// XPath '//*[@id="answer-*" and not(contains(@class, "new"))]' returned: [<div id="answer-1" class="answer">Answer 1</div>, <div id="answer-2" class="answer">Answer 2</div>]
match result {
SelectResult::List(nodes) => {
for node in nodes {
println!("\n\n\nNode: {}", node);
}
}
SelectResult::Text(s) => {
println!("XPath '{}' returned string: {}", xpath, s);
}
_ => {
println!("XPath did not return a node set.");
}
}
// Custom function usage
let xpath = r#"lib:hello()"#;
let result = custom_evaluator.select(&dom, xpath).unwrap();
println!("XPath '{}' returned: {}", xpath, result);
}- ✅ Node selection (//, /, [@attr])
- ✅ Wildcards () in node names and attributes ([@id="answer-"])
- ✅ Predicates ([position()], [@id="x"])
- ✅ Union (|)
- ✅ count(), string(), text()
- ✅ contains(), starts-with(), normalize-space()
- ✅ substring-before/after()
- ✅ if/then/else
- ✅ every $score in //li[@class='comment']/@data satisfies $score >= 0
- ✅ for $i in //li return $i/a/@href
- ✅ let $f := function($a) { $a > 5 } return filter(//li/@data, $f)
- ✅ Logical operators (and, or, not, =, !=, <, >, <=, >=)
- ✅ User-defined functions (e.g. me_feature:hello(), me_feature:save("data"), etc.)
- ✅ Mathematical functions (sum, avg, max, min, median, round, floor, ceiling, abs, math:pi(), math:sqrt(), math:pow(), math:cos()....)
⚠️ Develop? - Regex matching (matches(), replace())⚠️ Type returning data SelectResult:: (Text, Int, List)
basic data:
use microdom::{parse_html, SelectResult, CustomEvaluator, XPathValue};
let html = r#"
<html>
<body>
<div class="products">
<div class="product" data-price="120" data-stock="5" data-rating="4.8">
<h2>Mechanical Keyboard</h2>
<span class="tag">electronics</span>
<span class="tag">office</span>
</div>
<div class="product" data-price="35" data-stock="0" data-rating="3.2">
<h2>USB Hub</h2>
<span class="tag">electronics</span>
</div>
<div class="product" data-price="899" data-stock="2" data-rating="4.9">
<h2>Monitor 4K</h2>
<span class="tag">electronics</span>
<span class="tag">display</span>
</div>
<div class="product" data-price="19" data-stock="100" data-rating="4.1">
<h2>Mouse Pad XL</h2>
<span class="tag">office</span>
</div>
</div>
</body>
</html>
"#;
let dom = parse_html(html);short example:
let xpath = r#"//*[@id="answer-*" and not(contains(@class, "new"))]"#;
let result = dom.select(xpath).unwrap();
println!("XPath '{}' returned: {}", xpath, result);custom function example:
let mut example_evaluator = CustomEvaluator::new();
example_evaluator.register("shop:score", |args| {
let price = args.get(0).map(|v| v.to_str().parse::<f64>().unwrap_or(0.0)).unwrap_or(0.0);
let stock = args.get(1).map(|v| v.to_str().parse::<f64>().unwrap_or(0.0)).unwrap_or(0.0);
let rating = args.get(2).map(|v| v.to_str().parse::<f64>().unwrap_or(0.0)).unwrap_or(0.0);
let in_stock_bonus = if stock > 0.0 { 0.0 } else { -50.0 };
let price_penalty = -(price / 100.0).min(10.0);
let score = rating * 20.0 + in_stock_bonus + price_penalty;
Ok(XPathValue::Number(score))
});
let result = example_evaluator.select(
&dom,
r#"//div[@class="product"][shop:score(@data-price, @data-stock, @data-rating) > 60]/h2"#,
).unwrap();
// SelectResult::List(["Mechanical Keyboard", "Monitor 4K", "Mouse Pad XL"])
println!("{result:?}");let mut example_evaluator = CustomEvaluator::new();
// str:slug($text) → "Mechanical Keyboard" → "mechanical-keyboard"
example_evaluator.register("str:slug", |args| {
let input = args.get(0).map(|v| v.to_str()).unwrap_or_default();
let slug = input
.to_lowercase()
.chars()
.map(|c| if c.is_alphanumeric() { c } else { '-' })
.collect::<String>()
.split('-')
.filter(|s| !s.is_empty())
.collect::<Vec<_>>()
.join("-");
Ok(XPathValue::String(slug))
});
let result = example_evaluator.select(
&dom,
r#"//div[@class="product"][@data-stock > 0]/str:slug(h2)"#,
).unwrap();
// SelectResult::List(["mechanical-keyboard", "monitor-4k", "mouse-pad-xl"])
println!("{result:?}");Licensed under either of
- Apache License, Version 2.0 LICENSE-APACHE
- MIT license LICENSE-MIT
at your option.