From dc1a3b5f422696e0b59e9949cfd50dae9dae27cb Mon Sep 17 00:00:00 2001 From: Karuro Chari Date: Fri, 4 Jul 2025 18:58:23 +0100 Subject: [PATCH 01/12] Better messages for missing libraries, improving dependencies and fixing TreeRaw iterator. Linking regression emerged, to be investigated and fixed. --- include/vs-xml/fwd/format.hpp | 38 +++++++++++++++++-------------- include/vs-xml/fwd/print.hpp | 38 +++++++++++++++++-------------- include/vs-xml/private/impl.hpp | 13 +++++------ include/vs-xml/tree.hpp | 40 +++++++++++++++++++++++++++++++++ test/src/tree-iterator.cpp | 4 ++++ 5 files changed, 91 insertions(+), 42 deletions(-) diff --git a/include/vs-xml/fwd/format.hpp b/include/vs-xml/fwd/format.hpp index 0535ceff..9e8c5f90 100644 --- a/include/vs-xml/fwd/format.hpp +++ b/include/vs-xml/fwd/format.hpp @@ -11,20 +11,24 @@ */ - #if VS_XML_USE_FMT == true && __has_include() - - #include - - namespace xml{ - using fmt::format; - } - - #else - - #include - - namespace xml{ - using std::format; - } - - #endif \ No newline at end of file +#if VS_XML_USE_FMT == true && __has_include() + +#include + +namespace xml{ + using fmt::format; +} + +#else + +#if VS_XML_USE_FMT == true && !__has_include() + #warning "Unable to use fmt, header missing" +#endif + +#include + +namespace xml{ + using std::format; +} + +#endif \ No newline at end of file diff --git a/include/vs-xml/fwd/print.hpp b/include/vs-xml/fwd/print.hpp index 2b6326b1..281de798 100644 --- a/include/vs-xml/fwd/print.hpp +++ b/include/vs-xml/fwd/print.hpp @@ -11,20 +11,24 @@ */ - #if VS_XML_USE_FMT == true && __has_include() - - #include - - namespace xml{ - using fmt::print; - } - - #else - - #include - - namespace xml{ - using std::print; - } - - #endif \ No newline at end of file +#if VS_XML_USE_FMT == true && __has_include() + +#include + +namespace xml{ + using fmt::print; +} + +#else + +#if VS_XML_USE_FMT == true && !__has_include() + #warning "Unable to use fmt, header missing" +#endif + +#include + +namespace xml{ + using std::print; +} + +#endif \ No newline at end of file diff --git a/include/vs-xml/private/impl.hpp b/include/vs-xml/private/impl.hpp index 4a3d37aa..04c747f0 100644 --- a/include/vs-xml/private/impl.hpp +++ b/include/vs-xml/private/impl.hpp @@ -10,7 +10,6 @@ * */ -#include "vs-xml/filters.hpp" #include #include #include @@ -18,7 +17,6 @@ #include #include -#include #include #include @@ -27,7 +25,7 @@ #include #include #include -#include +//#include namespace VS_XML_NS{ @@ -357,7 +355,7 @@ else if (type() == type_t::PROC) return ((const proc_t*)this)-> X;\ else if (type() == type_t::CDATA) return ((const cdata_t*)this)-> X;\ else if (type() == type_t::MARKER) return ((const marker_t*)this)-> X;\ else{\ - xml_assert(false,std::format("Invalid XML thing type {}",(int)type()).c_str());\ + xml_assert(false,xml::format("Invalid XML thing type {}",(int)type()).c_str());\ std::unreachable();\ } @@ -533,12 +531,11 @@ inline auto base_t::attrs() const{ template inline auto base_t::visitor() const{ - struct self{ - visitor_iterator begin() const {return visitor_iterator(base);} - visitor_iterator end() const {return visitor_iterator(base->has_parent()?base->parent():nullptr);} + visitor_iterator begin() const {return visitor_iterator((const unknown_t*)base);} + visitor_iterator end() const {return visitor_iterator(base->has_parent()?(const unknown_t*)base->parent():nullptr);} - self(const TreeRaw& b):base(&b){} + self(const base_t& b):base(&b){} private: const base_t* base; diff --git a/include/vs-xml/tree.hpp b/include/vs-xml/tree.hpp index 201864ff..afdeb84c 100644 --- a/include/vs-xml/tree.hpp +++ b/include/vs-xml/tree.hpp @@ -156,6 +156,8 @@ struct TreeRaw{ return std::string_view(s.base+(char*)symbols.data(),s.base+(char*)symbols.data()+s.length); } + void visit_t(const unknown_t* node, bool(*test)(const unknown_t*), void(*before)(const unknown_t*), void(*after)(const unknown_t*)); + /** * @brief Visit all nodes starting from node. Evaluate if children should be considered by evaluating fn * @@ -292,4 +294,42 @@ namespace stored{ using Tree = Stored; } +} + +#include + +namespace VS_XML_NS{ + + inline void TreeRaw::visit_t(const unknown_t* node, bool(*test)(const unknown_t*), void(*before)(const unknown_t*), void(*after)(const unknown_t*)){ + while(true){ + if(node==nullptr)break; + if(before!=nullptr)before(node); + + bool children_visited = !test(node); + for(;;){ + if(node->has_children() && !children_visited){ + auto [l,r] =*node->children_range(); + node=l; + children_visited = false; + break; + } + if(node->has_next()){ + if(after!=nullptr)after(node); + node=node->next(); + children_visited = false; + break; + } + if(node->has_parent()){ + if(after!=nullptr)after(node); + node = (const unknown_t*)node->parent(); + children_visited = true; + } + else{ + if(after!=nullptr)after(node); + node = nullptr; + break; + } + } + } + } } \ No newline at end of file diff --git a/test/src/tree-iterator.cpp b/test/src/tree-iterator.cpp index 4db0b535..f3d30526 100644 --- a/test/src/tree-iterator.cpp +++ b/test/src/tree-iterator.cpp @@ -61,6 +61,10 @@ int main(){ std::print(">{}\n",(int)it.type()); } + for(auto it: tree.downgrade().root().visitor()){ + std::print(">{}\n",(int)it.type()); + } + std::print("\n\n"); xml::TreeRaw::visit(&tree.downgrade().root(),+[](const xml::unknown_t*){return true;}); From 1a8210d5c21e35c3bbb0e03b45182130e1931f6a Mon Sep 17 00:00:00 2001 From: Karuro Chari Date: Fri, 4 Jul 2025 19:34:39 +0100 Subject: [PATCH 02/12] More fixes of namespace usage. And I found out the issue. When using fmt, since that library is not linked in the final application, some symbols being generated by libvs-xml inline in the headers are not part of the final optimized blob, and code will complain it cannot be found. --- benchmark/src/serialize-big.cpp | 6 +++--- include/vs-xml/private/impl.hpp | 1 + include/vs-xml/private/wrp-impl.hpp | 14 +++++++------- include/vs-xml/tree-builder.hpp | 2 +- utils/decode.cpp | 2 +- utils/encode.cpp | 8 ++++---- 6 files changed, 17 insertions(+), 16 deletions(-) diff --git a/benchmark/src/serialize-big.cpp b/benchmark/src/serialize-big.cpp index 7b71bc5d..804c952e 100644 --- a/benchmark/src/serialize-big.cpp +++ b/benchmark/src/serialize-big.cpp @@ -16,8 +16,8 @@ int test_vs(std::string_view xmlInput){ try{ - xml::DocumentBuilder<{.symbols=xml::builder_config_t::EXTERN_REL,.raw_strings=true}> bld(xmlInput); - xml::Parser parser(xmlInput, bld); + VS_XML_NS::DocumentBuilder<{.symbols=VS_XML_NS::builder_config_t::EXTERN_REL,.raw_strings=true}> bld(xmlInput); + VS_XML_NS::Parser parser(xmlInput, bld); std::ignore = parser.parse(); auto tree = bld.close(); @@ -45,7 +45,7 @@ int test_vs2(std::string_view binInput){ std::span binInput((const uint8_t*)mmap.data(),mmap.size()); - auto tree =xml::Document::from_binary(binInput); + auto tree =VS_XML_NS::Document::from_binary(binInput); std::string str; std::stringstream file(str); diff --git a/include/vs-xml/private/impl.hpp b/include/vs-xml/private/impl.hpp index 04c747f0..b7853c9b 100644 --- a/include/vs-xml/private/impl.hpp +++ b/include/vs-xml/private/impl.hpp @@ -24,6 +24,7 @@ #include #include + #include //#include diff --git a/include/vs-xml/private/wrp-impl.hpp b/include/vs-xml/private/wrp-impl.hpp index 000b9f23..2b986790 100644 --- a/include/vs-xml/private/wrp-impl.hpp +++ b/include/vs-xml/private/wrp-impl.hpp @@ -44,21 +44,21 @@ struct sv { else if(r.tree==nullptr) return std::string_view(r) == l; if(l.tree->configs.raw_strings && r.tree->configs.raw_strings){ - auto ll = xml::serialize::unescaped_view((std::string_view)l); - auto rr = xml::serialize::unescaped_view((std::string_view)r); + auto ll = VS_XML_NS::serialize::unescaped_view((std::string_view)l); + auto rr = VS_XML_NS::serialize::unescaped_view((std::string_view)r); return std::equal(ll.begin(),ll.end(),rr.begin(),rr.end()); } else if(!l.tree->configs.raw_strings && !r.tree->configs.raw_strings){ return (std::string_view)l == (std::string_view)r; } if(l.tree->configs.raw_strings){ - auto ll = xml::serialize::unescaped_view((std::string_view)l); + auto ll = VS_XML_NS::serialize::unescaped_view((std::string_view)l); auto rr = (std::string_view)r; return std::equal(ll.begin(),ll.end(),rr.begin(),rr.end()); } else{ auto ll = (std::string_view)l; - auto rr = xml::serialize::unescaped_view((std::string_view)r); + auto rr = VS_XML_NS::serialize::unescaped_view((std::string_view)r); return std::equal(ll.begin(),ll.end(),rr.begin(),rr.end()); } } @@ -66,7 +66,7 @@ struct sv { friend inline bool operator==(const sv& l, std::string_view r){ if(l.tree!=nullptr && l.tree->configs.raw_strings){ - auto ll = xml::serialize::unescaped_view((std::string_view)l); + auto ll = VS_XML_NS::serialize::unescaped_view((std::string_view)l); auto rr= r; return std::equal(ll.begin(),ll.end(),rr.begin(),rr.end()); } @@ -145,8 +145,8 @@ struct base_t{ /* template - inline xml::query::generator> is(const xml::query::query_t& query) { - return xml::query::is(*(wrp::base_t*)this, query.tokens.begin(), query.tokens.end()); + inline VS_XML_NS::query::generator> is(const VS_XML_NS::query::query_t& query) { + return VS_XML_NS::query::is(*(wrp::base_t*)this, query.tokens.begin(), query.tokens.end()); } */ }; diff --git a/include/vs-xml/tree-builder.hpp b/include/vs-xml/tree-builder.hpp index c781c9c7..bdb820b6 100644 --- a/include/vs-xml/tree-builder.hpp +++ b/include/vs-xml/tree-builder.hpp @@ -76,7 +76,7 @@ namespace details{ template <> struct Symbols : Symbols{ - xml::unordered_set,std::function> idx; + VS_XML_NS::unordered_set,std::function> idx; sv label(std::string_view s); inline sv symbol(std::string_view s){return label(s);} diff --git a/utils/decode.cpp b/utils/decode.cpp index 4048d4a3..e4ca7da4 100644 --- a/utils/decode.cpp +++ b/utils/decode.cpp @@ -15,7 +15,7 @@ int decode(std::filesystem::path input, std::filesystem::path output){ try{ mio::mmap_source mmap(input.c_str()); std::span binaryInput((uint8_t*)mmap.data(),mmap.size()); - auto tree = xml::Document::from_binary(binaryInput); + auto tree = VS_XML_NS::Document::from_binary(binaryInput); if(!tree.has_value())throw std::runtime_error(std::string(tree.error().msg())); std::ofstream file(output,std::ios::binary|std::ios::out); diff --git a/utils/encode.cpp b/utils/encode.cpp index 78e1342b..0a8f32a3 100644 --- a/utils/encode.cpp +++ b/utils/encode.cpp @@ -11,14 +11,14 @@ #include -template +template int encode(std::filesystem::path input, std::filesystem::path output){ try{ mio::mmap_source mmap(input.c_str()); std::string_view xmlInput(mmap.data(),mmap.size()); - xml::DocumentBuilder bld; - xml::Parser parser(xmlInput, bld); + VS_XML_NS::DocumentBuilder bld; + VS_XML_NS::Parser parser(xmlInput, bld); if(auto ret = parser.parse(); !ret.has_value())throw std::runtime_error(std::string(ret.error().msg())); auto tree = bld.close(); @@ -49,5 +49,5 @@ int encode(std::filesystem::path input, std::filesystem::path output){ int main(int argc, const char* argv[]) { if(argc<3){std::cerr<<"Wrong usage, pass input file and output file as args.";return 1;} - return encode<{.symbols=xml::builder_config_t::COMPRESS_ALL,.raw_strings=true}>(argv[1],argv[2]); + return encode<{.symbols=VS_XML_NS::builder_config_t::COMPRESS_ALL,.raw_strings=true}>(argv[1],argv[2]); } \ No newline at end of file From cba65b420a33937b5e13da668aa90da6b0507141 Mon Sep 17 00:00:00 2001 From: Karuro Chari Date: Fri, 4 Jul 2025 19:53:01 +0100 Subject: [PATCH 03/12] Removed old `path` functions which are to be implemented externally to keep the basic interface more embedded-friendly. Also it was recursive which is not good. --- include/vs-xml/commons.hpp | 3 --- include/vs-xml/private/impl.hpp | 28 +--------------------------- 2 files changed, 1 insertion(+), 30 deletions(-) diff --git a/include/vs-xml/commons.hpp b/include/vs-xml/commons.hpp index 361c2439..521e1194 100644 --- a/include/vs-xml/commons.hpp +++ b/include/vs-xml/commons.hpp @@ -21,7 +21,6 @@ #include -#include #include #include @@ -234,8 +233,6 @@ concept thing_i = requires(T self){ {self.has_parent()} -> std::same_as; {self.has_prev()} -> std::same_as; {self.has_next()} -> std::same_as; - - {self.path()} -> std::same_as; }; //TODO: specialization of Builder_t or just remove it? diff --git a/include/vs-xml/private/impl.hpp b/include/vs-xml/private/impl.hpp index b7853c9b..7383cab3 100644 --- a/include/vs-xml/private/impl.hpp +++ b/include/vs-xml/private/impl.hpp @@ -17,7 +17,6 @@ #include #include -#include #include #include @@ -58,10 +57,6 @@ struct base_t{ bool has_prev() const {return static_cast(this)->has_prev();} bool has_next() const {return static_cast(this)->has_next();} - std::string path() const { - return VS_XML_NS::format("{}/{}",parent()!=nullptr?parent()->path():"",static_cast(this)->path_h()); - } - auto children() const; auto attrs() const; auto visitor() const; @@ -166,11 +161,6 @@ struct element_t : base_t{ inline bool has_prev() const {return _prev!=0;} inline bool has_next() const {return _next!=0;} - /* - inline std::string path_h() const { - return std::format("{}{}{}", _ns, _ns==""?"":":", _name); - } - */ template friend struct TreeBuilder; friend struct details::BuilderBase; @@ -214,12 +204,6 @@ struct root_t : base_t{ inline bool has_parent() const {return false;} inline bool has_prev() const {return false;} inline bool has_next() const {return false;} - - /* - inline std::string path_h() const { - return std::format("{}{}{}", _ns, _ns==""?"":":", _name); - } - */ template friend struct TreeBuilder; @@ -280,8 +264,6 @@ struct comment_t : leaf_t{ comment_t(const void* offset, element_t* parent, std::string_view value):leaf_t(offset, parent, value){} static inline type_t deftype() {return type_t::COMMENT;}; - inline std::string path_h() const { return VS_XML_NS::format("#comment"); } - template friend struct TreeBuilder; friend struct details::BuilderBase; @@ -292,8 +274,6 @@ struct cdata_t : leaf_t{ cdata_t(const void* offset, element_t* parent, std::string_view value):leaf_t(offset, parent, value){} static inline type_t deftype() {return type_t::CDATA;}; - inline std::string path_h() const { return VS_XML_NS::format("#cdata"); } - template friend struct TreeBuilder; friend struct details::BuilderBase; @@ -303,8 +283,6 @@ struct cdata_t : leaf_t{ struct text_t : leaf_t{ text_t(const void* offset, element_t* parent, std::string_view value):leaf_t(offset, parent, value){} static inline type_t deftype() {return type_t::TEXT;}; - - inline std::string path_h() const { return VS_XML_NS::format("#text"); } template friend struct TreeBuilder; @@ -316,8 +294,6 @@ struct proc_t : leaf_t{ proc_t(const void* offset, element_t* parent, std::string_view value):leaf_t(offset, parent, value){} static inline type_t deftype() {return type_t::PROC;}; - inline std::string path_h() const { return VS_XML_NS::format("#proc"); } - template friend struct TreeBuilder; friend struct details::BuilderBase; @@ -328,8 +304,6 @@ struct marker_t : leaf_t{ marker_t(const void* offset, element_t* parent, std::string_view value):leaf_t(offset, parent, value){} static inline type_t deftype() {return type_t::MARKER;}; - inline std::string path_h() const { return VS_XML_NS::format("#leaf"); } - template friend struct TreeBuilder; friend struct details::BuilderBase; @@ -356,7 +330,7 @@ else if (type() == type_t::PROC) return ((const proc_t*)this)-> X;\ else if (type() == type_t::CDATA) return ((const cdata_t*)this)-> X;\ else if (type() == type_t::MARKER) return ((const marker_t*)this)-> X;\ else{\ - xml_assert(false,xml::format("Invalid XML thing type {}",(int)type()).c_str());\ + xml_assert(false,VS_XML_NS::format("Invalid XML thing type {}",(int)type()).c_str());\ std::unreachable();\ } From c9b706829b9a20395e97fe95461b63cb5a4ea801 Mon Sep 17 00:00:00 2001 From: Karuro Chari Date: Fri, 4 Jul 2025 21:26:42 +0100 Subject: [PATCH 04/12] Visitor patterns will be moved to their own header and cpp file. This will also allow a template version to be exposed (likely speedup to implement serialiation and no memory allocation due to lambda context) --- RELEASE.md | 3 ++- TODO.md | 1 + include/vs-xml/module.modulemap | 1 + include/vs-xml/tree.hpp | 38 --------------------------------- include/vs-xml/visit.hpp | 1 + lib/visit.cpp | 0 6 files changed, 5 insertions(+), 39 deletions(-) create mode 100644 include/vs-xml/visit.hpp create mode 100644 lib/visit.cpp diff --git a/RELEASE.md b/RELEASE.md index 1d2e667b..f6469059 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,9 +1,10 @@ ## Breaking - `DocBuilder` renamed as `DocumentBuilder` to be more consistent in naming. +- Removed `path` functions from XML entities, leftovers from the very early versions; this functionality can now be trivially replaced by user-defined functions as the rest of the interface is complete. ## Features -Introduced `Tree::visit` and `TreeRaw::visit` to implement a slightly different visitor pattern compared to the recently added iterators. +Introduced `Tree::visit` and `TreeRaw::visit` to implement a slightly different and more flexible visitor pattern compared to the one recently added as iterator. Introduced a new `print2` function for trees and derived siblings, to provide a not recursive variant of `print` which does not grow on stack based on the depth of the tree. Not tested yet, but it will deprecate `print`. \ No newline at end of file diff --git a/TODO.md b/TODO.md index 50ee031f..f90720a3 100644 --- a/TODO.md +++ b/TODO.md @@ -1,6 +1,7 @@ - [ ] Deprecate this file plz. - [ ] Random access to attributes for the iterator. - [ ] Tree builder method to use injection maps when generating the tree. +- [ ] External visitor interface to Tree/TreeRaw. This allows for more flexibility and templating. ## Query redesign diff --git a/include/vs-xml/module.modulemap b/include/vs-xml/module.modulemap index d87676cd..99264002 100644 --- a/include/vs-xml/module.modulemap +++ b/include/vs-xml/module.modulemap @@ -9,5 +9,6 @@ module xml { header "document.hpp" header "archive.hpp" header "filters.hpp" + header "visit.hpp" export * } \ No newline at end of file diff --git a/include/vs-xml/tree.hpp b/include/vs-xml/tree.hpp index afdeb84c..bfe94695 100644 --- a/include/vs-xml/tree.hpp +++ b/include/vs-xml/tree.hpp @@ -295,41 +295,3 @@ namespace stored{ } } - -#include - -namespace VS_XML_NS{ - - inline void TreeRaw::visit_t(const unknown_t* node, bool(*test)(const unknown_t*), void(*before)(const unknown_t*), void(*after)(const unknown_t*)){ - while(true){ - if(node==nullptr)break; - if(before!=nullptr)before(node); - - bool children_visited = !test(node); - for(;;){ - if(node->has_children() && !children_visited){ - auto [l,r] =*node->children_range(); - node=l; - children_visited = false; - break; - } - if(node->has_next()){ - if(after!=nullptr)after(node); - node=node->next(); - children_visited = false; - break; - } - if(node->has_parent()){ - if(after!=nullptr)after(node); - node = (const unknown_t*)node->parent(); - children_visited = true; - } - else{ - if(after!=nullptr)after(node); - node = nullptr; - break; - } - } - } - } -} \ No newline at end of file diff --git a/include/vs-xml/visit.hpp b/include/vs-xml/visit.hpp new file mode 100644 index 00000000..7b9637ef --- /dev/null +++ b/include/vs-xml/visit.hpp @@ -0,0 +1 @@ +#pragma once \ No newline at end of file diff --git a/lib/visit.cpp b/lib/visit.cpp new file mode 100644 index 00000000..e69de29b From e6329181e9a05decf9de8f7430c7907d6c44a149 Mon Sep 17 00:00:00 2001 From: Karuro Chari Date: Sat, 5 Jul 2025 18:33:54 +0100 Subject: [PATCH 05/12] New `visit` pattern reworked. Refactored older implementation to use the new one. New print functions to match added capabilities. --- RELEASE.md | 2 + include/vs-xml/module.modulemap | 1 - include/vs-xml/private/visit.hpp | 64 ++++++++++ include/vs-xml/private/wrp-impl.hpp | 7 ++ include/vs-xml/private/wrp-visit.hpp | 67 +++++++++++ include/vs-xml/tree.hpp | 18 +-- include/vs-xml/visit.hpp | 1 - lib/tree.cpp | 173 +++++++-------------------- lib/visit.cpp | 0 test/src/base-checks.cpp | 2 + 10 files changed, 191 insertions(+), 144 deletions(-) create mode 100644 include/vs-xml/private/visit.hpp create mode 100644 include/vs-xml/private/wrp-visit.hpp delete mode 100644 include/vs-xml/visit.hpp delete mode 100644 lib/visit.cpp diff --git a/RELEASE.md b/RELEASE.md index f6469059..d3b17597 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -6,5 +6,7 @@ ## Features Introduced `Tree::visit` and `TreeRaw::visit` to implement a slightly different and more flexible visitor pattern compared to the one recently added as iterator. +They are both based on `private/(wrp-)visit.hpp`, which is not publicly exposed but if one cares about getting maximum performance out of a visitor they are to be used. + Introduced a new `print2` function for trees and derived siblings, to provide a not recursive variant of `print` which does not grow on stack based on the depth of the tree. Not tested yet, but it will deprecate `print`. \ No newline at end of file diff --git a/include/vs-xml/module.modulemap b/include/vs-xml/module.modulemap index 99264002..d87676cd 100644 --- a/include/vs-xml/module.modulemap +++ b/include/vs-xml/module.modulemap @@ -9,6 +9,5 @@ module xml { header "document.hpp" header "archive.hpp" header "filters.hpp" - header "visit.hpp" export * } \ No newline at end of file diff --git a/include/vs-xml/private/visit.hpp b/include/vs-xml/private/visit.hpp new file mode 100644 index 00000000..6b459a1e --- /dev/null +++ b/include/vs-xml/private/visit.hpp @@ -0,0 +1,64 @@ +#pragma once +/** + * @file visit.hpp + * @author karurochari + * @brief Visitor patterns for trees and derived + * @date 2025-07-05 + * + * @copyright Copyright (c) 2025 + * + */ + +#include +#include + +namespace VS_XML_NS { + +/** + * @brief Visit all nodes starting from node. Evaluate if children should be + * considered by evaluating fn + * + * @tparam T1 implicit type for test function + * @tparam T2 implicit type for before/after + * @param node the starting node + * @param test the function checking if children should be explored for this + * node (can have side-effects). + * @param before the function with side-effects run when entering a node. + * @param after the function with side-effects when exiting a node. + * @param args additional args if wanted + */ +template +void visit(const unknown_t *node, T1&& test={}, T2&& before = {}, T2&& after = {}, auto&&... args){ + while(true){ + if(node==nullptr)break; + if(before!=nullptr)before(node, std::forward(args)...); + + bool children_visited = test?!test(node, std::forward(args)...):true; + for(;;){ + if(node->has_children() && !children_visited){ + auto [l,r] =*node->children_range(); + node=l; + children_visited = false; + break; + } + if(node->has_next()){ + if(after!=nullptr)after(node,std::forward(args)...); + node=node->next(); + children_visited = false; + break; + } + if(node->has_parent()){ + if(after!=nullptr)after(node,std::forward(args)...); + node = (const unknown_t*)node->parent(); + children_visited = true; + } + else{ + if(after!=nullptr)after(node,std::forward(args)...); + node = nullptr; + break; + } + } + } +} + +} // namespace VS_XML_NS \ No newline at end of file diff --git a/include/vs-xml/private/wrp-impl.hpp b/include/vs-xml/private/wrp-impl.hpp index 2b986790..8787cde7 100644 --- a/include/vs-xml/private/wrp-impl.hpp +++ b/include/vs-xml/private/wrp-impl.hpp @@ -13,6 +13,10 @@ namespace VS_XML_NS{ namespace wrp{ +//TODO: forced forward declaration here to make it friend with base_t. This must be relocated at some point. +template +void visit(wrp::base_t node, T1&& test={}, T2&& before = {}, T2&& after = {}, auto&&... args); + struct node_iterator; struct attr_iterator; @@ -106,6 +110,9 @@ struct base_t{ friend struct node_iterator; friend struct attr_iterator; friend struct visitor_iterator; + + template + friend void visit(wrp::base_t node, T1&& test, T2&& before, T2&& after, auto&&... args); public: base_t(const base_t& ) = default; diff --git a/include/vs-xml/private/wrp-visit.hpp b/include/vs-xml/private/wrp-visit.hpp new file mode 100644 index 00000000..bf76f80f --- /dev/null +++ b/include/vs-xml/private/wrp-visit.hpp @@ -0,0 +1,67 @@ +#pragma once +/** + * @file visit.hpp + * @author karurochari + * @brief Visitor patterns for trees and derived + * @date 2025-07-05 + * + * @copyright Copyright (c) 2025 + * + */ + +#include +#include + +namespace VS_XML_NS { +namespace wrp{ + +/** + * @brief Visit all nodes starting from node. Evaluate if children should be + * considered by evaluating fn + * + * @tparam T1 implicit type for test function + * @tparam T2 implicit type for before/after + * @param node the starting node + * @param test the function checking if children should be explored for this + * node (can have side-effects). + * @param before the function with side-effects run when entering a node. + * @param after the function with side-effects when exiting a node. + * @param args additional args if wanted + */ +template +void visit(wrp::base_t node, T1&& test, T2&& before, T2&& after, auto&&... args){ + while(true){ + if(node.ptr==nullptr)break; + + bool children_visited = test?!test(node, std::forward(args)...):true; + if(before!=nullptr)before(node,std::forward(args)...); + + for(;;){ + if(node.ptr->has_children() && !children_visited){ + auto [l,r] =*node.children_range(); + node=l; + children_visited = false; + break; + } + if(node.ptr->has_next()){ + if(after!=nullptr)after(node,std::forward(args)...); + node=node.next(); + children_visited = false; + break; + } + if(node.ptr->has_parent()){ + if(after!=nullptr)after(node,std::forward(args)...); + node.ptr = (const unknown_t*) node.parent().ptr; + children_visited = true; + } + else{ + if(after!=nullptr)after(node,std::forward(args)...); + node.ptr = nullptr; + break; + } + } + } +} + +} +} // namespace VS_XML_NS \ No newline at end of file diff --git a/include/vs-xml/tree.hpp b/include/vs-xml/tree.hpp index bfe94695..b9dcad5b 100644 --- a/include/vs-xml/tree.hpp +++ b/include/vs-xml/tree.hpp @@ -137,14 +137,8 @@ struct TreeRaw{ return print_h(out, cfg, (const unknown_t*)&root()); } - inline bool print2(std::ostream& out, const print_cfg_t& cfg = {})const{ - auto node = (const unknown_t*)&root(); - auto test = +[](const unknown_t* n){return true;}; - auto before = [&out, &cfg, this](const unknown_t* n){print_h_before(out,cfg,n);}; - auto after = [&out, &cfg, this](const unknown_t* n){print_h_after(out,cfg,n);}; - visit(node,test,before,after); - return true; - } + bool print2(std::ostream& out, const print_cfg_t& cfg = {})const; + bool print3(std::ostream& out, const print_cfg_t& cfg = {})const; bool save_binary(std::ostream& out)const; @@ -156,8 +150,6 @@ struct TreeRaw{ return std::string_view(s.base+(char*)symbols.data(),s.base+(char*)symbols.data()+s.length); } - void visit_t(const unknown_t* node, bool(*test)(const unknown_t*), void(*before)(const unknown_t*), void(*after)(const unknown_t*)); - /** * @brief Visit all nodes starting from node. Evaluate if children should be considered by evaluating fn * @@ -167,7 +159,7 @@ struct TreeRaw{ * @param after the function with side-effects when exiting a node. */ - static void visit(const unknown_t* node, bool(*test)(const unknown_t*), void(*before)(const unknown_t*)={}, void(*after)(const unknown_t*)={}); + static void visit(const unknown_t* node, bool(*test)(const unknown_t*, void* ctx), void(*before)(const unknown_t*, void* ctx)={}, void(*after)(const unknown_t*, void* ctx)={}, void* ctx=nullptr); /** * @brief Visit all nodes starting from node. Evaluate if children should be considered by evaluating fn @@ -249,7 +241,7 @@ struct Tree : TreeRaw{ * @param before the function with side-effects run when entering a node. * @param after the function with side-effects when exiting a node. */ - static void visit(wrp::base_t node, bool(*test)(wrp::base_t), void(*before)(wrp::base_t)={}, void(*after)(wrp::base_t)={}); + static void visit(wrp::base_t node, bool(*test)(wrp::base_t, void* ctx), void(*before)(wrp::base_t, void* ctx)={}, void(*after)(wrp::base_t, void* ctx)={}, void* ctx=nullptr); /** * @brief Visit all nodes starting from node. Evaluate if children should be considered by evaluating fn @@ -259,7 +251,7 @@ struct Tree : TreeRaw{ * @param before the function with side-effects run when entering a node. * @param after the function with side-effects when exiting a node. */ - static void visit(wrp::base_t node, std::function)>&& test={}, std::function)>&& before={},std::function)>&& after={}); + static void visit(wrp::base_t node, std::function)>&& test, std::function)>&& before={},std::function)>&& after={}); }; diff --git a/include/vs-xml/visit.hpp b/include/vs-xml/visit.hpp deleted file mode 100644 index 7b9637ef..00000000 --- a/include/vs-xml/visit.hpp +++ /dev/null @@ -1 +0,0 @@ -#pragma once \ No newline at end of file diff --git a/lib/tree.cpp b/lib/tree.cpp index 66a57788..f1f32a39 100644 --- a/lib/tree.cpp +++ b/lib/tree.cpp @@ -9,6 +9,8 @@ #include #include +#include +#include namespace VS_XML_NS{ @@ -411,143 +413,56 @@ std::string_view TreeRaw::from_binary_error_t::msg() { wrp::base_t Tree::root() const{return {*this, &TreeRaw::root()};} -//TODO: these four functions and the two for iterators in *-impl.cpp are pretty much the same. I hate I have to copy/paste so much code around. -//But they are not 100% the same, so, short of removing the ones without std::function, I don't really have much of an option. - -void TreeRaw::visit(const unknown_t* node, bool(*test)(const unknown_t*), void(*before)(const unknown_t*), void(*after)(const unknown_t*)){ - while(true){ - if(node==nullptr)break; - if(before!=nullptr)before(node); - - bool children_visited = !test(node); - for(;;){ - if(node->has_children() && !children_visited){ - auto [l,r] =*node->children_range(); - node=l; - children_visited = false; - break; - } - if(node->has_next()){ - if(after!=nullptr)after(node); - node=node->next(); - children_visited = false; - break; - } - if(node->has_parent()){ - if(after!=nullptr)after(node); - node = (const unknown_t*)node->parent(); - children_visited = true; - } - else{ - if(after!=nullptr)after(node); - node = nullptr; - break; - } - } - } +void TreeRaw::visit(const unknown_t* node, bool(*test)(const unknown_t*, void* ctx), void(*before)(const unknown_t*, void* ctx), void(*after)(const unknown_t*, void* ctx), void* ctx){ + VS_XML_NS::visit<>(node,test,before,after,ctx); } void TreeRaw::visit(const unknown_t* node, std::function&& test, std::function&& before, std::function&& after){ - while(true){ - if(node==nullptr)break; - - bool children_visited = !test(node); - if(before!=nullptr)before(node); - - for(;;){ - if(node->has_children() && !children_visited){ - auto [l,r] =*node->children_range(); - node=l; - children_visited = false; - break; - } - if(node->has_next()){ - if(after!=nullptr)after(node); - node=node->next(); - children_visited = false; - break; - } - if(node->has_parent()){ - if(after!=nullptr)after(node); - node = (const unknown_t*)node->parent(); - children_visited = true; - } - else{ - if(after!=nullptr)after(node); - node = nullptr; - break; - } - } - } + VS_XML_NS::visit<>(node,test,before,after); } - -void Tree::visit(wrp::base_t node, bool(*test)(wrp::base_t), void(*before)(wrp::base_t), void(*after)(wrp::base_t)){ - while(true){ - if(node.ptr==nullptr)break; - - bool children_visited = !test(node); - if(before!=nullptr)before(node); - - for(;;){ - if(node.ptr->has_children() && !children_visited){ - auto [l,r] =*node.children_range(); - node=l; - children_visited = false; - break; - } - if(node.ptr->has_next()){ - if(after!=nullptr)after(node); - node=node.next(); - children_visited = false; - break; - } - if(node.ptr->has_parent()){ - if(after!=nullptr)after(node); - node.ptr = (const unknown_t*) node.parent().ptr; - children_visited = true; - } - else{ - if(after!=nullptr)after(node); - node.ptr = nullptr; - break; - } - } - } +void Tree::visit(wrp::base_t node, bool(*test)(wrp::base_t, void* ctx), void(*before)(wrp::base_t, void* ctx), void(*after)(wrp::base_t, void* ctx), void* ctx){ + VS_XML_NS::wrp::visit<>(node,test,before,after,ctx); } void Tree::visit(wrp::base_t node, std::function)>&& test, std::function)>&& before, std::function)>&& after){ - while(true){ - if(node.ptr==nullptr)break; - - bool children_visited = !test(node); - if(before!=nullptr)before(node); - - for(;;){ - if(node.ptr->has_children() && !children_visited){ - auto [l,r] =*node.children_range(); - node=l; - children_visited = false; - break; - } - if(node.ptr->has_next()){ - if(after!=nullptr)after(node); - node=node.next(); - children_visited = false; - break; - } - if(node.ptr->has_parent()){ - if(after!=nullptr)after(node); - node.ptr = (const unknown_t*) node.parent().ptr; - children_visited = true; - } - else{ - if(after!=nullptr)after(node); - node.ptr = nullptr; - break; - } - } - } + VS_XML_NS::wrp::visit<>(node,test,before,after); +} + +bool TreeRaw::print2(std::ostream& out, const print_cfg_t& cfg)const{ + auto node = (const unknown_t*)&root(); + auto test = +[](const unknown_t* n, void* _ctx)static{return true;}; + struct ctx_t{ + const TreeRaw* that; + const print_cfg_t cfg; + std::ostream& out; + }; + ctx_t ctx{this,cfg,out}; + auto before = +[](const unknown_t* n, void* _ctx)static{ + ctx_t& ctx = *(ctx_t*)_ctx; + ctx.that->print_h_before(ctx.out,ctx.cfg,n); + }; + auto after = +[](const unknown_t* n, void* _ctx)static{ + ctx_t& ctx = *(ctx_t*)_ctx; + ctx.that->print_h_after(ctx.out,ctx.cfg,n); + }; + VS_XML_NS::visit<>(node,test,before,after,(void*)&ctx); + return true; +} + +bool TreeRaw::print3(std::ostream& out, const print_cfg_t& cfg)const{ + auto node = (const unknown_t*)&root(); + auto test = +[](const unknown_t* n, std::ostream& out, const print_cfg_t& cfg, const TreeRaw* that)static{return true;}; + + auto before = +[](const unknown_t* n, std::ostream& out, const print_cfg_t& cfg, const TreeRaw* that)static{ + that->print_h_before(out,cfg,n); + }; + auto after = +[](const unknown_t* n, std::ostream& out, const print_cfg_t& cfg, const TreeRaw* that)static{ + that->print_h_after(out,cfg,n); + }; + VS_XML_NS::visit<>(node,test,before,after,out,cfg,this); + return true; } + } \ No newline at end of file diff --git a/lib/visit.cpp b/lib/visit.cpp deleted file mode 100644 index e69de29b..00000000 diff --git a/test/src/base-checks.cpp b/test/src/base-checks.cpp index 474497a6..967d0d7b 100644 --- a/test/src/base-checks.cpp +++ b/test/src/base-checks.cpp @@ -63,6 +63,8 @@ auto test(){ std::print("\n---\n"); tree.print2(std::cout,{}); std::print("\n---\n"); + tree.print3(std::cout,{}); + std::print("\n---\n"); tree.reorder(); tree.print(std::cout,{}); std::print("\n---\n"); From c0ff5ae018641d6546f6441ee296ad7fc506f9ae Mon Sep 17 00:00:00 2001 From: Karuro Chari Date: Sat, 5 Jul 2025 18:53:41 +0100 Subject: [PATCH 06/12] Removed legacy print functionality, replaced with the one based on visitor patterns. Added `[[nodiscard]]` around to fix interfaces. --- RELEASE.md | 3 +- include/vs-xml/document.hpp | 4 +- include/vs-xml/tree.hpp | 29 +++++----- lib/tree.cpp | 109 ++---------------------------------- test/src/base-checks.cpp | 2 - 5 files changed, 24 insertions(+), 123 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index d3b17597..25a0b3da 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -8,5 +8,4 @@ Introduced `Tree::visit` and `TreeRaw::visit` to implement a slightly different and more flexible visitor pattern compared to the one recently added as iterator. They are both based on `private/(wrp-)visit.hpp`, which is not publicly exposed but if one cares about getting maximum performance out of a visitor they are to be used. -Introduced a new `print2` function for trees and derived siblings, to provide a not recursive variant of `print` which does not grow on stack based on the depth of the tree. -Not tested yet, but it will deprecate `print`. \ No newline at end of file +Introduced a new `print` functions for trees and derived siblings, to provide a not recursive implementation. The public interface has not been affected. \ No newline at end of file diff --git a/include/vs-xml/document.hpp b/include/vs-xml/document.hpp index df4d7280..ea156f88 100644 --- a/include/vs-xml/document.hpp +++ b/include/vs-xml/document.hpp @@ -33,7 +33,7 @@ struct DocumentRaw : TreeRaw { inline bool print(std::ostream& out, const print_cfg_t& cfg = {})const{ for(auto& it: TreeRaw::root().children()){ - if(!print_h(out, cfg, &it))return false; + if(!TreeRaw::print(out, cfg, &it))return false; } return true; } @@ -43,7 +43,7 @@ struct DocumentRaw : TreeRaw { * * @return std::optional */ - inline std::optional tree_root() const{ + [[nodiscard]] inline std::optional tree_root() const{ auto c = TreeRaw::root().children(); auto it = std::ranges::find_if(c,[](auto e)static{return e.type()==type_t::ELEMENT;}); if(it!=c.end()) return it; diff --git a/include/vs-xml/tree.hpp b/include/vs-xml/tree.hpp index b9dcad5b..3c3d93c0 100644 --- a/include/vs-xml/tree.hpp +++ b/include/vs-xml/tree.hpp @@ -131,15 +131,19 @@ struct TreeRaw{ }; - inline const unknown_t& root() const {return *(const unknown_t*)buffer.data();} + [[nodiscard]] inline const unknown_t& root() const {return *(const unknown_t*)buffer.data();} - inline bool print(std::ostream& out, const print_cfg_t& cfg = {})const{ - return print_h(out, cfg, (const unknown_t*)&root()); - } - - bool print2(std::ostream& out, const print_cfg_t& cfg = {})const; - bool print3(std::ostream& out, const print_cfg_t& cfg = {})const; + /** + * @brief Stream the serialized version of the document onto an output stream. + * + * @param out Output stream. + * @param cfg Configuration for the formatting. + * @return true if no error was met + * @return false else + */ + bool print(std::ostream& out, const print_cfg_t& cfg = {}, const unknown_t* node = nullptr)const; + bool print2(std::ostream& out, const print_cfg_t& cfg = {}, const unknown_t* node = nullptr)const; bool save_binary(std::ostream& out)const; @@ -181,7 +185,6 @@ struct TreeRaw{ protected: - bool print_h(std::ostream& out, const print_cfg_t& cfg = {}, const unknown_t* ptr=nullptr) const; bool print_h_before(std::ostream& out, const print_cfg_t& cfg = {}, const unknown_t* ptr=nullptr) const; bool print_h_after(std::ostream& out, const print_cfg_t& cfg = {}, const unknown_t* ptr=nullptr) const; @@ -222,16 +225,16 @@ struct Tree : TreeRaw{ inline Tree(TreeRaw&& ref):TreeRaw(std::move(ref)){} inline Tree(const TreeRaw&& ref):TreeRaw(std::move(ref)){} - inline const Tree slice(const element_t* ref=nullptr) const{return TreeRaw::slice(ref);} - inline Tree clone(const element_t* ref=nullptr, bool reduce=true) const{return TreeRaw::clone(ref,reduce);} + [[nodiscard]] inline const Tree slice(const element_t* ref=nullptr) const{return TreeRaw::slice(ref);} + [[nodiscard]] inline Tree clone(const element_t* ref=nullptr, bool reduce=true) const{return TreeRaw::clone(ref,reduce);} - wrp::base_t root() const; + [[nodiscard]] wrp::base_t root() const; ///Cast this tree as a raw tree - inline TreeRaw& downgrade(){return *this;} + [[nodiscard]] inline TreeRaw& downgrade(){return *this;} ///Cast this const tree as a const raw tree - inline const TreeRaw& downgrade() const{return *this;} + [[nodiscard]] inline const TreeRaw& downgrade() const{return *this;} /** * @brief Visit all nodes starting from node. Evaluate if children should be considered by evaluating fn diff --git a/lib/tree.cpp b/lib/tree.cpp index f1f32a39..6b9ea4cd 100644 --- a/lib/tree.cpp +++ b/lib/tree.cpp @@ -186,105 +186,6 @@ bool TreeRaw::print_h_after(std::ostream& out, const print_cfg_t& cfg, const unk return true; }; - - - -bool TreeRaw::print_h(std::ostream& out, const print_cfg_t& cfg, const unknown_t* ptr) const{ - //TODO: at some point, convert it not to be recursive. - if(ptr->type()==type_t::ELEMENT){ - if(ptr->children_range()->first==ptr->children_range()->second){ - VS_XML_NS::print(out,"<{}{}{}", rsv(*ptr->ns()), rsv(*ptr->ns())==""?"":":", rsv(*ptr->name())); - for(auto& i : ptr->attrs()){ - if(!configs.raw_strings){ - auto t = serialize::to_xml_attr_2(rsv(*i.value())); - if(!t.has_value()){/*TODO: Error*/} - auto tt = t.value_or(std::string_view("")); - std::string_view sv = std::holds_alternative(tt)?std::get(tt):std::get(tt); - VS_XML_NS::print(out," {}{}{}=\"{}\"", rsv(*i.ns()), rsv(*i.ns())==""?"":":", rsv(*i.name()), sv); - } - else{ - VS_XML_NS::print(out," {}{}{}=\"{}\"", rsv(*i.ns()), rsv(*i.ns())==""?"":":", rsv(*i.name()), rsv(*i.value())); - } - } - out << "/>"; - } - else{ - VS_XML_NS::print(out,"<{}{}{}", rsv(*ptr->ns()), rsv(*ptr->ns())==""?"":":", rsv(*ptr->name())); - for(auto& i : ptr->attrs()){ - if(!configs.raw_strings){ - auto t = serialize::to_xml_attr_2(rsv(*i.value())); - if(!t.has_value()){/*TODO: Error*/} - auto tt = t.value_or(std::string_view("")); - std::string_view sv = std::holds_alternative(tt)?std::get(tt):std::get(tt); - VS_XML_NS::print(out," {}{}{}=\"{}\"", rsv(*i.ns()), rsv(*i.ns())==""?"":":", rsv(*i.name()), sv); - } - else{ - VS_XML_NS::print(out," {}{}{}=\"{}\"", rsv(*i.ns()), rsv(*i.ns())==""?"":":", rsv(*i.name()), rsv(*i.value())); - } - } - out << ">"; - for(auto& i : ptr->children()){ - print_h(out,cfg,&i); - } - VS_XML_NS::print(out,"", rsv(*ptr->ns()), rsv(*ptr->ns())==""?"":":", rsv(*ptr->name())); - } - } - else if(ptr->type()==type_t::CDATA){ - if(!configs.raw_strings){ - auto t = serialize::to_xml_cdata(rsv(*ptr->value())); - if(!t.has_value()){/*TODO: Error*/} - auto tt = t.value_or(std::string_view("")); - std::string_view sv = std::holds_alternative(tt)?std::get(tt):std::get(tt); - VS_XML_NS::print(out,"",sv); - } - else{ - VS_XML_NS::print(out,"",rsv(*ptr->value())); - } - } - else if(ptr->type()==type_t::COMMENT){ - if(!configs.raw_strings){ - auto t = serialize::to_xml_comment(rsv(*ptr->value())); - if(!t.has_value()){/*TODO: Error*/} - auto tt = t.value_or(std::string_view("")); - std::string_view sv = std::holds_alternative(tt)?std::get(tt):std::get(tt); - VS_XML_NS::print(out,"",sv); - } - else{ - VS_XML_NS::print(out,"",rsv(*ptr->value())); - } - } - else if(ptr->type()==type_t::TEXT){ - if(!configs.raw_strings){ - auto t = serialize::to_xml_text(rsv(*ptr->value())); - if(!t.has_value()){/*TODO: Error*/} - auto tt = t.value_or(std::string_view("")); - std::string_view sv = std::holds_alternative(tt)?std::get(tt):std::get(tt); - VS_XML_NS::print(out,"{}",sv); - } - else{ - VS_XML_NS::print(out,"{}",rsv(*ptr->value())); - } - } - else if(ptr->type()==type_t::PROC){ - if(!configs.raw_strings){ - auto t = serialize::to_xml_proc(rsv(*ptr->value())); - if(!t.has_value()){/*TODO: Error*/} - auto tt = t.value_or(std::string_view("")); - std::string_view sv = std::holds_alternative(tt)?std::get(tt):std::get(tt); - VS_XML_NS::print(out,"",sv); - } - else{ - VS_XML_NS::print(out,"",rsv(*ptr->value())); - } - } - else if(ptr->type()==type_t::MARKER){ - //Skip, marker points are not XML, they are only internally used. - //or emit something in a special namespace? not sure - } - else{return false;} - return true; -}; - const TreeRaw TreeRaw::slice(const element_t* ref) const{ //TODO: Move to std::expected xml_assert((uint8_t*)ref>=(uint8_t*)buffer.data() && (uint8_t*)ref<(uint8_t*)buffer.data()+buffer.size(), "out of bounds node pointer"); @@ -411,7 +312,7 @@ std::string_view TreeRaw::from_binary_error_t::msg() { } } -wrp::base_t Tree::root() const{return {*this, &TreeRaw::root()};} +[[nodiscard]] wrp::base_t Tree::root() const{return {*this, &TreeRaw::root()};} void TreeRaw::visit(const unknown_t* node, bool(*test)(const unknown_t*, void* ctx), void(*before)(const unknown_t*, void* ctx), void(*after)(const unknown_t*, void* ctx), void* ctx){ VS_XML_NS::visit<>(node,test,before,after,ctx); @@ -429,8 +330,8 @@ void Tree::visit(wrp::base_t node, std::function(node,test,before,after); } -bool TreeRaw::print2(std::ostream& out, const print_cfg_t& cfg)const{ - auto node = (const unknown_t*)&root(); +bool TreeRaw::print2(std::ostream& out, const print_cfg_t& cfg, const unknown_t* node)const{ + if(node==nullptr)node = (const unknown_t*)&root(); auto test = +[](const unknown_t* n, void* _ctx)static{return true;}; struct ctx_t{ const TreeRaw* that; @@ -450,8 +351,8 @@ bool TreeRaw::print2(std::ostream& out, const print_cfg_t& cfg)const{ return true; } -bool TreeRaw::print3(std::ostream& out, const print_cfg_t& cfg)const{ - auto node = (const unknown_t*)&root(); +bool TreeRaw::print(std::ostream& out, const print_cfg_t& cfg, const unknown_t* node)const{ + if(node==nullptr)node = (const unknown_t*)&root(); auto test = +[](const unknown_t* n, std::ostream& out, const print_cfg_t& cfg, const TreeRaw* that)static{return true;}; auto before = +[](const unknown_t* n, std::ostream& out, const print_cfg_t& cfg, const TreeRaw* that)static{ diff --git a/test/src/base-checks.cpp b/test/src/base-checks.cpp index 967d0d7b..474497a6 100644 --- a/test/src/base-checks.cpp +++ b/test/src/base-checks.cpp @@ -63,8 +63,6 @@ auto test(){ std::print("\n---\n"); tree.print2(std::cout,{}); std::print("\n---\n"); - tree.print3(std::cout,{}); - std::print("\n---\n"); tree.reorder(); tree.print(std::cout,{}); std::print("\n---\n"); From 2d79b7eece9e8f84ee31299adff87b99ec7b02ed Mon Sep 17 00:00:00 2001 From: Karuro Chari Date: Sun, 6 Jul 2025 00:01:58 +0100 Subject: [PATCH 07/12] The library moved away from its half-attempt of being header only which inherited from an early version. Dependencies have now been better isolated in compile units to avoid leaking to downstream code. The interface is basically the same as the earlier commit, but it fixes all issues with linking fmt to downstream. --- CMakeLists.txt | 5 +- include/vs-xml/commons.hpp | 10 +++ include/vs-xml/document.hpp | 56 +++++-------- include/vs-xml/fwd/README.md | 1 + include/vs-xml/fwd/format.hpp | 1 - include/vs-xml/meson.build | 6 -- include/vs-xml/module.modulemap | 2 + include/vs-xml/{private/impl.hpp => node.hpp} | 63 +++++---------- include/vs-xml/private/README.md | 1 + include/vs-xml/private/visit.hpp | 2 +- include/vs-xml/private/wrp-visit.hpp | 4 +- include/vs-xml/query-builder.hpp | 2 +- include/vs-xml/query.hpp | 2 +- include/vs-xml/tree-builder.hpp | 7 +- .../{private/wrp-impl.hpp => wrp-node.hpp} | 2 +- lib/document.cpp | 55 +++++++++++++ lib/node.cpp | 79 +++++++++++++++++++ lib/private/impl.cpp | 31 -------- lib/tree-builder.cpp | 2 +- lib/tree.cpp | 4 +- lib/{private/wrp-impl.cpp => wrp-node.cpp} | 2 +- meson.build | 22 ++++-- test/meson.build | 2 +- test/src/base-checks.cpp | 1 - test/src/query.cpp | 1 - test/src/tree-iterator.cpp | 1 - 26 files changed, 221 insertions(+), 143 deletions(-) create mode 100644 include/vs-xml/fwd/README.md rename include/vs-xml/{private/impl.hpp => node.hpp} (88%) create mode 100644 include/vs-xml/private/README.md rename include/vs-xml/{private/wrp-impl.hpp => wrp-node.hpp} (99%) create mode 100644 lib/document.cpp create mode 100644 lib/node.cpp delete mode 100644 lib/private/impl.cpp rename lib/{private/wrp-impl.cpp => wrp-node.cpp} (95%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5caaffaf..5e174c2b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -78,15 +78,16 @@ include_directories( # Library target # set(VS_XML_SOURCES - lib/private/impl.cpp - lib/private/wrp-impl.cpp lib/archive.cpp lib/parser.cpp lib/serializer.cpp lib/tree.cpp + lib/document.cpp lib/tree-builder.cpp lib/query.cpp lib/query-builder.cpp + lib/node.cpp + lib/wrp-node.cpp ) add_library(vs-xml diff --git a/include/vs-xml/commons.hpp b/include/vs-xml/commons.hpp index 521e1194..3ad33898 100644 --- a/include/vs-xml/commons.hpp +++ b/include/vs-xml/commons.hpp @@ -174,10 +174,20 @@ struct marker_t; struct root_t; struct unknown_t; +struct node_iterator; +struct attr_iterator; +struct text_iterator; +struct visitor_iterator; + namespace wrp{ template struct base_t; struct sv; + + struct node_iterator; + struct attr_iterator; + struct text_iterator; + struct visitor_iterator; } namespace details{ diff --git a/include/vs-xml/document.hpp b/include/vs-xml/document.hpp index ea156f88..408d1d83 100644 --- a/include/vs-xml/document.hpp +++ b/include/vs-xml/document.hpp @@ -10,10 +10,12 @@ * */ -#include -#include -#include -#include +#include + +#include +#include +#include +#include namespace VS_XML_NS{ @@ -31,42 +33,24 @@ struct DocumentBuilder; struct DocumentRaw : TreeRaw { using TreeRaw::TreeRaw; - inline bool print(std::ostream& out, const print_cfg_t& cfg = {})const{ - for(auto& it: TreeRaw::root().children()){ - if(!TreeRaw::print(out, cfg, &it))return false; - } - return true; - } + bool print(std::ostream& out, const print_cfg_t& cfg = {})const; /** * @brief Return the root of the proper tree inside the document (if present) * * @return std::optional */ - [[nodiscard]] inline std::optional tree_root() const{ - auto c = TreeRaw::root().children(); - auto it = std::ranges::find_if(c,[](auto e)static{return e.type()==type_t::ELEMENT;}); - if(it!=c.end()) return it; - return {}; - } - - [[nodiscard]] static inline std::expected from_binary(std::span region){ - std::expected t = TreeRaw::from_binary(region); - if(!t.has_value())return std::unexpected(t.error()); - else return DocumentRaw(std::move(*t)); - } - [[nodiscard]] static inline const std::expected from_binary(std::span region){ - std::expected t = TreeRaw::from_binary(region); - if(!t.has_value())return std::unexpected(t.error()); - else return DocumentRaw(std::move(*t)); - } + [[nodiscard]] std::optional tree_root() const; + + [[nodiscard]] static std::expected from_binary(std::span region); + [[nodiscard]] static const std::expected from_binary(std::span region); template friend struct DocumentBuilder; //TODO: Replace with proper prototypes, and incapsulate the mv mechanism away as it is an implementation detail, not semantically correct. - DocumentRaw(TreeRaw&& src):TreeRaw(src){} - DocumentRaw(const TreeRaw&& src):TreeRaw(src){} + DocumentRaw(TreeRaw&& src); + DocumentRaw(const TreeRaw&& src); }; @@ -82,19 +66,19 @@ struct Document : DocumentRaw { public: - inline Document(DocumentRaw&& ref):DocumentRaw(std::move(ref)){} - inline Document(const DocumentRaw&& ref):DocumentRaw(std::move(ref)){} + Document(DocumentRaw&& ref); + Document(const DocumentRaw&& ref); - inline const Tree slice(const element_t* ref=nullptr) const{return DocumentRaw::slice(ref);} - inline Tree clone(const element_t* ref=nullptr, bool reduce=true) const{return DocumentRaw::clone(ref,reduce);} + const Tree slice(const element_t* ref=nullptr) const; + Tree clone(const element_t* ref=nullptr, bool reduce=true) const; - inline wrp::base_t root() {return wrp::base_t{*(const TreeRaw*)this, &TreeRaw::root()};} + wrp::base_t root(); ///Cast this document as a raw document - inline DocumentRaw& downgrade(){return *this;} + DocumentRaw& downgrade(); ///Cast this const document as a const raw tree - inline const DocumentRaw& downgrade() const{return *this;} + const DocumentRaw& downgrade() const; }; diff --git a/include/vs-xml/fwd/README.md b/include/vs-xml/fwd/README.md new file mode 100644 index 00000000..4697d009 --- /dev/null +++ b/include/vs-xml/fwd/README.md @@ -0,0 +1 @@ +Forwards to decide which libraries are used internally by `vs.xml`. Not to be used externally, unless the respective libraries are also linked. \ No newline at end of file diff --git a/include/vs-xml/fwd/format.hpp b/include/vs-xml/fwd/format.hpp index 9e8c5f90..0c092f55 100644 --- a/include/vs-xml/fwd/format.hpp +++ b/include/vs-xml/fwd/format.hpp @@ -10,7 +10,6 @@ * */ - #if VS_XML_USE_FMT == true && __has_include() #include diff --git a/include/vs-xml/meson.build b/include/vs-xml/meson.build index 3a4eaeaf..7fffb1f4 100644 --- a/include/vs-xml/meson.build +++ b/include/vs-xml/meson.build @@ -19,17 +19,11 @@ conf.set('VS_XML_NS', get_option('ns')) conf.set('VS_XML_LAYOUT', get_option('binlayout')) if get_option('use_fmt') - fmt_dep = dependency('fmt') conf.set('VS_XML_USE_FMT','true') -else - fmt_dep = [] endif if get_option('use_gtl') - gtl_dep = dependency('gtl') conf.set('VS_XML_USE_GTL','true') -else - gtl_dep = [] endif cfgfile = configure_file(output : 'config.hpp', diff --git a/include/vs-xml/module.modulemap b/include/vs-xml/module.modulemap index d87676cd..1aa149e7 100644 --- a/include/vs-xml/module.modulemap +++ b/include/vs-xml/module.modulemap @@ -1,5 +1,7 @@ module xml { header "commons.hpp" + header "node.hpp" + header "wrp-node.hpp" header "tree-builder.hpp" header "document-builder.hpp" header "archive-builder.hpp" diff --git a/include/vs-xml/private/impl.hpp b/include/vs-xml/node.hpp similarity index 88% rename from include/vs-xml/private/impl.hpp rename to include/vs-xml/node.hpp index 7383cab3..0f8e8018 100644 --- a/include/vs-xml/private/impl.hpp +++ b/include/vs-xml/node.hpp @@ -1,7 +1,7 @@ #pragma once /** - * @file impl.hpp + * @file node.hpp * @author karurochari * @brief Implementation of the node logic * @date 2025-05-01 @@ -24,8 +24,6 @@ #include #include -#include -//#include namespace VS_XML_NS{ @@ -310,57 +308,33 @@ struct marker_t : leaf_t{ friend struct TreeRaw; }; -#define DISPATCH(X) \ -if (type() == type_t::ELEMENT) return ((element_t*)this) -> X;\ -else if (type() == type_t::TEXT) return ((text_t*)this)-> X;\ -else if (type() == type_t::COMMENT) return ((comment_t*)this)-> X;\ -else if (type() == type_t::PROC) return ((proc_t*)this)-> X;\ -else if (type() == type_t::CDATA) return ((cdata_t*)this)-> X;\ -else if (type() == type_t::MARKER) return ((marker_t*)this)-> X;\ -else{\ - xml_assert(false,"Invalid XML thing type");\ - std::unreachable();\ -} - -#define CDISPATCH(X) \ -if (type() == type_t::ELEMENT) return ((const element_t*)this) -> X;\ -else if (type() == type_t::TEXT) return ((const text_t*)this)-> X;\ -else if (type() == type_t::COMMENT) return ((const comment_t*)this)-> X;\ -else if (type() == type_t::PROC) return ((const proc_t*)this)-> X;\ -else if (type() == type_t::CDATA) return ((const cdata_t*)this)-> X;\ -else if (type() == type_t::MARKER) return ((const marker_t*)this)-> X;\ -else{\ - xml_assert(false,VS_XML_NS::format("Invalid XML thing type {}",(int)type()).c_str());\ - std::unreachable();\ -} - struct unknown_t : base_t{ private: - void set_parent(element_t* parent){DISPATCH(set_parent(parent));} - void set_prev(unknown_t* prev){DISPATCH(set_prev(prev));} - void set_next(unknown_t* next){DISPATCH(set_next(next));} + void set_parent(element_t* parent); + void set_prev(unknown_t* prev); + void set_next(unknown_t* next); public: static inline type_t deftype() {return type_t::UNKNOWN;}; - std::expected ns() const {CDISPATCH(ns());} - std::expected name() const {CDISPATCH(name());} - std::expected value() const {CDISPATCH(value());} - std::expected text_range() const {CDISPATCH(text_range());} + std::expected ns() const; + std::expected name() const; + std::expected value() const; + std::expected text_range() const; - std::expected,feature_t> children_range() const {CDISPATCH(children_range());} - std::expected,feature_t> attrs_range() const {CDISPATCH(attrs_range());} + std::expected,feature_t> children_range() const; + std::expected,feature_t> attrs_range() const; - const element_t* parent() const {CDISPATCH(parent());} - const unknown_t* prev() const {CDISPATCH(prev());} - const unknown_t* next() const {CDISPATCH(next());} + const element_t* parent() const; + const unknown_t* prev() const; + const unknown_t* next() const; - inline bool has_children() const {CDISPATCH(has_children());} - inline bool has_parent() const {CDISPATCH(has_parent());} - inline bool has_prev() const {CDISPATCH(has_prev());} - inline bool has_next() const {CDISPATCH(has_next());} + bool has_children() const; + bool has_parent() const; + bool has_prev() const; + bool has_next() const; template friend struct TreeBuilder; @@ -368,8 +342,7 @@ struct unknown_t : base_t{ friend struct TreeRaw; }; -#undef DISPATCH -#undef CDISPATCH + static_assert(thing_i); static_assert(thing_i); diff --git a/include/vs-xml/private/README.md b/include/vs-xml/private/README.md new file mode 100644 index 00000000..4b5609cd --- /dev/null +++ b/include/vs-xml/private/README.md @@ -0,0 +1 @@ +These files are not meant to be accessed from outside the vs-xml package. They are only to appear in its c/cpp files and leveraged as part of the library. \ No newline at end of file diff --git a/include/vs-xml/private/visit.hpp b/include/vs-xml/private/visit.hpp index 6b459a1e..98014d2d 100644 --- a/include/vs-xml/private/visit.hpp +++ b/include/vs-xml/private/visit.hpp @@ -10,7 +10,7 @@ */ #include -#include +#include namespace VS_XML_NS { diff --git a/include/vs-xml/private/wrp-visit.hpp b/include/vs-xml/private/wrp-visit.hpp index bf76f80f..5ba71b75 100644 --- a/include/vs-xml/private/wrp-visit.hpp +++ b/include/vs-xml/private/wrp-visit.hpp @@ -1,6 +1,6 @@ #pragma once /** - * @file visit.hpp + * @file wrp-visit.hpp * @author karurochari * @brief Visitor patterns for trees and derived * @date 2025-07-05 @@ -10,7 +10,7 @@ */ #include -#include +#include namespace VS_XML_NS { namespace wrp{ diff --git a/include/vs-xml/query-builder.hpp b/include/vs-xml/query-builder.hpp index 3e1da55d..a5a40187 100644 --- a/include/vs-xml/query-builder.hpp +++ b/include/vs-xml/query-builder.hpp @@ -12,7 +12,7 @@ #include #include -#include +#include namespace VS_XML_NS{ namespace query{ diff --git a/include/vs-xml/query.hpp b/include/vs-xml/query.hpp index 237f2c2a..58b107ae 100644 --- a/include/vs-xml/query.hpp +++ b/include/vs-xml/query.hpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include namespace VS_XML_NS{ namespace query{ diff --git a/include/vs-xml/tree-builder.hpp b/include/vs-xml/tree-builder.hpp index bdb820b6..b2667488 100644 --- a/include/vs-xml/tree-builder.hpp +++ b/include/vs-xml/tree-builder.hpp @@ -16,15 +16,16 @@ #include #include - +#include #include #include #include #include -#include -#include +#include +#include +#include namespace VS_XML_NS{ diff --git a/include/vs-xml/private/wrp-impl.hpp b/include/vs-xml/wrp-node.hpp similarity index 99% rename from include/vs-xml/private/wrp-impl.hpp rename to include/vs-xml/wrp-node.hpp index 8787cde7..7840342d 100644 --- a/include/vs-xml/private/wrp-impl.hpp +++ b/include/vs-xml/wrp-node.hpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include namespace VS_XML_NS{ diff --git a/lib/document.cpp b/lib/document.cpp new file mode 100644 index 00000000..7c3142a7 --- /dev/null +++ b/lib/document.cpp @@ -0,0 +1,55 @@ +#include + +namespace VS_XML_NS{ + +bool DocumentRaw::print(std::ostream& out, const print_cfg_t& cfg)const{ + for(auto& it: TreeRaw::root().children()){ + if(!TreeRaw::print(out, cfg, &it))return false; + } + return true; +} + +/** + * @brief Return the root of the proper tree inside the document (if present) + * + * @return std::optional + */ +[[nodiscard]] std::optional DocumentRaw::tree_root() const{ + auto c = TreeRaw::root().children(); + auto it = std::ranges::find_if(c,[](auto e)static{return e.type()==type_t::ELEMENT;}); + if(it!=c.end()) return it; + return {}; +} + +[[nodiscard]] std::expected DocumentRaw::from_binary(std::span region){ + std::expected t = TreeRaw::from_binary(region); + if(!t.has_value())return std::unexpected(t.error()); + else return DocumentRaw(std::move(*t)); +} +[[nodiscard]] const std::expected DocumentRaw::from_binary(std::span region){ + std::expected t = TreeRaw::from_binary(region); + if(!t.has_value())return std::unexpected(t.error()); + else return DocumentRaw(std::move(*t)); +} + + +//TODO: Replace with proper prototypes, and incapsulate the mv mechanism away as it is an implementation detail, not semantically correct. +DocumentRaw::DocumentRaw(TreeRaw&& src):TreeRaw(src){} +DocumentRaw::DocumentRaw(const TreeRaw&& src):TreeRaw(src){} + + +Document::Document(DocumentRaw&& ref):DocumentRaw(std::move(ref)){} +Document::Document(const DocumentRaw&& ref):DocumentRaw(std::move(ref)){} + +const Tree Document::slice(const element_t* ref) const{return DocumentRaw::slice(ref);} +Tree Document::clone(const element_t* ref, bool reduce) const{return DocumentRaw::clone(ref,reduce);} + +wrp::base_t Document::root() {return wrp::base_t{*(const TreeRaw*)this, &TreeRaw::root()};} + +///Cast this document as a raw document +DocumentRaw& Document::downgrade(){return *this;} + +///Cast this const document as a const raw tree +const DocumentRaw& Document::downgrade() const{return *this;} + +} \ No newline at end of file diff --git a/lib/node.cpp b/lib/node.cpp new file mode 100644 index 00000000..f3434977 --- /dev/null +++ b/lib/node.cpp @@ -0,0 +1,79 @@ +#include +#include + +#define DISPATCH(X) \ +if (type() == type_t::ELEMENT) return ((element_t*)this) -> X;\ +else if (type() == type_t::TEXT) return ((text_t*)this)-> X;\ +else if (type() == type_t::COMMENT) return ((comment_t*)this)-> X;\ +else if (type() == type_t::PROC) return ((proc_t*)this)-> X;\ +else if (type() == type_t::CDATA) return ((cdata_t*)this)-> X;\ +else if (type() == type_t::MARKER) return ((marker_t*)this)-> X;\ +else{\ + xml_assert(false,"Invalid XML thing type");\ + std::unreachable();\ +} + +#define CDISPATCH(X) \ +if (type() == type_t::ELEMENT) return ((const element_t*)this) -> X;\ +else if (type() == type_t::TEXT) return ((const text_t*)this)-> X;\ +else if (type() == type_t::COMMENT) return ((const comment_t*)this)-> X;\ +else if (type() == type_t::PROC) return ((const proc_t*)this)-> X;\ +else if (type() == type_t::CDATA) return ((const cdata_t*)this)-> X;\ +else if (type() == type_t::MARKER) return ((const marker_t*)this)-> X;\ +else{\ + xml_assert(false,VS_XML_NS::format("Invalid XML thing type {}",(int)type()).c_str());\ + std::unreachable();\ +} + +namespace VS_XML_NS { + void unknown_t::set_parent(element_t* parent){DISPATCH(set_parent(parent));} + void unknown_t::set_prev(unknown_t* prev){DISPATCH(set_prev(prev));} + void unknown_t::set_next(unknown_t* next){DISPATCH(set_next(next));} + + std::expected unknown_t::ns() const {CDISPATCH(ns());} + std::expected unknown_t::name() const {CDISPATCH(name());} + std::expected unknown_t::value() const {CDISPATCH(value());} + std::expected unknown_t::text_range() const {CDISPATCH(text_range());} + + std::expected,feature_t> unknown_t::children_range() const {CDISPATCH(children_range());} + std::expected,feature_t> unknown_t::attrs_range() const {CDISPATCH(attrs_range());} + + const element_t* unknown_t::parent() const {CDISPATCH(parent());} + const unknown_t* unknown_t::prev() const {CDISPATCH(prev());} + const unknown_t* unknown_t::next() const {CDISPATCH(next());} + + bool unknown_t::has_children() const {CDISPATCH(has_children());} + bool unknown_t::has_parent() const {CDISPATCH(has_parent());} + bool unknown_t::has_prev() const {CDISPATCH(has_prev());} + bool unknown_t::has_next() const {CDISPATCH(has_next());} + + visitor_iterator& visitor_iterator::operator++(){ + xml_assert(node!=nullptr, "Reached end of tree"); + bool children_visited = false; + for(;;){ + if(node->has_children() && !children_visited){ + auto [l,r] =*node->children_range(); + node=l; + children_visited = false; + return *this; + } + if(node->has_next()){ + node=node->next(); + children_visited = false; + return *this; + } + if(node->has_parent()){ + node = (const unknown_t*)node->parent(); + children_visited = true; + } + else{ + node = nullptr; + return *this; + } + } + return *this; + } +} + +#undef DISPATCH +#undef CDISPATCH \ No newline at end of file diff --git a/lib/private/impl.cpp b/lib/private/impl.cpp deleted file mode 100644 index 587887cb..00000000 --- a/lib/private/impl.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include - -namespace VS_XML_NS{ -visitor_iterator& visitor_iterator::operator++(){ - xml_assert(node!=nullptr, "Reached end of tree"); - bool children_visited = false; - for(;;){ - if(node->has_children() && !children_visited){ - auto [l,r] =*node->children_range(); - node=l; - children_visited = false; - return *this; - } - if(node->has_next()){ - node=node->next(); - children_visited = false; - return *this; - } - if(node->has_parent()){ - node = (const unknown_t*)node->parent(); - children_visited = true; - } - else{ - node = nullptr; - return *this; - } - } - return *this; -} - -} diff --git a/lib/tree-builder.cpp b/lib/tree-builder.cpp index 635c0060..059bdffe 100644 --- a/lib/tree-builder.cpp +++ b/lib/tree-builder.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include namespace VS_XML_NS{ diff --git a/lib/tree.cpp b/lib/tree.cpp index 6b9ea4cd..cc5bda19 100644 --- a/lib/tree.cpp +++ b/lib/tree.cpp @@ -4,8 +4,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/lib/private/wrp-impl.cpp b/lib/wrp-node.cpp similarity index 95% rename from lib/private/wrp-impl.cpp rename to lib/wrp-node.cpp index 6e46f98c..b6e6d8ca 100644 --- a/lib/private/wrp-impl.cpp +++ b/lib/wrp-node.cpp @@ -1,4 +1,4 @@ -#include +#include namespace VS_XML_NS{ namespace wrp{ diff --git a/meson.build b/meson.build index 3f03724c..4879da11 100644 --- a/meson.build +++ b/meson.build @@ -6,8 +6,6 @@ project( default_options: ['cpp_std=c++23'], ) -include_dirs = include_directories('include') - add_project_arguments(['-Wno-c23-extensions'], language: 'cpp') conf = configuration_data() @@ -18,23 +16,37 @@ rev_version = version_components[2] subdir('./include/vs-xml') +if get_option('use_fmt') + fmt_dep = dependency('fmt') #default_options : ['header-only=true'] +else + fmt_dep = [] +endif + +if get_option('use_gtl') + gtl_dep = dependency('gtl') +else + gtl_dep = [] +endif + + vs_xml_lib = library( 'vs-xml', [ - 'lib/private/impl.cpp', - 'lib/private/wrp-impl.cpp', 'lib/parser.cpp', 'lib/serializer.cpp', 'lib/archive.cpp', 'lib/tree.cpp', + 'lib/document.cpp', 'lib/tree-builder.cpp', 'lib/query.cpp', 'lib/query-builder.cpp', + 'lib/node.cpp', + 'lib/wrp-node.cpp', ], cpp_args: [], install: true, dependencies: [fmt_dep, gtl_dep], - include_directories: include_dirs, + include_directories: ['include'], ) vs_xml_dep = declare_dependency( diff --git a/test/meson.build b/test/meson.build index 4c17574b..c3890f56 100644 --- a/test/meson.build +++ b/test/meson.build @@ -10,7 +10,7 @@ if get_option('noexcept') == false cpp_args: [], link_args: [], dependencies: [ - vs_xml_dep + vs_xml_dep, ], )) diff --git a/test/src/base-checks.cpp b/test/src/base-checks.cpp index 474497a6..17085783 100644 --- a/test/src/base-checks.cpp +++ b/test/src/base-checks.cpp @@ -4,7 +4,6 @@ #include #include -#include #include diff --git a/test/src/query.cpp b/test/src/query.cpp index c25198e9..c9e4eb64 100644 --- a/test/src/query.cpp +++ b/test/src/query.cpp @@ -1,4 +1,3 @@ -#include "vs-xml/private/impl.hpp" #include #include #include diff --git a/test/src/tree-iterator.cpp b/test/src/tree-iterator.cpp index f3d30526..22c2c957 100644 --- a/test/src/tree-iterator.cpp +++ b/test/src/tree-iterator.cpp @@ -4,7 +4,6 @@ #include #include -#include #include From 85fd338589eb846802a4af3732b57635406c7927 Mon Sep 17 00:00:00 2001 From: Karuro Chari Date: Sun, 6 Jul 2025 00:15:25 +0100 Subject: [PATCH 08/12] Improvements and fixes to the build script --- include/vs-xml/meson.build | 8 +++++++- meson.build | 13 ++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/include/vs-xml/meson.build b/include/vs-xml/meson.build index 7fffb1f4..4a22b0a4 100644 --- a/include/vs-xml/meson.build +++ b/include/vs-xml/meson.build @@ -1,3 +1,8 @@ +version_components = meson.project_version().split('.') +major_version = version_components[0] +minor_version = version_components[1] +rev_version = version_components[2] + conf = configuration_data() conf.set('VS_XML_VERSION_MAJOR', major_version) conf.set('VS_XML_VERSION_MINOR', minor_version) @@ -31,4 +36,5 @@ cfgfile = configure_file(output : 'config.hpp', ) install_headers(cfgfile, subdir:'vs-xml') -install_subdir('.', install_dir : 'include/vs-xml', strip_directory: false, follow_symlinks: true, exclude_files: ['meson.build','config.hpp.in'] ) +#TODO: Exclude private subdir +install_subdir('.', install_dir : get_option('includedir')+'/vs-xml', strip_directory: false, follow_symlinks: true, exclude_files: ['meson.build','config.hpp.in'] ) diff --git a/meson.build b/meson.build index 4879da11..32eff5a0 100644 --- a/meson.build +++ b/meson.build @@ -8,16 +8,10 @@ project( add_project_arguments(['-Wno-c23-extensions'], language: 'cpp') -conf = configuration_data() -version_components = meson.project_version().split('.') -major_version = version_components[0] -minor_version = version_components[1] -rev_version = version_components[2] - subdir('./include/vs-xml') if get_option('use_fmt') - fmt_dep = dependency('fmt') #default_options : ['header-only=true'] + fmt_dep = dependency('fmt') else fmt_dep = [] endif @@ -28,6 +22,7 @@ else gtl_dep = [] endif +incdir = [include_directories('include')] vs_xml_lib = library( 'vs-xml', @@ -46,12 +41,12 @@ vs_xml_lib = library( cpp_args: [], install: true, dependencies: [fmt_dep, gtl_dep], - include_directories: ['include'], + include_directories: incdir, ) vs_xml_dep = declare_dependency( link_with: vs_xml_lib, - include_directories: ['include'], + include_directories: incdir, ) # Ignore these targets if not the root project From 525b37ea6b67e0b1bf563352ff184502f3e3238a Mon Sep 17 00:00:00 2001 From: Karuro Chari Date: Sun, 6 Jul 2025 10:07:34 +0100 Subject: [PATCH 09/12] Optimizing print functions for trees, now they are much better for embedded. --- RELEASE.md | 9 +- VERSION | 2 +- benchmark/src/serialize-big.cpp | 1 - docs/embedded.md | 4 +- include/vs-xml/private/query-impl.hpp | 127 -------------------------- include/vs-xml/query.hpp | 123 ++++++++++++++++++++++++- lib/tree.cpp | 28 +++--- 7 files changed, 144 insertions(+), 150 deletions(-) delete mode 100644 include/vs-xml/private/query-impl.hpp diff --git a/RELEASE.md b/RELEASE.md index 25a0b3da..ae8ad707 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,11 +1,12 @@ ## Breaking - `DocBuilder` renamed as `DocumentBuilder` to be more consistent in naming. -- Removed `path` functions from XML entities, leftovers from the very early versions; this functionality can now be trivially replaced by user-defined functions as the rest of the interface is complete. +- Removed `path` functions from XML entities, leftovers from the very early versions; + this functionality can now be trivially replaced by user-defined functions, since the rest of the interface is complete. ## Features -Introduced `Tree::visit` and `TreeRaw::visit` to implement a slightly different and more flexible visitor pattern compared to the one recently added as iterator. -They are both based on `private/(wrp-)visit.hpp`, which is not publicly exposed but if one cares about getting maximum performance out of a visitor they are to be used. +Introduced `Tree::visit` and `TreeRaw::visit` to implement a slightly different and more flexible visitor pattern when compared to the one recently added iterator. +They are both based on `private/(wrp-)visit.hpp`, which is not publicly exposed (for now). -Introduced a new `print` functions for trees and derived siblings, to provide a not recursive implementation. The public interface has not been affected. \ No newline at end of file +New `print` functions have been introduced for trees et similia. It no longer uses `std::print` due to the awful overhead and additional memory allocations. `fmt` had no such issue to be honest. \ No newline at end of file diff --git a/VERSION b/VERSION index 25cd22b6..f21e5a1f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.2.12 \ No newline at end of file +0.2.13 \ No newline at end of file diff --git a/benchmark/src/serialize-big.cpp b/benchmark/src/serialize-big.cpp index 804c952e..903d4703 100644 --- a/benchmark/src/serialize-big.cpp +++ b/benchmark/src/serialize-big.cpp @@ -92,7 +92,6 @@ int main(int argc, const char* argv[]) { mio::mmap_source mmap3("./assets/nasa_10_f_bs.xml.bin"); std::string_view binInput(mmap3.data(),mmap3.size()); - for(int i = 0; i<3; i++){ std::vector ticks; ticks.push_back(std::chrono::system_clock::now()); diff --git a/docs/embedded.md b/docs/embedded.md index 9cdbebb2..62a84da8 100644 --- a/docs/embedded.md +++ b/docs/embedded.md @@ -22,13 +22,13 @@ It is important to enable the `noexcept` flag and disable `utils` alongside any - `TreeRaw`/`Tree` general usage - `DocumentRaw`/`Document` general usage - `ArchiveRaw`/`Archive` general usage -- The XML parser when `.raw_strings=true`, however wraps builders which are not fully optimized yet. +- The XML parser when `.raw_strings=true`, however it wraps builders which are not fully optimized yet. +- The XML serializer when `.raw_strings=true`. - Memos/notes/indices can all be implemented externally, as long as you have a proper library for containers `vs.xml` will not get in your way. ### 🟠 Features planned for embedded - `TreeBuilder`, `DocumentBuilder`, `ArchiveBuilder` & `QueryBuilder`. Right now they encapsulate their own storage, unable to just work on externally defined containers, so we cannot externally handle memory allocations. It is possible to reserve space and so limiting the number of allocations, but they cannot be fully removed as it is. -- The XML serializer when `.raw_strings=true`, it is still using functions which are not optimized, but their replacement has been implemented already. It also assumes to operate on a stream which is not great. - Queries. Right now they are not good due to the high number of dynamic allocations needed. They could be trivially removed for the most part, but the whole system is being refactored to be stack-based and consume less memory overall. ### 🔴 Features not planned for embedded diff --git a/include/vs-xml/private/query-impl.hpp b/include/vs-xml/private/query-impl.hpp deleted file mode 100644 index f776acdb..00000000 --- a/include/vs-xml/private/query-impl.hpp +++ /dev/null @@ -1,127 +0,0 @@ -#include -#include -#include -#include -#include - -namespace VS_XML_NS{ -namespace query{ - -static inline bool expr_helper(const auto& pattern, const auto& check){ - if(std::holds_alternative(pattern)){ - if(check.has_value() && *check==std::get(pattern))return true; - } - else if(std::holds_alternative>(pattern)){ - if(!check.has_value() && std::get>(pattern)(*check))return true; - } - else return true; - return false; -} - -template -result_t is(wrp::base_t root, typename query_t::container_type::const_iterator begin, typename query_t::container_type::const_iterator end) { - for(auto current = begin;current!=end;current++){ - //Accept the current element - if (std::holds_alternative>(current->args)) { - co_yield root; - co_return; - } - //Continue on children if current is element - else if (std::holds_alternative>(current->args)) { - if(root.type()==type_t::ELEMENT) for (auto& child : root.children()) { - for (auto n : is(child, current+1, end)) { - co_yield n; - } - } - co_return; - } - //Continue from here on, FORK will just be consumed on the current branch AND on children if current is element, - else if (std::holds_alternative>(current->args)) { - if(root.type()==type_t::ELEMENT){ - for (auto& child : root.children()) { - for (auto n : is(child, current, end)) { - co_yield n; - } - } - } - else co_return; - } - //Filter based on type - else if (std::holds_alternative>(current->args)) { - auto type = std::get>(current->args); - bool match = false; - switch(root.type()){ - case type_t::ELEMENT: - if(type.is_element)match=true; - break; - case type_t::TEXT: - if(type.is_text)match=true; - break; - case type_t::CDATA: - if(type.is_cdata)match=true; - break; - case type_t::COMMENT: - if(type.is_comment)match=true; - break; - case type_t::PROC: - if(type.is_proc)match=true; - break; - case type_t::MARKER: - if(type.is_marker)match=true; - break; - default: - break; - } - if(!match) co_return; //All matches failing. Fail branch. - } - //Match NS - else if ( std::holds_alternative>(current->args) ){ - if(!expr_helper(std::get>(current->args),root.ns())) co_return; - } - //Match name - else if ( std::holds_alternative>(current->args) ){ - if(!expr_helper(std::get>(current->args),root.name())) co_return; - } - //Match value - else if ( std::holds_alternative>(current->args) ){ - if(!expr_helper(std::get>(current->args),root.value())) co_return; - } - //Match text, not implemented as .text() is missing upstream. - /* - else if ( std::holds_alternative>(current->args) ){ - auto pattern = std::get>(current->args); - auto check = root.text(); - if(std::holds_alternative(pattern)){ - if(check==std::get(pattern)); - else co_return; - } - else if(std::holds_alternative>(pattern)){ - if(std::get>(pattern)(check)); - else co_return; - } - else co_return; - } - */ - //TODO: right now attribute matching has k*O(n) complexity if k attributes must be tested. - //By looking ahead it is possible to check if there are more attributes to be tested, and perform the operation in just O(n) - //Match attribute - else if (std::holds_alternative>(current->args)) { - auto pattern = std::get>(current->args); - bool found = false; - if(root.type()!=type_t::ELEMENT)co_return; - for(auto& attr: root.attrs()){ - if(expr_helper(pattern.ns,attr.ns()) && expr_helper(pattern.name,attr.name()) && expr_helper(pattern.value,attr.value())){ - found=true;break; - } - } - if(!found)co_return; - } - else{ - //Failed commands will prevent propagation. - co_return; - } - } -} - -} -} \ No newline at end of file diff --git a/include/vs-xml/query.hpp b/include/vs-xml/query.hpp index 58b107ae..227154cd 100644 --- a/include/vs-xml/query.hpp +++ b/include/vs-xml/query.hpp @@ -379,4 +379,125 @@ template<> inline constexpr bool std::ranges::enable_borrowed_range = true; -#include \ No newline at end of file + +namespace VS_XML_NS{ + namespace query{ + + static inline bool expr_helper(const auto& pattern, const auto& check){ + if(std::holds_alternative(pattern)){ + if(check.has_value() && *check==std::get(pattern))return true; + } + else if(std::holds_alternative>(pattern)){ + if(!check.has_value() && std::get>(pattern)(*check))return true; + } + else return true; + return false; + } + + template + result_t is(wrp::base_t root, typename query_t::container_type::const_iterator begin, typename query_t::container_type::const_iterator end) { + for(auto current = begin;current!=end;current++){ + //Accept the current element + if (std::holds_alternative>(current->args)) { + co_yield root; + co_return; + } + //Continue on children if current is element + else if (std::holds_alternative>(current->args)) { + if(root.type()==type_t::ELEMENT) for (auto& child : root.children()) { + for (auto n : is(child, current+1, end)) { + co_yield n; + } + } + co_return; + } + //Continue from here on, FORK will just be consumed on the current branch AND on children if current is element, + else if (std::holds_alternative>(current->args)) { + if(root.type()==type_t::ELEMENT){ + for (auto& child : root.children()) { + for (auto n : is(child, current, end)) { + co_yield n; + } + } + } + else co_return; + } + //Filter based on type + else if (std::holds_alternative>(current->args)) { + auto type = std::get>(current->args); + bool match = false; + switch(root.type()){ + case type_t::ELEMENT: + if(type.is_element)match=true; + break; + case type_t::TEXT: + if(type.is_text)match=true; + break; + case type_t::CDATA: + if(type.is_cdata)match=true; + break; + case type_t::COMMENT: + if(type.is_comment)match=true; + break; + case type_t::PROC: + if(type.is_proc)match=true; + break; + case type_t::MARKER: + if(type.is_marker)match=true; + break; + default: + break; + } + if(!match) co_return; //All matches failing. Fail branch. + } + //Match NS + else if ( std::holds_alternative>(current->args) ){ + if(!expr_helper(std::get>(current->args),root.ns())) co_return; + } + //Match name + else if ( std::holds_alternative>(current->args) ){ + if(!expr_helper(std::get>(current->args),root.name())) co_return; + } + //Match value + else if ( std::holds_alternative>(current->args) ){ + if(!expr_helper(std::get>(current->args),root.value())) co_return; + } + //Match text, not implemented as .text() is missing upstream. + /* + else if ( std::holds_alternative>(current->args) ){ + auto pattern = std::get>(current->args); + auto check = root.text(); + if(std::holds_alternative(pattern)){ + if(check==std::get(pattern)); + else co_return; + } + else if(std::holds_alternative>(pattern)){ + if(std::get>(pattern)(check)); + else co_return; + } + else co_return; + } + */ + //TODO: right now attribute matching has k*O(n) complexity if k attributes must be tested. + //By looking ahead it is possible to check if there are more attributes to be tested, and perform the operation in just O(n) + //Match attribute + else if (std::holds_alternative>(current->args)) { + auto pattern = std::get>(current->args); + bool found = false; + if(root.type()!=type_t::ELEMENT)co_return; + for(auto& attr: root.attrs()){ + if(expr_helper(pattern.ns,attr.ns()) && expr_helper(pattern.name,attr.name()) && expr_helper(pattern.value,attr.value())){ + found=true;break; + } + } + if(!found)co_return; + } + else{ + //Failed commands will prevent propagation. + co_return; + } + } + } + +} +} \ No newline at end of file diff --git a/lib/tree.cpp b/lib/tree.cpp index cc5bda19..a47a7c68 100644 --- a/lib/tree.cpp +++ b/lib/tree.cpp @@ -72,17 +72,17 @@ bool TreeRaw::print_h_before(std::ostream& out, const print_cfg_t& cfg, const un //TODO: at some point, convert it not to be recursive. if(ptr->type()==type_t::ELEMENT){ if(ptr->children_range()->first==ptr->children_range()->second){ - VS_XML_NS::print(out,"<{}{}{}", rsv(*ptr->ns()), rsv(*ptr->ns())==""?"":":", rsv(*ptr->name())); + out<<"<"<ns())<<(rsv(*ptr->ns())==""?"":":")<name()); for(auto& i : ptr->attrs()){ if(!configs.raw_strings){ auto t = serialize::to_xml_attr_2(rsv(*i.value())); if(!t.has_value()){/*TODO: Error*/} auto tt = t.value_or(std::string_view("")); std::string_view sv = std::holds_alternative(tt)?std::get(tt):std::get(tt); - VS_XML_NS::print(out," {}{}{}=\"{}\"", rsv(*i.ns()), rsv(*i.ns())==""?"":":", rsv(*i.name()), sv); + out<<" "<"; @@ -95,10 +95,10 @@ bool TreeRaw::print_h_before(std::ostream& out, const print_cfg_t& cfg, const un if(!t.has_value()){/*TODO: Error*/} auto tt = t.value_or(std::string_view("")); std::string_view sv = std::holds_alternative(tt)?std::get(tt):std::get(tt); - VS_XML_NS::print(out," {}{}{}=\"{}\"", rsv(*i.ns()), rsv(*i.ns())==""?"":":", rsv(*i.name()), sv); + out<<" "<"; @@ -110,10 +110,10 @@ bool TreeRaw::print_h_before(std::ostream& out, const print_cfg_t& cfg, const un if(!t.has_value()){/*TODO: Error*/} auto tt = t.value_or(std::string_view("")); std::string_view sv = std::holds_alternative(tt)?std::get(tt):std::get(tt); - VS_XML_NS::print(out,"",sv); + out<<""; } else{ - VS_XML_NS::print(out,"",rsv(*ptr->value())); + out<<"value())<<"]]>"; } } else if(ptr->type()==type_t::COMMENT){ @@ -122,10 +122,10 @@ bool TreeRaw::print_h_before(std::ostream& out, const print_cfg_t& cfg, const un if(!t.has_value()){/*TODO: Error*/} auto tt = t.value_or(std::string_view("")); std::string_view sv = std::holds_alternative(tt)?std::get(tt):std::get(tt); - VS_XML_NS::print(out,"",sv); + out<<""; } else{ - VS_XML_NS::print(out,"",rsv(*ptr->value())); + out<<""; } } else if(ptr->type()==type_t::TEXT){ @@ -134,10 +134,10 @@ bool TreeRaw::print_h_before(std::ostream& out, const print_cfg_t& cfg, const un if(!t.has_value()){/*TODO: Error*/} auto tt = t.value_or(std::string_view("")); std::string_view sv = std::holds_alternative(tt)?std::get(tt):std::get(tt); - VS_XML_NS::print(out,"{}",sv); + out<value())); + out<value()); } } else if(ptr->type()==type_t::PROC){ @@ -146,10 +146,10 @@ bool TreeRaw::print_h_before(std::ostream& out, const print_cfg_t& cfg, const un if(!t.has_value()){/*TODO: Error*/} auto tt = t.value_or(std::string_view("")); std::string_view sv = std::holds_alternative(tt)?std::get(tt):std::get(tt); - VS_XML_NS::print(out,"",sv); + out<<""; } else{ - VS_XML_NS::print(out,"",rsv(*ptr->value())); + out<<"value())<<"?>"; } } else if(ptr->type()==type_t::MARKER){ @@ -167,7 +167,7 @@ bool TreeRaw::print_h_after(std::ostream& out, const print_cfg_t& cfg, const unk if(ptr->children_range()->first==ptr->children_range()->second){ } else{ - VS_XML_NS::print(out,"", rsv(*ptr->ns()), rsv(*ptr->ns())==""?"":":", rsv(*ptr->name())); + out<<"ns())<<(rsv(*ptr->ns())==""?"":":")<name())<<">"; } } else if(ptr->type()==type_t::CDATA){ From 082ac7d9d857c5a7a1ebafc95b146ab79e5a4a46 Mon Sep 17 00:00:00 2001 From: Karuro Chari Date: Sun, 6 Jul 2025 22:31:53 +0100 Subject: [PATCH 10/12] Restored the original `print` as `print_fast`. This will need to be documented. --- benchmark/src/serialize-big.cpp | 4 +- include/vs-xml/document.hpp | 1 + include/vs-xml/tree.hpp | 3 +- lib/document.cpp | 7 ++ lib/tree.cpp | 119 ++++++++++++++++++++++++++++++-- test/src/base-checks.cpp | 2 +- 6 files changed, 127 insertions(+), 9 deletions(-) diff --git a/benchmark/src/serialize-big.cpp b/benchmark/src/serialize-big.cpp index 903d4703..2e73afbc 100644 --- a/benchmark/src/serialize-big.cpp +++ b/benchmark/src/serialize-big.cpp @@ -29,7 +29,7 @@ int test_vs(std::string_view xmlInput){ std::string str; std::stringstream file(str); - tree->print(file); + tree->print_fast(file); }catch (const std::exception &ex) { std::cerr << "Error while testing: " << ex.what() << "\n"; @@ -51,7 +51,7 @@ int test_vs2(std::string_view binInput){ std::stringstream file(str); //tree.print(file); - tree->save_binary(file); + tree->print_fast(file); }catch (const std::exception &ex) { std::cerr << "Error while testing: " << ex.what() << "\n"; diff --git a/include/vs-xml/document.hpp b/include/vs-xml/document.hpp index 408d1d83..043c8959 100644 --- a/include/vs-xml/document.hpp +++ b/include/vs-xml/document.hpp @@ -34,6 +34,7 @@ struct DocumentRaw : TreeRaw { using TreeRaw::TreeRaw; bool print(std::ostream& out, const print_cfg_t& cfg = {})const; + bool print_fast(std::ostream& out, const print_cfg_t& cfg = {})const; /** * @brief Return the root of the proper tree inside the document (if present) diff --git a/include/vs-xml/tree.hpp b/include/vs-xml/tree.hpp index 3c3d93c0..447c51a2 100644 --- a/include/vs-xml/tree.hpp +++ b/include/vs-xml/tree.hpp @@ -143,7 +143,7 @@ struct TreeRaw{ */ bool print(std::ostream& out, const print_cfg_t& cfg = {}, const unknown_t* node = nullptr)const; - bool print2(std::ostream& out, const print_cfg_t& cfg = {}, const unknown_t* node = nullptr)const; + bool print_fast(std::ostream& out, const print_cfg_t& cfg = {}, const unknown_t* node = nullptr)const; bool save_binary(std::ostream& out)const; @@ -185,6 +185,7 @@ struct TreeRaw{ protected: + bool print_h(std::ostream& out, const print_cfg_t& cfg = {}, const unknown_t* ptr=nullptr) const; bool print_h_before(std::ostream& out, const print_cfg_t& cfg = {}, const unknown_t* ptr=nullptr) const; bool print_h_after(std::ostream& out, const print_cfg_t& cfg = {}, const unknown_t* ptr=nullptr) const; diff --git a/lib/document.cpp b/lib/document.cpp index 7c3142a7..fee2c849 100644 --- a/lib/document.cpp +++ b/lib/document.cpp @@ -9,6 +9,13 @@ bool DocumentRaw::print(std::ostream& out, const print_cfg_t& cfg)const{ return true; } +bool DocumentRaw::print_fast(std::ostream& out, const print_cfg_t& cfg)const{ + for(auto& it: TreeRaw::root().children()){ + if(!TreeRaw::print_fast(out, cfg, &it))return false; + } + return true; +} + /** * @brief Return the root of the proper tree inside the document (if present) * diff --git a/lib/tree.cpp b/lib/tree.cpp index a47a7c68..cd45350b 100644 --- a/lib/tree.cpp +++ b/lib/tree.cpp @@ -186,6 +186,105 @@ bool TreeRaw::print_h_after(std::ostream& out, const print_cfg_t& cfg, const unk return true; }; +bool TreeRaw::print_h(std::ostream& out, const print_cfg_t& cfg, const unknown_t* ptr) const{ + //TODO: at some point, convert it not to be recursive. + if(ptr->type()==type_t::ELEMENT){ + if(ptr->children_range()->first==ptr->children_range()->second){ + out<<"<"<ns())<<(rsv(*ptr->ns())==""?"":":")<name()); + for(auto& i : ptr->attrs()){ + if(!configs.raw_strings){ + auto t = serialize::to_xml_attr_2(rsv(*i.value())); + if(!t.has_value()){/*TODO: Error*/} + auto tt = t.value_or(std::string_view("")); + std::string_view sv = std::holds_alternative(tt)?std::get(tt):std::get(tt); + out<<" "<"; + } + else{ + VS_XML_NS::print(out,"<{}{}{}", rsv(*ptr->ns()), rsv(*ptr->ns())==""?"":":", rsv(*ptr->name())); + for(auto& i : ptr->attrs()){ + if(!configs.raw_strings){ + auto t = serialize::to_xml_attr_2(rsv(*i.value())); + if(!t.has_value()){/*TODO: Error*/} + auto tt = t.value_or(std::string_view("")); + std::string_view sv = std::holds_alternative(tt)?std::get(tt):std::get(tt); + out<<" "<"; + for(auto& i : ptr->children()){ + print_h(out,cfg,&i); + } + out<<"ns())<<(rsv(*ptr->ns())==""?"":":")<name())<<">"; + + } + } + else if(ptr->type()==type_t::CDATA){ + if(!configs.raw_strings){ + auto t = serialize::to_xml_cdata(rsv(*ptr->value())); + if(!t.has_value()){/*TODO: Error*/} + auto tt = t.value_or(std::string_view("")); + std::string_view sv = std::holds_alternative(tt)?std::get(tt):std::get(tt); + out<<""; + } + else{ + out<<"value())<<"]]>"; + } + } + else if(ptr->type()==type_t::COMMENT){ + if(!configs.raw_strings){ + auto t = serialize::to_xml_comment(rsv(*ptr->value())); + if(!t.has_value()){/*TODO: Error*/} + auto tt = t.value_or(std::string_view("")); + std::string_view sv = std::holds_alternative(tt)?std::get(tt):std::get(tt); + out<<""; + } + else{ + out<<""; + } + } + else if(ptr->type()==type_t::TEXT){ + if(!configs.raw_strings){ + auto t = serialize::to_xml_text(rsv(*ptr->value())); + if(!t.has_value()){/*TODO: Error*/} + auto tt = t.value_or(std::string_view("")); + std::string_view sv = std::holds_alternative(tt)?std::get(tt):std::get(tt); + out<value()); + } + } + else if(ptr->type()==type_t::PROC){ + if(!configs.raw_strings){ + auto t = serialize::to_xml_proc(rsv(*ptr->value())); + if(!t.has_value()){/*TODO: Error*/} + auto tt = t.value_or(std::string_view("")); + std::string_view sv = std::holds_alternative(tt)?std::get(tt):std::get(tt); + out<<""; + } + else{ + out<<"value())<<"?>"; + + } + } + else if(ptr->type()==type_t::MARKER){ + //Skip, marker points are not XML, they are only internally used. + //or emit something in a special namespace? not sure + } + else{return false;} + return true; +}; + + const TreeRaw TreeRaw::slice(const element_t* ref) const{ //TODO: Move to std::expected xml_assert((uint8_t*)ref>=(uint8_t*)buffer.data() && (uint8_t*)ref<(uint8_t*)buffer.data()+buffer.size(), "out of bounds node pointer"); @@ -330,20 +429,22 @@ void Tree::visit(wrp::base_t node, std::function(node,test,before,after); } -bool TreeRaw::print2(std::ostream& out, const print_cfg_t& cfg, const unknown_t* node)const{ +bool TreeRaw::print(std::ostream& out, const print_cfg_t& cfg, const unknown_t* node)const{ if(node==nullptr)node = (const unknown_t*)&root(); - auto test = +[](const unknown_t* n, void* _ctx)static{return true;}; struct ctx_t{ const TreeRaw* that; const print_cfg_t cfg; std::ostream& out; }; ctx_t ctx{this,cfg,out}; - auto before = +[](const unknown_t* n, void* _ctx)static{ + static constexpr auto test = +[](const unknown_t* n, void* _ctx)static{ + return true; + }; + static constexpr auto before = +[](const unknown_t* n, void* _ctx)static{ ctx_t& ctx = *(ctx_t*)_ctx; ctx.that->print_h_before(ctx.out,ctx.cfg,n); }; - auto after = +[](const unknown_t* n, void* _ctx)static{ + static constexpr auto after = +[](const unknown_t* n, void* _ctx)static{ ctx_t& ctx = *(ctx_t*)_ctx; ctx.that->print_h_after(ctx.out,ctx.cfg,n); }; @@ -351,7 +452,8 @@ bool TreeRaw::print2(std::ostream& out, const print_cfg_t& cfg, const unknown_t* return true; } -bool TreeRaw::print(std::ostream& out, const print_cfg_t& cfg, const unknown_t* node)const{ +/* +bool TreeRaw::print2(std::ostream& out, const print_cfg_t& cfg, const unknown_t* node)const{ if(node==nullptr)node = (const unknown_t*)&root(); auto test = +[](const unknown_t* n, std::ostream& out, const print_cfg_t& cfg, const TreeRaw* that)static{return true;}; @@ -364,6 +466,13 @@ bool TreeRaw::print(std::ostream& out, const print_cfg_t& cfg, const unknown_t* VS_XML_NS::visit<>(node,test,before,after,out,cfg,this); return true; } +*/ + +bool TreeRaw::print_fast(std::ostream& out, const print_cfg_t& cfg, const unknown_t* node)const{ + if(node==nullptr)node = (const unknown_t*)&root(); + return print_h(out,cfg,node); +} + } \ No newline at end of file diff --git a/test/src/base-checks.cpp b/test/src/base-checks.cpp index 17085783..ccf6347e 100644 --- a/test/src/base-checks.cpp +++ b/test/src/base-checks.cpp @@ -60,7 +60,7 @@ auto test(){ auto tree = *mk_tree(); tree.print(std::cout,{}); std::print("\n---\n"); - tree.print2(std::cout,{}); + tree.print_fast(std::cout,{}); std::print("\n---\n"); tree.reorder(); tree.print(std::cout,{}); From c6e0e15fb80a2a09256d77b15eecd0a4b64cde46 Mon Sep 17 00:00:00 2001 From: Karuro Chari Date: Mon, 7 Jul 2025 05:24:10 +0100 Subject: [PATCH 11/12] Optimized node by removing explicit `_size`. Updated release notes. --- RELEASE.md | 7 +++++-- include/vs-xml/node.hpp | 21 +++++++++++++-------- lib/tree-builder.cpp | 2 +- lib/tree.cpp | 2 +- 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index ae8ad707..ad031300 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -3,10 +3,13 @@ - `DocBuilder` renamed as `DocumentBuilder` to be more consistent in naming. - Removed `path` functions from XML entities, leftovers from the very early versions; this functionality can now be trivially replaced by user-defined functions, since the rest of the interface is complete. +- The binary interface of the tree changed. Again. But it is for good reasons! We optimized away one of the biggest fields in nodes, saving a significant amount of memory. + Technically this change prevents out of order nodes in the memory layout, but this was just a side-effect extra, not something intended. ## Features -Introduced `Tree::visit` and `TreeRaw::visit` to implement a slightly different and more flexible visitor pattern when compared to the one recently added iterator. +Introduced `Tree::visit` and `TreeRaw::visit` to implement a more flexible visitor pattern when compared to the one recently added iterator-based approach. They are both based on `private/(wrp-)visit.hpp`, which is not publicly exposed (for now). -New `print` functions have been introduced for trees et similia. It no longer uses `std::print` due to the awful overhead and additional memory allocations. `fmt` had no such issue to be honest. \ No newline at end of file +New `print` functions have been introduced for trees, based on the visitor pattern. It no longer uses `std::print` due to the awful overhead and additional memory allocations. `fmt` had no such issue to be honest. +The legacy version has been optimized as well: it is now called `print_fast` and still uses simple recursion to get a signifiant edge on performance; however, be mindful of stack overflows if working with stack intensive applications or badly nested trees. \ No newline at end of file diff --git a/include/vs-xml/node.hpp b/include/vs-xml/node.hpp index 0f8e8018..a7eacf21 100644 --- a/include/vs-xml/node.hpp +++ b/include/vs-xml/node.hpp @@ -31,7 +31,11 @@ namespace VS_XML_NS{ template struct base_t{ protected: - type_t _type = T::deftype(); + type_t _type : 4 = T::deftype(); + uint8_t _bit0 : 1; + uint8_t _bit1 : 1; + uint8_t _bit2 : 1; + uint8_t _bit3 : 1; public: typedef T base; @@ -93,7 +97,8 @@ struct element_t : base_t{ delta_ptr_t _prev; delta_ptr_t _next; - xml_size_t _size; + //bool _has_next; + //_has_next=_bit0; xml_count_t attrs_count; @@ -107,13 +112,13 @@ struct element_t : base_t{ _name(offset,serialize::validate_xml_label(_name)) { set_parent(_parent); - _size=0; + _bit0=false; attrs_count=0; } inline void set_parent(element_t* parent){auto tmp=(uint8_t*)parent-(uint8_t*)this;_parent=tmp;xml_assert((std::ptrdiff_t)_parent==tmp, "Loss of precision");} inline void set_prev(unknown_t* prev){auto tmp=(uint8_t*)prev-(uint8_t*)this;_prev=tmp;xml_assert((std::ptrdiff_t)_prev==tmp, "Loss of precision");} - inline void set_next(unknown_t* next){auto tmp=(uint8_t*)next-(uint8_t*)this;_next=tmp;xml_assert((std::ptrdiff_t)_next==tmp, "Loss of precision");} + inline void set_next(unknown_t* next){auto tmp=(uint8_t*)next-(uint8_t*)this;_next=tmp;_bit0=true;xml_assert((std::ptrdiff_t)_next==tmp, "Loss of precision");} //Unsafe, not boundary checked. inline attr_t& get_attr(xml_count_t a) const{return (attr_t&)_attrs[a];} @@ -131,7 +136,7 @@ struct element_t : base_t{ inline std::expected,feature_t> children_range() const { return std::pair{ (const unknown_t*)((const uint8_t*)this+sizeof(element_t)+sizeof(attr_t)*attrs_count), - (const unknown_t*)((const uint8_t*)this+_size) + (const unknown_t*)((const uint8_t*)this+_next) }; } inline std::expected,feature_t> attrs_range() const { @@ -150,14 +155,14 @@ struct element_t : base_t{ return (const unknown_t*)((const uint8_t*)this+_prev); } inline const unknown_t* next() const { - if(_next==0)return (const unknown_t*) (parent()->_size+_parent+(const uint8_t*)this); + if(_next==0)return nullptr; return (const unknown_t*)((const uint8_t*)this+_next); } - inline bool has_children() const {return (const unknown_t*)((const uint8_t*)this+sizeof(element_t)+sizeof(attr_t)*attrs_count)!=(const unknown_t*)((const uint8_t*)this+_size);} + inline bool has_children() const {return (const unknown_t*)((const uint8_t*)this+sizeof(element_t)+sizeof(attr_t)*attrs_count)!=(const unknown_t*)((const uint8_t*)this+_next);} inline bool has_parent() const {return _parent!=0;} inline bool has_prev() const {return _prev!=0;} - inline bool has_next() const {return _next!=0;} + inline bool has_next() const {return _bit0;} template friend struct TreeBuilder; diff --git a/lib/tree-builder.cpp b/lib/tree-builder.cpp index 059bdffe..7c764d8b 100644 --- a/lib/tree-builder.cpp +++ b/lib/tree-builder.cpp @@ -74,7 +74,7 @@ BuilderBase::error_t BuilderBase::end(){ auto& ctx = stack.back(); element_t* parent = (element_t*)(buffer.data()+ctx.first); - parent->_size=buffer.size()-ctx.first; + parent->_next=buffer.size()-ctx.first; stack.pop_back(); diff --git a/lib/tree.cpp b/lib/tree.cpp index cd45350b..585b010d 100644 --- a/lib/tree.cpp +++ b/lib/tree.cpp @@ -295,7 +295,7 @@ const TreeRaw TreeRaw::slice(const element_t* ref) const{ ref=(const element_t*)&root(); } - std::span tmp = {( uint8_t*)ref,ref->_size}; + std::span tmp = {( uint8_t*)ref,(size_t)ref->_next}; return TreeRaw(configs,tmp,this->symbols); }; From 9fca11b2bba1103624d90f8fdb73b7e9643b5ee8 Mon Sep 17 00:00:00 2001 From: Karuro Chari Date: Mon, 7 Jul 2025 05:34:23 +0100 Subject: [PATCH 12/12] Preparing release --- RELEASE.md | 9 ++++++--- docs/releases/v0.2.13.md | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) create mode 100644 docs/releases/v0.2.13.md diff --git a/RELEASE.md b/RELEASE.md index ad031300..584696b3 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,14 +1,17 @@ +New release, fresh out of the oven. Many of the improvements are not reported in here, as they are mostly infrastructural and will not impact the end user; please check commits if you want more details. +The main focus was to extend the current interface with useful utilities, remove even more dynamic allocations when not needed, and fix some downstream issues when linking this library. + ## Breaking - `DocBuilder` renamed as `DocumentBuilder` to be more consistent in naming. - Removed `path` functions from XML entities, leftovers from the very early versions; this functionality can now be trivially replaced by user-defined functions, since the rest of the interface is complete. -- The binary interface of the tree changed. Again. But it is for good reasons! We optimized away one of the biggest fields in nodes, saving a significant amount of memory. +- The binary interface for trees and derived friends changed. Again. But it is for good reasons! We optimized away one of the biggest fields in nodes, saving a significant amount of memory. Technically this change prevents out of order nodes in the memory layout, but this was just a side-effect extra, not something intended. -## Features +## New features -Introduced `Tree::visit` and `TreeRaw::visit` to implement a more flexible visitor pattern when compared to the one recently added iterator-based approach. +Introduced `TreeRaw::visit` and `Tree::visit` to implement a more flexible visitor pattern when compared to the one recently added iterator-based approach. They are both based on `private/(wrp-)visit.hpp`, which is not publicly exposed (for now). New `print` functions have been introduced for trees, based on the visitor pattern. It no longer uses `std::print` due to the awful overhead and additional memory allocations. `fmt` had no such issue to be honest. diff --git a/docs/releases/v0.2.13.md b/docs/releases/v0.2.13.md new file mode 100644 index 00000000..7f46917d --- /dev/null +++ b/docs/releases/v0.2.13.md @@ -0,0 +1,15 @@ +## Breaking + +- `DocBuilder` renamed as `DocumentBuilder` to be more consistent in naming. +- Removed `path` functions from XML entities, leftovers from the very early versions; + this functionality can now be trivially replaced by user-defined functions, since the rest of the interface is complete. +- The binary interface of the tree changed. Again. But it is for good reasons! We optimized away one of the biggest fields in nodes, saving a significant amount of memory. + Technically this change prevents out of order nodes in the memory layout, but this was just a side-effect extra, not something intended. + +## Features + +Introduced `TreeRaw::visit` and `Tree::visit` to implement a more flexible visitor pattern when compared to the one recently added iterator-based approach. +They are both based on `private/(wrp-)visit.hpp`, which is not publicly exposed (for now). + +New `print` functions have been introduced for trees, based on the visitor pattern. It no longer uses `std::print` due to the awful overhead and additional memory allocations. `fmt` had no such issue to be honest. +The legacy version has been optimized as well: it is now called `print_fast` and still uses simple recursion to get a signifiant edge on performance; however, be mindful of stack overflows if working with stack intensive applications or badly nested trees. \ No newline at end of file