diff --git a/be/src/core/block/block.cpp b/be/src/core/block/block.cpp index 2bb156325443e3..4d0b67b93052e9 100644 --- a/be/src/core/block/block.cpp +++ b/be/src/core/block/block.cpp @@ -1081,15 +1081,6 @@ std::unique_ptr Block::create_same_struct_block(size_t size, bool is_rese return temp_block; } -void Block::shrink_char_type_column_suffix_zero(const std::vector& char_type_idx) { - for (auto idx : char_type_idx) { - if (idx < data.size()) { - auto& col_and_name = this->get_by_position(idx); - col_and_name.column->assume_mutable()->shrink_padding_chars(); - } - } -} - size_t MutableBlock::allocated_bytes() const { size_t res = 0; for (const auto& col : _columns) { diff --git a/be/src/core/block/block.h b/be/src/core/block/block.h index 62186b36cced7e..c7dced80c60224 100644 --- a/be/src/core/block/block.h +++ b/be/src/core/block/block.h @@ -346,9 +346,6 @@ class Block { return res; } - // for String type or Array type - void shrink_char_type_column_suffix_zero(const std::vector& char_type_idx); - void clear_column_mem_not_keep(const std::vector& column_keep_flags, bool need_keep_first); diff --git a/be/src/core/column/column.h b/be/src/core/column/column.h index d20ecc9d820846..8521ec15029038 100644 --- a/be/src/core/column/column.h +++ b/be/src/core/column/column.h @@ -113,10 +113,6 @@ class IColumn : public COW { return nullptr; } - // shrink the end zeros for ColumnStr(also for who has it nested). so nest column will call it for all nested. - // for non-str col, will reach here(do nothing). only ColumnStr will really shrink itself. - virtual void shrink_padding_chars() {} - // Only used in ColumnVariant to handle lifecycle of variant. Other columns would do nothing. virtual void finalize() {} diff --git a/be/src/core/column/column_array.cpp b/be/src/core/column/column_array.cpp index 6de4d96cc326f7..35e56b2d900302 100644 --- a/be/src/core/column/column_array.cpp +++ b/be/src/core/column/column_array.cpp @@ -98,10 +98,6 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column) : data(std::move(nest offsets = ColumnOffsets::create(); } -void ColumnArray::shrink_padding_chars() { - data->shrink_padding_chars(); -} - std::string ColumnArray::get_name() const { return "Array(" + get_data().get_name() + ")"; } diff --git a/be/src/core/column/column_array.h b/be/src/core/column/column_array.h index c11547bdbf5e2d..5c5518d42fa1e4 100644 --- a/be/src/core/column/column_array.h +++ b/be/src/core/column/column_array.h @@ -117,8 +117,6 @@ class ColumnArray final : public COWHelper { offsets->sanity_check(); } - void shrink_padding_chars() override; - /** On the index i there is an offset to the beginning of the i + 1 -th element. */ using ColumnOffsets = ColumnOffset64; diff --git a/be/src/core/column/column_dictionary.h b/be/src/core/column/column_dictionary.h index a1835b817a7233..7ac780e2bde4d8 100644 --- a/be/src/core/column/column_dictionary.h +++ b/be/src/core/column/column_dictionary.h @@ -231,7 +231,7 @@ class ColumnDictI32 final : public COWHelper { _dict.initialize_hash_values_for_runtime_filter(); } - uint32_t get_hash_value(uint32_t idx) const { return _dict.get_hash_value(_codes[idx], _type); } + uint32_t get_hash_value(uint32_t idx) const { return _dict.get_hash_value(_codes[idx]); } template void find_codes(const HybridSetType* values, std::vector& selected) const { @@ -278,14 +278,6 @@ class ColumnDictI32 final : public COWHelper { inline const StringRef& get_value(value_type code) const { return _dict.get_value(code); } - inline StringRef get_shrink_value(value_type code) const { - StringRef result = _dict.get_value(code); - if (_type == FieldType::OLAP_FIELD_TYPE_CHAR) { - result.size = strnlen(result.data, result.size); - } - return result; - } - size_t dict_size() const { return _dict.size(); } std::string dict_debug_string() const { return _dict.debug_string(); } @@ -326,26 +318,13 @@ class ColumnDictI32 final : public COWHelper { } } - inline uint32_t get_hash_value(Int32 code, FieldType type) const { + inline uint32_t get_hash_value(Int32 code) const { if (_compute_hash_value_flags[code]) { return _hash_values[code]; } else { auto& sv = (*_dict_data)[code]; - // The char data is stored in the disk with the schema length, - // and zeros are filled if the length is insufficient - - // When reading data, use shrink_char_type_column_suffix_zero(_char_type_idx) - // Remove the suffix 0 - // When writing data, use the CharField::consume function to fill in the trailing 0. - - // For dictionary data of char type, sv.size is the schema length, - // so use strnlen to remove the 0 at the end to get the actual length. - size_t len = sv.size; - if (type == FieldType::OLAP_FIELD_TYPE_CHAR) { - len = strnlen(sv.data, sv.size); - } uint32_t hash_val = - crc32c::Extend(0, (const uint8_t*)sv.data, static_cast(len)); + crc32c::Extend(0, (const uint8_t*)sv.data, static_cast(sv.size)); _hash_values[code] = hash_val; _compute_hash_value_flags[code] = 1; return _hash_values[code]; diff --git a/be/src/core/column/column_map.cpp b/be/src/core/column/column_map.cpp index 48db377d888b75..bf7406c87af05e 100644 --- a/be/src/core/column/column_map.cpp +++ b/be/src/core/column/column_map.cpp @@ -643,11 +643,6 @@ Status ColumnMap::deduplicate_keys(bool recursive) { return Status::OK(); } -void ColumnMap::shrink_padding_chars() { - keys_column->shrink_padding_chars(); - values_column->shrink_padding_chars(); -} - void ColumnMap::reserve(size_t n) { get_offsets().reserve(n); keys_column->reserve(n); diff --git a/be/src/core/column/column_map.h b/be/src/core/column/column_map.h index 12f8fe4f8184ab..b1fb4bb31764a0 100644 --- a/be/src/core/column/column_map.h +++ b/be/src/core/column/column_map.h @@ -115,7 +115,6 @@ class ColumnMap final : public COWHelper { const char* deserialize_and_insert_from_arena(const char* pos) override; void update_hash_with_value(size_t n, SipHash& hash) const override; - void shrink_padding_chars() override; ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override; size_t filter(const Filter& filter) override; MutableColumnPtr permute(const Permutation& perm, size_t limit) const override; diff --git a/be/src/core/column/column_nullable.cpp b/be/src/core/column/column_nullable.cpp index 95b186fe894b69..3965332e8ef939 100644 --- a/be/src/core/column/column_nullable.cpp +++ b/be/src/core/column/column_nullable.cpp @@ -47,10 +47,6 @@ ColumnNullable::ColumnNullable(MutableColumnPtr&& nested_column_, MutableColumnP } } -void ColumnNullable::shrink_padding_chars() { - get_nested_column_ptr()->shrink_padding_chars(); -} - void ColumnNullable::update_xxHash_with_value(size_t start, size_t end, uint64_t& hash, const uint8_t* __restrict null_data) const { if (!has_null(start, end)) { diff --git a/be/src/core/column/column_nullable.h b/be/src/core/column/column_nullable.h index a31df0937d2b61..183f730de9d717 100644 --- a/be/src/core/column/column_nullable.h +++ b/be/src/core/column/column_nullable.h @@ -84,8 +84,6 @@ class ColumnNullable final : public COWHelper { _nested_column->sanity_check(); } - void shrink_padding_chars() override; - bool is_variable_length() const override { return _nested_column->is_variable_length(); } std::string get_name() const override { return "Nullable(" + _nested_column->get_name() + ")"; } diff --git a/be/src/core/column/column_string.cpp b/be/src/core/column/column_string.cpp index e21e2bb83cbabd..399c0952b892da 100644 --- a/be/src/core/column/column_string.cpp +++ b/be/src/core/column/column_string.cpp @@ -85,29 +85,6 @@ MutableColumnPtr ColumnStr::clone_resized(size_t to_size) const { return res; } -template -void ColumnStr::shrink_padding_chars() { - if (size() == 0) { - return; - } - char* data = reinterpret_cast(chars.data()); - auto* offset = offsets.data(); - size_t size = offsets.size(); - - // deal the 0-th element. no need to move. - auto next_start = offset[0]; - offset[0] = static_cast(strnlen(data, size_at(0))); - for (size_t i = 1; i < size; i++) { - // get the i-th length and whole move it to cover the last's trailing void - auto length = strnlen(data + next_start, offset[i] - next_start); - memmove(data + offset[i - 1], data + next_start, length); - // offset i will be changed. so save the old value for (i+1)-th to get its length. - next_start = offset[i]; - offset[i] = offset[i - 1] + static_cast(length); - } - chars.resize_fill(offsets.back()); // just call it to shrink memory here. no possible to expand. -} - // This method is only called by MutableBlock::merge_ignore_overflow // by hash join operator to collect build data to avoid // the total string length of a ColumnStr column exceeds the 4G limit. diff --git a/be/src/core/column/column_string.h b/be/src/core/column/column_string.h index 5a1537ff2c04eb..7bae712f849721 100644 --- a/be/src/core/column/column_string.h +++ b/be/src/core/column/column_string.h @@ -145,8 +145,6 @@ class ColumnStr final : public COWHelper> { MutableColumnPtr clone_resized(size_t to_size) const override; - void shrink_padding_chars() override; - Field operator[](size_t n) const override; void get(size_t n, Field& res) const override; diff --git a/be/src/core/column/column_struct.cpp b/be/src/core/column/column_struct.cpp index 5cf8f390634627..57463f6c435717 100644 --- a/be/src/core/column/column_struct.cpp +++ b/be/src/core/column/column_struct.cpp @@ -338,12 +338,6 @@ MutableColumnPtr ColumnStruct::permute(const Permutation& perm, size_t limit) co return ColumnStruct::create(new_columns); } -void ColumnStruct::shrink_padding_chars() { - for (auto& column : columns) { - column->shrink_padding_chars(); - } -} - void ColumnStruct::reserve(size_t n) { const size_t tuple_size = columns.size(); for (size_t i = 0; i < tuple_size; ++i) { diff --git a/be/src/core/column/column_struct.h b/be/src/core/column/column_struct.h index ca77868d056125..b602376f561590 100644 --- a/be/src/core/column/column_struct.h +++ b/be/src/core/column/column_struct.h @@ -150,8 +150,6 @@ class ColumnStruct final : public COWHelper { int compare_at(size_t n, size_t m, const IColumn& rhs_, int nan_direction_hint) const override; - void shrink_padding_chars() override; - void reserve(size_t n) override; void resize(size_t n) override; size_t byte_size() const override; diff --git a/be/src/core/column/predicate_column.h b/be/src/core/column/predicate_column.h index d98743500dbb1e..4eaeb78dbfeec2 100644 --- a/be/src/core/column/predicate_column.h +++ b/be/src/core/column/predicate_column.h @@ -104,11 +104,7 @@ class PredicateColumnType final : public COWHelper) { - auto res = reinterpret_cast(data[n]); - if constexpr (Type == TYPE_CHAR) { - res.size = strnlen(res.data, res.size); - } - return res; + return reinterpret_cast(data[n]); } else { throw doris::Exception( ErrorCode::INTERNAL_ERROR, diff --git a/be/src/core/data_type_serde/data_type_string_serde.cpp b/be/src/core/data_type_serde/data_type_string_serde.cpp index b7a59b3c07e42a..08d87cc38cac92 100644 --- a/be/src/core/data_type_serde/data_type_string_serde.cpp +++ b/be/src/core/data_type_serde/data_type_string_serde.cpp @@ -462,25 +462,16 @@ Status DataTypeStringSerDeBase::from_string(StringRef& str, IColumn& // Deserializes a STRING/VARCHAR/CHAR value from its OLAP string representation // (e.g. from ZoneMap protobuf). This is the inverse of to_olap_string(). -// -// For CHAR type: if the string is shorter than the declared column length (_len), -// pads with '\0' bytes to reach _len. This preserves CHAR's fixed-length semantics. -// For STRING/VARCHAR: stores the string as-is. -// -// Examples: -// CHAR(10), str="hello" => field = "hello\0\0\0\0\0" (10 bytes) -// VARCHAR, str="hello" => field = "hello" (5 bytes) template Status DataTypeStringSerDeBase::from_olap_string(const std::string& str, Field& field, const FormatOptions& options) const { - if (cast_set(str.size()) < _len) { - DCHECK_EQ(_type, TYPE_CHAR); - std::string tmp(_len, '\0'); - memcpy(tmp.data(), str.data(), str.size()); - field = Field::create_field(std::move(tmp)); - } else { - field = Field::create_field(str); - } + // CHAR(N) writes through OlapColumnDataConvertorChar are zero-padded to + // the declared schema length, so the serialized OLAP string carries + // trailing '\0' bytes. strnlen() drops that padding to surface the + // logical character content in the Field. VARCHAR / STRING never write + // trailing '\0' through this path, so strnlen is a no-op for them. + size_t len = strnlen(str.data(), str.size()); + field = Field::create_field(std::string(str.data(), len)); return Status::OK(); } diff --git a/be/src/exec/rowid_fetcher.cpp b/be/src/exec/rowid_fetcher.cpp index 27c66197541f5e..69804323fde92d 100644 --- a/be/src/exec/rowid_fetcher.cpp +++ b/be/src/exec/rowid_fetcher.cpp @@ -206,30 +206,6 @@ Status RowIDFetcher::_merge_rpc_results(const PMultiGetRequest& request, return Status::OK(); } -bool _has_char_type(const DataTypePtr& type) { - switch (type->get_primitive_type()) { - case TYPE_CHAR: { - return true; - } - case TYPE_ARRAY: { - const auto* arr_type = assert_cast(remove_nullable(type).get()); - return _has_char_type(arr_type->get_nested_type()); - } - case TYPE_MAP: { - const auto* map_type = assert_cast(remove_nullable(type).get()); - return _has_char_type(map_type->get_key_type()) || - _has_char_type(map_type->get_value_type()); - } - case TYPE_STRUCT: { - const auto* struct_type = assert_cast(remove_nullable(type).get()); - return std::any_of(struct_type->get_elements().begin(), struct_type->get_elements().end(), - [&](const DataTypePtr& dt) -> bool { return _has_char_type(dt); }); - } - default: - return false; - } -} - Status RowIDFetcher::fetch(const ColumnPtr& column_row_ids, Block* res_block) { CHECK(!_stubs.empty()); PMultiGetRequest mget_req = _init_fetch_request( @@ -279,16 +255,6 @@ Status RowIDFetcher::fetch(const ColumnPtr& column_row_ids, Block* res_block) { } // Check row consistency RETURN_IF_CATCH_EXCEPTION(res_block->check_number_of_rows()); - // shrink for char type - std::vector char_type_idx; - for (size_t i = 0; i < _fetch_option.desc->slots().size(); i++) { - const auto& column_desc = _fetch_option.desc->slots()[i]; - const auto type = column_desc->type(); - if (_has_char_type(type)) { - char_type_idx.push_back(i); - } - } - res_block->shrink_char_type_column_suffix_zero(char_type_idx); VLOG_DEBUG << "dump block:" << res_block->dump_data(0, 10); return Status::OK(); } @@ -592,15 +558,6 @@ Status RowIdStorageReader::read_by_rowids(const PMultiGetRequestV2& request, for (const auto& pslot : request_block_desc.slots()) { slots.push_back(SlotDescriptor(pslot)); } - // prepare block char vector shrink for char type - std::vector char_type_idx; - for (int j = 0; j < slots.size(); ++j) { - auto slot = slots[j]; - if (_has_char_type(slot.type())) { - char_type_idx.push_back(j); - } - } - try { if (first_file_mapping->type == FileMappingType::INTERNAL) { RETURN_IF_ERROR(read_batch_doris_format_row( @@ -618,9 +575,6 @@ Status RowIdStorageReader::read_by_rowids(const PMultiGetRequestV2& request, return Status::Error(e.code(), "Row id fetch failed because {}", e.what()); } - - // after read the block, shrink char type block - result_blocks[i].shrink_char_type_column_suffix_zero(char_type_idx); } [[maybe_unused]] size_t compressed_size = 0; diff --git a/be/src/exprs/bloom_filter_func_adaptor.h b/be/src/exprs/bloom_filter_func_adaptor.h index d41a12ff64832f..5cc5d33215d359 100644 --- a/be/src/exprs/bloom_filter_func_adaptor.h +++ b/be/src/exprs/bloom_filter_func_adaptor.h @@ -243,18 +243,6 @@ struct StringFindOp : CommonFindOp { } }; -// We do not need to judge whether data is empty, because null will not appear -// when filer used by the storage engine -template -struct FixedStringFindOp : public StringFindOp { - static uint16_t find_batch_olap_engine(const BloomFilterAdaptor& bloom_filter, const char* data, - const uint8_t* nullmap, uint16_t* offsets, int number, - const bool is_parse_column) { - return find_batch_olap( - bloom_filter, data, nullmap, offsets, number, is_parse_column); - } -}; - template struct BloomFilterTypeTraits { using T = typename PrimitiveTypeTraits::CppType; @@ -263,7 +251,7 @@ struct BloomFilterTypeTraits { template struct BloomFilterTypeTraits { - using FindOp = FixedStringFindOp; + using FindOp = StringFindOp; }; template diff --git a/be/src/exprs/bloom_filter_func_impl.h b/be/src/exprs/bloom_filter_func_impl.h index fe47ec18b3bcbc..4ecb39cb267b1e 100644 --- a/be/src/exprs/bloom_filter_func_impl.h +++ b/be/src/exprs/bloom_filter_func_impl.h @@ -54,23 +54,12 @@ struct fixed_len_to_uint32_v2 { } }; -template +template uint16_t find_batch_olap(const BloomFilterAdaptor& bloom_filter, const char* data, const uint8_t* nullmap, uint16_t* offsets, int number, const bool is_parse_column) { auto get_element = [](const char* input_data, int idx) { - if constexpr (std::is_same_v && need_trim) { - const auto value = ((const StringRef*)(input_data))[idx]; - int64_t size = value.size; - const char* data = value.data; - // CHAR type may pad the tail with \0, need to trim - while (size > 0 && data[size - 1] == '\0') { - size--; - } - return StringRef(value.data, size); - } else { - return ((const T*)(input_data))[idx]; - } + return ((const T*)(input_data))[idx]; }; uint16_t new_size = 0; diff --git a/be/src/service/point_query_executor.cpp b/be/src/service/point_query_executor.cpp index af34a3fe1d4cfc..6d1058bb438de4 100644 --- a/be/src/service/point_query_executor.cpp +++ b/be/src/service/point_query_executor.cpp @@ -568,11 +568,6 @@ Status PointQueryExecutor::_lookup_row_data() { RETURN_IF_ERROR(segment->seek_and_read_by_rowid(*_tablet->tablet_schema(), slot, row_ids, column, storage_read_options, iter)); - if (_tablet->tablet_schema() - ->column_by_uid(slot->col_unique_id()) - .has_char_type()) { - column->shrink_padding_chars(); - } } } } diff --git a/be/src/storage/delete/delete_handler.cpp b/be/src/storage/delete/delete_handler.cpp index 8aab3c422966fd..1781c7734de7d0 100644 --- a/be/src/storage/delete/delete_handler.cpp +++ b/be/src/storage/delete/delete_handler.cpp @@ -112,9 +112,6 @@ Status convert(const DataTypePtr& data_type, const std::list& str, // Parses a single condition value string into a Field and creates a comparison predicate. // Uses serde->from_fe_string to do the parsing, which handles all type-specific // conversions (including decimal scale, etc.). -// For CHAR type, the value is padded with '\0' to the declared column length, consistent -// with the IN list path in convert() above. -// For VARCHAR/STRING, the Field is created directly from the raw string. Status parse_to_predicate(const uint32_t index, const std::string col_name, const DataTypePtr& type, DeleteHandler::ConditionParseResult& res, Arena& arena, std::shared_ptr& predicate) { @@ -128,22 +125,7 @@ Status parse_to_predicate(const uint32_t index, const std::string col_name, cons } Field v; - if (type->get_primitive_type() == TYPE_CHAR) { - // CHAR type: create Field and pad with '\0' to the declared column length, - // consistent with IN list path (convert() above) and create_comparison_predicate. - const auto& str = res.value_str.front(); - auto char_len = cast_set( - assert_cast(remove_nullable(type).get())->len()); - auto target = std::max(char_len, str.size()); - if (target > str.size()) { - std::string padded(target, '\0'); - memcpy(padded.data(), str.data(), str.size()); - v = Field::create_field(std::move(padded)); - } else { - v = Field::create_field(str); - } - } else if (is_string_type(type->get_primitive_type())) { - // VARCHAR/STRING: create Field directly from the raw string, no padding needed. + if (is_string_type(type->get_primitive_type())) { v = Field::create_field(res.value_str.front()); } else { auto serde = type->get_serde(); diff --git a/be/src/storage/index/indexed_column_reader.cpp b/be/src/storage/index/indexed_column_reader.cpp index b8fe9a57541a2e..17e42c00bef212 100644 --- a/be/src/storage/index/indexed_column_reader.cpp +++ b/be/src/storage/index/indexed_column_reader.cpp @@ -194,8 +194,8 @@ Status IndexedColumnIterator::seek_to_ordinal(ordinal_t idx) { // need to read the data page containing row at idx if (_reader->_has_index_page) { std::string key; - KeyCoderTraits::full_encode_ascending(&idx, - &key); + KeyCoderTraits::full_encode_ascending( + &idx, &key, /*char_len=*/0); RETURN_IF_ERROR(_ordinal_iter.seek_at_or_before(key)); RETURN_IF_ERROR(_read_data_page(_ordinal_iter.current_page_pointer())); _current_iter = &_ordinal_iter; diff --git a/be/src/storage/index/indexed_column_writer.cpp b/be/src/storage/index/indexed_column_writer.cpp index 87a055792959cc..3902ae7e19f190 100644 --- a/be/src/storage/index/indexed_column_writer.cpp +++ b/be/src/storage/index/indexed_column_writer.cpp @@ -137,7 +137,7 @@ Status IndexedColumnWriter::_finish_current_data_page(size_t& num_val) { if (_options.write_ordinal_index) { std::string key; KeyCoderTraits::full_encode_ascending( - &first_ordinal, &key); + &first_ordinal, &key, /*char_len=*/0); _ordinal_index_builder->add(key, _last_data_page); } diff --git a/be/src/storage/index/ordinal_page_index.cpp b/be/src/storage/index/ordinal_page_index.cpp index 054f30f67f2445..855dbcd7da2b89 100644 --- a/be/src/storage/index/ordinal_page_index.cpp +++ b/be/src/storage/index/ordinal_page_index.cpp @@ -39,8 +39,8 @@ namespace segment_v2 { void OrdinalIndexWriter::append_entry(ordinal_t ordinal, const PagePointer& data_pp) { std::string key; - KeyCoderTraits::full_encode_ascending(&ordinal, - &key); + KeyCoderTraits::full_encode_ascending( + &ordinal, &key, /*char_len=*/0); _page_builder->add(key, data_pp); _last_pp = data_pp; } diff --git a/be/src/storage/key_coder.h b/be/src/storage/key_coder.h index 0c4bcf08d171e5..782f5c9ea1c527 100644 --- a/be/src/storage/key_coder.h +++ b/be/src/storage/key_coder.h @@ -41,7 +41,11 @@ namespace doris { -using FullEncodeAscendingFunc = void (*)(const void* value, std::string* buf); +// `char_len` is only used by CHAR — the declared CHAR(N) column length. +// CHAR full-encoding zero-pads the slice up to char_len so the encoded +// bytes are the fixed-width canonical form used by PK and segment min/max +// indexes. All other types ignore this argument. +using FullEncodeAscendingFunc = void (*)(const void* value, std::string* buf, size_t char_len); using EncodeAscendingFunc = void (*)(const void* value, size_t index_size, std::string* buf); using DecodeAscendingFunc = Status (*)(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr); @@ -55,8 +59,8 @@ class KeyCoder { KeyCoder(TraitsType traits); // encode the provided `value` into `buf`. - void full_encode_ascending(const void* value, std::string* buf) const { - _full_encode_ascending(value, buf); + void full_encode_ascending(const void* value, std::string* buf, size_t char_len = 0) const { + _full_encode_ascending(value, buf, char_len); } // similar to `full_encode_ascending`, but only encode part (the first `index_size` bytes) of the value. @@ -90,7 +94,7 @@ class KeyCoderTraits::CppType; using UnsignedCppType = typename CppTypeTraits::UnsignedCppType; - static void full_encode_ascending(const void* value, std::string* buf) { + static void full_encode_ascending(const void* value, std::string* buf, size_t /*char_len*/) { UnsignedCppType unsigned_val; memcpy(&unsigned_val, value, sizeof(unsigned_val)); // swap MSB to encode integer @@ -105,7 +109,7 @@ class KeyCoderTraits { typename CppTypeTraits::UnsignedCppType; public: - static void full_encode_ascending(const void* value, std::string* buf) { + static void full_encode_ascending(const void* value, std::string* buf, size_t /*char_len*/) { UnsignedCppType unsigned_val; memcpy(&unsigned_val, value, sizeof(unsigned_val)); // make it bigendian @@ -145,7 +149,7 @@ class KeyCoderTraits { } static void encode_ascending(const void* value, size_t index_size, std::string* buf) { - full_encode_ascending(value, buf); + full_encode_ascending(value, buf, /*char_len=*/0); } static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) { @@ -170,7 +174,7 @@ class KeyCoderTraits { typename CppTypeTraits::UnsignedCppType; public: - static void full_encode_ascending(const void* value, std::string* buf) { + static void full_encode_ascending(const void* value, std::string* buf, size_t /*char_len*/) { UnsignedCppType unsigned_val; memcpy(&unsigned_val, value, sizeof(unsigned_val)); // make it bigendian @@ -179,7 +183,7 @@ class KeyCoderTraits { } static void encode_ascending(const void* value, size_t index_size, std::string* buf) { - full_encode_ascending(value, buf); + full_encode_ascending(value, buf, /*char_len=*/0); } static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) { @@ -205,7 +209,7 @@ class KeyCoderTraits { typename CppTypeTraits::UnsignedCppType; public: - static void full_encode_ascending(const void* value, std::string* buf) { + static void full_encode_ascending(const void* value, std::string* buf, size_t /*char_len*/) { UnsignedCppType unsigned_val; memcpy(&unsigned_val, value, sizeof(unsigned_val)); // make it bigendian @@ -214,7 +218,7 @@ class KeyCoderTraits { } static void encode_ascending(const void* value, size_t index_size, std::string* buf) { - full_encode_ascending(value, buf); + full_encode_ascending(value, buf, /*char_len=*/0); } static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) { @@ -240,7 +244,7 @@ class KeyCoderTraits { typename CppTypeTraits::UnsignedCppType; public: - static void full_encode_ascending(const void* value, std::string* buf) { + static void full_encode_ascending(const void* value, std::string* buf, size_t /*char_len*/) { UnsignedCppType unsigned_val; memcpy(&unsigned_val, value, sizeof(unsigned_val)); // make it bigendian @@ -249,7 +253,7 @@ class KeyCoderTraits { } static void encode_ascending(const void* value, size_t index_size, std::string* buf) { - full_encode_ascending(value, buf); + full_encode_ascending(value, buf, /*char_len=*/0); } static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) { @@ -270,17 +274,17 @@ class KeyCoderTraits { template <> class KeyCoderTraits { public: - static void full_encode_ascending(const void* value, std::string* buf) { + static void full_encode_ascending(const void* value, std::string* buf, size_t /*char_len*/) { decimal12_t decimal_val; memcpy(&decimal_val, value, sizeof(decimal12_t)); KeyCoderTraits::full_encode_ascending( - &decimal_val.integer, buf); + &decimal_val.integer, buf, /*char_len=*/0); KeyCoderTraits::full_encode_ascending(&decimal_val.fraction, - buf); + buf, /*char_len=*/0); } // namespace doris static void encode_ascending(const void* value, size_t index_size, std::string* buf) { - full_encode_ascending(value, buf); + full_encode_ascending(value, buf, /*char_len=*/0); } static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) { @@ -297,18 +301,24 @@ class KeyCoderTraits { template <> class KeyCoderTraits { public: - NO_SANITIZE_UNDEFINED static void full_encode_ascending(const void* value, std::string* buf) { - auto slice = reinterpret_cast(value); - buf->append(slice->get_data(), slice->get_size()); + // Pad to char_len so the encoded bytes are the canonical fixed-width + // form used by short-key / PK / segment min-max indexes. + NO_SANITIZE_UNDEFINED static void full_encode_ascending(const void* value, std::string* buf, + size_t char_len) { + const Slice* slice = reinterpret_cast(value); + size_t copy_size = std::min(slice->size, char_len); + buf->append(slice->data, copy_size); + if (copy_size < char_len) { + buf->append(char_len - copy_size, '\0'); + } } + // For CHAR, FE guarantees column.length() == column.index_length(), so + // the short-key encoding is just the full encoding with the same pad + // length. Forward to keep the pad logic in one place. NO_SANITIZE_UNDEFINED static void encode_ascending(const void* value, size_t index_size, std::string* buf) { - const Slice* slice = (const Slice*)value; - CHECK(index_size <= slice->size) - << "index size is larger than char size, index=" << index_size - << ", char=" << slice->size; - buf->append(slice->data, index_size); + full_encode_ascending(value, buf, index_size); } static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) { @@ -319,7 +329,8 @@ class KeyCoderTraits { template <> class KeyCoderTraits { public: - NO_SANITIZE_UNDEFINED static void full_encode_ascending(const void* value, std::string* buf) { + NO_SANITIZE_UNDEFINED static void full_encode_ascending(const void* value, std::string* buf, + size_t /*char_len*/) { auto slice = reinterpret_cast(value); buf->append(slice->get_data(), slice->get_size()); } @@ -339,7 +350,8 @@ class KeyCoderTraits { template <> class KeyCoderTraits { public: - NO_SANITIZE_UNDEFINED static void full_encode_ascending(const void* value, std::string* buf) { + NO_SANITIZE_UNDEFINED static void full_encode_ascending(const void* value, std::string* buf, + size_t /*char_len*/) { auto slice = reinterpret_cast(value); buf->append(slice->get_data(), slice->get_size()); } @@ -371,7 +383,7 @@ class KeyCoderTraitsForFloat { } // -infinity < -100.0 < -1.0 < -0.0 < 0.0 < 1.0 < 100.0 < infinity < NaN - static void full_encode_ascending(const void* value, std::string* buf) { + static void full_encode_ascending(const void* value, std::string* buf, size_t /*char_len*/) { CppType val; std::memcpy(&val, value, sizeof(CppType)); UnsignedCppType sortable_val = encode_float(val); @@ -383,7 +395,7 @@ class KeyCoderTraitsForFloat { } static void encode_ascending(const void* value, size_t index_size, std::string* buf) { - full_encode_ascending(value, buf); + full_encode_ascending(value, buf, /*char_len=*/0); } static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) { diff --git a/be/src/storage/predicate/comparison_predicate.h b/be/src/storage/predicate/comparison_predicate.h index e1ebae39f8f9d4..192188cc7dc8e9 100644 --- a/be/src/storage/predicate/comparison_predicate.h +++ b/be/src/storage/predicate/comparison_predicate.h @@ -280,7 +280,12 @@ class ComparisonPredicateBase final : public ColumnPredicate { if (bf->is_ngram_bf()) { return true; } - if constexpr (is_string_type(Type)) { + if constexpr (Type == TYPE_CHAR) { + // CHAR BFs hash zero-padded bytes while the predicate value is + // unpadded, so probing the BF would always miss. Skip BF + // pruning for CHAR entirely and let the scan filter the rows. + return true; + } else if constexpr (is_string_type(Type)) { return bf->test_bytes(_value.data(), _value.size()); } else { // DecimalV2 using decimal12_t in bloom filter, should convert value to decimal12_t diff --git a/be/src/storage/predicate/in_list_predicate.h b/be/src/storage/predicate/in_list_predicate.h index 6c92290e5be009..468760ac7ab0b6 100644 --- a/be/src/storage/predicate/in_list_predicate.h +++ b/be/src/storage/predicate/in_list_predicate.h @@ -68,18 +68,16 @@ class InListPredicateBase final : public ColumnPredicate { ENABLE_FACTORY_CREATOR(InListPredicateBase); using T = typename PrimitiveTypeTraits::CppType; InListPredicateBase(uint32_t column_id, std::string col_name, - const std::shared_ptr& hybrid_set, bool is_opposite, - size_t char_length = 0) + const std::shared_ptr& hybrid_set, bool is_opposite) : ColumnPredicate(column_id, col_name, Type, is_opposite), _min_value(type_limit::max()), _max_value(type_limit::min()) { CHECK(hybrid_set != nullptr); // String types need a copy because: - // 1. The caller's set is StringSet>, but here we want - // StringSet> for small-set optimization — different - // C++ types, cannot share the pointer. - // 2. CHAR type additionally needs padding to char_length. + // The caller's set is StringSet>, but here we want + // StringSet> for small-set optimization — different + // C++ types, cannot share the pointer. // // Date/DECIMALV2 types do NOT need a copy: their ElementType (CppType) is identical // between the caller's HybridSet and InListPredicateBase's, and InListPredicateBase @@ -93,13 +91,7 @@ class InListPredicateBase final : public ColumnPredicate { HybridSetBase::IteratorBase* iter = hybrid_set->begin(); while (iter->has_next()) { const auto* value = (const StringRef*)(iter->get_value()); - if constexpr (Type == TYPE_CHAR) { - std::string padded(std::max(char_length, value->size), '\0'); - memcpy(padded.data(), value->data, value->size); - _values->insert((void*)padded.data(), padded.length()); - } else { - _values->insert((void*)value->data, value->size); - } + _values->insert((void*)value->data, value->size); iter->next(); } } else { @@ -350,6 +342,12 @@ class InListPredicateBase final : public ColumnPredicate { if (bf->is_ngram_bf()) { return true; } + if constexpr (Type == TYPE_CHAR) { + // CHAR BFs hash zero-padded bytes while the predicate value is + // unpadded, so probing the BF would always miss. Skip BF + // pruning for CHAR entirely. + return true; + } HybridSetBase::IteratorBase* iter = _values->begin(); while (iter->has_next()) { if constexpr (is_string_type(Type)) { diff --git a/be/src/storage/predicate/like_column_predicate.h b/be/src/storage/predicate/like_column_predicate.h index 24f129b0fb7dc6..152518450517b9 100644 --- a/be/src/storage/predicate/like_column_predicate.h +++ b/be/src/storage/predicate/like_column_predicate.h @@ -155,7 +155,7 @@ class LikeColumnPredicate final : public ColumnPredicate { std::vector tmp_res(column.dict_size(), false); for (int i = 0; i < column.dict_size(); i++) { - StringRef cell_value = column.get_shrink_value(i); + StringRef cell_value = column.get_value(i); unsigned char flag = 0; THROW_IF_ERROR((_state->scalar_function)( &_like_state, StringRef(cell_value.data, cell_value.size), pattern, &flag)); diff --git a/be/src/storage/predicate/predicate_creator_comparison.cpp b/be/src/storage/predicate/predicate_creator_comparison.cpp index bfec1262cfc1a5..b10a175b016592 100644 --- a/be/src/storage/predicate/predicate_creator_comparison.cpp +++ b/be/src/storage/predicate/predicate_creator_comparison.cpp @@ -77,21 +77,9 @@ std::shared_ptr create_comparison_predicate(const uint32_t cid, opposite); } case TYPE_CHAR: { - auto target = std::max(cast_set(assert_cast( - remove_nullable(data_type).get()) - ->len()), - value.template get().size()); - if (target > value.template get().size()) { - std::string tmp(target, '\0'); - memcpy(tmp.data(), value.template get().data(), - value.template get().size()); - return ComparisonPredicateBase::create_shared( - cid, col_name, Field::create_field(std::move(tmp)), opposite); - } else { - return ComparisonPredicateBase::create_shared( - cid, col_name, Field::create_field(value.template get()), - opposite); - } + return ComparisonPredicateBase::create_shared( + cid, col_name, Field::create_field(value.template get()), + opposite); } case TYPE_VARCHAR: case TYPE_STRING: { diff --git a/be/src/storage/predicate/predicate_creator_in_list_in.cpp b/be/src/storage/predicate/predicate_creator_in_list_in.cpp index e720da632a8ad4..f1ad365b4f523e 100644 --- a/be/src/storage/predicate/predicate_creator_in_list_in.cpp +++ b/be/src/storage/predicate/predicate_creator_in_list_in.cpp @@ -26,42 +26,34 @@ namespace doris { template static std::shared_ptr create_in_list_predicate_impl( const uint32_t cid, const std::string col_name, const std::shared_ptr& set, - bool is_opposite, size_t char_length = 0) { + bool is_opposite) { // Only string types construct their own HybridSetType in the constructor (to convert // from DynamicContainer to FixedContainer), so N dispatch is only needed // for them. All other types directly share the caller's hybrid_set. if constexpr (!is_string_type(TYPE)) { return InListPredicateBase::create_shared( - cid, col_name, set, is_opposite, char_length); + cid, col_name, set, is_opposite); } else { auto set_size = set->size(); if (set_size == 1) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 2) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 3) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 4) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 5) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 6) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 7) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == FIXED_CONTAINER_MAX_SIZE) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else { return InListPredicateBase::create_shared( - cid, col_name, set, is_opposite, char_length); + cid, col_name, set, is_opposite); } } } @@ -120,9 +112,8 @@ std::shared_ptr create_in_list_predicate( - cid, col_name, set, is_opposite, - assert_cast(remove_nullable(data_type).get())->len()); + return create_in_list_predicate_impl(cid, col_name, set, + is_opposite); } case TYPE_VARCHAR: { return create_in_list_predicate_impl( diff --git a/be/src/storage/predicate/predicate_creator_in_list_not_in.cpp b/be/src/storage/predicate/predicate_creator_in_list_not_in.cpp index 63e8eb37b186a3..e4cb4731a57095 100644 --- a/be/src/storage/predicate/predicate_creator_in_list_not_in.cpp +++ b/be/src/storage/predicate/predicate_creator_in_list_not_in.cpp @@ -26,42 +26,34 @@ namespace doris { template static std::shared_ptr create_in_list_predicate_impl( const uint32_t cid, const std::string col_name, const std::shared_ptr& set, - bool is_opposite, size_t char_length = 0) { + bool is_opposite) { // Only string types construct their own HybridSetType in the constructor (to convert // from DynamicContainer to FixedContainer), so N dispatch is only needed // for them. All other types directly share the caller's hybrid_set. if constexpr (!is_string_type(TYPE)) { return InListPredicateBase::create_shared( - cid, col_name, set, is_opposite, char_length); + cid, col_name, set, is_opposite); } else { auto set_size = set->size(); if (set_size == 1) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 2) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 3) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 4) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 5) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 6) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 7) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == FIXED_CONTAINER_MAX_SIZE) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else { return InListPredicateBase::create_shared( - cid, col_name, set, is_opposite, char_length); + cid, col_name, set, is_opposite); } } } @@ -121,8 +113,7 @@ std::shared_ptr create_in_list_predicate( - cid, col_name, set, is_opposite, - assert_cast(remove_nullable(data_type).get())->len()); + cid, col_name, set, is_opposite); } case TYPE_VARCHAR: { return create_in_list_predicate_impl( diff --git a/be/src/storage/row_cursor.cpp b/be/src/storage/row_cursor.cpp index e0fb9ade80f434..b013669f146bf8 100644 --- a/be/src/storage/row_cursor.cpp +++ b/be/src/storage/row_cursor.cpp @@ -123,17 +123,6 @@ RowCursor RowCursor::clone() const { return result; } -void RowCursor::pad_char_fields() { - for (size_t i = 0; i < _fields.size(); ++i) { - const TabletColumn* col = _schema->column(cast_set(i)); - if (col->type() == FieldType::OLAP_FIELD_TYPE_CHAR && !_fields[i].is_null()) { - String padded = _fields[i].get(); - padded.resize(col->length(), '\0'); - _fields[i] = Field::create_field(std::move(padded)); - } - } -} - std::string RowCursor::to_string() const { std::string result; for (size_t i = 0; i < _fields.size(); ++i) { @@ -157,29 +146,13 @@ void RowCursor::_encode_column_value(const TabletColumn* column, const Field& va const KeyCoder* coder = get_key_coder(ft); if (field_is_slice_type(ft)) { - // String types: CHAR, VARCHAR, STRING — all stored as String in Field. const String& str = value.get(); - - if (ft == FieldType::OLAP_FIELD_TYPE_CHAR) { - // CHAR type: must pad with \0 to the declared column length - size_t col_len = column->length(); - String padded(col_len, '\0'); - memcpy(padded.data(), str.data(), std::min(str.size(), col_len)); - - Slice slice(padded.data(), col_len); - if (full_encode) { - coder->full_encode_ascending(&slice, buf); - } else { - coder->encode_ascending(&slice, column->index_length(), buf); - } + Slice slice(str.data(), str.size()); + if (full_encode) { + // CHAR's KeyCoder pads to char_len internally; non-CHAR ignores it. + coder->full_encode_ascending(&slice, buf, column->length()); } else { - // VARCHAR / STRING: use actual length - Slice slice(str.data(), str.size()); - if (full_encode) { - coder->full_encode_ascending(&slice, buf); - } else { - coder->encode_ascending(&slice, column->index_length(), buf); - } + coder->encode_ascending(&slice, column->index_length(), buf); } return; } diff --git a/be/src/storage/row_cursor.h b/be/src/storage/row_cursor.h index bc40439b19aa7d..19850f9604bceb 100644 --- a/be/src/storage/row_cursor.h +++ b/be/src/storage/row_cursor.h @@ -70,11 +70,6 @@ class RowCursor { // Returns a deep copy of this RowCursor with the same schema and field values. RowCursor clone() const; - // Pad all CHAR-type fields in-place to their declared column length using '\0'. - // RowCursor holds CHAR values in compute format (unpadded). Call this before - // comparing against storage-format data (e.g. _seek_block) where CHAR is padded. - void pad_char_fields(); - // Output row cursor content in string format std::string to_string() const; diff --git a/be/src/storage/segment/binary_dict_page_pre_decoder.h b/be/src/storage/segment/binary_dict_page_pre_decoder.h index b488e83f402fcf..31ace1bcdffb3c 100644 --- a/be/src/storage/segment/binary_dict_page_pre_decoder.h +++ b/be/src/storage/segment/binary_dict_page_pre_decoder.h @@ -19,6 +19,8 @@ #include "storage/cache/page_cache.h" #include "storage/segment/binary_dict_page.h" +#include "storage/segment/binary_plain_page_char_strip_pre_decoder.h" +#include "storage/segment/binary_plain_page_v2_char_strip_pre_decoder.h" #include "storage/segment/binary_plain_page_v2_pre_decoder.h" #include "storage/segment/bitshuffle_page_pre_decoder.h" #include "storage/segment/encoding_info.h" @@ -33,12 +35,18 @@ namespace segment_v2 { * BinaryDictPage data pages can have different encoding types: * 1. DICT_ENCODING: header(4 bytes) + bitshuffle encoded codeword page * 2. PLAIN_ENCODING_V2: header(4 bytes) + BinaryPlainPageV2 encoded data - * 3. PLAIN_ENCODING: header(4 bytes) + BinaryPlainPage encoded data (no pre-decode needed) + * 3. PLAIN_ENCODING: header(4 bytes) + BinaryPlainPage encoded data (no pre-decode needed + * for non-CHAR; CHAR pages get their trailing '\0' padding stripped inline) * * This pre-decoder reads the encoding type from the first 4 bytes, strips the header, * dispatches to the appropriate pre-decoder (BitShufflePagePreDecoder or * BinaryPlainPageV2PreDecoder), and then restores the header. + * + * When IS_CHAR is true the inline-binary paths (PLAIN_ENCODING / PLAIN_ENCODING_V2) + * use the CHAR-strip variants so the dict-fallback data pages are also unpadded + * once at page load time. */ +template struct BinaryDictPagePreDecoder : public DataPagePreDecoder { /** * @brief Decode BinaryDictPage data page @@ -72,8 +80,9 @@ struct BinaryDictPagePreDecoder : public DataPagePreDecoder { "PLAIN_ENCODING_V2, PLAIN_ENCODING>", encoding_type, file_path); } - // For PLAIN_ENCODING, no pre-decoding needed - if (encoding_type == PLAIN_ENCODING) { + // For PLAIN_ENCODING, non-CHAR pages can be used as-is; CHAR pages + // are routed through the CHAR-strip pre-decoder below. + if (encoding_type == PLAIN_ENCODING && !IS_CHAR) { return Status::OK(); } @@ -102,9 +111,21 @@ struct BinaryDictPagePreDecoder : public DataPagePreDecoder { break; } case PLAIN_ENCODING_V2: { - // Use BinaryPlainPageV2PreDecoder with total_prefix to reserve space - BinaryPlainPageV2PreDecoder v2_decoder; - status = v2_decoder.decode(&decoded_page, &data_without_header, size_of_tail, + if constexpr (IS_CHAR) { + BinaryPlainPageV2CharStripPreDecoder v2_decoder; + status = v2_decoder.decode(&decoded_page, &data_without_header, size_of_tail, + _use_cache, page_type, file_path, total_prefix); + } else { + BinaryPlainPageV2PreDecoder v2_decoder; + status = v2_decoder.decode(&decoded_page, &data_without_header, size_of_tail, + _use_cache, page_type, file_path, total_prefix); + } + break; + } + case PLAIN_ENCODING: { + // Only reachable when IS_CHAR (non-CHAR returned earlier). + BinaryPlainPageCharStripPreDecoder v1_decoder; + status = v1_decoder.decode(&decoded_page, &data_without_header, size_of_tail, _use_cache, page_type, file_path, total_prefix); break; } diff --git a/be/src/storage/segment/binary_plain_page_char_strip_pre_decoder.h b/be/src/storage/segment/binary_plain_page_char_strip_pre_decoder.h new file mode 100644 index 00000000000000..11947c5abcbb73 --- /dev/null +++ b/be/src/storage/segment/binary_plain_page_char_strip_pre_decoder.h @@ -0,0 +1,98 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "storage/cache/page_cache.h" +#include "storage/segment/binary_plain_page_v2_pre_decoder.h" // BinaryPlainV1Entry, write_binary_plain_v1_output +#include "storage/segment/encoding_info.h" +#include "util/coding.h" + +namespace doris::segment_v2 { + +// Pre-decoder for BinaryPlainPage (V1) data pages of CHAR columns. +// +// Segments store CHAR(N) zero-padded to N bytes (the on-disk format). The +// pre-decoder strnlens each slice once at page load time, then rewrites the +// page as a tight V1 layout (no trailing '\0' bytes, adjusted offsets) before +// it is placed in the page cache, so the compute layer reads unpadded CHAR. +// +// Both input and output layout are V1: +// Data: |binary1|binary2|...|binaryN| +// Trailer: |offset1|offset2|...|offsetN| num_elems (32-bit) +// +// Reuses the same writer as the V2 pre-decoders (write_binary_plain_v1_output) +// for steps 3-7; only the input scan differs (offsets array instead of varint +// lengths). +struct BinaryPlainPageCharStripPreDecoder : public DataPagePreDecoder { + Status decode(std::unique_ptr* page, Slice* page_slice, size_t size_of_tail, + bool _use_cache, segment_v2::PageTypePB page_type, const std::string& file_path, + size_t size_of_prefix = 0) override { + // Step 1: validate and locate the V1 trailer. + if (page_slice->size < size_of_tail + sizeof(uint32_t)) { + return Status::Corruption( + "Invalid CHAR plain page size: {}, expected at least {} in file: {}", + page_slice->size, size_of_tail + sizeof(uint32_t), file_path); + } + Slice data(page_slice->data, page_slice->size - size_of_tail); + uint32_t num_elems = decode_fixed32_le( + reinterpret_cast(&data[data.size - sizeof(uint32_t)])); + + // Always rewrite the page even for num_elems==0 — callers (e.g. + // BinaryDictPagePreDecoder) may pass a non-zero `size_of_prefix` + // expecting a fresh output buffer with that prefix area reserved. + size_t offsets_pos = data.size - (num_elems + 1) * sizeof(uint32_t); + if (num_elems > 0 && offsets_pos > data.size - sizeof(uint32_t)) { + return Status::Corruption( + "CHAR plain page corruption: offsets pos beyond data, size={}, num_elems={}, " + "offsets_pos={} in file: {}", + data.size, num_elems, offsets_pos, file_path); + } + const auto* offsets_in = reinterpret_cast(&data[offsets_pos]); + + // Step 2: scan entries, strnlen-ing each to drop trailing '\0' padding. + std::vector entries; + entries.reserve(num_elems); + uint32_t total_out_len = 0; + for (uint32_t i = 0; i < num_elems; ++i) { + uint32_t start = decode_fixed32_le(offsets_in + i * sizeof(uint32_t)); + uint32_t end = (i + 1 < num_elems) + ? decode_fixed32_le(offsets_in + (i + 1) * sizeof(uint32_t)) + : static_cast(offsets_pos); + if (end < start || end > offsets_pos) { + return Status::Corruption( + "CHAR plain page corruption: bad offset at {}, start={}, end={}, " + "offsets_pos={} in file: {}", + i, start, end, offsets_pos, file_path); + } + uint32_t raw_size = end - start; + uint32_t out_len = static_cast(strnlen(data.data + start, raw_size)); + entries.push_back({reinterpret_cast(data.data + start), out_len}); + total_out_len += out_len; + } + + // Steps 3-7 (shared with V2 pre-decoders). + return write_binary_plain_v1_output(entries, num_elems, total_out_len, *page_slice, + size_of_tail, size_of_prefix, _use_cache, page_type, + page, page_slice); + } +}; + +} // namespace doris::segment_v2 diff --git a/be/src/storage/segment/binary_plain_page_v2_char_strip_pre_decoder.h b/be/src/storage/segment/binary_plain_page_v2_char_strip_pre_decoder.h new file mode 100644 index 00000000000000..356e8149ab8351 --- /dev/null +++ b/be/src/storage/segment/binary_plain_page_v2_char_strip_pre_decoder.h @@ -0,0 +1,68 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "storage/segment/binary_plain_page_v2_pre_decoder.h" + +namespace doris::segment_v2 { + +// CHAR-specific pre-decoder for BinaryPlainPageV2 data pages. +// +// Inherits the static helpers (parse_header, write_v1_output) from +// BinaryPlainPageV2PreDecoder and writes its own decode() so the entry scan +// loop knows at compile time that every entry must be strnlen-ed. The cached +// page emitted into the page cache contains unpadded CHAR slices, and the +// downstream decoder pays no per-call strnlen cost. +struct BinaryPlainPageV2CharStripPreDecoder : public BinaryPlainPageV2PreDecoder { + Status decode(std::unique_ptr* page, Slice* page_slice, size_t size_of_tail, + bool _use_cache, segment_v2::PageTypePB page_type, const std::string& file_path, + size_t size_of_prefix = 0) override { + // Step 1. + Slice data; + uint32_t num_elems = 0; + const uint8_t* ptr = nullptr; + const uint8_t* limit = nullptr; + RETURN_IF_ERROR(parse_header(*page_slice, size_of_tail, file_path, &data, &num_elems, &ptr, + &limit)); + + // Step 2: out_len = strnlen(data_start, raw_len) — strip CHAR padding. + std::vector entries; + entries.reserve(num_elems); + uint32_t total_out_len = 0; + for (uint32_t i = 0; i < num_elems; ++i) { + uint32_t raw_len = 0; + const uint8_t* data_start = nullptr; + RETURN_IF_ERROR(decode_one(ptr, limit, file_path, i, &data_start, &raw_len)); + uint32_t out_len = static_cast( + strnlen(reinterpret_cast(data_start), raw_len)); + entries.push_back({data_start, out_len}); + total_out_len += out_len; + ptr = data_start + raw_len; + } + + // Steps 3-7 (shared with non-CHAR). + return write_binary_plain_v1_output(entries, num_elems, total_out_len, *page_slice, + size_of_tail, size_of_prefix, _use_cache, page_type, + page, page_slice); + } +}; + +} // namespace doris::segment_v2 diff --git a/be/src/storage/segment/binary_plain_page_v2_pre_decoder.h b/be/src/storage/segment/binary_plain_page_v2_pre_decoder.h index bea58094c8a578..936cb78c965a37 100644 --- a/be/src/storage/segment/binary_plain_page_v2_pre_decoder.h +++ b/be/src/storage/segment/binary_plain_page_v2_pre_decoder.h @@ -17,6 +17,9 @@ #pragma once +#include +#include + #include "storage/cache/page_cache.h" #include "storage/segment/encoding_info.h" #include "util/coding.h" @@ -24,6 +27,63 @@ namespace doris { namespace segment_v2 { +// One source entry feeding the V1 output writer. Variants differ only in how +// `out_len` is derived from the raw input length (raw, strnlen'd, etc.). +struct BinaryPlainV1Entry { + const uint8_t* start; + uint32_t out_len; +}; + +// Allocate a V1 BinaryPlainPage layout output buffer and write +// binary -> offsets -> num_elems -> tail. Shared by V2 pre-decoders (after +// they iterate varint lengths) and the V1 CHAR-strip pre-decoder (after it +// iterates the V1 offsets array). `size_of_prefix` reserves room before the +// V1 data for callers that wrap the page (e.g. BinaryDictPagePreDecoder +// prepending the dict-page header). +inline Status write_binary_plain_v1_output(const std::vector& entries, + uint32_t num_elems, uint32_t total_out_len, + const Slice& source_page_slice, size_t size_of_tail, + size_t size_of_prefix, bool use_cache, + segment_v2::PageTypePB page_type, + std::unique_ptr* out_page, + Slice* out_page_slice) { + size_t offsets_size = num_elems * sizeof(uint32_t); + size_t v1_data_size = total_out_len + offsets_size + sizeof(uint32_t); + size_t total_size = size_of_prefix + v1_data_size + size_of_tail; + + std::unique_ptr decoded_page = + std::make_unique(total_size, use_cache, page_type); + Slice decoded_slice(decoded_page->data(), total_size); + char* output = decoded_slice.data + size_of_prefix; + + // Binary payload. + for (const auto& e : entries) { + memcpy(output, e.start, e.out_len); + output += e.out_len; + } + + // Offsets array (running cursor). + uint32_t running = 0; + for (const auto& e : entries) { + encode_fixed32_le(reinterpret_cast(output), running); + output += sizeof(uint32_t); + running += e.out_len; + } + + // num_elems trailer. + encode_fixed32_le(reinterpret_cast(output), num_elems); + output += sizeof(uint32_t); + + // Tail (footer + null map). + if (size_of_tail > 0) { + memcpy(output, source_page_slice.data + source_page_slice.size - size_of_tail, + size_of_tail); + } + *out_page_slice = decoded_slice; + *out_page = std::move(decoded_page); + return Status::OK(); +} + /** * @brief Pre-decoder for BinaryPlainPageV2 * @@ -37,127 +97,95 @@ namespace segment_v2 { * V1 format (output): * Data: |binary1|binary2|... * Trailer: |offset1(32-bit)|offset2(32-bit)|...| num_elems (32-bit) + * + * The decode pipeline is 7 steps: + * 1. parse header (validate sizes + extract num_elems + iteration bounds) + * 2. scan entries: record (data_start, out_len) per entry, sum total out_len + * 3. allocate the V1 output page (size_of_prefix + binary + offsets + trailer + tail) + * 4. write binary payload + * 5. write offsets array (running cursor over out_len) + * 6. write num_elems trailer + * 7. copy tail (footer + null map) and publish output params + * + * Steps 1, 3, 5, 6, 7 are identical for every variant and live in static + * helpers (parse_header, write_v1_output). Step 2 (the entry scan loop) + * differs across variants only in how it derives `out_len` from the raw + * V2 length — each subclass writes its own decode() so the scan loop has + * no virtual dispatch. */ struct BinaryPlainPageV2PreDecoder : public DataPagePreDecoder { - /** - * @brief Decode BinaryPlainPageV2 data to BinaryPlainPage format - * - * @param page unique_ptr to hold page data, will be replaced by decoded data - * @param page_slice data to decode, will be updated to point to decoded data - * @param size_of_tail including size of footer and null map - * @param _use_cache whether to use page cache - * @param page_type the type of page - * @param file_path file path for error reporting - * @param size_of_prefix size of prefix space to reserve (for dict page header) - * @return Status - */ Status decode(std::unique_ptr* page, Slice* page_slice, size_t size_of_tail, bool _use_cache, segment_v2::PageTypePB page_type, const std::string& file_path, size_t size_of_prefix = 0) override { - // Validate input - if (page_slice->size < sizeof(uint32_t) + size_of_tail) { - return Status::Corruption("Invalid page size: {}, expected at least {} in file: {}", - page_slice->size, sizeof(uint32_t) + size_of_tail, file_path); + // Step 1. + Slice data; + uint32_t num_elems = 0; + const uint8_t* ptr = nullptr; + const uint8_t* limit = nullptr; + RETURN_IF_ERROR(parse_header(*page_slice, size_of_tail, file_path, &data, &num_elems, &ptr, + &limit)); + + // Step 2: out_len == raw_len (no transform). + std::vector entries; + entries.reserve(num_elems); + uint32_t total_out_len = 0; + for (uint32_t i = 0; i < num_elems; ++i) { + uint32_t raw_len = 0; + const uint8_t* data_start = nullptr; + RETURN_IF_ERROR(decode_one(ptr, limit, file_path, i, &data_start, &raw_len)); + entries.push_back({data_start, raw_len}); + total_out_len += raw_len; + ptr = data_start + raw_len; } - // Calculate data portion (excluding tail) - Slice data(page_slice->data, page_slice->size - size_of_tail); + // Steps 3-7. + return write_binary_plain_v1_output(entries, num_elems, total_out_len, *page_slice, + size_of_tail, size_of_prefix, _use_cache, page_type, + page, page_slice); + } - // Read num_elems from the last 4 bytes of data portion - if (data.size < sizeof(uint32_t)) { +protected: + // Step 1: validate the V2 page and extract iteration bounds. + static inline Status parse_header(const Slice& page_slice, size_t size_of_tail, + const std::string& file_path, Slice* out_data, + uint32_t* out_num_elems, const uint8_t** out_ptr, + const uint8_t** out_limit) { + if (page_slice.size < sizeof(uint32_t) + size_of_tail) { + return Status::Corruption("Invalid page size: {}, expected at least {} in file: {}", + page_slice.size, sizeof(uint32_t) + size_of_tail, file_path); + } + *out_data = Slice(page_slice.data, page_slice.size - size_of_tail); + if (out_data->size < sizeof(uint32_t)) { return Status::Corruption("Data too small to contain num_elems in file: {}", file_path); } + *out_num_elems = decode_fixed32_le( + reinterpret_cast(&(*out_data)[out_data->size - sizeof(uint32_t)])); + *out_ptr = reinterpret_cast(out_data->data); + *out_limit = *out_ptr + out_data->size - sizeof(uint32_t); + return Status::OK(); + } - uint32_t num_elems = decode_fixed32_le( - reinterpret_cast(&data[data.size - sizeof(uint32_t)])); - - // Calculate required size for V1 format - // V1 format: binary_data + offsets_array + num_elems + tail - // We need to parse V2 to calculate the total binary data size - const auto* ptr = reinterpret_cast(data.data); - const uint8_t* limit = ptr + data.size - sizeof(uint32_t); - - std::vector offsets; - offsets.reserve(num_elems); - - uint32_t current_offset = 0; - for (uint32_t i = 0; i < num_elems; i++) { - if (ptr >= limit) { - return Status::Corruption( - "Unexpected end of data while parsing element {} in file: {}", i, - file_path); - } - - // Decode varuint length - uint32_t length; - const uint8_t* data_start = decode_varint32_ptr(ptr, limit, &length); - if (data_start == nullptr) { - return Status::Corruption("Failed to decode varuint for element {} in file: {}", i, - file_path); - } - - // Store offset for this element - offsets.push_back(current_offset); - current_offset += length; - - // Move to next entry - ptr = data_start + length; - - if (ptr > limit) { - return Status::Corruption("Data extends beyond page for element {} in file: {}", i, - file_path); - } + // Step 2 helper: decode one varint length and validate the entry bounds. + // The scan loop in decode() / overrides walks the input with this helper + // and decides what `out_len` to record (raw_len here, strnlen'd in the + // CHAR variant). + static inline Status decode_one(const uint8_t* ptr, const uint8_t* limit, + const std::string& file_path, uint32_t i, + const uint8_t** out_data_start, uint32_t* out_raw_len) { + if (ptr >= limit) { + return Status::Corruption("Unexpected end of data while parsing element {} in file: {}", + i, file_path); } - - // Calculate size for V1 format - size_t binary_data_size = current_offset; - size_t offsets_size = num_elems * sizeof(uint32_t); - size_t v1_data_size = binary_data_size + offsets_size + sizeof(uint32_t); - size_t total_size = size_of_prefix + v1_data_size + size_of_tail; - - // Allocate new page - Slice decoded_slice; - decoded_slice.size = total_size; - std::unique_ptr decoded_page = - std::make_unique(decoded_slice.size, _use_cache, page_type); - decoded_slice.data = decoded_page->data(); - - // Write V1 format data after the prefix - char* output = decoded_slice.data + size_of_prefix; - - // Step 1: Write binary data (without varint prefixes) - ptr = reinterpret_cast(data.data); - for (uint32_t i = 0; i < num_elems; i++) { - uint32_t length; - const uint8_t* data_start = decode_varint32_ptr(ptr, limit, &length); - - // Copy binary data - memcpy(output, data_start, length); - output += length; - - // Move to next entry - ptr = data_start + length; + const uint8_t* data_start = decode_varint32_ptr(ptr, limit, out_raw_len); + if (data_start == nullptr) { + return Status::Corruption("Failed to decode varuint for element {} in file: {}", i, + file_path); } - - // Step 2: Write offsets array - for (uint32_t offset : offsets) { - encode_fixed32_le(reinterpret_cast(output), offset); - output += sizeof(uint32_t); + if (data_start + *out_raw_len > limit) { + return Status::Corruption("Data extends beyond page for element {} in file: {}", i, + file_path); } - - // Step 3: Write num_elems - encode_fixed32_le(reinterpret_cast(output), num_elems); - output += sizeof(uint32_t); - - // Step 4: Copy tail (footer and null map) - if (size_of_tail > 0) { - memcpy(output, page_slice->data + page_slice->size - size_of_tail, size_of_tail); - } - - // Update output parameters - *page_slice = decoded_slice; - *page = std::move(decoded_page); - + *out_data_start = data_start; return Status::OK(); } }; diff --git a/be/src/storage/segment/binary_prefix_page.h b/be/src/storage/segment/binary_prefix_page.h index ce2b363e934635..3e7da14b476d9e 100644 --- a/be/src/storage/segment/binary_prefix_page.h +++ b/be/src/storage/segment/binary_prefix_page.h @@ -94,7 +94,7 @@ class BinaryPrefixPageBuilder : public PageBuilderHelperread_page(_opts, _reader->get_dict_page_pointer(), &_dict_page_handle, &dict_data, &dict_footer, _compress_codec, true)); const EncodingInfo* encoding_info; - RETURN_IF_ERROR(EncodingInfo::get(FieldType::OLAP_FIELD_TYPE_VARCHAR, + // The dict pool stores strings of the outer column's type. Using the + // outer type (CHAR vs VARCHAR/STRING) lets the EncodingInfo pick a + // CHAR-strip pre-decoder so the cached dict page is already unpadded. + RETURN_IF_ERROR(EncodingInfo::get(_reader->get_meta_type(), dict_footer.dict_page_footer().encoding(), {}, &encoding_info)); RETURN_IF_ERROR(encoding_info->create_page_decoder(dict_data, {}, _dict_decoder)); diff --git a/be/src/storage/segment/encoding_info.cpp b/be/src/storage/segment/encoding_info.cpp index 9fdad8504063aa..0e92b842267dc1 100644 --- a/be/src/storage/segment/encoding_info.cpp +++ b/be/src/storage/segment/encoding_info.cpp @@ -32,7 +32,9 @@ #include "storage/segment/binary_dict_page.h" #include "storage/segment/binary_dict_page_pre_decoder.h" #include "storage/segment/binary_plain_page.h" +#include "storage/segment/binary_plain_page_char_strip_pre_decoder.h" #include "storage/segment/binary_plain_page_v2.h" +#include "storage/segment/binary_plain_page_v2_char_strip_pre_decoder.h" #include "storage/segment/binary_plain_page_v2_pre_decoder.h" #include "storage/segment/binary_prefix_page.h" #include "storage/segment/bitshuffle_page.h" @@ -444,11 +446,23 @@ EncodingInfo::EncodingInfo(TraitsClass traits) if (_encoding == BIT_SHUFFLE) { _data_page_pre_decoder = std::make_unique(); } else if (_encoding == DICT_ENCODING) { - _data_page_pre_decoder = std::make_unique(); + if constexpr (TraitsClass::type == FieldType::OLAP_FIELD_TYPE_CHAR) { + _data_page_pre_decoder = std::make_unique>(); + } else { + _data_page_pre_decoder = std::make_unique>(); + } + } else if (_encoding == PLAIN_ENCODING) { + // CHAR plain pages may contain trailing '\0' padding written by older + // BEs; strip it once at page load so the cached page is unpadded. + if constexpr (TraitsClass::type == FieldType::OLAP_FIELD_TYPE_CHAR) { + _data_page_pre_decoder = std::make_unique(); + } } else if (_encoding == PLAIN_ENCODING_V2) { - // Only binary types (Slice) need the predecoder for PLAIN_ENCODING_V2 - // to convert varint-encoded lengths to offset array format - if constexpr (std::is_same_v) { + if constexpr (TraitsClass::type == FieldType::OLAP_FIELD_TYPE_CHAR) { + _data_page_pre_decoder = std::make_unique(); + } else if constexpr (std::is_same_v) { + // Only binary types (Slice) need the predecoder for PLAIN_ENCODING_V2 + // to convert varint-encoded lengths to offset array format _data_page_pre_decoder = std::make_unique(); } } diff --git a/be/src/storage/segment/page_io.cpp b/be/src/storage/segment/page_io.cpp index 2324f29efc9d7e..6d34508fce0c41 100644 --- a/be/src/storage/segment/page_io.cpp +++ b/be/src/storage/segment/page_io.cpp @@ -232,11 +232,15 @@ Status PageIO::read_and_decompress_page_(const PageReadOptions& opts, PageHandle if (opts.pre_decode) { const auto* encoding_info = opts.encoding_info; if (opts.is_dict_page) { - // for dict page, we need to use encoding_info based on footer->dict_page_footer().encoding() - // to get its pre_decoder - RETURN_IF_ERROR(EncodingInfo::get(FieldType::OLAP_FIELD_TYPE_VARCHAR, - footer->dict_page_footer().encoding(), {}, - &encoding_info)); + // Look up the dict page's encoding_info using the outer column's + // field type (not a hardcoded VARCHAR), so CHAR columns reach the + // CharStrip pre-decoder for the dict pool too. Falls back to + // VARCHAR only when the caller didn't set opts.encoding_info. + FieldType dict_field_type = opts.encoding_info != nullptr + ? opts.encoding_info->type() + : FieldType::OLAP_FIELD_TYPE_VARCHAR; + RETURN_IF_ERROR(EncodingInfo::get( + dict_field_type, footer->dict_page_footer().encoding(), {}, &encoding_info)); } if (encoding_info) { auto* pre_decoder = encoding_info->get_data_page_pre_decoder(); diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index 8ac492769ea53a..4d017c97ca908e 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -658,7 +658,7 @@ Status SegmentIterator::_lazy_init(Block* block) { } _current_return_columns.resize(_schema->columns().size()); - _vec_init_char_column_id(block); + _vec_init_char_column_id(); for (size_t i = 0; i < _schema->column_ids().size(); i++) { ColumnId cid = _schema->column_ids()[i]; const auto* column_desc = _schema->column(cid); @@ -1803,13 +1803,6 @@ Status SegmentIterator::_lookup_ordinal_from_sk_index(const RowCursor& key, bool const auto& key_col_ids = key.schema()->column_ids(); - // Clone the key once and pad CHAR fields to storage format before the binary search. - // _seek_block holds storage-format data where CHAR is zero-padded to column length, - // while RowCursor holds CHAR in compute format (unpadded). Padding once here avoids - // repeated allocation inside the comparison loop. - RowCursor padded_key = key.clone(); - padded_key.pad_char_fields(); - ssize_t start_block_id = 0; auto start_iter = sk_index_decoder->lower_bound(index_key); if (start_iter.valid()) { @@ -1837,7 +1830,7 @@ Status SegmentIterator::_lookup_ordinal_from_sk_index(const RowCursor& key, bool while (start < end) { rowid_t mid = (start + end) / 2; RETURN_IF_ERROR(_seek_and_peek(mid)); - int cmp = _compare_short_key_with_seek_block(padded_key, key_col_ids); + int cmp = _compare_short_key_with_seek_block(key, key_col_ids); if (cmp > 0) { start = mid + 1; } else if (cmp == 0) { @@ -2193,29 +2186,8 @@ bool SegmentIterator::_can_evaluated_by_vectorized(std::shared_ptrcolumns().size(), false); @@ -2223,14 +2195,6 @@ void SegmentIterator::_vec_init_char_column_id(Block* block) { auto cid = _schema->column_id(i); const TabletColumn* column_desc = _schema->column(cid); - // The additional deleted filter condition will be in the materialized column at the end of the block. - // After _output_column_by_sel_idx, it will be erased, so we do not need to shrink it. - if (i < block->columns()) { - if (_has_char_type(*column_desc)) { - _char_type_idx.emplace_back(i); - } - } - if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_CHAR) { _is_char_type[cid] = true; } @@ -3112,8 +3076,6 @@ Status SegmentIterator::_next_batch_internal(Block* block) { _output_index_result_column(vir_ctxs, sel_rowid_idx, _selected_size, block); } RETURN_IF_ERROR(_materialization_of_virtual_column(block)); - // shrink char_type suffix zero data - block->shrink_char_type_column_suffix_zero(_char_type_idx); if (_opts.read_limit > 0) { _rows_returned += block->rows(); } @@ -3223,7 +3185,6 @@ Status SegmentIterator::_process_common_expr(uint16_t* sel_rowid_idx, uint16_t& common_ctxs.push_back(ctx.get()); } _output_index_result_column(common_ctxs, _sel_rowid_idx.data(), _selected_size, block); - block->shrink_char_type_column_suffix_zero(_char_type_idx); RETURN_IF_ERROR(_execute_common_expr(_sel_rowid_idx.data(), _selected_size, block)); if (need_mock_col) { @@ -3626,7 +3587,6 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { idx_in_block, block->columns(), _vir_cid_to_idx_in_block.size(), column_expr->root()->debug_string()); } - block->shrink_char_type_column_suffix_zero(_char_type_idx); if (check_and_get_column( block->get_by_position(idx_in_block).column.get())) { VLOG_DEBUG << fmt::format("Virtual column is doing materialization, cid {}, col idx {}", diff --git a/be/src/storage/segment/segment_iterator.h b/be/src/storage/segment/segment_iterator.h index 5105d050da1872..999a5c7dd65c5a 100644 --- a/be/src/storage/segment/segment_iterator.h +++ b/be/src/storage/segment/segment_iterator.h @@ -204,11 +204,7 @@ class SegmentIterator : public RowwiseIterator { bool _is_literal_node(const TExprNodeType::type& node_type); Status _vec_init_lazy_materialization(); - // TODO: Fix Me - // CHAR type in storage layer padding the 0 in length. But query engine need ignore the padding 0. - // so segment iterator need to shrink char column before output it. only use in vec query engine. - void _vec_init_char_column_id(Block* block); - bool _has_char_type(const TabletColumn& column_desc); + void _vec_init_char_column_id(); uint32_t segment_id() const { return _segment->id(); } uint32_t num_rows() const { return _segment->num_rows(); } @@ -433,8 +429,6 @@ class SegmentIterator : public RowwiseIterator { io::FileReaderSPtr _file_reader; - // char_type or array type columns cid - std::vector _char_type_idx; std::vector _is_char_type; // used for compaction, record selectd rowids of current batch diff --git a/be/src/storage/segment/segment_writer.cpp b/be/src/storage/segment/segment_writer.cpp index edf3ebc81dd0b7..68f3c7deb2b1fa 100644 --- a/be/src/storage/segment/segment_writer.cpp +++ b/be/src/storage/segment/segment_writer.cpp @@ -128,6 +128,7 @@ SegmentWriter::SegmentWriter(io::FileWriter* file_writer, uint32_t segment_id, _rowid_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT); // primary keys _primary_key_coders.swap(_key_coders); + _primary_key_index_size = _key_index_size; // cluster keys _key_coders.clear(); _key_index_size.clear(); @@ -833,13 +834,15 @@ std::string SegmentWriter::_full_encode_keys( assert(_key_index_size.size() == _num_sort_key_columns); assert(key_columns.size() == _num_sort_key_columns && _key_coders.size() == _num_sort_key_columns); - return _full_encode_keys(_key_coders, key_columns, pos, null_first); + return _full_encode_keys(_key_coders, _key_index_size, key_columns, pos, null_first); } std::string SegmentWriter::_full_encode_keys( const std::vector& key_coders, + const std::vector& key_index_sizes, const std::vector& key_columns, size_t pos, bool null_first) { assert(key_columns.size() == key_coders.size()); + assert(key_columns.size() == key_index_sizes.size()); std::string encoded_keys; size_t cid = 0; @@ -856,7 +859,7 @@ std::string SegmentWriter::_full_encode_keys( } encoded_keys.push_back(KEY_NORMAL_MARKER); DCHECK(key_coders[cid] != nullptr); - key_coders[cid]->full_encode_ascending(field, &encoded_keys); + key_coders[cid]->full_encode_ascending(field, &encoded_keys, key_index_sizes[cid]); ++cid; } return encoded_keys; @@ -1217,7 +1220,8 @@ Status SegmentWriter::_generate_primary_key_index( } else { // mow table with cluster key // generate primary keys in memory for (uint32_t pos = 0; pos < num_rows; pos++) { - std::string key = _full_encode_keys(primary_key_coders, primary_key_columns, pos); + std::string key = _full_encode_keys(primary_key_coders, _primary_key_index_size, + primary_key_columns, pos); _maybe_invalid_row_cache(key); if (_tablet_schema->has_sequence_col()) { _encode_seq_column(seq_column, pos, &key); diff --git a/be/src/storage/segment/segment_writer.h b/be/src/storage/segment/segment_writer.h index 9b6b8b55c3aea1..9af9e901f7dae2 100644 --- a/be/src/storage/segment/segment_writer.h +++ b/be/src/storage/segment/segment_writer.h @@ -172,6 +172,7 @@ class SegmentWriter { size_t pos, bool null_first = true); static std::string _full_encode_keys(const std::vector& key_coders, + const std::vector& key_index_sizes, const std::vector& key_columns, size_t pos, bool null_first = true); @@ -227,7 +228,12 @@ class SegmentWriter { std::vector _primary_key_coders; const KeyCoder* _seq_coder = nullptr; const KeyCoder* _rowid_coder = nullptr; + // Per-column index length, parallel to _key_coders. Forwarded as + // char_len to KeyCoder::full_encode_ascending. std::vector _key_index_size; + // Mirror of _key_index_size captured before the coder swap in mow-with- + // cluster-key tables, used by PK encoding. + std::vector _primary_key_index_size; size_t _short_key_row_pos = 0; std::vector _column_ids; diff --git a/be/src/storage/segment/vertical_segment_writer.cpp b/be/src/storage/segment/vertical_segment_writer.cpp index 56d99d7249efa2..52e57baf07f53a 100644 --- a/be/src/storage/segment/vertical_segment_writer.cpp +++ b/be/src/storage/segment/vertical_segment_writer.cpp @@ -134,6 +134,7 @@ VerticalSegmentWriter::VerticalSegmentWriter(io::FileWriter* file_writer, uint32 _rowid_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT); // primary keys _primary_key_coders.swap(_key_coders); + _primary_key_index_size = _key_index_size; // cluster keys _key_coders.clear(); _key_index_size.clear(); @@ -1135,7 +1136,8 @@ Status VerticalSegmentWriter::_generate_primary_key_index( // 1. generate primary keys in memory std::vector primary_keys; for (uint32_t pos = 0; pos < num_rows; pos++) { - std::string key = _full_encode_keys(primary_key_coders, primary_key_columns, pos); + std::string key = _full_encode_keys(primary_key_coders, _primary_key_index_size, + primary_key_columns, pos); _maybe_invalid_row_cache(key); if (_tablet_schema->has_sequence_col()) { _encode_seq_column(seq_column, pos, &key); @@ -1195,13 +1197,15 @@ std::string VerticalSegmentWriter::_full_encode_keys( } assert(key_columns.size() == _num_sort_key_columns && _key_coders.size() == _num_sort_key_columns); - return _full_encode_keys(_key_coders, key_columns, pos); + return _full_encode_keys(_key_coders, _key_index_size, key_columns, pos); } std::string VerticalSegmentWriter::_full_encode_keys( const std::vector& key_coders, + const std::vector& key_index_sizes, const std::vector& key_columns, size_t pos) { assert(key_columns.size() == key_coders.size()); + assert(key_columns.size() == key_index_sizes.size()); std::string encoded_keys; size_t cid = 0; @@ -1214,7 +1218,7 @@ std::string VerticalSegmentWriter::_full_encode_keys( } encoded_keys.push_back(KEY_NORMAL_MARKER); DCHECK(key_coders[cid] != nullptr); - key_coders[cid]->full_encode_ascending(field, &encoded_keys); + key_coders[cid]->full_encode_ascending(field, &encoded_keys, key_index_sizes[cid]); ++cid; } return encoded_keys; diff --git a/be/src/storage/segment/vertical_segment_writer.h b/be/src/storage/segment/vertical_segment_writer.h index 5c0ec0930e522d..2e4d1d3cde6e4c 100644 --- a/be/src/storage/segment/vertical_segment_writer.h +++ b/be/src/storage/segment/vertical_segment_writer.h @@ -148,6 +148,7 @@ class VerticalSegmentWriter { std::string _full_encode_keys(const std::vector& key_columns, size_t pos); std::string _full_encode_keys(const std::vector& key_coders, + const std::vector& key_index_sizes, const std::vector& key_columns, size_t pos); // used for unique-key with merge on write @@ -231,6 +232,9 @@ class VerticalSegmentWriter { const KeyCoder* _seq_coder = nullptr; const KeyCoder* _rowid_coder = nullptr; std::vector _key_index_size; + // Mirror of _key_index_size captured before the coder swap in mow-with- + // cluster-key tables, used by PK encoding. + std::vector _primary_key_index_size; size_t _short_key_row_pos = 0; // _num_rows_written means row count already written in this current column group diff --git a/be/test/core/block/block_test.cpp b/be/test/core/block/block_test.cpp index ff80cc4c425de9..89f019ff06fa73 100644 --- a/be/test/core/block/block_test.cpp +++ b/be/test/core/block/block_test.cpp @@ -1243,8 +1243,6 @@ TEST(BlockTest, others) { auto block = ColumnHelper::create_block({1, 2, 3}); block.insert(ColumnHelper::create_column_with_name({"abc", "efg", "hij"})); - block.shrink_char_type_column_suffix_zero({1, 2}); - SipHash hash; block.update_hash(hash); diff --git a/be/test/core/column/column_array_test.cpp b/be/test/core/column/column_array_test.cpp index 7b2ec0a2544ff2..43601a954aa955 100644 --- a/be/test/core/column/column_array_test.cpp +++ b/be/test/core/column/column_array_test.cpp @@ -605,11 +605,6 @@ TEST_F(ColumnArrayTest, ColumnStringFuncsTest) { assert_column_string_funcs(array_columns); } -// test shrink_padding_chars_callback -TEST_F(ColumnArrayTest, ShrinkPaddingCharsTest) { - shrink_padding_chars_callback(array_columns, serdes); -} - //////////////////////// special function from column_array.h //////////////////////// TEST_F(ColumnArrayTest, CreateArrayTest) { // Test ColumnArray constructor constraints: nested_column and offsets_column must not be ColumnConst. diff --git a/be/test/core/column/column_string_test.cpp b/be/test/core/column/column_string_test.cpp index e57c02c3155a88..6d29b555367b1f 100644 --- a/be/test/core/column/column_string_test.cpp +++ b/be/test/core/column/column_string_test.cpp @@ -1326,28 +1326,6 @@ TEST_F(ColumnStringTest, TestStringInsert) { } } } -TEST_F(ColumnStringTest, shrink_padding_chars) { - ColumnString::MutablePtr col = ColumnString::create(); - col->shrink_padding_chars(); - - col->insert_data("123\0 ", 7); - col->insert_data("456\0xx", 6); - col->insert_data("78", 2); - col->shrink_padding_chars(); - - EXPECT_EQ(col->size(), 3); - EXPECT_EQ(col->get_data_at(0), StringRef("123")); - EXPECT_EQ(col->get_data_at(0).size, 3); - EXPECT_EQ(col->get_data_at(1), StringRef("456")); - EXPECT_EQ(col->get_data_at(1).size, 3); - EXPECT_EQ(col->get_data_at(2), StringRef("78")); - EXPECT_EQ(col->get_data_at(2).size, 2); - - col->insert_data("xyz", 2); // only xy - - EXPECT_EQ(col->size(), 4); - EXPECT_EQ(col->get_data_at(3), StringRef("xy")); -} TEST_F(ColumnStringTest, sort_column) { column_string_common_test(assert_sort_column_callback, false); } diff --git a/be/test/core/column/common_column_test.h b/be/test/core/column/common_column_test.h index ac4ed5eff76582..6172a4bc3eae05 100644 --- a/be/test/core/column/common_column_test.h +++ b/be/test/core/column/common_column_test.h @@ -2364,43 +2364,6 @@ class CommonColumnTest : public ::testing::Test { } } - // get_shrinked_column should only happened in char-type column or nested char-type column, - // other column just return the origin column without any data changed, so check file content should be the same as the origin column - // just shrink the end zeros for char-type column which happened in segmentIterator - // eg. column_desc: char(6), insert into char(3), the char(3) will padding the 3 zeros at the end for writing to disk. - // but we select should just print the char(3) without the padding zeros - // limit and topN operation will trigger this function call - void shrink_padding_chars_callback(MutableColumns& load_cols, DataTypeSerDeSPtrs serders) { - auto option = DataTypeSerDe::FormatOptions(); - std::vector> res; - for (size_t i = 0; i < load_cols.size(); i++) { - auto& source_column = load_cols[i]; - LOG(INFO) << "now we are in shrink_padding_chars column : " << load_cols[i]->get_name() - << " for column size : " << source_column->size(); - source_column->shrink_padding_chars(); - // check after get_shrinked_column: 1 in selector present the load cols data is selected and data should be default value - auto ser_col = ColumnString::create(); - ser_col->reserve(source_column->size()); - VectorBufferWriter buffer_writer(*ser_col.get()); - std::vector data; - data.push_back("column: " + source_column->get_name() + - " with shrinked column size: " + std::to_string(source_column->size())); - for (size_t j = 0; j < source_column->size(); ++j) { - if (auto st = serders[i]->serialize_one_cell_to_json(*source_column, j, - buffer_writer, option); - !st) { - LOG(ERROR) << "Failed to serialize column " << i << " at row " << j; - break; - } - buffer_writer.commit(); - std::string actual_str_value = ser_col->get_data_at(j).to_string(); - data.push_back(actual_str_value); - } - res.push_back(data); - } - check_res_file("shrink_padding_chars", res); - } - void assert_size_eq(MutableColumnPtr col, size_t expect_size) { EXPECT_EQ(col->size(), expect_size); } diff --git a/be/test/storage/key_coder_test.cpp b/be/test/storage/key_coder_test.cpp index 5362645078677c..6eb0dabe7df816 100644 --- a/be/test/storage/key_coder_test.cpp +++ b/be/test/storage/key_coder_test.cpp @@ -123,7 +123,7 @@ typename CppTypeTraits::CppType decode_float(const std::string& enco template std::string encode_float(typename CppTypeTraits::CppType value) { std::string buf; - KeyCoderTraits::full_encode_ascending(&value, &buf); + KeyCoderTraits::full_encode_ascending(&value, &buf, /*char_len=*/0); return buf; } diff --git a/be/test/storage/olap_type_test.cpp b/be/test/storage/olap_type_test.cpp index 05775b693e4430..741f75bf47b9ab 100644 --- a/be/test/storage/olap_type_test.cpp +++ b/be/test/storage/olap_type_test.cpp @@ -2083,4 +2083,52 @@ TEST_F(OlapTypeTest, timestamptz_type) { << "serde mismatch for TIMESTAMPTZ expected=" << tc.expected; } } + +// from_olap_string for string types (CHAR / VARCHAR / STRING) strnlens the +// input so any trailing '\0' bytes that came from a fixed-width CHAR write +// are dropped before the value lands in the Field. VARCHAR / STRING ZoneMap +// values do not normally carry trailing '\0' (the writers store the natural +// byte length), so strnlen is a no-op for them. +TEST_F(OlapTypeTest, from_olap_string_strings) { + struct Case { + PrimitiveType type; + std::string input; + std::string expected; + }; + std::vector cases = { + // CHAR(N) ZoneMap min/max from the convertor is padded with '\0' + // — strnlen recovers the logical content. + {TYPE_CHAR, std::string("abc", 3) + std::string(7, '\0'), "abc"}, + {TYPE_CHAR, std::string(10, '\0'), ""}, + {TYPE_CHAR, "alpha", "alpha"}, + // VARCHAR / STRING never carry trailing '\0' in their ZoneMap + // representation, so the helper is a transparent pass-through + // for the typical case. + {TYPE_VARCHAR, "hello", "hello"}, + {TYPE_STRING, "world\nline2", "world\nline2"}, + {TYPE_STRING, "", ""}, + }; + + for (const auto& tc : cases) { + auto data_type = DataTypeFactory::instance().create_data_type( + tc.type, /*is_nullable=*/false, /*precision=*/0, /*scale=*/0, + /*length=*/static_cast(tc.input.size())); + expect_from_storage_string_paths(data_type, tc.input, [&](const Field& field) { + EXPECT_EQ(field.get(), tc.expected) + << "type=" << static_cast(tc.type) << " input.size=" << tc.input.size(); + }); + } +} + +// VARCHAR / STRING values containing an embedded '\0' are truncated at the +// first '\0' — the same strnlen behaviour applies to all string types. This +// is acceptable in practice because Doris string columns do not store +// embedded NULs in their ZoneMap representation; the test pins the contract. +TEST_F(OlapTypeTest, from_olap_string_strings_embedded_null_truncates) { + auto data_type = DataTypeFactory::instance().create_data_type( + TYPE_VARCHAR, /*is_nullable=*/false, 0, 0, /*length=*/32); + expect_from_storage_string_paths(data_type, std::string("ab\0cd", 5), [](const Field& field) { + EXPECT_EQ(field.get(), "ab"); + }); +} } // namespace doris diff --git a/be/test/storage/segment/binary_dict_page_test.cpp b/be/test/storage/segment/binary_dict_page_test.cpp index 70f3b1ada154f3..6e4daac77715df 100644 --- a/be/test/storage/segment/binary_dict_page_test.cpp +++ b/be/test/storage/segment/binary_dict_page_test.cpp @@ -107,7 +107,7 @@ class BinaryDictPageTest : public testing::Test { // Apply pre-decode for BinaryDictPage data pages // This method handles all encoding types (bitshuffle, plain V1, plain V2) Status apply_pre_decode(Slice& page_slice, std::unique_ptr& decoded_page) { - BinaryDictPagePreDecoder pre_decoder; + BinaryDictPagePreDecoder pre_decoder; return pre_decoder.decode(&decoded_page, &page_slice, 0, false, PageTypePB::DATA_PAGE, ""); } diff --git a/be/test/storage/segment/binary_plain_page_v2_test.cpp b/be/test/storage/segment/binary_plain_page_v2_test.cpp index 29b74961427aaa..6579564acd0d80 100644 --- a/be/test/storage/segment/binary_plain_page_v2_test.cpp +++ b/be/test/storage/segment/binary_plain_page_v2_test.cpp @@ -27,6 +27,9 @@ #include "core/column/column_string.h" #include "storage/cache/page_cache.h" #include "storage/olap_common.h" +#include "storage/segment/binary_plain_page.h" +#include "storage/segment/binary_plain_page_char_strip_pre_decoder.h" +#include "storage/segment/binary_plain_page_v2_char_strip_pre_decoder.h" #include "storage/segment/binary_plain_page_v2_pre_decoder.h" #include "storage/segment/page_builder.h" #include "storage/segment/page_decoder.h" @@ -553,5 +556,146 @@ TEST_F(BinaryPlainPageV2Test, TestSeekAndRead) { EXPECT_EQ("e", string_column->get_data_at(2).to_string()); } +// CHAR-specific roundtrip: write padded slices (the on-disk format produced +// by OlapColumnDataConvertorChar) through both CHAR-strip pre-decoders and +// confirm the post-decode column surfaces the unpadded logical content. + +namespace { + +// Build padded slices of fixed width `pad_len`: each input `s` is copied into +// a buffer of size pad_len with trailing '\0' fill. +// Build a vector of zero-padded buffers of fixed width `pad_len`. The caller +// builds Slices into the returned buffers AFTER the vector is settled — keep +// Slice creation outside this helper so the buffers' addresses don't move +// underneath the Slices. +std::vector make_padded_buffers(const std::vector& logical, + size_t pad_len) { + std::vector buffers; + buffers.reserve(logical.size()); + for (const auto& s : logical) { + EXPECT_LE(s.size(), pad_len); + std::string buf = s; + buf.resize(pad_len, '\0'); + buffers.push_back(std::move(buf)); + } + return buffers; +} + +std::vector slices_from(const std::vector& buffers, size_t pad_len) { + std::vector slices; + slices.reserve(buffers.size()); + for (const auto& b : buffers) { + slices.emplace_back(b.data(), pad_len); + } + return slices; +} + +void verify_unpadded_column(MutableColumnPtr& column, const std::vector& expected) { + auto* str_col = assert_cast(column.get()); + ASSERT_EQ(expected.size(), str_col->size()); + for (size_t i = 0; i < expected.size(); ++i) { + auto got = str_col->get_data_at(i).to_string(); + EXPECT_EQ(expected[i], got) << "row " << i; + } +} + +} // namespace + +// V2 plain page with padded CHAR slices → BinaryPlainPageV2CharStripPreDecoder +// → BinaryPlainPageDecoder → strings come out unpadded. +TEST_F(BinaryPlainPageV2Test, CharStripPreDecoder_V2_RoundtripPaddedSlices) { + constexpr size_t pad_len = 8; + std::vector logical = {"a", "bc", "", "alpha", "alpaca12"}; + auto buffers = make_padded_buffers(logical, pad_len); + auto slices = slices_from(buffers, pad_len); + + PageBuilderOptions builder_options; + builder_options.data_page_size = 256 * 1024; + PageBuilder* builder_ptr = nullptr; + ASSERT_TRUE(BinaryPlainPageV2Builder::create(&builder_ptr, + builder_options) + .ok()); + std::unique_ptr wrapper(builder_ptr); + auto* page_builder = + static_cast*>(builder_ptr); + size_t count = slices.size(); + ASSERT_TRUE(page_builder->add(reinterpret_cast(slices.data()), &count).ok()); + ASSERT_EQ(slices.size(), count); + + OwnedSlice owned_slice; + ASSERT_TRUE(page_builder->finish(&owned_slice).ok()); + + // Run the CHAR-strip pre-decoder (the one EncodingInfo would pick for a + // CHAR PLAIN_ENCODING_V2 page). + Slice page_slice = owned_slice.slice(); + std::unique_ptr decoded_page; + BinaryPlainPageV2CharStripPreDecoder pre_decoder; + ASSERT_TRUE(pre_decoder + .decode(&decoded_page, &page_slice, /*size_of_tail=*/0, + /*use_cache=*/false, PageTypePB::DATA_PAGE, "") + .ok()); + + // Decode the resulting V1 layout and verify rows are unpadded. + PageDecoderOptions decoder_options; + BinaryPlainPageDecoder page_decoder(page_slice, + decoder_options); + ASSERT_TRUE(page_decoder.init().ok()); + ASSERT_EQ(logical.size(), page_decoder.count()); + + MutableColumnPtr column = ColumnString::create(); + size_t num_to_read = logical.size(); + ASSERT_TRUE(page_decoder.next_batch(&num_to_read, column).ok()); + ASSERT_EQ(logical.size(), num_to_read); + verify_unpadded_column(column, logical); +} + +// V1 plain page with padded CHAR slices → BinaryPlainPageCharStripPreDecoder +// → BinaryPlainPageDecoder → strings come out unpadded. This mirrors the +// V2 case above for the PLAIN_ENCODING path. +TEST_F(BinaryPlainPageV2Test, CharStripPreDecoder_V1_RoundtripPaddedSlices) { + constexpr size_t pad_len = 6; + std::vector logical = {"x", "abcd", "", "zzzzzz", "qw"}; + auto buffers = make_padded_buffers(logical, pad_len); + auto slices = slices_from(buffers, pad_len); + + // Build a V1 plain page (no varint length prefixes — data + offsets + + // num_elems trailer). + PageBuilderOptions builder_options; + builder_options.data_page_size = 256 * 1024; + PageBuilder* builder_ptr = nullptr; + ASSERT_TRUE(BinaryPlainPageBuilder::create(&builder_ptr, + builder_options) + .ok()); + std::unique_ptr wrapper(builder_ptr); + auto* page_builder = + static_cast*>(builder_ptr); + size_t count = slices.size(); + ASSERT_TRUE(page_builder->add(reinterpret_cast(slices.data()), &count).ok()); + ASSERT_EQ(slices.size(), count); + + OwnedSlice owned_slice; + ASSERT_TRUE(page_builder->finish(&owned_slice).ok()); + + Slice page_slice = owned_slice.slice(); + std::unique_ptr decoded_page; + BinaryPlainPageCharStripPreDecoder pre_decoder; + ASSERT_TRUE(pre_decoder + .decode(&decoded_page, &page_slice, /*size_of_tail=*/0, + /*use_cache=*/false, PageTypePB::DATA_PAGE, "") + .ok()); + + PageDecoderOptions decoder_options; + BinaryPlainPageDecoder page_decoder(page_slice, + decoder_options); + ASSERT_TRUE(page_decoder.init().ok()); + ASSERT_EQ(logical.size(), page_decoder.count()); + + MutableColumnPtr column = ColumnString::create(); + size_t num_to_read = logical.size(); + ASSERT_TRUE(page_decoder.next_batch(&num_to_read, column).ok()); + ASSERT_EQ(logical.size(), num_to_read); + verify_unpadded_column(column, logical); +} + } // namespace segment_v2 } // namespace doris diff --git a/be/test/storage/segment/encoding_info_test.cpp b/be/test/storage/segment/encoding_info_test.cpp index 666363c9566c83..f851e30242df76 100644 --- a/be/test/storage/segment/encoding_info_test.cpp +++ b/be/test/storage/segment/encoding_info_test.cpp @@ -28,6 +28,8 @@ #include "runtime/exec_env.h" #include "storage/olap_common.h" #include "storage/segment/binary_dict_page_pre_decoder.h" +#include "storage/segment/binary_plain_page_char_strip_pre_decoder.h" +#include "storage/segment/binary_plain_page_v2_char_strip_pre_decoder.h" #include "storage/segment/binary_plain_page_v2_pre_decoder.h" #include "storage/segment/bitshuffle_page_pre_decoder.h" #include "storage/types.h" @@ -192,10 +194,15 @@ TEST_F(EncodingInfoTest, test_all_pre_decoders) { auto* pre_decoder = encoding_info->get_data_page_pre_decoder(); ASSERT_NE(nullptr, pre_decoder) << "Type " << static_cast(type) << " with DICT_ENCODING should have pre_decoder"; - auto* dict_decoder = dynamic_cast(pre_decoder); - EXPECT_NE(nullptr, dict_decoder) - << "Type " << static_cast(type) - << " with DICT_ENCODING should have BinaryDictPagePreDecoder"; + // CHAR uses the IS_CHAR=true specialization so it strips trailing '\0' + // padding from inline-binary dict fallbacks; other string types use + // the regular non-CHAR specialization. + bool is_dict_decoder = + (type == FieldType::OLAP_FIELD_TYPE_CHAR) + ? dynamic_cast*>(pre_decoder) != nullptr + : dynamic_cast*>(pre_decoder) != nullptr; + EXPECT_TRUE(is_dict_decoder) << "Type " << static_cast(type) + << " with DICT_ENCODING should have BinaryDictPagePreDecoder"; } // Test PLAIN_ENCODING_V2 with Slice types - should have BinaryPlainPageV2PreDecoder @@ -217,10 +224,16 @@ TEST_F(EncodingInfoTest, test_all_pre_decoders) { auto* pre_decoder = encoding_info->get_data_page_pre_decoder(); ASSERT_NE(nullptr, pre_decoder) << "Type " << static_cast(type) << " with PLAIN_ENCODING_V2 should have pre_decoder"; - auto* v2_decoder = dynamic_cast(pre_decoder); - EXPECT_NE(nullptr, v2_decoder) << "Type " << static_cast(type) - << " with PLAIN_ENCODING_V2 should have " - "BinaryPlainPageV2PreDecoder"; + // CHAR PLAIN_ENCODING_V2 is wired to BinaryPlainPageV2CharStripPreDecoder + // so the trailing '\0' padding written by OlapColumnDataConvertorChar + // is stripped at page load time; other binary types use the regular + // V2 pre-decoder. + bool ok = (type == FieldType::OLAP_FIELD_TYPE_CHAR) + ? dynamic_cast(pre_decoder) != + nullptr + : dynamic_cast(pre_decoder) != nullptr; + EXPECT_TRUE(ok) << "Type " << static_cast(type) + << " with PLAIN_ENCODING_V2 should have V2 pre-decoder"; } // Test PLAIN_ENCODING - should NOT have pre_decoder @@ -260,8 +273,19 @@ TEST_F(EncodingInfoTest, test_all_pre_decoders) { auto status = EncodingInfo::get(type, PLAIN_ENCODING, encoding_preference, &encoding_info); if (status.ok() && encoding_info != nullptr) { auto* pre_decoder = encoding_info->get_data_page_pre_decoder(); - EXPECT_EQ(nullptr, pre_decoder) << "Type " << static_cast(type) - << " with PLAIN_ENCODING should NOT have pre_decoder"; + if (type == FieldType::OLAP_FIELD_TYPE_CHAR) { + // CHAR PLAIN_ENCODING has a CHAR-strip pre-decoder that strips + // the trailing '\0' padding written by the convertor. + EXPECT_NE(nullptr, pre_decoder) + << "CHAR with PLAIN_ENCODING should have a pre_decoder"; + EXPECT_NE(nullptr, dynamic_cast(pre_decoder)) + << "CHAR PLAIN_ENCODING pre-decoder should be " + "BinaryPlainPageCharStripPreDecoder"; + } else { + EXPECT_EQ(nullptr, pre_decoder) + << "Type " << static_cast(type) + << " with PLAIN_ENCODING should NOT have pre_decoder"; + } } } diff --git a/be/test/storage/segment/segment_writer_full_encode_keys_test.cpp b/be/test/storage/segment/segment_writer_full_encode_keys_test.cpp index c2748a43fb9eac..8848bed4e44012 100644 --- a/be/test/storage/segment/segment_writer_full_encode_keys_test.cpp +++ b/be/test/storage/segment/segment_writer_full_encode_keys_test.cpp @@ -87,9 +87,14 @@ TEST(SegmentWriterFullEncodeKeysTest, TestSegmentWriterKeyEncoding) { auto int_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_INT); auto str_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_VARCHAR); std::vector key_coders = {int_coder, str_coder, str_coder}; + // Per-column index sizes. Only CHAR's KeyCoder consults this; the columns + // here are INT and VARCHAR, which ignore it — zeros are fine. + std::vector key_index_sizes = {0, 0, 0}; //////////////////////////////////////////////////////////////////////////// - std::string encoded0 = SegmentWriter::_full_encode_keys(key_coders, key_columns, 0); - std::string encoded1 = SegmentWriter::_full_encode_keys(key_coders, key_columns, 1); + std::string encoded0 = + SegmentWriter::_full_encode_keys(key_coders, key_index_sizes, key_columns, 0); + std::string encoded1 = + SegmentWriter::_full_encode_keys(key_coders, key_index_sizes, key_columns, 1); //////////////////////////////////////////////////////////////////////////// std::cout << StringView(encoded0).dump_hex() << std::endl; // X'02850505050261026262' std::cout << StringView(encoded1).dump_hex() << std::endl; // X'0285050505026101026363' diff --git a/be/test/storage/segment/zone_map_index_test.cpp b/be/test/storage/segment/zone_map_index_test.cpp index 4ec894feac6827..51f8c068947c44 100644 --- a/be/test/storage/segment/zone_map_index_test.cpp +++ b/be/test/storage/segment/zone_map_index_test.cpp @@ -249,16 +249,16 @@ class ColumnZoneMapTest : public testing::Test { DataTypeFactory::instance().create_data_type(TYPE_CHAR, true, 0, 0, length); auto tab_col = create_char_key(0, true, length); const TabletColumn* field = tab_col.get(); - std::string s_less_than_schema_length1(length - 1, 'a'); - std::string s_less_than_schema_length1_expect(length, 'a'); - s_less_than_schema_length1_expect[length - 1] = '\0'; - std::string s_less_than_schema_length2(length - 2, 'b'); - std::string s_less_than_schema_length2_expect(length, 'b'); - s_less_than_schema_length2_expect[length - 1] = '\0'; - s_less_than_schema_length2_expect[length - 2] = '\0'; + // ZoneMap writer stores whatever slice bytes it receives. In production + // OlapColumnDataConvertorChar pads CHAR slices to the declared length + // before they reach the writer; from_olap_string strnlens at read time + // so the materialized Field is always unpadded. This test passes raw + // shorter slices directly to the writer to exercise the strnlen path. + std::string s_less_than_char_len1(length - 1, 'a'); + std::string s_less_than_char_len2(length - 2, 'b'); std::unique_ptr writer; ASSERT_TRUE(ZoneMapIndexWriter::create(data_type, field, writer).ok()); - Slice slices[] = {Slice(s_less_than_schema_length1), Slice(s_less_than_schema_length2)}; + Slice slices[] = {Slice(s_less_than_char_len1), Slice(s_less_than_char_len2)}; writer->add_values(&slices, 2); if (pass_all) { writer->invalid_page_zone_map(); @@ -274,13 +274,13 @@ class ColumnZoneMapTest : public testing::Test { ASSERT_TRUE(file_writer->close().ok()); const auto& seg_zm = index_meta.zone_map_index().segment_zone_map(); - // Min/Max should be truncated to MAX_ZONE_MAP_INDEX_SIZE and last byte of Max is incremented - EXPECT_EQ(seg_zm.min().size(), s_less_than_schema_length1.size()); - EXPECT_EQ(seg_zm.min(), s_less_than_schema_length1); - EXPECT_EQ(seg_zm.max().size(), s_less_than_schema_length2.size()); - EXPECT_EQ(seg_zm.max(), s_less_than_schema_length2); + // On-disk min/max preserve the raw (unpadded) bytes. + EXPECT_EQ(seg_zm.min().size(), s_less_than_char_len1.size()); + EXPECT_EQ(seg_zm.min(), s_less_than_char_len1); + EXPECT_EQ(seg_zm.max().size(), s_less_than_char_len2.size()); + EXPECT_EQ(seg_zm.max(), s_less_than_char_len2); - // Verify ZoneMap::from_proto can correctly parse the truncated zone map + // Verify ZoneMap::from_proto materializes the unpadded Field. ZoneMap seg_zone_map; ASSERT_TRUE(ZoneMap::from_proto(seg_zm, data_type, seg_zone_map).ok()); EXPECT_EQ(seg_zone_map.has_null, false); @@ -289,10 +289,10 @@ class ColumnZoneMapTest : public testing::Test { EXPECT_EQ(seg_zone_map.has_positive_inf, false); EXPECT_EQ(seg_zone_map.has_negative_inf, false); EXPECT_EQ(seg_zone_map.has_nan, false); - EXPECT_EQ(seg_zone_map.min_value.get().size(), length); - EXPECT_EQ(seg_zone_map.min_value.get(), s_less_than_schema_length1_expect); - EXPECT_EQ(seg_zone_map.max_value.get().size(), length); - EXPECT_EQ(seg_zone_map.max_value.get(), s_less_than_schema_length2_expect); + EXPECT_EQ(seg_zone_map.min_value.get().size(), s_less_than_char_len1.size()); + EXPECT_EQ(seg_zone_map.min_value.get(), s_less_than_char_len1); + EXPECT_EQ(seg_zone_map.max_value.get().size(), s_less_than_char_len2.size()); + EXPECT_EQ(seg_zone_map.max_value.get(), s_less_than_char_len2); io::FileReaderSPtr file_reader; EXPECT_TRUE(_fs->open_file(file_path, &file_reader).ok()); @@ -315,12 +315,12 @@ class ColumnZoneMapTest : public testing::Test { EXPECT_EQ(page_zone_map.has_negative_inf, false); EXPECT_EQ(page_zone_map.has_nan, false); if (!pass_all) { - EXPECT_EQ(page_zone_map.min_value.get().size(), length); - EXPECT_EQ(page_zone_map.min_value.get(), - s_less_than_schema_length1_expect); - EXPECT_EQ(page_zone_map.max_value.get().size(), length); - EXPECT_EQ(page_zone_map.max_value.get(), - s_less_than_schema_length2_expect); + EXPECT_EQ(page_zone_map.min_value.get().size(), + s_less_than_char_len1.size()); + EXPECT_EQ(page_zone_map.min_value.get(), s_less_than_char_len1); + EXPECT_EQ(page_zone_map.max_value.get().size(), + s_less_than_char_len2.size()); + EXPECT_EQ(page_zone_map.max_value.get(), s_less_than_char_len2); } } }