diff --git a/phpstan-baseline-7.4.neon b/phpstan-baseline-7.4.neon index d91c9ee853..c970170901 100644 --- a/phpstan-baseline-7.4.neon +++ b/phpstan-baseline-7.4.neon @@ -150,12 +150,6 @@ parameters: count: 1 path: src/lib/FieldType/Selection/SearchField.php - - - message: '#^Parameter \#1 \$str of function mb_substr expects string, string\|false given\.$#' - identifier: argument.type - count: 1 - path: src/lib/FieldType/TextBlock/SearchField.php - - message: '#^Parameter \#1 \$str of function mb_substr expects string, string\|false given\.$#' identifier: argument.type diff --git a/phpstan-baseline-gte-8.0.neon b/phpstan-baseline-gte-8.0.neon index 92badbf76c..874cf8f42c 100644 --- a/phpstan-baseline-gte-8.0.neon +++ b/phpstan-baseline-gte-8.0.neon @@ -162,12 +162,6 @@ parameters: count: 1 path: src/lib/FieldType/Selection/SearchField.php - - - message: '#^Parameter \#1 \$string of function mb_substr expects string, string\|false given\.$#' - identifier: argument.type - count: 1 - path: src/lib/FieldType/TextBlock/SearchField.php - - message: '#^Parameter \#1 \$string of function mb_substr expects string, string\|false given\.$#' identifier: argument.type diff --git a/phpstan-baseline.neon b/phpstan-baseline.neon index 9bee1e5f4b..7e90edb8c2 100644 --- a/phpstan-baseline.neon +++ b/phpstan-baseline.neon @@ -11946,12 +11946,6 @@ parameters: count: 1 path: src/lib/FieldType/StorageGateway.php - - - message: '#^Parameter \#1 \$string of method Ibexa\\Core\\FieldType\\TextBlock\\SearchField\:\:extractShortText\(\) expects string, array\|bool\|float\|int\|string\|null given\.$#' - identifier: argument.type - count: 1 - path: src/lib/FieldType/TextBlock/SearchField.php - - message: '#^Access to an undefined property Ibexa\\Contracts\\Core\\FieldType\\Value\:\:\$text\.$#' identifier: property.notFound @@ -15828,12 +15822,6 @@ parameters: count: 1 path: src/lib/MVC/Symfony/Translation/ExceptionMessageTemplateFileVisitor.php - - - message: '#^Access to an undefined property PhpParser\\Node\\Arg\|PhpParser\\Node\\VariadicPlaceholder\:\:\$value\.$#' - identifier: property.notFound - count: 1 - path: src/lib/MVC/Symfony/Translation/ExceptionMessageTemplateFileVisitor.php - - message: '#^Cannot call method error\(\) on Psr\\Log\\LoggerInterface\|null\.$#' identifier: method.nonObject @@ -18084,7 +18072,6 @@ parameters: count: 1 path: src/lib/Persistence/Legacy/Content/FieldValue/Converter/DateAndTimeConverter.php - - message: '#^Method Ibexa\\Core\\Persistence\\Legacy\\Content\\FieldValue\\Converter\\DateAndTimeConverter\:\:toFieldDefinition\(\) has no return type specified\.$#' identifier: missingType.return @@ -26173,12 +26160,6 @@ parameters: count: 1 path: src/lib/Search/Common/FieldValueMapper/MultipleStringMapper.php - - - message: '#^Method Ibexa\\Core\\Search\\Common\\FieldValueMapper\\StringMapper\:\:convert\(\) should return string but returns string\|null\.$#' - identifier: return.type - count: 1 - path: src/lib/Search/Common/FieldValueMapper/StringMapper.php - - message: '#^Method Ibexa\\Core\\Search\\Common\\IncrementalIndexer\:\:createSearchIndex\(\) has no return type specified\.$#' identifier: missingType.return @@ -32305,7 +32286,6 @@ parameters: count: 1 path: tests/integration/Core/Repository/Common/SlugConverter.php - - message: '#^Cannot call method getValue\(\) on Ibexa\\Contracts\\Core\\Repository\\Values\\Content\\Field\|null\.$#' identifier: method.nonObject @@ -41930,7 +41910,6 @@ parameters: count: 1 path: tests/integration/Core/Repository/ObjectStateServiceTest.php - - message: '#^Method Ibexa\\Tests\\Integration\\Core\\Repository\\ObjectStateServiceTest\:\:assertObjectsLoadedByIdentifiers\(\) has no return type specified\.$#' identifier: missingType.return @@ -41961,7 +41940,6 @@ parameters: count: 1 path: tests/integration/Core/Repository/ObjectStateServiceTest.php - - message: '#^Method Ibexa\\Tests\\Integration\\Core\\Repository\\ObjectStateServiceTest\:\:deleteExistingObjectStateGroups\(\) has no return type specified\.$#' identifier: missingType.return diff --git a/src/lib/FieldType/TextBlock/SearchField.php b/src/lib/FieldType/TextBlock/SearchField.php index 2bfde7dc89..501bb3699f 100644 --- a/src/lib/FieldType/TextBlock/SearchField.php +++ b/src/lib/FieldType/TextBlock/SearchField.php @@ -21,7 +21,7 @@ public function getIndexData(Field $field, FieldDefinition $fieldDefinition) return [ new Search\Field( 'value', - $this->extractShortText($field->value->data), + $this->extractText($field->value->data), new Search\FieldType\StringField() ), new Search\Field( @@ -33,15 +33,17 @@ public function getIndexData(Field $field, FieldDefinition $fieldDefinition) } /** - * Extracts short snippet of the given $string. - * - * @param string $string - * - * @return string + * @param mixed $string */ - private function extractShortText($string) + private function extractText($string): string { - return mb_substr(strtok(trim((string)$string), "\r\n"), 0, 255); + if (!is_string($string)) { + return ''; + } + + $lines = explode("\n", str_replace(["\r\n", "\r"], "\n", $string)); + + return implode(' ', array_map('trim', $lines)); } public function getIndexDefinition() diff --git a/src/lib/MVC/Symfony/Translation/ExceptionMessageTemplateFileVisitor.php b/src/lib/MVC/Symfony/Translation/ExceptionMessageTemplateFileVisitor.php index c975d605b3..56df97034c 100644 --- a/src/lib/MVC/Symfony/Translation/ExceptionMessageTemplateFileVisitor.php +++ b/src/lib/MVC/Symfony/Translation/ExceptionMessageTemplateFileVisitor.php @@ -65,12 +65,21 @@ public function enterNode(Node $node): void $ignore = $this->isIgnore($node); + if (!$node->args[0] instanceof Node\Arg) { + return; + } + if (!$node->args[0]->value instanceof String_) { if ($ignore) { return; } - $message = sprintf('Can only extract the translation id from a scalar string, but got "%s". Please refactor your code to make it extractable, or add the doc comment /** @Ignore */ to this code element (in %s on line %d).', get_class($node->args[0]->value), $this->file, $node->args[0]->value->getLine()); + $message = sprintf( + 'Can only extract the translation id from a scalar string, but got "%s". Please refactor your code to make it extractable, or add the doc comment /** @Ignore */ to this code element (in %s on line %d).', + get_class($node->args[0]->value), + $this->file, + $node->args[0]->value->getLine() + ); $this->logger->error($message); diff --git a/src/lib/Resources/settings/search_engines/field_value_mappers.yml b/src/lib/Resources/settings/search_engines/field_value_mappers.yml index eb80c572db..219bf682a4 100644 --- a/src/lib/Resources/settings/search_engines/field_value_mappers.yml +++ b/src/lib/Resources/settings/search_engines/field_value_mappers.yml @@ -62,6 +62,7 @@ services: - { name: ibexa.search.common.field_value.mapper, maps: Ibexa\Contracts\Core\Search\FieldType\PriceField } Ibexa\Core\Search\Common\FieldValueMapper\StringMapper: + autoconfigure: true class: Ibexa\Core\Search\Common\FieldValueMapper\StringMapper tags: - { name: ibexa.search.common.field_value.mapper, maps: Ibexa\Contracts\Core\Search\FieldType\StringField } diff --git a/src/lib/Search/Common/FieldValueMapper/StringMapper.php b/src/lib/Search/Common/FieldValueMapper/StringMapper.php index 1050b068ed..94482065b7 100644 --- a/src/lib/Search/Common/FieldValueMapper/StringMapper.php +++ b/src/lib/Search/Common/FieldValueMapper/StringMapper.php @@ -9,14 +9,27 @@ use Ibexa\Contracts\Core\Search\Field; use Ibexa\Contracts\Core\Search\FieldType; use Ibexa\Core\Search\Common\FieldValueMapper; +use Psr\Log\LoggerAwareInterface; +use Psr\Log\LoggerAwareTrait; +use Psr\Log\LoggerInterface; +use Psr\Log\NullLogger; /** * Common string field value mapper implementation. */ -class StringMapper extends FieldValueMapper +class StringMapper extends FieldValueMapper implements LoggerAwareInterface { + use LoggerAwareTrait; + + public function __construct( + ?LoggerInterface $logger = null + ) { + $this->logger = $logger ?? new NullLogger(); + } + public const REPLACE_WITH_SPACE_PATTERN = '([\x09\x0B\x0C]+)'; public const REMOVE_PATTERN = '([\x00-\x08\x0E-\x1F]+)'; + public const MAX_TERM_LENGTH = 32766; public function canMap(Field $field): bool { @@ -43,11 +56,28 @@ protected function convert($value): string ); // Remove non-printable characters. - return preg_replace( + $value = preg_replace( self::REMOVE_PATTERN, '', (string)$value ); + + // Enforce Lucene's bytes MAX_TERM_LENGTH to avoid silent indexing failures + $value = (string)$value; + $truncated = mb_strcut($value, 0, self::MAX_TERM_LENGTH); + + if (strlen($truncated) < strlen($value)) { + $this->logger->warning( + sprintf( + 'String field value was truncated from %d to %d bytes (max term length: %d).', + strlen($value), + strlen($truncated), + self::MAX_TERM_LENGTH + ) + ); + } + + return $truncated; } } diff --git a/tests/integration/Core/Repository/FieldType/TextBlockIntegrationTest.php b/tests/integration/Core/Repository/FieldType/TextBlockIntegrationTest.php index 5c26890faf..ed6d12892f 100644 --- a/tests/integration/Core/Repository/FieldType/TextBlockIntegrationTest.php +++ b/tests/integration/Core/Repository/FieldType/TextBlockIntegrationTest.php @@ -287,24 +287,24 @@ public function providerForTestIsNotEmptyValue() protected function getValidSearchValueOne() { - return 'caution is the " path to mediocrity' . PHP_EOL . 'something completely different'; + return 'caution is the " path to mediocrity something completely different'; } protected function getSearchTargetValueOne() { // ensure case-insensitivity - return strtoupper('caution is the " path to mediocrity'); + return strtoupper('caution is the " path to mediocrity something completely different'); } protected function getValidSearchValueTwo() { - return "truth suffers from ' too much analysis\n hello and goodbye"; + return "truth suffers from ' too much analysis hello and goodbye"; } protected function getSearchTargetValueTwo() { // ensure case-insensitivity - return strtoupper("truth suffers from ' too much analysis"); + return strtoupper("truth suffers from ' too much analysis hello and goodbye"); } protected function getFullTextIndexedFieldData() diff --git a/tests/lib/Search/Common/FieldValueMapper/RemoteIdentifierMapperTest.php b/tests/lib/Search/Common/FieldValueMapper/RemoteIdentifierMapperTest.php index 2d09349b83..da2a37357e 100644 --- a/tests/lib/Search/Common/FieldValueMapper/RemoteIdentifierMapperTest.php +++ b/tests/lib/Search/Common/FieldValueMapper/RemoteIdentifierMapperTest.php @@ -15,6 +15,7 @@ use Ibexa\Contracts\Core\Search\FieldType\StringField; use Ibexa\Core\Search\Common\FieldValueMapper\RemoteIdentifierMapper; use Ibexa\Tests\Core\Search\TestCase; +use Psr\Log\LoggerInterface; /** * @covers \Ibexa\Core\Search\Common\FieldValueMapper\RemoteIdentifierMapper @@ -26,7 +27,9 @@ final class RemoteIdentifierMapperTest extends TestCase protected function setUp(): void { - $this->mapper = new RemoteIdentifierMapper(); + $this->mapper = new RemoteIdentifierMapper( + $this->createMock(LoggerInterface::class) + ); } /** diff --git a/tests/lib/Search/Common/FieldValueMapper/StringMapperTest.php b/tests/lib/Search/Common/FieldValueMapper/StringMapperTest.php new file mode 100644 index 0000000000..eddd165ddc --- /dev/null +++ b/tests/lib/Search/Common/FieldValueMapper/StringMapperTest.php @@ -0,0 +1,105 @@ +mapper = new StringMapper(); + } + + public function testCanMap(): void + { + $field = $this->createFieldWithValue('hello', new StringField()); + + self::assertTrue($this->mapper->canMap($field)); + } + + public function testMapsPlainString(): void + { + $field = $this->createFieldWithValue('hello world', new StringField()); + + self::assertSame('hello world', $this->mapper->map($field)); + } + + public function testStripsNonPrintableCharacters(): void + { + $field = $this->createFieldWithValue("hello\x01\x02world", new StringField()); + + self::assertSame('helloworld', $this->mapper->map($field)); + } + + public function testReplacesTabAndVerticalWhitespaceWithSpace(): void + { + $field = $this->createFieldWithValue("hello\x09world\x0Bfoo", new StringField()); + + self::assertSame('hello world foo', $this->mapper->map($field)); + } + + public function testTruncatesToMaxTermLength(): void + { + $longValue = str_repeat('a', StringMapper::MAX_TERM_LENGTH + 100); + $field = $this->createFieldWithValue($longValue, new StringField()); + $result = $this->mapper->map($field); + + self::assertSame(StringMapper::MAX_TERM_LENGTH, strlen($result)); + } + + public function testTruncatesMultibyteStringAtCharacterBoundary(): void + { + // Each UTF-8 character here is 3 bytes (€ = E2 82 AC). + // Fill up to just past the limit so truncation must happen on a char boundary. + $char = '€'; + $charBytes = strlen($char); + + self::assertSame(3, $charBytes); + + $count = (int) ceil((StringMapper::MAX_TERM_LENGTH + $charBytes) / $charBytes); + $longValue = str_repeat($char, $count); + + $field = $this->createFieldWithValue($longValue, new StringField()); + $result = $this->mapper->map($field); + + self::assertSame(StringMapper::MAX_TERM_LENGTH, strlen($result)); + // Result must be valid UTF-8 (no split mid-character). + self::assertSame(1, preg_match('//u', $result)); + } + + public function testValueWithinLimitIsNotTruncated(): void + { + $value = str_repeat('a', StringMapper::MAX_TERM_LENGTH); + $field = $this->createFieldWithValue($value, new StringField()); + + self::assertSame($value, $this->mapper->map($field)); + } + + private function createFieldWithValue(string $value, StringField $type): Field + { + $field = $this->createMock(Field::class); + $field + ->method('getValue') + ->willReturn($value); + $field + ->method('getType') + ->willReturn($type); + + return $field; + } +}