Skip to content

Commit d14dbbc

Browse files
habermancopybara-github
authored andcommitted
Breaking Change: Validate UTF-8 in string setters, [as previously announced](https://protobuf.dev/news/2023-12-27/#php-breaking-changes).
Pure-PHP was already validating UTF-8, but this makes the C extension validate also. PiperOrigin-RevId: 597695655
1 parent d1444e2 commit d14dbbc

File tree

2 files changed

+29
-15
lines changed

2 files changed

+29
-15
lines changed

php/ext/google/protobuf/convert.c

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -370,21 +370,23 @@ bool Convert_PhpToUpb(zval* php_val, upb_MessageValue* upb_val, TypeInfo type,
370370
return to_bool(php_val, &upb_val->bool_val);
371371
case kUpb_CType_String:
372372
case kUpb_CType_Bytes: {
373-
char* ptr;
374-
size_t size;
375-
376373
if (!to_string(php_val)) return false;
377374

378-
size = Z_STRLEN_P(php_val);
375+
char* ptr = Z_STRVAL_P(php_val);
376+
size_t size = Z_STRLEN_P(php_val);
377+
378+
if (type.type == kUpb_CType_String && !utf8_range_IsValid(ptr, size)) {
379+
zend_throw_exception_ex(NULL, 0, "Invalid UTF-8 in string data");
380+
return false;
381+
}
379382

380383
// If arena is NULL we reference the input zval.
381384
// The resulting upb_StringView will only be value while the zval is
382385
// alive.
383386
if (arena) {
384-
ptr = upb_Arena_Malloc(arena, size);
385-
memcpy(ptr, Z_STRVAL_P(php_val), size);
386-
} else {
387-
ptr = Z_STRVAL_P(php_val);
387+
char* copy = upb_Arena_Malloc(arena, size);
388+
memcpy(copy, ptr, size);
389+
ptr = copy;
388390
}
389391

390392
upb_val->str_val = upb_StringView_FromDataAndSize(ptr, size);

php/tests/GeneratedClassTest.php

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,19 @@ public function testStringField()
449449
$this->assertSame('1', $m->getOptionalString());
450450
}
451451

452+
#########################################################
453+
# Test invalid UTF-8
454+
#########################################################
455+
456+
public function testInvalidUtf8StringFails()
457+
{
458+
$m = new TestMessage();
459+
460+
// Invalid UTF-8 is rejected.
461+
$this->expectException(Exception::class);
462+
$m->setOptionalString("\xff");
463+
}
464+
452465
#########################################################
453466
# Test bytes field.
454467
#########################################################
@@ -474,13 +487,12 @@ public function testBytesField()
474487
$this->assertSame('1', $m->getOptionalBytes());
475488
}
476489

477-
public function testBytesFieldInvalidUTF8Success()
478-
{
479-
$m = new TestMessage();
480-
$hex = hex2bin("ff");
481-
$m->setOptionalBytes($hex);
482-
$this->assertTrue(true);
483-
}
490+
public function testBytesFieldInvalidUTF8Success()
491+
{
492+
$m = new TestMessage();
493+
$m->setOptionalBytes("\xff");
494+
$this->assertSame("\xff", $m->getOptionalBytes());
495+
}
484496

485497
#########################################################
486498
# Test message field.

0 commit comments

Comments
 (0)