static UnicodeCodepoint utf8_decode(u8 const bytes[static 4], u8 *out_num_consumed) { u8 const flipped = ~bytes[0]; if (flipped == 0) { // Because __builtin_clz is UB for value 0. // When his happens, the UTF-8 is malformed. *out_num_consumed = 1; return 0; } u8 const num_ones = __builtin_clz(flipped) & 0x07; u8 const num_bytes_total = num_ones > 1 ? num_ones : 1; u8 const main_byte_shift = num_ones + 1; UnicodeCodepoint value = bytes[0] & (0xFF >> main_byte_shift); for (u8 i = 1; i < num_bytes_total; ++i) { if (bytes[i] >> 6 != 2) { // Not a valid continuation byte. *out_num_consumed = i; return 0; } value = (value << 6) | (bytes[i] & 0x3F); } *out_num_consumed = num_bytes_total; return value; }