mirror of
https://github.com/asterisk/asterisk.git
synced 2025-09-02 19:16:15 +00:00
res_pjsip: Replace invalid UTF-8 sequences in callerid name
* Added a new function ast_utf8_replace_invalid_chars() to utf8.c that copies a string replacing any invalid UTF-8 sequences with the Unicode specified U+FFFD replacement character. For example: "abc\xffdef" becomes "abc\uFFFDdef". Any UTF-8 compliant implementation will show that character as a � character. * Updated res_pjsip:set_id_from_hdr() to use ast_utf8_replace_invalid_chars and print a warning if any invalid sequences were found during the copy. * Updated stasis_channels:ast_channel_publish_varset to use ast_utf8_replace_invalid_chars and print a warning if any invalid sequences were found during the copy. ASTERISK-27830 Change-Id: I4ffbdb19c80bf0efc675d40078a3ca4f85c567d8
This commit is contained in:
committed by
George Joseph
parent
e5c5cd6e25
commit
ceda5a9859
@@ -67,6 +67,59 @@ int ast_utf8_is_validn(const char *str, size_t size);
|
||||
*/
|
||||
void ast_utf8_copy_string(char *dst, const char *src, size_t size);
|
||||
|
||||
enum ast_utf8_replace_result {
|
||||
/*! \brief Source contained fully valid UTF-8
|
||||
*
|
||||
* The entire string was valid UTF-8 and no replacement
|
||||
* was required.
|
||||
*/
|
||||
AST_UTF8_REPLACE_VALID,
|
||||
|
||||
/*! \brief Source contained at least 1 invalid UTF-8 sequence
|
||||
*
|
||||
* Parts of the string contained invalid UTF-8 sequences
|
||||
* but those were successfully replaced with the U+FFFD
|
||||
* replacement sequence.
|
||||
*/
|
||||
AST_UTF8_REPLACE_INVALID,
|
||||
|
||||
/*! \brief Not enough space to copy entire source
|
||||
*
|
||||
* The destination buffer wasn't large enough to copy
|
||||
* all of the source characters. As many of the source
|
||||
* characters that could be copied/replaced were done so
|
||||
* and a final NULL terminator added.
|
||||
*/
|
||||
AST_UTF8_REPLACE_OVERRUN,
|
||||
};
|
||||
|
||||
/*!
|
||||
* \brief Copy a string safely replacing any invalid UTF-8 sequences
|
||||
*
|
||||
* This is similar to \ref ast_copy_string, but it will only copy valid UTF-8
|
||||
* sequences from the source string into the destination buffer.
|
||||
* If an invalid sequence is encountered, it's replaced with the \uFFFD
|
||||
* sequence which is the valid UTF-8 sequence that represents an unknown,
|
||||
* unrecognized, or unrepresentable character. Since \uFFFD is actually a
|
||||
* 3 byte sequence, the destination buffer will need to be larger than
|
||||
* the corresponding source string if it contains invalid sequences.
|
||||
* You can pass NULL as the destination buffer pointer to get the actual
|
||||
* size required, then call the function again with the properly sized
|
||||
* buffer.
|
||||
*
|
||||
* \param dst Pointer to the destination buffer. If NULL,
|
||||
* dst_size will be set to the size of the
|
||||
* buffer required to fully process the
|
||||
* source string.
|
||||
* \param dst_size A pointer to the size of the dst buffer
|
||||
* \param src The source string
|
||||
* \param src_len The number of bytes to copy
|
||||
*
|
||||
* \return \ref ast_utf8_replace_result
|
||||
*/
|
||||
enum ast_utf8_replace_result ast_utf8_replace_invalid_chars(char *dst,
|
||||
size_t *dst_size, const char *src, size_t src_len);
|
||||
|
||||
enum ast_utf8_validation_result {
|
||||
/*! \brief The consumed sequence is valid UTF-8
|
||||
*
|
||||
|
Reference in New Issue
Block a user