res_pjsip: Replace invalid UTF-8 sequences in callerid name

* Added a new function ast_utf8_replace_invalid_chars() to
  utf8.c that copies a string replacing any invalid UTF-8
  sequences with the Unicode specified U+FFFD replacement
  character.  For example:  "abc\xffdef" becomes "abc\uFFFDdef".
  Any UTF-8 compliant implementation will show that character
  as a � character.

* Updated res_pjsip:set_id_from_hdr() to use
  ast_utf8_replace_invalid_chars and print a warning if any
  invalid sequences were found during the copy.

* Updated stasis_channels:ast_channel_publish_varset to use
  ast_utf8_replace_invalid_chars and print a warning if any
  invalid sequences were found during the copy.

ASTERISK-27830

Change-Id: I4ffbdb19c80bf0efc675d40078a3ca4f85c567d8
This commit is contained in:
George Joseph
2023-02-16 09:05:30 -07:00
committed by George Joseph
parent e5c5cd6e25
commit ceda5a9859
4 changed files with 647 additions and 3 deletions

View File

@@ -39,6 +39,7 @@
#include "asterisk/stasis_channels.h"
#include "asterisk/dial.h"
#include "asterisk/linkedlists.h"
#include "asterisk/utf8.h"
/*** DOCUMENTATION
<managerEvent language="en_US" name="VarSet">
@@ -1154,13 +1155,43 @@ void ast_channel_publish_blob(struct ast_channel *chan, struct stasis_message_ty
void ast_channel_publish_varset(struct ast_channel *chan, const char *name, const char *value)
{
struct ast_json *blob;
enum ast_utf8_replace_result result;
char *new_value = NULL;
size_t new_value_size = 0;
ast_assert(name != NULL);
ast_assert(value != NULL);
/*
* Call with new-value == NULL to just check for invalid UTF-8
* sequences and get size of buffer needed.
*/
result = ast_utf8_replace_invalid_chars(new_value, &new_value_size,
value, strlen(value));
if (result == AST_UTF8_REPLACE_VALID) {
/*
* If there were no invalid sequences, we can use
* the value directly.
*/
new_value = (char *)value;
} else {
/*
* If there were invalid sequences, we need to replace
* them with the UTF-8 U+FFFD replacement character.
*/
new_value = ast_alloca(new_value_size);
result = ast_utf8_replace_invalid_chars(new_value, &new_value_size,
value, strlen(value));
ast_log(LOG_WARNING, "%s: The contents of variable '%s' had invalid UTF-8 sequences which were replaced",
ast_channel_name(chan), name);
}
blob = ast_json_pack("{s: s, s: s}",
"variable", name,
"value", value);
"value", new_value);
if (!blob) {
ast_log(LOG_ERROR, "Error creating message\n");
return;