res_pjsip: Replace invalid UTF-8 sequences in callerid name

* Added a new function ast_utf8_replace_invalid_chars() to
  utf8.c that copies a string replacing any invalid UTF-8
  sequences with the Unicode specified U+FFFD replacement
  character.  For example:  "abc\xffdef" becomes "abc\uFFFDdef".
  Any UTF-8 compliant implementation will show that character
  as a � character.

* Updated res_pjsip:set_id_from_hdr() to use
  ast_utf8_replace_invalid_chars and print a warning if any
  invalid sequences were found during the copy.

* Updated stasis_channels:ast_channel_publish_varset to use
  ast_utf8_replace_invalid_chars and print a warning if any
  invalid sequences were found during the copy.

ASTERISK-27830

Change-Id: I4ffbdb19c80bf0efc675d40078a3ca4f85c567d8
This commit is contained in:
George Joseph
2023-02-16 09:05:30 -07:00
committed by George Joseph
parent e5c5cd6e25
commit ceda5a9859
4 changed files with 647 additions and 3 deletions

View File

@@ -47,6 +47,7 @@
#include "asterisk/test.h"
#include "asterisk/res_pjsip_presence_xml.h"
#include "asterisk/res_pjproject.h"
#include "asterisk/utf8.h"
/*** MODULEINFO
<depend>pjproject</depend>
@@ -2461,12 +2462,12 @@ static void set_id_from_hdr(pjsip_fromto_hdr *hdr, struct ast_party_id *id)
{
char cid_name[AST_CHANNEL_NAME];
char cid_num[AST_CHANNEL_NAME];
size_t cid_name_size = AST_CHANNEL_NAME;
pjsip_name_addr *id_name_addr = (pjsip_name_addr *) hdr->uri;
char *semi;
enum ast_utf8_replace_result result;
ast_copy_pj_str(cid_name, &id_name_addr->display, sizeof(cid_name));
ast_copy_pj_str(cid_num, ast_sip_pjsip_uri_get_username(hdr->uri), sizeof(cid_num));
/* Always truncate caller-id number at a semicolon. */
semi = strchr(cid_num, ';');
if (semi) {
@@ -2484,6 +2485,21 @@ static void set_id_from_hdr(pjsip_fromto_hdr *hdr, struct ast_party_id *id)
*semi = '\0';
}
/*
* It's safe to pass a NULL or empty string as the source.
* The result will be an empty string assuming the destination
* size was at least 1.
*/
result = ast_utf8_replace_invalid_chars(cid_name, &cid_name_size,
id_name_addr->display.ptr, id_name_addr->display.slen);
if (result != AST_UTF8_REPLACE_VALID) {
ast_log(LOG_WARNING, "CallerID Name '" PJSTR_PRINTF_SPEC
"' for number '%s' has invalid UTF-8 characters which "
"were replaced",
PJSTR_PRINTF_VAR(id_name_addr->display), cid_num);
}
ast_free(id->name.str);
id->name.str = ast_strdup(cid_name);
if (!ast_strlen_zero(cid_name)) {