audiohook.c: Improve frame pairing logic to avoid MixMonitor breakage with mixed codecs

This patch adjusts the read/write synchronization logic in audiohook_read_frame_both()
to better handle calls where participants use different codecs or sample sizes
(e.g., alaw vs G.722). The previous hard threshold of 2 * samples caused MixMonitor
recordings to break or stutter when frames were not aligned between both directions.

The new logic uses a more tolerant limit (1.5 * samples), which prevents audio tearing
without causing excessive buffer overruns. This fix specifically addresses issues
with MixMonitor when recording directly on a channel in a bridge using mixed codecs.

Reported-by: Michal Hajek <michal.hajek@daktela.com>

Resolves: #1276
Resolves: #1279
This commit is contained in:
Michal Hajek
2025-05-21 10:28:20 +02:00
parent 7d5280c16e
commit a2ae527d60

View File

@@ -308,14 +308,23 @@ static struct ast_frame *audiohook_read_frame_both(struct ast_audiohook *audioho
* 2 * samples) according to actual needs, for example, setting it to (ast_tvdiff_ms(ast_tvnow(), * 2 * samples) according to actual needs, for example, setting it to (ast_tvdiff_ms(ast_tvnow(),
* audiohook->write_time) < (samples/8)*4) && (ast_slinfactory_available(&audiohook->read_factory) * audiohook->write_time) < (samples/8)*4) && (ast_slinfactory_available(&audiohook->read_factory)
* < 4 * samples). * < 4 * samples).
*
* Update:
* Increased time and sample thresholds allow for better handling of asymmetric streams
* (e.g., mixed codecs like alaw and G.722) and high RTT conditions.
* This avoids premature frame reads when one direction is delayed, which can cause
* audio tearing or broken recordings.
* Specifically addresses issues with MixMonitor when recording directly on a channel
* that is part of a bridge with different sample rates or codecs.
* A slight overrun in recording duration is acceptable in exchange for audio stability.
*/ */
if (usable_read && !usable_write && (ast_tvdiff_ms(ast_tvnow(), audiohook->write_time) < (samples/8)*2) && (ast_slinfactory_available(&audiohook->read_factory) < 2 * samples)) { if (usable_read && !usable_write && (ast_tvdiff_ms(ast_tvnow(), audiohook->write_time) < (samples/8)*4) && (ast_slinfactory_available(&audiohook->read_factory) < 4 * samples)) {
ast_debug(3, "Write factory %p was pretty quick last time, waiting for them.\n", &audiohook->write_factory); ast_debug(3, "Write factory %p was pretty quick last time, waiting for them.\n", &audiohook->write_factory);
return NULL; return NULL;
} }
/* As shown in the above comment. */ /* As shown in the above comment. */
if (usable_write && !usable_read && (ast_tvdiff_ms(ast_tvnow(), audiohook->read_time) < (samples/8)*2) && (ast_slinfactory_available(&audiohook->write_factory) < 2 * samples)) { if (usable_write && !usable_read && (ast_tvdiff_ms(ast_tvnow(), audiohook->read_time) < (samples/8)*4) && (ast_slinfactory_available(&audiohook->write_factory) < 4 * samples)) {
ast_debug(3, "Read factory %p was pretty quick last time, waiting for them.\n", &audiohook->read_factory); ast_debug(3, "Read factory %p was pretty quick last time, waiting for them.\n", &audiohook->read_factory);
return NULL; return NULL;
} }