audiohook.c: Improve frame pairing logic to avoid MixMonitor breakage with mixed codecs

This patch adjusts the read/write synchronization logic in audiohook_read_frame_both() to better handle calls where participants use different codecs or sample sizes (e.g., alaw vs G.722). The previous hard threshold of 2 * samples caused MixMonitor recordings to break or stutter when frames were not aligned between both directions. The new logic uses a more tolerant limit (1.5 * samples), which prevents audio tearing without causing excessive buffer overruns. This fix specifically addresses issues with MixMonitor when recording directly on a channel in a bridge using mixed codecs. Reported-by: Michal Hajek <michal.hajek@daktela.com> Resolves: #1276 Resolves: #1279
2025-09-02 19:16:15 +00:00 · 2025-05-21 10:28:20 +02:00
parent 7d5280c16e
commit a2ae527d60
1 changed files with 11 additions and 2 deletions
--- a/main/audiohook.c
+++ b/main/audiohook.c
@@ -308,14 +308,23 @@ static struct ast_frame *audiohook_read_frame_both(struct ast_audiohook *audioho
 	 *    2 * samples) according to actual needs, for example, setting it to (ast_tvdiff_ms(ast_tvnow(),
 	 *    audiohook->write_time) < (samples/8)*4) && (ast_slinfactory_available(&audiohook->read_factory)
 	 *    < 4 * samples).
 	 *
 	 *    Update:
 	 *       Increased time and sample thresholds allow for better handling of asymmetric streams
 	 *       (e.g., mixed codecs like alaw and G.722) and high RTT conditions.
 	 *       This avoids premature frame reads when one direction is delayed, which can cause
 	 *       audio tearing or broken recordings.
 	 *       Specifically addresses issues with MixMonitor when recording directly on a channel
 	 *       that is part of a bridge with different sample rates or codecs.
 	 *       A slight overrun in recording duration is acceptable in exchange for audio stability.
 	 */
-	if (usable_read && !usable_write && (ast_tvdiff_ms(ast_tvnow(), audiohook->write_time) < (samples/8)*2) && (ast_slinfactory_available(&audiohook->read_factory) < 2 * samples)) {
+	if (usable_read && !usable_write && (ast_tvdiff_ms(ast_tvnow(), audiohook->write_time) < (samples/8)*4) && (ast_slinfactory_available(&audiohook->read_factory) < 4 * samples)) {
 		ast_debug(3, "Write factory %p was pretty quick last time, waiting for them.\n", &audiohook->write_factory);
 		return NULL;
 	}
 	/* As shown in the above comment. */
-	if (usable_write && !usable_read && (ast_tvdiff_ms(ast_tvnow(), audiohook->read_time) < (samples/8)*2) && (ast_slinfactory_available(&audiohook->write_factory) < 2 * samples)) {
+	if (usable_write && !usable_read && (ast_tvdiff_ms(ast_tvnow(), audiohook->read_time) < (samples/8)*4) && (ast_slinfactory_available(&audiohook->write_factory) < 4 * samples)) {
 		ast_debug(3, "Read factory %p was pretty quick last time, waiting for them.\n", &audiohook->read_factory);
 		return NULL;
 	}