1. 前言
MRCP(Media Resource Control Protocol, 媒体资源控制协议)
是一种通讯协议,用于语音服务器向客户端提供各种语音服务,例如 语音识别(ASR)和语音合成(TTS)。FreeSWITCH 中的 unimrcp模块 就是对接 MRCP 协议栈的中间层,提供了集成使用 ASR、TTS 的能力。下图是 FreeSWITCH 中 unimrcp模块 的源码时序,下文将对源码进行分析
2. 源码分析
2.1 unimrcp 模块的加载
-
在 FreeSWITCH 1.10 源码阅读(1)-服务启动及 Event Socket 模块工作原理 中笔者分析了 FreeSWITCH 加载模块的主流程,unimrcp 模块被加载时将触发
mod_unimrcp.c#SWITCH_MODULE_LOAD_FUNCTION(mod_unimrcp_load)
执行。这个函数比较简练,大致有以下几个关键点:- 调用
mod_unimrcp.c#mod_unimrcp_do_config()
函数获取 XML 配置中指定的unimrcp.conf
名称下的配置内容,这部分不做赘述 - 调用
mod_unimrcp.c#mod_unimrcp_client_create()
函数创建 FreeSWITCH 本地的 MRCP 客户端,用于后续与 MRCP 服务器交互 - 调用
mod_unimrcp.c#synth_load()
函数加载创建 TTS 应用 - 调用
mod_unimrcp.c#recog_load()
函数加载创建 ASR 应用,与 TTS 应用加载类似,不做赘述 - 调用库函数
mrcp_client.c#mrcp_client_start()
新开线程启动 MRCP 客户端,涉及库函数不做赘述
SWITCH_MODULE_LOAD_FUNCTION(mod_unimrcp_load) { if (switch_event_reserve_subclass(MY_EVENT_PROFILE_CREATE) != SWITCH_STATUS_SUCCESS) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", MY_EVENT_PROFILE_CREATE); return SWITCH_STATUS_TERM; } if (switch_event_reserve_subclass(MY_EVENT_PROFILE_CLOSE) != SWITCH_STATUS_SUCCESS) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", MY_EVENT_PROFILE_CLOSE); return SWITCH_STATUS_TERM; } if (switch_event_reserve_subclass(MY_EVENT_PROFILE_OPEN) != SWITCH_STATUS_SUCCESS) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", MY_EVENT_PROFILE_OPEN); return SWITCH_STATUS_TERM; } /* connect my internal structure to the blank pointer passed to me */ *module_interface = switch_loadable_module_create_module_interface(pool, modname); memset(&globals, 0, sizeof(globals)); switch_mutex_init(&globals.mutex, SWITCH_MUTEX_UNNESTED, pool); globals.speech_channel_number = 0; switch_core_hash_init_nocase(&globals.profiles); /* get MRCP module configuration */ mod_unimrcp_do_config(); if (zstr(globals.unimrcp_default_synth_profile)) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing default-tts-profile\n"); return SWITCH_STATUS_FALSE; } if (zstr(globals.unimrcp_default_recog_profile)) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing default-asr-profile\n"); return SWITCH_STATUS_FALSE; } /* link UniMRCP logs to FreeSWITCH */ switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "UniMRCP log level = %s\n", globals.unimrcp_log_level); if (apt_log_instance_create(APT_LOG_OUTPUT_NONE, str_to_log_level(globals.unimrcp_log_level), pool) == FALSE) { /* already created */ apt_log_priority_set(str_to_log_level(globals.unimrcp_log_level)); } apt_log_ext_handler_set(unimrcp_log); /* Create the MRCP client */ if ((globals.mrcp_client = mod_unimrcp_client_create(pool)) == NULL) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create mrcp client\n"); return SWITCH_STATUS_FALSE; } /* Create the synthesizer interface */ if (synth_load(*module_interface, pool) != SWITCH_STATUS_SUCCESS) { return SWITCH_STATUS_FALSE; } /* Create the recognizer interface */ if (recog_load(*module_interface, pool) != SWITCH_STATUS_SUCCESS) { return SWITCH_STATUS_FALSE; } /* Start the client stack */ mrcp_client_start(globals.mrcp_client); /* indicate that the module should continue to be loaded */ return SWITCH_STATUS_SUCCESS; }
- 调用
-
mod_unimrcp.c#mod_unimrcp_client_create()
函数的关键点在于与底层 mrcp 库的交互,由于底层库已经不属于 FreeSWITCH 源码,本文不会再深入:- 调用库函数
mrcp_client.c#mrcp_client_create()
创建 FreeSWITCH 作为客户端连接 MRCP 服务器的 mrcp_client_t 对象,并设置该对象上回调函数表中处理消息的函数为mrcp_client.c#mrcp_client_msg_process()
- 调用库函数
mrcp_client_connection.c#mrcp_client_connection_agent_create()
创建 MRCP 连接端点对象 mrcp_connection_agent_t,用于管理底层 socket 数据读写 - 调用
mrcp_client.c#mrcp_client_connection_agent_register()
将 MRCP 连接端点注册到 FreeSWITCH 客户端对象中,并设置底层连接收到数据时的回调函数表为 mrcp_client.connection_method_vtable - 解析 unimrcp 配置文件属性,创建对应的 profile,据此可以将多个 MRCP 服务器的连接信息隔离。如果是 v2 版本的 MRCP 协议,在 FreeSWITCH 和 MRCP 服务器之间还需要 SIP 信令交互,所以也会调用
mrcp_sofiasip_client_agent.c#mrcp_sofiasip_client_agent_create()
函数创建一个 SIP 交互的端点对象
static mrcp_client_t *mod_unimrcp_client_create(switch_memory_pool_t *mod_pool) { switch_xml_t cfg = NULL, xml = NULL, profiles = NULL, profile = NULL; mrcp_client_t *client = NULL; apr_pool_t *pool = NULL; mrcp_resource_loader_t *resource_loader = NULL; mrcp_resource_factory_t *resource_factory = NULL; mpf_codec_manager_t *codec_manager = NULL; apr_size_t max_connection_count = 0; apt_bool_t offer_new_connection = FALSE; mrcp_connection_agent_t *connection_agent; mpf_engine_t *media_engine; apt_dir_layout_t *dir_layout; /* create the client */ if ((dir_layout = apt_default_dir_layout_create("../", mod_pool)) == NULL) { goto done; } client = mrcp_client_create(dir_layout); if (!client) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MRCP client\n"); goto done; } pool = mrcp_client_memory_pool_get(client); if (!pool) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to get MRCP client memory pool\n"); client = NULL; goto done; } /* load the synthesizer and recognizer resources */ resource_loader = mrcp_resource_loader_create(FALSE, pool); if (!resource_loader) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MRCP resource loader\n"); client = NULL; goto done; } else { apt_str_t synth_resource; apt_str_t recog_resource; apt_string_set(&synth_resource, "speechsynth"); mrcp_resource_load(resource_loader, &synth_resource); apt_string_set(&recog_resource, "speechrecog"); mrcp_resource_load(resource_loader, &recog_resource); resource_factory = mrcp_resource_factory_get(resource_loader); mrcp_client_resource_factory_register(client, resource_factory); } codec_manager = mpf_engine_codec_manager_create(pool); if (!codec_manager) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MPF codec manager\n"); client = NULL; goto done; } if (!mrcp_client_codec_manager_register(client, codec_manager)) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create register MRCP client codec manager\n"); client = NULL; goto done; } /* set up MRCP connection agent that will be shared with all profiles */ if (!zstr(globals.unimrcp_max_connection_count)) { max_connection_count = atoi(globals.unimrcp_max_connection_count); } if (max_connection_count <= 0) { max_connection_count = 100; } if (!zstr(globals.unimrcp_offer_new_connection)) { offer_new_connection = strcasecmp("true", globals.unimrcp_offer_new_connection); } connection_agent = mrcp_client_connection_agent_create("MRCPv2ConnectionAgent", max_connection_count, offer_new_connection, pool); if (!connection_agent) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MRCP connection agent\n"); client = NULL; goto done; } if (!zstr(globals.unimrcp_rx_buffer_size)) { apr_size_t rx_buffer_size = (apr_size_t)atol(globals.unimrcp_rx_buffer_size); if (rx_buffer_size > 0) { mrcp_client_connection_rx_size_set(connection_agent, rx_buffer_size); } } if (!zstr(globals.unimrcp_tx_buffer_size)) { apr_size_t tx_buffer_size = (apr_size_t)atol(globals.unimrcp_tx_buffer_size); if (tx_buffer_size > 0) { mrcp_client_connection_tx_size_set(connection_agent, tx_buffer_size); } } if (!zstr(globals.unimrcp_request_timeout)) { apr_size_t request_timeout = (apr_size_t)atol(globals.unimrcp_request_timeout); if (request_timeout > 0) { mrcp_client_connection_timeout_set(connection_agent, request_timeout); } } if (!mrcp_client_connection_agent_register(client, connection_agent)) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create register MRCP connection agent\n"); client = NULL; goto done; } /* Set up the media engine that will be shared with all profiles */ media_engine = mpf_engine_create("MediaEngine", pool); if (!media_engine) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MPF media engine\n"); client = NULL; goto done; } if (!mpf_engine_scheduler_rate_set(media_engine, 1)) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to set MPF engine scheduler rate\n"); client = NULL; goto done; } if (!mrcp_client_media_engine_register(client, media_engine)) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to register MPF media engine\n"); client = NULL; goto done; } /* configure the client profiles */ if (!(xml = switch_xml_open_cfg(CONFIG_FILE, &cfg, NULL))) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Could not open %s\n", CONFIG_FILE); client = NULL; goto done; } if ((profiles = switch_xml_child(cfg, "profiles"))) { for (profile = switch_xml_child(profiles, "profile"); profile; profile = switch_xml_next(profile)) { /* a profile is a signaling agent + termination factory + media engine + connection agent (MRCPv2 only) */ mrcp_sig_agent_t *agent = NULL; mpf_termination_factory_t *termination_factory = NULL; mrcp_profile_t *mprofile = NULL; mpf_rtp_config_t *rtp_config = NULL; mpf_rtp_settings_t *rtp_settings = mpf_rtp_settings_alloc(pool); mrcp_sig_settings_t *sig_settings = mrcp_signaling_settings_alloc(pool); profile_t *mod_profile = NULL; switch_xml_t default_params = NULL; mrcp_connection_agent_t *v2_profile_connection_agent = NULL; /* get profile attributes */ const char *name = apr_pstrdup(pool, switch_xml_attr(profile, "name")); const char *version = switch_xml_attr(profile, "version"); if (zstr(name) || zstr(version)) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "<profile> missing name or version attribute\n"); client = NULL; goto done; } /* prepare mod_unimrcp's profile for configuration */ profile_create(&mod_profile, name, mod_pool); if (mod_profile) { switch_core_hash_insert(globals.profiles, mod_profile->name, mod_profile); } else { client = NULL; goto done; } /* pull in any default SPEAK params */ default_params = switch_xml_child(profile, "synthparams"); if (default_params) { switch_xml_t param = NULL; switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading SPEAK params\n"); for (param = switch_xml_child(default_params, "param"); param; param = switch_xml_next(param)) { const char *param_name = switch_xml_attr(param, "name"); const char *param_value = switch_xml_attr(param, "value"); if (zstr(param_name)) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing SPEAK param name\n"); client = NULL; goto done; } if (zstr(param_value)) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing SPEAK param value\n"); client = NULL; goto done; } switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading SPEAK Param %s:%s\n", param_name, param_value); switch_core_hash_insert(mod_profile->default_synth_params, switch_core_strdup(pool, param_name), switch_core_strdup(pool, param_value)); } } /* pull in any default RECOGNIZE params */ default_params = switch_xml_child(profile, "recogparams"); if (default_params) { switch_xml_t param = NULL; switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading RECOGNIZE params\n"); for (param = switch_xml_child(default_params, "param"); param; param = switch_xml_next(param)) { const char *param_name = switch_xml_attr(param, "name"); const char *param_value = switch_xml_attr(param, "value"); if (zstr(param_name)) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing RECOGNIZE param name\n"); client = NULL; goto done; } if (zstr(param_value)) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing RECOGNIZE param value\n"); client = NULL; goto done; } switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading RECOGNIZE Param %s:%s\n", param_name, param_value); switch_core_hash_insert(mod_profile->default_recog_params, switch_core_strdup(pool, param_name), switch_core_strdup(pool, param_value)); } } /* create RTP config, common to MRCPv1 and MRCPv2 */ rtp_config = mpf_rtp_config_alloc(pool); rtp_config->rtp_port_min = DEFAULT_RTP_PORT_MIN; rtp_config->rtp_port_max = DEFAULT_RTP_PORT_MAX; apt_string_set(&rtp_config->ip, DEFAULT_LOCAL_IP_ADDRESS); if (strcmp("1", version) == 0) { /* MRCPv1 configuration */ switch_xml_t param = NULL; rtsp_client_config_t *config = mrcp_unirtsp_client_config_alloc(pool); config->origin = DEFAULT_SDP_ORIGIN; sig_settings->resource_location = DEFAULT_RESOURCE_LOCATION; v2_profile_connection_agent = NULL; if (!zstr(globals.unimrcp_request_timeout)) { apr_size_t request_timeout = (apr_size_t)atol(globals.unimrcp_request_timeout); if (request_timeout > 0) { config->request_timeout = request_timeout; } } switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading MRCPv1 profile: %s\n", name); for (param = switch_xml_child(profile, "param"); param; param = switch_xml_next(param)) { const char *param_name = switch_xml_attr(param, "name"); const char *param_value = switch_xml_attr(param, "value"); if (zstr(param_name)) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing param name\n"); client = NULL; goto done; } switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading Param %s:%s\n", param_name, param_value); if (!process_mrcpv1_config(config, sig_settings, param_name, param_value, pool) && !process_rtp_config(client, rtp_config, rtp_settings, param_name, param_value, pool) && !process_profile_config(mod_profile, param_name, param_value, mod_pool)) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Ignoring unknown param %s\n", param_name); } } agent = mrcp_unirtsp_client_agent_create(name, config, pool); if (!agent) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create MRCP RTSP client agent\n"); client = NULL; goto done; } } else if (strcmp("2", version) == 0) { /* MRCPv2 configuration */ mrcp_sofia_client_config_t *config = mrcp_sofiasip_client_config_alloc(pool); switch_xml_t param = NULL; config->local_ip = DEFAULT_LOCAL_IP_ADDRESS; config->local_port = DEFAULT_SIP_LOCAL_PORT; sig_settings->server_ip = DEFAULT_REMOTE_IP_ADDRESS; sig_settings->server_port = DEFAULT_SIP_REMOTE_PORT; config->ext_ip = NULL; config->user_agent_name = DEFAULT_SOFIASIP_UA_NAME; config->origin = DEFAULT_SDP_ORIGIN; v2_profile_connection_agent = connection_agent; switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading MRCPv2 profile: %s\n", name); for (param = switch_xml_child(profile, "param"); param; param = switch_xml_next(param)) { const char *param_name = switch_xml_attr(param, "name"); const char *param_value = switch_xml_attr(param, "value"); if (zstr(param_name)) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing param name\n"); client = NULL; goto done; } switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading Param %s:%s\n", param_name, param_value); if (!process_mrcpv2_config(config, sig_settings, param_name, param_value, pool) && !process_rtp_config(client, rtp_config, rtp_settings, param_name, param_value, pool) && !process_profile_config(mod_profile, param_name, param_value, mod_pool)) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Ignoring unknown param %s\n", param_name); } } agent = mrcp_sofiasip_client_agent_create(name, config, pool); if (!agent) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create MRCP SIP client agent\n"); client = NULL; goto done; } } else { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "version must be either \"1\" or \"2\"\n"); client = NULL; goto done; } termination_factory = mpf_rtp_termination_factory_create(rtp_config, pool); if (!termination_factory) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create RTP termination factory\n"); client = NULL; goto done; } mrcp_client_rtp_factory_register(client, termination_factory, name); mrcp_client_rtp_settings_register(client, rtp_settings, "RTP-Settings"); mrcp_client_signaling_settings_register(client, sig_settings, "Signaling-Settings"); mrcp_client_signaling_agent_register(client, agent); /* create the profile and register it */ mprofile = mrcp_client_profile_create(NULL, agent, v2_profile_connection_agent, media_engine, termination_factory, rtp_settings, sig_settings, pool); if (!mprofile) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create MRCP client profile\n"); client = NULL; goto done; } mrcp_client_profile_register(client, mprofile, name); } } done: if (xml) { switch_xml_free(xml); } return client; }
- 调用库函数
-
mod_unimrcp.c#synth_load()
函数加载创建 TTS 功能应用的处理主要分为两个部分,- 创建
SWITCH_SPEECH_INTERFACE
接口,将 TTS 相关功能封装到 FreeSWITCH 标准模块结构中,供上层使用 - 调用库函数
mrcp_application.c#mrcp_application_create()
创建 unimrcp 模块的 TTS 应用,这个部分主要是将 unimrcp 模块的处理逻辑嵌入到底层 MRCP 客户端,供底层回调通知上层
static switch_status_t synth_load(switch_loadable_module_interface_t *module_interface, switch_memory_pool_t *pool) { /* link to FreeSWITCH ASR / TTS callbacks */ switch_speech_interface_t *speech_interface = NULL; if ((speech_interface = (switch_speech_interface_t *) switch_loadable_module_create_interface(module_interface, SWITCH_SPEECH_INTERFACE)) == NULL) { return SWITCH_STATUS_FALSE; } speech_interface->interface_name = MOD_UNIMRCP; speech_interface->speech_open = synth_speech_open; speech_interface->speech_close = synth_speech_close; speech_interface->speech_feed_tts = synth_speech_feed_tts; speech_interface->speech_read_tts = synth_speech_read_tts; speech_interface->speech_flush_tts = synth_speech_flush_tts; speech_interface->speech_text_param_tts = synth_speech_text_param_tts; speech_interface->speech_numeric_param_tts = synth_speech_numeric_param_tts; speech_interface->speech_float_param_tts = synth_speech_float_param_tts; /* Create the synthesizer application and link its callbacks to UniMRCP */ if ((globals.synth.app = mrcp_application_create(synth_message_handler, (void *) 0, pool)) == NULL) { return SWITCH_STATUS_FALSE; } globals.synth.dispatcher.on_session_update = NULL; globals.synth.dispatcher.on_session_terminate = speech_on_session_terminate; globals.synth.dispatcher.on_channel_add = speech_on_channel_add; globals.synth.dispatcher.on_channel_remove = speech_on_channel_remove; globals.synth.dispatcher.on_message_receive = synth_on_message_receive; globals.synth.audio_stream_vtable.destroy = NULL; globals.synth.audio_stream_vtable.open_rx = NULL; globals.synth.audio_stream_vtable.close_rx = NULL; globals.synth.audio_stream_vtable.read_frame = NULL; globals.synth.audio_stream_vtable.open_tx = NULL; globals.synth.audio_stream_vtable.close_tx = NULL; globals.synth.audio_stream_vtable.write_frame = synth_stream_write; mrcp_client_application_register(globals.mrcp_client, globals.synth.app, "synth"); /* map FreeSWITCH params to MRCP param */ switch_core_hash_init_nocase(&globals.synth.fs_param_map); switch_core_hash_insert(globals.synth.fs_param_map, "voice", "voice-name"); /* map MRCP params to UniMRCP ID */ switch_core_hash_init_nocase(&globals.synth.param_id_map); switch_core_hash_insert(globals.synth.param_id_map, "jump-size", unimrcp_param_id_create(SYNTHESIZER_HEADER_JUMP_SIZE, pool)); switch_core_hash_insert(globals.synth.param_id_map, "kill-on-barge-in", unimrcp_param_id_create(SYNTHESIZER_HEADER_KILL_ON_BARGE_IN, pool)); switch_core_hash_insert(globals.synth.param_id_map, "speaker-profile", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEAKER_PROFILE, pool)); switch_core_hash_insert(globals.synth.param_id_map, "completion-cause", unimrcp_param_id_create(SYNTHESIZER_HEADER_COMPLETION_CAUSE, pool)); switch_core_hash_insert(globals.synth.param_id_map, "completion-reason", unimrcp_param_id_create(SYNTHESIZER_HEADER_COMPLETION_REASON, pool)); switch_core_hash_insert(globals.synth.param_id_map, "voice-gender", unimrcp_param_id_create(SYNTHESIZER_HEADER_VOICE_GENDER, pool)); switch_core_hash_insert(globals.synth.param_id_map, "voice-age", unimrcp_param_id_create(SYNTHESIZER_HEADER_VOICE_AGE, pool)); switch_core_hash_insert(globals.synth.param_id_map, "voice-variant", unimrcp_param_id_create(SYNTHESIZER_HEADER_VOICE_VARIANT, pool)); switch_core_hash_insert(globals.synth.param_id_map, "voice-name", unimrcp_param_id_create(SYNTHESIZER_HEADER_VOICE_NAME, pool)); switch_core_hash_insert(globals.synth.param_id_map, "prosody-volume", unimrcp_param_id_create(SYNTHESIZER_HEADER_PROSODY_VOLUME, pool)); switch_core_hash_insert(globals.synth.param_id_map, "prosody-rate", unimrcp_param_id_create(SYNTHESIZER_HEADER_PROSODY_RATE, pool)); switch_core_hash_insert(globals.synth.param_id_map, "speech-marker", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEECH_MARKER, pool)); switch_core_hash_insert(globals.synth.param_id_map, "speech-language", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEECH_LANGUAGE, pool)); switch_core_hash_insert(globals.synth.param_id_map, "fetch-hint", unimrcp_param_id_create(SYNTHESIZER_HEADER_FETCH_HINT, pool)); switch_core_hash_insert(globals.synth.param_id_map, "audio-fetch-hint", unimrcp_param_id_create(SYNTHESIZER_HEADER_AUDIO_FETCH_HINT, pool)); switch_core_hash_insert(globals.synth.param_id_map, "failed-uri", unimrcp_param_id_create(SYNTHESIZER_HEADER_FAILED_URI, pool)); switch_core_hash_insert(globals.synth.param_id_map, "failed-uri-cause", unimrcp_param_id_create(SYNTHESIZER_HEADER_FAILED_URI_CAUSE, pool)); switch_core_hash_insert(globals.synth.param_id_map, "speak-restart", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEAK_RESTART, pool)); switch_core_hash_insert(globals.synth.param_id_map, "speak-length", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEAK_LENGTH, pool)); switch_core_hash_insert(globals.synth.param_id_map, "load-lexicon", unimrcp_param_id_create(SYNTHESIZER_HEADER_LOAD_LEXICON, pool)); switch_core_hash_insert(globals.synth.param_id_map, "lexicon-search-order", unimrcp_param_id_create(SYNTHESIZER_HEADER_LEXICON_SEARCH_ORDER, pool)); return SWITCH_STATUS_SUCCESS; }
- 创建
2.2 tts 功能的实现
-
以 speak 放音 APP 为例,当上层执行这个 APP 时实际调用到
mod_dptools.c#speak_function()
函数,可以看到该函数主要处理是校验参数合法性,然后调用switch_ivr_play_say.c#switch_ivr_speak_text()
函数SWITCH_STANDARD_APP(speak_function) { switch_channel_t *channel = switch_core_session_get_channel(session); char buf[10]; char *argv[3] = { 0 }; int argc; const char *engine = NULL; const char *voice = NULL; char *text = NULL; char *mydata = NULL; switch_input_args_t args = { 0 }; if (zstr(data) || !(mydata = switch_core_session_strdup(session, data))) { switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Invalid Params!\n"); return; } argc = switch_separate_string(mydata, '|', argv, sizeof(argv) / sizeof(argv[0])); if (argc == 0) { switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Invalid Params!\n"); return; } else if (argc == 1) { text = switch_core_session_strdup(session, data); /* unstripped text */ } else if (argc == 2) { voice = argv[0]; text = switch_core_session_strdup(session, data + (argv[1] - argv[0])); /* unstripped text */ } else { engine = argv[0]; voice = argv[1]; text = switch_core_session_strdup(session, data + (argv[2] - argv[0])); /* unstripped text */ } if (!engine) { engine = switch_channel_get_variable(channel, "tts_engine"); } if (!voice) { voice = switch_channel_get_variable(channel, "tts_voice"); } if (!(engine && voice && text)) { if (!engine) { engine = "NULL"; } if (!voice) { voice = "NULL"; } if (!text) { text = "NULL"; } switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Invalid Params! [%s][%s][%s]\n", engine, voice, text); switch_channel_hangup(channel, SWITCH_CAUSE_DESTINATION_OUT_OF_ORDER); } args.input_callback = on_dtmf; args.buf = buf; args.buflen = sizeof(buf); switch_channel_set_variable(channel, SWITCH_PLAYBACK_TERMINATOR_USED, ""); switch_ivr_speak_text(session, engine, voice, text, &args); }
-
switch_ivr_play_say.c#switch_ivr_speak_text()
函数核心处理为以下几步:- 调用函数
switch_core_soeech.c#switch_core_speech_open()
使用本地 MRCP 客户端请求 MRCP 服务器新建会话 - 调用函数
switch_ivr_play_say.c#switch_ivr_speak_text_handle()
处理语音合成
SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text(switch_core_session_t *session, const char *tts_name, const char *voice_name, const char *text, switch_input_args_t *args) { switch_channel_t *channel = switch_core_session_get_channel(session); uint32_t rate = 0; int interval = 0; uint32_t channels; switch_frame_t write_frame = { 0 }; switch_timer_t ltimer, *timer; switch_codec_t lcodec, *codec; switch_memory_pool_t *pool = switch_core_session_get_pool(session); char *codec_name; switch_status_t status = SWITCH_STATUS_SUCCESS; switch_speech_handle_t lsh, *sh; switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE; const char *timer_name, *var; cached_speech_handle_t *cache_obj = NULL; int need_create = 1, need_alloc = 1; switch_codec_implementation_t read_impl = { 0 }; switch_core_session_get_read_impl(session, &read_impl); if (switch_channel_pre_answer(channel) != SWITCH_STATUS_SUCCESS) { return SWITCH_STATUS_FALSE; } arg_recursion_check_start(args); sh = ↰ codec = &lcodec; timer = <imer; if ((var = switch_channel_get_variable(channel, SWITCH_CACHE_SPEECH_HANDLES_VARIABLE)) && switch_true(var)) { switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "cache enabled"); if ((cache_obj = (cached_speech_handle_t *) switch_channel_get_private(channel, SWITCH_CACHE_SPEECH_HANDLES_OBJ_NAME))) { need_create = 0; if (!strcasecmp(cache_obj->tts_name, tts_name)) { need_alloc = 0; } else { switch_ivr_clear_speech_cache(session); } } if (!cache_obj) { cache_obj = (cached_speech_handle_t *) switch_core_session_alloc(session, sizeof(*cache_obj)); } if (need_alloc) { switch_copy_string(cache_obj->tts_name, tts_name, sizeof(cache_obj->tts_name)); switch_copy_string(cache_obj->voice_name, voice_name, sizeof(cache_obj->voice_name)); switch_channel_set_private(channel, SWITCH_CACHE_SPEECH_HANDLES_OBJ_NAME, cache_obj); } sh = &cache_obj->sh; codec = &cache_obj->codec; timer = &cache_obj->timer; } timer_name = switch_channel_get_variable(channel, "timer_name"); switch_core_session_reset(session, SWITCH_FALSE, SWITCH_FALSE); rate = read_impl.actual_samples_per_second; interval = read_impl.microseconds_per_packet / 1000; channels = read_impl.number_of_channels; if (need_create) { memset(sh, 0, sizeof(*sh)); if ((status = switch_core_speech_open(sh, tts_name, voice_name, (uint32_t) rate, interval, read_impl.number_of_channels, &flags, NULL)) != SWITCH_STATUS_SUCCESS) { switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Invalid TTS module %s[%s]!\n", tts_name, voice_name); switch_core_session_reset(session, SWITCH_TRUE, SWITCH_TRUE); switch_ivr_clear_speech_cache(session); arg_recursion_check_stop(args); return status; } } else if (cache_obj && strcasecmp(cache_obj->voice_name, voice_name)) { switch_copy_string(cache_obj->voice_name, voice_name, sizeof(cache_obj->voice_name)); switch_core_speech_text_param_tts(sh, "voice", voice_name); } if (switch_channel_pre_answer(channel) != SWITCH_STATUS_SUCCESS) { flags = 0; switch_core_speech_close(sh, &flags); arg_recursion_check_stop(args); return SWITCH_STATUS_FALSE; } switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "OPEN TTS %s\n", tts_name); codec_name = "L16"; if (need_create) { if (switch_core_codec_init(codec, codec_name, NULL, NULL, (int) rate, interval, channels, SWITCH_CODEC_FLAG_ENCODE | SWITCH_CODEC_FLAG_DECODE, NULL, pool) == SWITCH_STATUS_SUCCESS) { switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Raw Codec Activated\n"); } else { switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Raw Codec Activation Failed %s@%uhz 1 channel %dms\n", codec_name, rate, interval); flags = 0; switch_core_speech_close(sh, &flags); switch_core_session_reset(session, SWITCH_TRUE, SWITCH_TRUE); switch_ivr_clear_speech_cache(session); arg_recursion_check_stop(args); return SWITCH_STATUS_GENERR; } } write_frame.codec = codec; if (timer_name) { if (need_create) { if (switch_core_timer_init(timer, timer_name, interval, (int) sh->samples, pool) != SWITCH_STATUS_SUCCESS) { switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Setup timer failed!\n"); switch_core_codec_destroy(write_frame.codec); flags = 0; switch_core_speech_close(sh, &flags); switch_core_session_reset(session, SWITCH_TRUE, SWITCH_TRUE); switch_ivr_clear_speech_cache(session); arg_recursion_check_stop(args); return SWITCH_STATUS_GENERR; } switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Setup timer success %u bytes per %d ms!\n", sh->samples * 2, interval); } switch_core_timer_sync(timer); // Sync timer /* start a thread to absorb incoming audio */ switch_core_service_session(session); } status = switch_ivr_speak_text_handle(session, sh, write_frame.codec, timer_name ? timer : NULL, text, args); flags = 0; if (!cache_obj) { switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "no cache_obj"); switch_core_speech_close(sh, &flags); switch_core_codec_destroy(codec); } if (timer_name) { /* End the audio absorbing thread */ switch_core_thread_session_end(session); if (!cache_obj) { switch_core_timer_destroy(timer); } } switch_core_session_reset(session, SWITCH_FALSE, SWITCH_TRUE); arg_recursion_check_stop(args); return status; }
- 调用函数
-
switch_core_soeech.c#switch_core_speech_open()
函数实际只是通过核心注册的接口调用到 unimrcp 模块的mod_unimrcp.c#synth_speech_open()
函数SWITCH_DECLARE(switch_status_t) switch_core_speech_open(switch_speech_handle_t *sh, const char *module_name, const char *voice_name, unsigned int rate, unsigned int interval, unsigned int channels, switch_speech_flag_t *flags, switch_memory_pool_t *pool) { switch_status_t status; char buf[256] = ""; char *param = NULL; if (!sh || !flags || zstr(module_name)) { return SWITCH_STATUS_FALSE; } if (strchr(module_name, ':')) { switch_set_string(buf, module_name); if ((param = strchr(buf, ':'))) { *param++ = '\0'; module_name = buf; } } if ((sh->speech_interface = switch_loadable_module_get_speech_interface(module_name)) == 0) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Invalid speech module [%s]!\n", module_name); return SWITCH_STATUS_GENERR; } sh->flags = *flags; if (pool) { sh->memory_pool = pool; } else { if ((status = switch_core_new_memory_pool(&sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { UNPROTECT_INTERFACE(sh->speech_interface); return status; } switch_set_flag(sh, SWITCH_SPEECH_FLAG_FREE_POOL); } sh->engine = switch_core_strdup(sh->memory_pool, module_name); if (param) { sh->param = switch_core_strdup(sh->memory_pool, param); } sh->rate = rate; sh->name = switch_core_strdup(sh->memory_pool, module_name); sh->samples = switch_samples_per_packet(rate, interval); sh->samplerate = rate; sh->native_rate = rate; sh->channels = channels; sh->real_channels = 1; if ((status = sh->speech_interface->speech_open(sh, voice_name, rate, channels, flags)) == SWITCH_STATUS_SUCCESS) { switch_set_flag(sh, SWITCH_SPEECH_FLAG_OPEN); } else { UNPROTECT_INTERFACE(sh->speech_interface); } return status; }
-
mod_unimrcp.c#synth_speech_open()
函数的核心处理是创建一个 FreeSWITCH 层面的 speech_channel_t 对象,并调用mod_unimrcp.c#speech_channel_open()
函数通过底层 MRCP 客户端建立与远程 MRCP 服务端之间的连接static switch_status_t synth_speech_open(switch_speech_handle_t *sh, const char *voice_name, int rate, int channels, switch_speech_flag_t *flags) { switch_status_t status = SWITCH_STATUS_SUCCESS; speech_channel_t *schannel = NULL; const char *profile_name = sh->param; profile_t *profile = NULL; int speech_channel_number = get_next_speech_channel_number(); char *name = NULL; char *session_uuid = NULL; switch_hash_index_t *hi = NULL; /* Name the channel */ if (profile_name && strchr(profile_name, ':')) { /* Profile has session name appended to it. Pick it out */ profile_name = switch_core_strdup(sh->memory_pool, profile_name); session_uuid = strchr(profile_name, ':'); *session_uuid = '\0'; session_uuid++; session_uuid = switch_core_strdup(sh->memory_pool, session_uuid); } else { /* check if session is associated w/ this memory pool */ switch_core_session_t *session = switch_core_memory_pool_get_data(sh->memory_pool, "__session"); if (session) { session_uuid = switch_core_session_get_uuid(session); } } name = switch_core_sprintf(sh->memory_pool, "TTS-%d", speech_channel_number); switch_log_printf(SWITCH_CHANNEL_UUID_LOG(session_uuid), SWITCH_LOG_INFO, "speech_handle: name = %s, rate = %d, speed = %d, samples = %d, voice = %s, engine = %s, param = %s\n", sh->name, sh->rate, sh->speed, sh->samples, sh->voice, sh->engine, sh->param); switch_log_printf(SWITCH_CHANNEL_UUID_LOG(session_uuid), SWITCH_LOG_INFO, "voice = %s, rate = %d\n", voice_name, rate); /* Allocate the channel */ if (speech_channel_create(&schannel, name, session_uuid, SPEECH_CHANNEL_SYNTHESIZER, &globals.synth, (uint16_t) rate, sh->memory_pool) != SWITCH_STATUS_SUCCESS) { status = SWITCH_STATUS_FALSE; goto done; } sh->private_info = schannel; schannel->fsh = sh; /* Open the channel */ if (zstr(profile_name)) { profile_name = globals.unimrcp_default_synth_profile; } profile = (profile_t *) switch_core_hash_find(globals.profiles, profile_name); if (!profile) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(session_uuid), SWITCH_LOG_ERROR, "(%s) Can't find profile, %s\n", name, profile_name); status = SWITCH_STATUS_FALSE; goto done; } if ((status = speech_channel_open(schannel, profile)) != SWITCH_STATUS_SUCCESS) { goto done; } /* Set session TTS params */ if (!zstr(voice_name)) { speech_channel_set_param(schannel, "Voice-Name", voice_name); } /* Set default TTS params */ for (hi = switch_core_hash_first(profile->default_synth_params); hi; hi = switch_core_hash_next(&hi)) { char *param_name = NULL, *param_val = NULL; const void *key; void *val; switch_core_hash_this(hi, &key, NULL, &val); param_name = (char *) key; param_val = (char *) val; speech_channel_set_param(schannel, param_name, param_val); } done: return status; }
-
mod_unimrcp.c#speech_channel_open()
函数主要逻辑是调用底层库函数创建 MRCP 会话,并建立连接- 调用库函数
mrcp_application.c#mrcp_application_session_create()
创建 MRCP 会话 - 调用库函数
mrcp_application.c#mrcp_application_channel_create()
创建 MRCP 会话下的 channel - 调用库函数
mrcp_application.c#mrcp_application_channel_add()
请求远程 MRCP 服务器创建新会话
static switch_status_t speech_channel_open(speech_channel_t *schannel, profile_t *profile) { switch_status_t status = SWITCH_STATUS_SUCCESS; mpf_termination_t *termination = NULL; mrcp_resource_type_e resource_type; int warned = 0; switch_mutex_lock(schannel->mutex); /* make sure we can open channel */ if (schannel->state != SPEECH_CHANNEL_CLOSED) { status = SWITCH_STATUS_FALSE; goto done; } schannel->profile = profile; /* create MRCP session */ if ((schannel->unimrcp_session = mrcp_application_session_create(schannel->application->app, profile->name, schannel)) == NULL) { /* profile doesn't exist? */ switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Unable to create session with %s\n", schannel->name, profile->name); status = SWITCH_STATUS_RESTART; goto done; } mrcp_application_session_name_set(schannel->unimrcp_session, schannel->name); /* create audio termination and add to channel */ if ((termination = speech_channel_create_mpf_termination(schannel)) == NULL) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Unable to create termination with %s\n", schannel->name, profile->name); mrcp_application_session_destroy(schannel->unimrcp_session); status = SWITCH_STATUS_FALSE; goto done; } if (schannel->type == SPEECH_CHANNEL_SYNTHESIZER) { resource_type = MRCP_SYNTHESIZER_RESOURCE; } else { resource_type = MRCP_RECOGNIZER_RESOURCE; } if ((schannel->unimrcp_channel = mrcp_application_channel_create(schannel->unimrcp_session, resource_type, termination, NULL, schannel)) == NULL) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Unable to create channel with %s\n", schannel->name, profile->name); mrcp_application_session_destroy(schannel->unimrcp_session); status = SWITCH_STATUS_FALSE; goto done; } /* add channel to session... this establishes the connection to the MRCP server */ if (mrcp_application_channel_add(schannel->unimrcp_session, schannel->unimrcp_channel) != TRUE) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Unable to add channel to session with %s\n", schannel->name, profile->name); mrcp_application_session_destroy(schannel->unimrcp_session); status = SWITCH_STATUS_FALSE; goto done; } /* wait for channel to be ready */ warned = 0; while (schannel->state == SPEECH_CHANNEL_CLOSED) { if (switch_thread_cond_timedwait(schannel->cond, schannel->mutex, SPEECH_CHANNEL_TIMEOUT_USEC) == SWITCH_STATUS_TIMEOUT && !warned) { warned = 1; switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_WARNING, "(%s) MRCP session has not opened after %d ms\n", schannel->name, SPEECH_CHANNEL_TIMEOUT_USEC / (1000)); } } if (schannel->state == SPEECH_CHANNEL_READY) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_DEBUG, "(%s) channel is ready\n", schannel->name); } else if (schannel->state == SPEECH_CHANNEL_CLOSED) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Timed out waiting for channel to be ready\n", schannel->name); /* can't retry */ status = SWITCH_STATUS_FALSE; } else if (schannel->state == SPEECH_CHANNEL_ERROR) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_DEBUG, "(%s) Terminating MRCP session\n", schannel->name); if (!mrcp_application_session_terminate(schannel->unimrcp_session)) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_WARNING, "(%s) Unable to terminate application session\n", schannel->name); status = SWITCH_STATUS_FALSE; goto done; } /* Wait for session to be cleaned up */ warned = 0; while (schannel->state == SPEECH_CHANNEL_ERROR) { if (switch_thread_cond_timedwait(schannel->cond, schannel->mutex, SPEECH_CHANNEL_TIMEOUT_USEC) == SWITCH_STATUS_TIMEOUT && !warned) { warned = 1; switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_WARNING, "(%s) MRCP session has not cleaned up after %d ms\n", schannel->name, SPEECH_CHANNEL_TIMEOUT_USEC / (1000)); } } if (schannel->state != SPEECH_CHANNEL_CLOSED) { /* major issue... can't retry */ status = SWITCH_STATUS_FALSE; } else { /* failed to open profile, retry is allowed */ status = SWITCH_STATUS_RESTART; } } done: switch_mutex_unlock(schannel->mutex); return status; }
- 调用库函数
-
此时回到本节步骤2第2步,
switch_ivr_play_say.c#switch_ivr_speak_text_handle()
函数是 tts 处理的功能主体,关键处理如下:- 通过核心函数
switch_core.c#switch_core_speech_feed_tts()
调用到mod_unimrcp.c#synth_speech_feed_tts()
函数发起 MRCP 语音合成请求 - 在 for 空循环中不断执行核心函数
switch_core.c#switch_core_speech_read_tts()
调用到mod_unimrcp.c#synth_speech_read_tts()
函数尝试获取合成的语音 - 通过核心函数
switch_core.c#switch_core_session_write_frame()
将 MRCP 服务器返回的语音流写到当前会话,通过 RTP 传输到 SIP 终端播放
SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text_handle(switch_core_session_t *session, switch_speech_handle_t *sh, switch_codec_t *codec, switch_timer_t *timer, const char *text, switch_input_args_t *args) { switch_channel_t *channel = switch_core_session_get_channel(session); short abuf[SWITCH_RECOMMENDED_BUFFER_SIZE]; switch_dtmf_t dtmf = { 0 }; uint32_t len = 0; switch_size_t ilen = 0; switch_frame_t write_frame = { 0 }; switch_status_t status = SWITCH_STATUS_SUCCESS; switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE; switch_size_t extra = 0; char *tmp = NULL; const char *star, *pound, *p; switch_size_t starlen, poundlen; if (!sh) { return SWITCH_STATUS_FALSE; } if (switch_channel_pre_answer(channel) != SWITCH_STATUS_SUCCESS) { return SWITCH_STATUS_FALSE; } if (!switch_core_codec_ready(codec)) { return SWITCH_STATUS_FALSE; } arg_recursion_check_start(args); write_frame.data = abuf; write_frame.buflen = sizeof(abuf); len = sh->samples * 2 * sh->channels; flags = 0; if (!(star = switch_channel_get_variable(channel, "star_replace"))) { star = "star"; } if (!(pound = switch_channel_get_variable(channel, "pound_replace"))) { pound = "pound"; } starlen = strlen(star); poundlen = strlen(pound); for (p = text; p && *p; p++) { if (*p == '*') { extra += starlen; } else if (*p == '#') { extra += poundlen; } } if (extra) { char *tp; switch_size_t mylen = strlen(text) + extra + 1; tmp = malloc(mylen); if (!tmp) { arg_recursion_check_stop(args); return SWITCH_STATUS_MEMERR; } memset(tmp, 0, mylen); tp = tmp; for (p = text; p && *p; p++) { if (*p == '*' ) { snprintf(tp + strlen(tp), sizeof(tp) - strlen(tp), "%s", star); tp += starlen; } else if (*p == '#') { snprintf(tp + strlen(tp), sizeof(tp) - strlen(tp), "%s", pound); tp += poundlen; } else { *tp++ = *p; } } text = tmp; } switch_core_speech_feed_tts(sh, text, &flags); switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Speaking text: %s\n", text); switch_safe_free(tmp); text = NULL; write_frame.rate = sh->rate; memset(write_frame.data, 0, len); write_frame.datalen = len; write_frame.samples = len / 2; write_frame.codec = codec; switch_assert(codec->implementation != NULL); switch_channel_audio_sync(channel); for (;;) { switch_event_t *event; ilen = len; if (!switch_channel_ready(channel)) { status = SWITCH_STATUS_FALSE; break; } if (switch_channel_test_flag(channel, CF_BREAK)) { switch_channel_clear_flag(channel, CF_BREAK); status = SWITCH_STATUS_BREAK; break; } switch_ivr_parse_all_events(session); if (args) { /* dtmf handler function you can hook up to be executed when a digit is dialed during playback * if you return anything but SWITCH_STATUS_SUCCESS the playback will stop. */ if (switch_channel_has_dtmf(channel)) { if (!args->input_callback && !args->buf && !args->dmachine) { status = SWITCH_STATUS_BREAK; break; } if (args->buf && !strcasecmp(args->buf, "_break_")) { status = SWITCH_STATUS_BREAK; } else { switch_channel_dequeue_dtmf(channel, &dtmf); if (args->dmachine) { char ds[2] = { dtmf.digit, '\0'}; if ((status = switch_ivr_dmachine_feed(args->dmachine, ds, NULL)) != SWITCH_STATUS_SUCCESS) { break; } } if (args->input_callback) { status = args->input_callback(session, (void *) &dtmf, SWITCH_INPUT_TYPE_DTMF, args->buf, args->buflen); } else if (args->buf) { *((char *) args->buf) = dtmf.digit; status = SWITCH_STATUS_BREAK; } } } if (args->input_callback) { if (switch_core_session_dequeue_event(session, &event, SWITCH_FALSE) == SWITCH_STATUS_SUCCESS) { switch_status_t ostatus = args->input_callback(session, event, SWITCH_INPUT_TYPE_EVENT, args->buf, args->buflen); if (ostatus != SWITCH_STATUS_SUCCESS) { status = ostatus; } switch_event_destroy(&event); } } if (status != SWITCH_STATUS_SUCCESS) { break; } } if (switch_test_flag(sh, SWITCH_SPEECH_FLAG_PAUSE)) { if (timer) { if (switch_core_timer_next(timer) != SWITCH_STATUS_SUCCESS) { break; } } else { switch_frame_t *read_frame; switch_status_t tstatus = switch_core_session_read_frame(session, &read_frame, SWITCH_IO_FLAG_NONE, 0); while (switch_channel_ready(channel) && switch_channel_test_flag(channel, CF_HOLD)) { switch_ivr_parse_all_messages(session); switch_yield(10000); } if (!SWITCH_READ_ACCEPTABLE(tstatus)) { break; } if (args && args->dmachine) { if ((status = switch_ivr_dmachine_ping(args->dmachine, NULL)) != SWITCH_STATUS_SUCCESS) { goto done; } } if (args && (args->read_frame_callback)) { if ((status = args->read_frame_callback(session, read_frame, args->user_data)) != SWITCH_STATUS_SUCCESS) { goto done; } } } continue; } flags = SWITCH_SPEECH_FLAG_BLOCKING; status = switch_core_speech_read_tts(sh, abuf, &ilen, &flags); if (status != SWITCH_STATUS_SUCCESS) { if (status == SWITCH_STATUS_BREAK) { status = SWITCH_STATUS_SUCCESS; } break; } write_frame.datalen = (uint32_t) ilen; write_frame.samples = (uint32_t) (ilen / 2 / sh->channels); if (timer) { write_frame.timestamp = timer->samplecount; } if (switch_core_session_write_frame(session, &write_frame, SWITCH_IO_FLAG_NONE, 0) != SWITCH_STATUS_SUCCESS) { break; } if (timer) { if (switch_core_timer_next(timer) != SWITCH_STATUS_SUCCESS) { break; } } else { /* time off the channel (if you must) */ switch_frame_t *read_frame; switch_status_t tstatus = switch_core_session_read_frame(session, &read_frame, SWITCH_IO_FLAG_NONE, 0); while (switch_channel_ready(channel) && switch_channel_test_flag(channel, CF_HOLD)) { switch_ivr_parse_all_messages(session); switch_yield(10000); } if (!SWITCH_READ_ACCEPTABLE(tstatus)) { break; } if (args && args->dmachine) { if ((status = switch_ivr_dmachine_ping(args->dmachine, NULL)) != SWITCH_STATUS_SUCCESS) { goto done; } } if (args && (args->read_frame_callback)) { if ((status = args->read_frame_callback(session, read_frame, args->user_data)) != SWITCH_STATUS_SUCCESS) { goto done; } } } } done: switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "done speaking text\n"); flags = 0; switch_core_speech_flush_tts(sh); arg_recursion_check_stop(args); return status; }
- 通过核心函数
-
mod_unimrcp.c#synth_speech_feed_tts()
函数的核心其实是执行mod_unimrcp.c#synth_channel_speak()
函数,mod_unimrcp.c#synth_channel_speak()
函数的核心处理如下:- 调用底层库函数
mrcp_application.c#mrcp_application_message_create()
创建SYNTHESIZER_SPEAK
tts 请求的消息结构 - 调用底层库函数
mrcp_application.c#mrcp_application_message_send()
触发执行向 MRCP 服务器发送语音合成请求 - 等待 MRCP 服务器返回,将当前 tts 的 channel 状态流转为 SPEECH_CHANNEL_PROCESSING。这个部分主要依靠 unimrcp 模块加载时嵌入到底层 MRCP 客户端的回调
mod_unimrcp.c#synth_on_message_receive()
函数完成
static switch_status_t synth_speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags) { switch_status_t status = SWITCH_STATUS_SUCCESS; speech_channel_t *schannel = (speech_channel_t *) sh->private_info; if (zstr(text)) { status = SWITCH_STATUS_FALSE; } else { status = synth_channel_speak(schannel, text); } return status; } static switch_status_t synth_channel_speak(speech_channel_t *schannel, const char *text) { switch_status_t status = SWITCH_STATUS_SUCCESS; mrcp_message_t *mrcp_message = NULL; mrcp_generic_header_t *generic_header = NULL; mrcp_synth_header_t *synth_header = NULL; int warned = 0; switch_mutex_lock(schannel->mutex); if (schannel->state != SPEECH_CHANNEL_READY) { status = SWITCH_STATUS_FALSE; goto done; } mrcp_message = mrcp_application_message_create(schannel->unimrcp_session, schannel->unimrcp_channel, SYNTHESIZER_SPEAK); if (mrcp_message == NULL) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Failed to create SPEAK message\n", schannel->name); status = SWITCH_STATUS_FALSE; goto done; } /* set generic header fields (content-type) */ if ((generic_header = (mrcp_generic_header_t *) mrcp_generic_header_prepare(mrcp_message)) == NULL) { status = SWITCH_STATUS_FALSE; goto done; } /* good enough way of determining SSML or plain text body */ if (text_starts_with(text, XML_ID) || text_starts_with(text, SSML_ID)) { apt_string_assign(&generic_header->content_type, schannel->profile->ssml_mime_type, mrcp_message->pool); } else { apt_string_assign(&generic_header->content_type, MIME_TYPE_PLAIN_TEXT, mrcp_message->pool); } mrcp_generic_header_property_add(mrcp_message, GENERIC_HEADER_CONTENT_TYPE); /* set synthesizer header fields (voice, rate, etc.) */ if ((synth_header = (mrcp_synth_header_t *) mrcp_resource_header_prepare(mrcp_message)) == NULL) { status = SWITCH_STATUS_FALSE; goto done; } /* add params to MRCP message */ synth_channel_set_params(schannel, mrcp_message, generic_header, synth_header); /* set body (plain text or SSML) */ apt_string_assign(&mrcp_message->body, text, schannel->memory_pool); /* Empty audio queue and send SPEAK to MRCP server */ audio_queue_clear(schannel->audio_queue); if (mrcp_application_message_send(schannel->unimrcp_session, schannel->unimrcp_channel, mrcp_message) == FALSE) { status = SWITCH_STATUS_FALSE; goto done; } /* wait for IN-PROGRESS */ while (schannel->state == SPEECH_CHANNEL_READY) { if (switch_thread_cond_timedwait(schannel->cond, schannel->mutex, SPEECH_CHANNEL_TIMEOUT_USEC) == SWITCH_STATUS_TIMEOUT && !warned) { warned = 1; switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_WARNING, "(%s) SPEAK IN-PROGRESS not received after %d ms\n", schannel->name, SPEECH_CHANNEL_TIMEOUT_USEC / (1000)); } } if (schannel->state != SPEECH_CHANNEL_PROCESSING) { status = SWITCH_STATUS_FALSE; goto done; } done: switch_mutex_unlock(schannel->mutex); return status; }
- 调用底层库函数
-
mod_unimrcp.c#synth_speech_read_tts()
函数的核心是执行mod_unimrcp.c#speech_channel_read()
,mod_unimrcp.c#speech_channel_read()
函数的关键则是检查 tts 的 channel 状态,当其状态符合要求的时候从 channel 的语音流缓冲队列中读取数据。此时回到本节步骤6第3步,switch_core.c#switch_core_session_write_frame()
函数会将从 MRCP 服务器传输过来到语音流数据写入到当前会话缓冲,经过编码转化,最终将通过 RTP 发送到终端播放,至此 tts 语音合成处理流程基本结束static switch_status_t synth_speech_read_tts(switch_speech_handle_t *sh, void *data, switch_size_t *datalen, switch_speech_flag_t *flags) { switch_status_t status = SWITCH_STATUS_SUCCESS; switch_size_t bytes_read; speech_channel_t *schannel = (speech_channel_t *) sh->private_info; bytes_read = *datalen; if (speech_channel_read(schannel, data, &bytes_read, (*flags & SWITCH_SPEECH_FLAG_BLOCKING)) == SWITCH_STATUS_SUCCESS) { /* pad data, if not enough read */ if (bytes_read < *datalen) { #ifdef MOD_UNIMRCP_DEBUG_AUDIO_QUEUE switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_DEBUG, "(%s) adding %ld bytes of padding\n", schannel->name, *datalen - bytes_read); #endif memset((uint8_t *) data + bytes_read, schannel->silence, *datalen - bytes_read); } } else { /* ready for next speak request */ speech_channel_set_state(schannel, SPEECH_CHANNEL_READY); *datalen = 0; status = SWITCH_STATUS_BREAK; } /* report negotiated sample rate back to FreeSWITCH */ sh->native_rate = schannel->rate; return status; } static switch_status_t speech_channel_read(speech_channel_t *schannel, void *data, switch_size_t *len, int block) { switch_status_t status = SWITCH_STATUS_SUCCESS; if (!schannel || !schannel->mutex || !schannel->audio_queue) { return SWITCH_STATUS_FALSE; } switch (schannel->state) { case SPEECH_CHANNEL_DONE: /* pull any remaining audio - never blocking */ if (audio_queue_read(schannel->audio_queue, data, len, 0) == SWITCH_STATUS_FALSE) { /* all frames read */ status = SWITCH_STATUS_BREAK; } break; case SPEECH_CHANNEL_PROCESSING: /* IN-PROGRESS */ audio_queue_read(schannel->audio_queue, data, len, block); break; default: status = SWITCH_STATUS_BREAK; } return status; }