一、获取Licenese
1.首先前往 https://ai.baidu.com 百度语音官网注册账号并且申请成为开发者。
2.创建应用,并且填写相应的应用描述
3.获取 appId,apiKey,secretKey
二、书写代码
1.新建一个百度语音的基类,命名 SpeechBase(代码如下)
using UnityEngine;
namespace BaiduSpeech
{
/// <summary>百度语音基类</summary>
public abstract class SpeechBase : MonoBehaviour
{
public string appId { get; set; }
public string apiKey { get; set; }
public string secretKey { get; set; }
private void Awake()
{
OnAwake();
OnInitPlatform();
}
private void Start()
{
OnStart();
}
private void Update()
{
OnUpdate();
}
private void OnDestroy()
{
OnDispose();
}
//----------------------------------------公共函数----------------------------------------
/// <summary>初始化</summary>
public virtual void OnAwake() { }
public virtual void OnStart() { }
public virtual void OnUpdate() { }
/// <summary>初始化平台</summary>
public virtual void OnInitPlatform() { }
/// <summary>释放算法</summary>
public virtual void OnDispose() { }
}
}
2.新建一个语音转文字基类命名 AsrBase (代码如下)
namespace BaiduSpeech
{
/// <summary>语音转文字基类</summary>
public abstract class AsrBase : SpeechBase
{
/// <summary>初始化语音</summary>
public virtual void AsrInit() { }
/// <summary>开始录音</summary>
public virtual void VoiceStart(string json) { }
/// <summary>取消本次识别,取消后将立即停止不会返回识别结果</summary>
public virtual void VoiceCancel() { }
/// <summary>停止录音</summary>
public virtual void VoiceStop() { }
}
}
3.新建一个Json解析的类 命名 WebAsrParams (代码如下)
[Serializable]
public class WebAsrParams
{
public int err_no;
public string err_msg;
public long corpus_no;
public string sn;
public string[] result;
}
4.新建一个类 命名 AsrForWeb (代码如下)
using System;
using System.Collections;
using System.Text.RegularExpressions;
using UnityEngine;
using UnityEngine.Networking;
namespace BaiduSpeech
{
/// <summary>Web接口语音转文本功能API管理</summary>
public class AsrForWeb : AsrBase
{
/// <summary>记录accesstoken令牌</summary>
private string accessToken = string.Empty;
/// <summary>百度请求令牌API地址</summary>
private const string ACCESS_TOKEN_API_URL = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client";
/// <summary>标记是否有麦克风</summary>
private bool isHaveMic = false;
/// <summary>当前录音设备名称</summary>
private string currentDeviceName = string.Empty;
/// <summary>录音频率,控制录音质量(8000,16000)</summary>
private int recordFrequency = 8000;
/// <summary>上次按下时间戳</summary>
private double lastPressTimestamp = 0;
/// <summary>表示录音的最大时长</summary>
private int recordMaxLength = 10;
/// <summary>实际录音长度</summary>
private int trueLength = 0;
/// <summary>是否循环</summary>
private bool isLoop = false;
private AudioClip saveAudioClip;
//初始化平台
public override void OnInitPlatform()
{
//获取麦克风设备,判断是否有麦克风设备
if (Microphone.devices.Length > 0)
{
isHaveMic = true;
currentDeviceName = Microphone.devices[0];
}
}
/// <summary>初始化语音</summary>
public override void AsrInit()
{
if (isHaveMic == false || Microphone.IsRecording(currentDeviceName))
{
Debug.LogWarning(GetType() + "/SpeechInit()/当前设备没有麦克风!");
}
else
{
//初始化语音成功
}
}
/// <summary>
/// 开始录音
/// </summary>
/// <param name="json">详情请移步 https://ai.baidu.com/ai-doc/SPEECH/9k38lxfnk </param>
public override void VoiceStart(string json)
{
if (isHaveMic == false || Microphone.IsRecording(currentDeviceName))
{
return;
}
lastPressTimestamp = GetTimestampOfNowWithMillisecond();
saveAudioClip = Microphone.Start(currentDeviceName, isLoop, recordMaxLength, recordFrequency);
}
/// <summary>取消本次识别,取消后将立即停止不会返回识别结果</summary>
public override void VoiceCancel()
{
if (isHaveMic == false || !Microphone.IsRecording(currentDeviceName))
{
return;
}
Microphone.End(currentDeviceName);
}
/// <summary>停止录音</summary>
public override void VoiceStop()
{
if (isHaveMic == false || !Microphone.IsRecording(currentDeviceName)) { return; }
Microphone.End(currentDeviceName);
trueLength = Mathf.CeilToInt((float)(GetTimestampOfNowWithMillisecond() - lastPressTimestamp) / 1000f);
if (trueLength > 1)
{
StartCoroutine(StartAsr());
}
else
{
Debug.LogWarning(GetType() + "/VoiceStop()/录音时长过短!");
}
}
/// <summary>获取毫秒级别的时间戳,用于计算按下录音时长</summary>
private double GetTimestampOfNowWithMillisecond()
{
return (DateTime.Now.ToUniversalTime().Ticks - 621355968000000000) / 10000;
}
/// <summary>获取accessToken请求令牌</summary>
private IEnumerator GetAccessToken()
{
var uri = string.Format(ACCESS_TOKEN_API_URL + "_id={0}&client_secret={1}", apiKey, secretKey);
UnityWebRequest unityWebRequest = UnityWebRequest.Get(uri);
yield return unityWebRequest.SendWebRequest();
if (unityWebRequest.isDone)
{
Match match = Regex.Match(unityWebRequest.downloadHandler.text, @"access_token.:.(.*?).,");
if (match.Success)
{
accessToken = match.Groups[1].ToString();
}
else
{
Debug.LogWarning(GetType() + "/GetAccessToken()/验证错误,获取AccessToken失败!");
}
}
}
/// <summary>发起语音识别请求</summary>
private IEnumerator StartAsr()
{
if (string.IsNullOrEmpty(accessToken)) { yield return GetAccessToken(); }
string asrResult = string.Empty;
//处理当前录音数据为PCM16
float[] samples = new float[recordFrequency * trueLength * saveAudioClip.channels];
saveAudioClip.GetData(samples, 0);
var samplesShort = new short[samples.Length];
for (var index = 0; index < samples.Length; index++)
{
samplesShort[index] = (short)(samples[index] * short.MaxValue);
}
byte[] datas = new byte[samplesShort.Length * 2];
Buffer.BlockCopy(samplesShort, 0, datas, 0, datas.Length);
string url = string.Format("{0}?cuid={1}&token={2}", "https://vop.baidu.com/server_api", SystemInfo.deviceUniqueIdentifier, accessToken);
WWWForm wwwForm = new WWWForm();
wwwForm.AddBinaryData("audio", datas);
UnityWebRequest unityWebRequest = UnityWebRequest.Post(url, wwwForm);
unityWebRequest.SetRequestHeader("Content-Type", "audio/pcm;rate=" + recordFrequency);
yield return unityWebRequest.SendWebRequest();
if (string.IsNullOrEmpty(unityWebRequest.error))
{
asrResult = unityWebRequest.downloadHandler.text;
Debug.Log(asrResult);
WebAsrParams webAsrParams = JsonUtility.FromJson<WebAsrParams>(asrResult);
}
else
{
Debug.LogWarning(GetType() + "/StartAsr()/语音识别失败!");
}
}
}
}
Unity源码:https://github.com/yongliangchen/BaiduSpeechForUnity.git
Android源码:https://github.com/yongliangchen/BaiduSpeechForAndroid.git
QQ交流群:947618353