主要功能介绍
假如你在阅读过程中发现代码有问题,或者我的代码风格习惯不好,请直接指出,我会细心改正,谢谢!
左上角是聊天框。
左下角的播放按钮是用于语音识别,×是让模型停止当前动作与音乐。
右下角的播放按钮是进行语音识别。
右上角是用户当前情绪的分析。
这是本项目的流程图。
语音识别的流程是:用户先说一段话,百度语音开始识别,同时将语音转换成文字,然后再发送给图灵机器人,等待图灵机器人的返回结果,然后再将获取的文字结果通过百度语音转成语音播放出来。
情绪识别的流程是:摄像头保持开启,并且每时每刻都在分析用户当前的情绪,当用户按下情绪识别按钮时,就从那一刻的情绪列表中分析用户当前情绪,并且根据情绪模型做出相应的动作与播放音乐。
语音识别功能
下面贴上代码。(引用自作者云图,原文链接,这里只贴上的是对本项目调整后的代码,如手动获取token)
#region 录制声音转化为文字
private string token; //access_token
private string cuid = "hmy"; //用户标识
private string format = "pcm"; //语音格式
private int rate = 8000; //采样率
private int channel = 1; //声道数
private string speech; //语音数据,进行base64编码
private int len; //原始语音长度
private string lan = "zh"; //语种
private string grant_Type = "client_credentials"; //dpi id10266074
private string client_ID = "填入你的百度appkey"; //百度appkey
private string client_Secret = "填入你的百度Secret"; //百度Secret
private string baiduAPI = "http://vop.baidu.com/server_api";
private string getTokenAPIPath =
"https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=4srHTxv1GNfimhTpiyc9VKxy&client_secret=7e2c45f43b52e4df312eaec0d57f0718";
private Byte[] clipByte;
/// <summary>
/// 转换出来的TEXT
/// </summary>
public static string audioToString;
private AudioSource aud;
private int audioLength;//录音的长度
//开始录音
public void StartRecord()
{
Debug.Log("开始说话");
emotionsTestToSee1.text = "开始说话";
if (Microphone.devices.Length == 0) return;
Microphone.End(null);
aud.clip = Microphone.Start(null, false, 10, rate);
}
//结束录音
public void EndRecord()
{
Debug.Log("结束说话");
emotionsTestToSee1.text = ("结束说话,正在识别...");
int lastPos = Microphone.GetPosition(null);
if (Microphone.IsRecording(null))
audioLength = lastPos / rate;//录音时长
else
audioLength = 10;
Microphone.End(null);
clipByte = GetClipData();
len = clipByte.Length;
speech = Convert.ToBase64String(clipByte);
StartCoroutine(GetToken(getTokenAPIPath));
StartCoroutine(GetAudioString(baiduAPI));
}
/// <summary>
/// 把录音转换为Byte[]
/// </summary>
/// <returns></returns>
public Byte[] GetClipData()
{
if (aud.clip == null)
{
Debug.LogError("录音数据为空");
return null;
}
float[] samples = new float[aud.clip.samples];
aud.clip.GetData(samples, 0);
Byte[] outData = new byte[samples.Length * 2];
int rescaleFactor = 32767; //to convert float to Int16
for (int i = 0; i < samples.Length; i++)
{
short temshort = (short)(samples[i] * rescaleFactor);
Byte[] temdata = System.BitConverter.GetBytes(temshort);
outData[i * 2] = temdata[0];
outData[i * 2 + 1] = temdata[1];
}
if (outData == null || outData.Length <= 0)
{
Debug.LogError("录音数据为空");
return null;
}
return outData;
}
/// <summary>
/// 获取百度用户令牌
/// </summary>
/// <param name="url">获取的url</param>
/// <returns></returns>
private IEnumerator GetToken(string url)
{
WWW getTW = new WWW(url);
yield return getTW;
if (getTW.isDone)
{
if (getTW.error == null)
{
//token = getTW.text;
token = "24.07fba5ccdfb95b03ac73d9cea458ea98.2592000.1527134506.282335-10915649";
//Debug.Log(token);
StartCoroutine(GetAudioString(baiduAPI));
}
else
{
Debug.LogError("获取令牌出错" + getTW.error);
}
}
else
{
Debug.LogError("下载出错" + getTW.error);
}
}
/// <summary>
/// 把语音转换为文字
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
private IEnumerator GetAudioString(string url)
{
JsonWriter jw = new JsonWriter();
jw.WriteObjectStart();
jw.WritePropertyName("format");
jw.Write(format);
jw.WritePropertyName("rate");
jw.Write(rate);
jw.WritePropertyName("channel");
jw.Write(channel);
jw.WritePropertyName("token");
jw.Write(token);
jw.WritePropertyName("cuid");
jw.Write(cuid);
jw.WritePropertyName("len");
jw.Write(len);
jw.WritePropertyName("speech");
jw.Write(speech);
jw.WriteObjectEnd();
//Debug.Log(jw.ToString());
WWW getASW = new WWW(url, Encoding.Default.GetBytes(jw.ToString()));
//Debug.Log(getASW.ToString());
yield return getASW;
if (getASW.isDone)
{
if (getASW.error == null)
{
JsonData getASWJson = JsonMapper.ToObject(getASW.text);
Debug.Log(getASWJson.ToString());
if (getASWJson["err_msg"].ToString() == "success.")
{
audioToString = getASWJson["result"][0].ToString();
if (audioToString.Substring(audioToString.Length - 1) == ",")
audioToString = audioToString.Substring(0, audioToString.Length - 1);
Debug.Log("说话的问题是:" + audioToString);
GetAnswer(audioToString);
}
else
{
Debug.LogWarning("没有成功:" + getASWJson["err_msg"].ToString());
}
}
else
{
Debug.LogError(getASW.error);
}
}
}
#endregion
这里要说几个问题:
自动获取token似乎总会报这几个错误
token error是没有成功获取到token
第二个是身份验证失败
到百度语言的官方论坛查询后发现很多人都遇到这样的问题,而且也测试了各种方法,最后无奈之下只能选择最简单也是最笨的方法–写死token(无奈)
情绪识别功能
下面的代码主要就是情绪识别功能。当时在怎么用Affdex的SDK这一块卡了很久,自己是超级小白,连工程都不会建的那种,多亏组内同学给了我很多帮助,前前后后也给了很多建议,总算把这一功能弄出来了。
Affdex的SDK很强大,它能判断出人脸上面的每一个部位比如眉毛嘴角眼睛的变化,从而分析出用户的表情。但是我只用到情绪分析部分,所以就把它截取了。
分析代码得知,Affdex把分析结果存储在一个faces这个字典中,我们用ToString().Split(‘\n’)这个方法来拆分这个字典,最后把结果存储在一个数组里,然后通过比较数组的最大值来得到用户最可能的情绪,最后模型做出相应的反馈。
获取模型然后根据情绪来做出动作这些都不难,我用的是Switch语句来实现。
using Affdex;
using System;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine.UI;
public class Listener : ImageResultsListener
{
private GameObject emotionsTestToSee;
public static Text emotionsTestToSee1;
public static bool foundFace = false;
//调用Animator
Animator Animator;
private GameObject model;
public string i;
private void Awake()
{
model = GameObject.FindGameObjectWithTag("model");//找到model
Animator = model.GetComponent<Animator>();//获得model上的动态机
emotionsTestToSee = GameObject.FindGameObjectWithTag("ET");
emotionsTestToSee1 = emotionsTestToSee.GetComponentInChildren<Text>();//获得message下的text子物体
emotionsTestToSee1.fontSize = 300;
}
public Dictionary<Emotions, float> Emotions { get; private set; }
public Text textArea;
public float[] emotionsArray = new float[7];
public Dictionary<int, Face> emotion;
public static int max = -1;
public void Play(string str)
{
AudioClip clip = (AudioClip)Resources.Load(str, typeof(AudioClip));//调用Resources方法加载AudioClip资源
Vector3 position = transform.position;
AudioSource.PlayClipAtPoint(clip, position);
}
public override void onFaceFound(float timestamp, int faceId)
{
foundFace = true;
Debug.Log("Found the face");
}
public override void onFaceLost(float timestamp, int faceId)
{
foundFace = false;
Debug.Log("Lost the face");
}
public override void onImageResults(Dictionary<int, Face> faces)//faces是字典变量,其中一个int对应一个Face类
{
if (faces.Count > 0)
{
DebugFeatureViewer dfv = GameObject.FindObjectOfType<DebugFeatureViewer>();
if (dfv != null)
{
dfv.ShowFace(faces[0]);
}
// Adjust font size to fit the selected platform.
if ((Application.platform == RuntimePlatform.IPhonePlayer) ||
(Application.platform == RuntimePlatform.Android))
{
textArea.fontSize = 36;
// textArea1.fontSize = 36;
}
else
{
textArea.fontSize = 12;
// textArea1.fontSize = 12;
}
//续写
//获得情绪
//用for循环面板会出错,懒得改了
i = faces[0].ToString().Split('\n')[18]
+ '\n' + faces[0].ToString().Split('\n')[19]
+ '\n' + faces[0].ToString().Split('\n')[20]
+ '\n' + faces[0].ToString().Split('\n')[21]
+ '\n' + faces[0].ToString().Split('\n')[23]
+ '\n' + faces[0].ToString().Split('\n')[24]
+ '\n' + faces[0].ToString().Split('\n')[25]
+ '\n' + faces[0].ToString().Split('\n')[26]
+ '\n' + faces[0].ToString().Split('\n')[27]
;
textArea.text = i;// emotionsTextToSee[1]'//.Split('\n')[18];//
// textArea1.text = faces[0].ToString().Split('\n')[19];
//分别获得情绪
//faces[0]对应Face这个类,然后调用Emotions这个字典,Emotions这个枚举,从里面寻值;
emotionsArray[0] = faces[0].Emotions[Affdex.Emotions.Anger];//愤怒
emotionsArray[1] = faces[0].Emotions[Affdex.Emotions.Contempt];//耻辱
emotionsArray[2] = faces[0].Emotions[Affdex.Emotions.Disgust];//厌恶
emotionsArray[3] = faces[0].Emotions[Affdex.Emotions.Fear];//害怕
emotionsArray[4] = faces[0].Emotions[Affdex.Emotions.Joy];//愉悦
emotionsArray[5] = Math.Abs(faces[0].Emotions[Affdex.Emotions.Valence]);
//emotionsArray[5] = faces[0].Emotions[Affdex.Emotions.Sadness];//悲伤
emotionsArray[6] = faces[0].Emotions[Affdex.Emotions.Surprise];//惊讶
//emotionsArray[3] = faces[0].Emotions[Affdex.Emotions.Engagement];
//emotionsArray[8] = faces[0].Emotions[Affdex.Emotions.Valence];//正面情绪
// emotions[0] = 0;
// emotion = faces;
textArea.CrossFadeColor(Color.white, 0.2f, true, false);
// textArea1.CrossFadeColor(Color.white, 0.2f, true, false);
}
else
{
textArea.CrossFadeColor(new Color(1, 0.7f, 0.7f), 0.2f, true, false);
// textArea1.CrossFadeColor(new Color(1, 0.7f, 0.7f), 0.2f, true, false);
}
}
//比较出最可能的情绪
public void max1(float[] j)
{
float comp = 0;
for (int i = 0; i < emotionsArray.Length; i++)
{
if (emotionsArray[i] > comp)
{
comp = emotionsArray[i];
max = i;
}
}
}
public void OnButtonclick()
{
int rand = UnityEngine.Random.Range(0, 2);
if (foundFace == false)
{
emotionsTestToSee1.text = "摄像头没有识别到脸部";
return;
}
max1(emotionsArray);
Debug.Log(max);
//初始化动作的bool
Animator.SetBool("happy", false);//happy舞蹈
Animator.SetBool("Contempt", false);//被蔑视
Animator.SetBool("Sadness", false);//用于情绪消极,目前用第二段舞蹈代替动作
Animator.SetBool("shiluo", false);
Animator.SetBool("happy2", false);
Animator.SetBool("Sadness2", false);
Animator.SetBool("surprise", false);
// int randEmo = 0;
switch (max)
{
//emotionsArray[0] = faces[0].Emotions[Affdex.Emotions.Anger];//愤怒
//emotionsArray[1] = faces[0].Emotions[Affdex.Emotions.Contempt];//蔑视
//emotionsArray[2] = faces[0].Emotions[Affdex.Emotions.Disgust];//厌恶 极少输出
//emotionsArray[3] = faces[0].Emotions[Affdex.Emotions.Fear];//害怕
//emotionsArray[4] = faces[0].Emotions[Affdex.Emotions.Joy];//愉悦
//emotionsArray[5] = faces[0].Emotions[Affdex.Emotions.Sadness];//悲伤
//emotionsArray[6] = faces[0].Emotions[Affdex.Emotions.Surprise];//惊讶
case 0://disgust
Animator.SetBool("shiluo", true);
emotionsTestToSee1.text = "感觉你有点低落,o(>﹏<)o";
break;
case 1://comtempt
Animator.SetBool("Contempt", true);
emotionsTestToSee1.text = "我怎么觉得你在蔑视我!哼,生气了!";
break;
case 4: //happy
if (rand == 0)
{
Animator.SetBool("happy", true);
//AudioSource.PlayClipAtPoint(AudioClipHappy[0],new Vector3(0,0,0));
//randplay1();
emotionsTestToSee1.fontSize = 40;
emotionsTestToSee1.text = "happy!!跳支舞给你看吧";
}
else
{
Animator.SetBool("happy2", true);
emotionsTestToSee1.text = "我觉得你在笑!!跳支舞给你看吧";
}
break;
case 5://sadness
if (rand == 0)
{
Animator.SetBool("Sadness", true);
emotionsTestToSee1.text = "感觉你情绪消极,Take it easy!";
}
else
{
Animator.SetBool("Sadness2", true);
emotionsTestToSee1.text = "你是不是压力有点大?要记得放松哦!~";
}
break;
case 6://惊讶
emotionsTestToSee1.text = "惊讶";
Animator.SetBool("surprise", true);
emotionsTestToSee1.text = "我觉得你有点惊讶!哈哈哈,我是不是很准啊~";
break;
}
}
}
以上就是我的主要代码分析,下一篇博客会详细的分析代码以及总结自己的经验。