1,首先打开百度语音识别官网,注册一个账户成为开发者,接着创建一个应用,下载百度提供源代码 。
下载地址: http://yuyin.baidu.com/sdk/
官方文档地址:http://yuyin.baidu.com/docs/asr/54
2,打开源代码,进行配置参数或修改要识别的语言文件
demo1
<?php
define('AUDIO_FILE', "./text2audio_1.wav"); //语音文件地址,值支持本地
$url = "http://vop.baidu.com/server_api";
//put your params here
$cuid = "";
$apiKey = "";
$secretKey = "";
$auth_url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=".$apiKey."&client_secret=".$secretKey;
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $auth_url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
$response = curl_exec($ch);
if(curl_errno($ch))
{
print curl_error($ch);
}
curl_close($ch);
$response = json_decode($response, true);
$token = $response['access_token'];
$audio = file_get_contents(AUDIO_FILE);
$base_data = base64_encode($audio);
$array = array(
"format" => "wav",
"rate" => 8000,
"channel" => 1,
// "lan" => "zh",
"token" => $token,
"cuid"=> $cuid,
//"url" => "http://www.xxx.com/sample.pcm",
//"callback" => "http://www.xxx.com/audio/callback",
"len" => filesize(AUDIO_FILE),
"speech" => $base_data,
);
$json_array = json_encode($array);
$content_len = "Content-Length: ".strlen($json_array);
$header = array ($content_len, 'Content-Type: application/json; charset=utf-8');
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
curl_setopt($ch, CURLOPT_POSTFIELDS, $json_array);
$response = curl_exec($ch);
if(curl_errno($ch))
{
print curl_error($ch);
}
curl_close($ch);
echo $response;
$response = json_decode($response, true);
var_dump($response);
demo2
define('AUDIO_FILE', "./text.wav");
//put your params here
$cuid = "";
$apiKey = "";
$secretKey = "";
$auth_url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=".$apiKey."&client_secret=".$secretKey;
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $auth_url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
$response = curl_exec($ch);
if(curl_errno($ch))
{
print curl_error($ch);
}
curl_close($ch);
$response = json_decode($response, true);
$token = $response['access_token'];
$url = "http://vop.baidu.com/server_api?cuid=".$cuid."&token=".$token;
$url = $url."&lan=zh";
$audio = file_get_contents(AUDIO_FILE);
$content_len = "Content-Length: ".strlen($audio);
$header = array ($content_len,'Content-Type: audio/pcm; rate=8000',);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
curl_setopt($ch, CURLOPT_POSTFIELDS, $audio);
$response = curl_exec($ch);
if(curl_errno($ch))
{
print curl_error($ch);
}
curl_close($ch);
echo $response;
$response = json_decode($response, true);
echo '<pre>';
var_dump($response);
3、运行文件识别,运行后发现调取接口成功,但是识别出来的正确率百分之一都不到,于是查看原因发现,百度语音识别只支持 8k/16k 采样率 16bit 位深的单声道语音,进行音频格式转换后正确率可达成80% 。
注意事项:
语音识别接口支持 POST 方式
目前 API 仅支持整段语音识别的模式,即需要上传整段语音进行识别
语音数据上传方式有两种:隐示发送和显示发送
原始语音的录音格式目前只支持评测 8k/16k 采样率 16bit 位深的单声道语音
压缩格式支持:pcm(不压缩)、wav、opus、speex、amr、x-flac
系统支持语言种类:中文(zh)、粤语(ct)、英文(en)
4、因为是调用接口测试所以在转换音频格式使用的是格式工厂,如果项目需要可使用代码自动转换(自行百度)
然后进行确认转换得出来的格式再进行测试即可,谢谢~