.Net 讯飞语音识别Demo

讯飞语音识别官方号称具有以下六个优势:

  1. 超过95%的准确率
  2. 支持多种语种和方言
  3. 方便快捷的信息沟通
  4. 个性的语音识别
  5. 中文标点智能预测
  6. 支持垂直领域和应用级听写

获得APPID和调用Dll

在讯飞开放平台的控制台新建一个应用,平台选择Windows,新建完成后为应用添加语音听写服务。

下载SDK

解压出你下载的压缩包bin目录中的msc.dll等待使用

注意:下面步骤里的Dll必须使用自行下载的版本,此Dll并不通用

Coding

识别文件实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
/// <summary>
/// 执行语音识别的异步方法
/// </summary>
/// <param name="inFile">音频文件,pcm无文件头,采样率16k,数据16位,单声道</param>
/// <param name="outFile">输出识别结果到文件</param>
public void Audio2TxtAsync(string inFile, string outFile = null)
{
var dlt = new DltSpeek(Audio2Txt);
dlt.BeginInvoke(inFile, outFile, null, null);
}

/// <summary>
/// 进行声音识别
/// </summary>
/// <param name="inFile">音频文件,pcm无文件头,采样率16k,数据16位,单声道</param>
/// <param name="outFile">输出识别结果到文件</param>
public void Audio2Txt(string inFile, string outFile = null)
{
var ret = 0;
var result = "";
try
{
//模拟录音,输入音频
if (!File.Exists(inFile)) throw new Exception("文件" + inFile + "不存在!");
if (inFile.Substring(inFile.Length - 3, 3).ToUpper() != "WAV" && inFile.Substring(inFile.Length - 3, 3).ToUpper() != "PCM")
throw new Exception("音频文件格式不对!");
var fp = new FileStream(inFile, FileMode.Open);
if (inFile.Substring(inFile.Length - 3, 3).ToUpper() == "WAV") fp.Position = 44;
var buff = new byte[BufferNum];
var bp = Marshal.AllocHGlobal(BufferNum);
int len;
var status = AudioStatus.IsrAudioSampleContinue;
var epStatus = EpStatus.IsrEpNull;
var recStatus = RecogStatus.IsrRecNull;
var rsltStatus = RecogStatus.IsrRecNull;
//ep_status 端点检测(End-point detected)器所处的状态
//rec_status 识别器所处的状态
//rslt_status 识别器所处的状态
while (fp.Position != fp.Length)
{
len = fp.Read(buff, 0, BufferNum);
Marshal.Copy(buff, 0, bp, buff.Length);
//开始向服务器发送音频数据
ret = AsrDll.QISRAudioWrite(_sessID, bp, (uint)len, status, ref epStatus, ref recStatus);
if (ret != 0)
{
fp.Close();
throw new Exception("QISRAudioWrite err,errCode=" + ((ErrorCode)ret).ToString("G"));
}
//服务器返回部分结果
if (recStatus == RecogStatus.IsrRecStatusSuccess)
{
var p = AsrDll.QISRGetResult(_sessID, ref rsltStatus, WaitTime, ref ret);
if (p != IntPtr.Zero)
{
var tmp = FlyTts.Ptr2Str(p);
DataArrived?.Invoke(this, new DataArrivedEventArgs(tmp));
result += tmp;
Console.WriteLine(@"返回部分结果!:" + tmp);
}
}
Thread.Sleep(500);
}
fp.Close();

//最后一块数据
status = AudioStatus.IsrAudioSampleLast;

ret = AsrDll.QISRAudioWrite(_sessID, bp, 1, status, ref epStatus, ref recStatus);
if (ret != 0) throw new Exception("QISRAudioWrite write last audio err,errCode=" + ((ErrorCode)ret).ToString("G"));
Marshal.FreeHGlobal(bp);
var loopCount = 0;
//最后一块数据发完之后,循环从服务器端获取结果
//考虑到网络环境不好的情况下,需要对循环次数作限定
do
{
var p = AsrDll.QISRGetResult(_sessID, ref rsltStatus, WaitTime, ref ret);
if (p != IntPtr.Zero)
{
var tmp = FlyTts.Ptr2Str(p);
DataArrived?.Invoke(this, new DataArrivedEventArgs(tmp)); //激发识别数据到达事件
result += tmp;
Console.WriteLine(@"传完音频后返回结果!:" + tmp);
}
if (ret != 0) throw new Exception("QISRGetResult err,errCode=" + ((ErrorCode)ret).ToString("G"));
Thread.Sleep(200);
} while (rsltStatus != RecogStatus.IsrRecStatusSpeechComplete && loopCount++ < 30);
if (outFile != null)
{
var fout = new FileStream(outFile, FileMode.OpenOrCreate);
fout.Write(Encoding.Default.GetBytes(result), 0, Encoding.Default.GetByteCount(result));
fout.Close();
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
finally
{
ret = AsrDll.QISRSessionEnd(_sessID, string.Empty);
ret = AsrDll.MSPLogout();
IsrEnd?.Invoke(this, new EventArgs()); //通知识别结束
}
}

录音实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
public void StartRecoding()
{
WaveMonitor = new WaveInEvent { WaveFormat = new WaveFormat(16000, 16, 1) };

if (!Directory.Exists("temp"))
Directory.CreateDirectory("temp");
_fileName = Path.Combine("temp", Guid.NewGuid() + ".wav");

Writer = new WaveFileWriter(_fileName, WaveMonitor.WaveFormat);

WaveMonitor.DataAvailable += (s, a) => Writer.Write(a.Buffer, 0, a.BytesRecorded);
WaveMonitor.RecordingStopped += (s, a) => { Writer?.Dispose(); WaveMonitor?.Dispose(); };

WaveMonitor.StartRecording();
}

public void StopRecoding()
{
WaveMonitor.StopRecording();
Writer?.Close();

Audio2Txt(_fileName);
}

完整源码

托管在GitHub

结论

讯飞语音识别实测识别率其实并没有比百度好多少,准确率在服务提供商看来是越精确越好,但在实际应用中90%和95%差距并不大,故虽然讯飞看起来在数据上更好一些,但是API易用性实在比较差,还是更推荐百度一些。

欢迎关注我的其它发布渠道