最近在項目開發中,需要將語音識別轉換成文本的功能。研究了下科大訊飛,附上Demo分享給大家。
研發前先得做一些准備。
1、注冊科大訊飛開發者帳號(http://www.xfyun.cn)
2、下載開發平台(iOS、或android,或其他)所需要的SDK(SDK包含:說明文檔、SDK即iflyMSC.framework、Demo)
3、項目中添加SDK(添加時,先將SDK復制粘貼到項目文件,再通過addframe的方法添加到項目引用),及相關聯的framework
添加方法:TARGETS-Build Phases-Link Binary With Libraries-"+"-Choose frameworks and libraries to add-add other,或選擇對應的framework-add
4、使用時要添加對應的頭文件
特別說明:
1、使用SDK關聯的APPID存在於下載的Demo中,如果SDK有替換的話APPID應該跟著一起替換。
2、在使用前,務必在AppDelegate的方法中"
- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {}"進行初始化操作。
3、需要有網絡的情況下才能使用。
如圖
下載的科大訊飛SDK文件
Demo中的APPID
添加SDK
添加關聯framework
語音轉文件實現代碼
.h文件 #import// 導入頭文件 #import "iflyMSC.framework/Headers/IFlyMSC.h" #import "iflyMSC.framework/Headers/IFlySpeechUtility.h" #import "iflyMSC/IFlySpeechConstant.h" #pragma mark - 初始化參數類 /**************************************************************************/ @interface IATConfig : NSObject + (IATConfig *)sharedInstance; + (NSString *)mandarin; + (NSString *)cantonese; + (NSString *)henanese; + (NSString *)chinese; + (NSString *)english; + (NSString *)lowSampleRate; + (NSString *)highSampleRate; + (NSString *)isDot; + (NSString *)noDot; /** 以下參數,需要通過 iFlySpeechRecgonizer 進行設置 */ @property (nonatomic, strong) NSString *speechTimeout; @property (nonatomic, strong) NSString *vadEos; @property (nonatomic, strong) NSString *vadBos; @property (nonatomic, strong) NSString *language; @property (nonatomic, strong) NSString *accent; @property (nonatomic, strong) NSString *dot; @property (nonatomic, strong) NSString *sampleRate; /** 以下參數無需設置 不必關 */ @property (nonatomic, assign) BOOL haveView; @property (nonatomic, strong) NSArray *accentIdentifer; @property (nonatomic, strong) NSArray *accentNickName; @end /**************************************************************************/ #pragma mark - 語音聽寫類 @interface VoiceConversion : NSObject /// 啟動初始化語音程序 + (void)VoiceInitialize; /// 開始錄音 - (void)voiceStart:(void (^)(BOOL isStart))startListening speechBegin:(void (^)(void))begin speechEnd:(void (^)(void))end speechError:(void (^)(BOOL isSuccess))error speechResult:(void (^)(NSString *text))result speechVolume:(void (^)(int volume))volume; /// 取消錄音 - (void)voiceCancel; /// 停止錄音 - (void)voiceStop; @end
.m文件 #import "VoiceConversion.h" #pragma mark - 初始化參數類 /**************************************************************************/ static NSString *const PUTONGHUA = @"mandarin"; static NSString *const YUEYU = @"cantonese"; static NSString *const HENANHUA = @"henanese"; static NSString *const ENGLISH = @"en_us"; static NSString *const CHINESE = @"zh_cn"; @implementation IATConfig - (id)init { self = [super init]; if (self) { [self defaultSetting]; return self; } return nil; } + (IATConfig *)sharedInstance { static IATConfig * instance = nil; static dispatch_once_t predict; dispatch_once(&predict, ^{ instance = [[IATConfig alloc] init]; }); return instance; } - (void)defaultSetting { _speechTimeout = @"30000"; _vadEos = @"3000"; _vadBos = @"3000"; _dot = @"1"; _sampleRate = @"16000"; _language = CHINESE; _accent = PUTONGHUA; _haveView = NO;//默認是不dai界面的 _accentNickName = [[NSArray alloc] initWithObjects:@"粵語", @"普通話", @"河南話", @"英文", nil]; } + (NSString *)mandarin { return PUTONGHUA; } + (NSString *)cantonese { return YUEYU; } + (NSString *)henanese { return HENANHUA; } + (NSString *)chinese { return CHINESE; } + (NSString *)english { return ENGLISH; } + (NSString *)lowSampleRate { return @"8000"; } + (NSString *)highSampleRate { return @"16000"; } + (NSString *)isDot { return @"1"; } + (NSString *)noDot { return @"0"; } @end /**************************************************************************/ #pragma mark - 語音聽寫類 static NSString *const VoiceAPPID = @"572016e4"; static NSString *const VoiceTimeOut = @"20000"; @interface VoiceConversion ()@property (nonatomic, strong) NSMutableString *resultText; @property (nonatomic, strong) IFlySpeechRecognizer *iFlySpeechRecognizer; @property (nonatomic, copy) void (^beginSpeech)(void); @property (nonatomic, copy) void (^endSpeech)(void); @property (nonatomic, copy) void (^errorSpeech)(BOOL isSuccess); @property (nonatomic, copy) void (^resultSpeech)(NSString *text); @property (nonatomic, copy) void (^volumeSpeech)(int volume); @end @implementation VoiceConversion #pragma mark 初始化------------ /// 啟動初始化語音程序 + (void)VoiceInitialize { // 設置sdk的log等級,log保存在下面設置的工作路徑中 [IFlySetting setLogFile:LVL_ALL]; // 打開輸出在console的log開關 [IFlySetting showLogcat:YES]; // 設置sdk的工作路徑 NSArray *paths = NSSearchPathForDirectoriesInDomains(NSCachesDirectory, NSUserDomainMask, YES); NSString *cachePath = [paths objectAtIndex:0]; [IFlySetting setLogFilePath:cachePath]; // Appid是應用的身份信息,具有唯一性,初始化時必須要傳入Appid。初始化是一個異步過程,可放在 App 啟動時執行初始化,具體代碼可以參 照 Demo 的 MSCAppDelegate.m。未初始化時使用服務,一般會返回錯誤碼 10111. NSString *initString = [[NSString alloc] initWithFormat:@"appid=%@", VoiceAPPID]; [IFlySpeechUtility createUtility:initString]; } #pragma mark 實例化------------ - (void)dealloc { [self voiceCancel]; } - (NSMutableString *)resultText { if (!_resultText) { _resultText = [[NSMutableString alloc] init]; } return _resultText; } - (IFlySpeechRecognizer *)iFlySpeechRecognizer { if (_iFlySpeechRecognizer == nil) { _iFlySpeechRecognizer = [IFlySpeechRecognizer sharedInstance]; [_iFlySpeechRecognizer setParameter:@"" forKey:[IFlySpeechConstant PARAMS]]; // 設置聽寫模式 [_iFlySpeechRecognizer setParameter:@"iat" forKey:[IFlySpeechConstant IFLY_DOMAIN]]; } return _iFlySpeechRecognizer; } - (void)initializeVoice { self.iFlySpeechRecognizer.delegate = self; IATConfig *instance = [IATConfig sharedInstance]; // 設置最長錄音時間 [self.iFlySpeechRecognizer setParameter:instance.speechTimeout forKey:[IFlySpeechConstant SPEECH_TIMEOUT]]; // 設置後端點 [self.iFlySpeechRecognizer setParameter:instance.vadEos forKey:[IFlySpeechConstant VAD_EOS]]; // 設置前端點 [self.iFlySpeechRecognizer setParameter:instance.vadBos forKey:[IFlySpeechConstant VAD_BOS]]; // 網絡等待時間 [self.iFlySpeechRecognizer setParameter:@"20000" forKey:[IFlySpeechConstant NET_TIMEOUT]]; // 設置采樣率,推薦使用16K [self.iFlySpeechRecognizer setParameter:instance.sampleRate forKey:[IFlySpeechConstant SAMPLE_RATE]]; if ([instance.language isEqualToString:[IATConfig chinese]]) { // 設置語言 [self.iFlySpeechRecognizer setParameter:instance.language forKey:[IFlySpeechConstant LANGUAGE]]; // 設置方言 [self.iFlySpeechRecognizer setParameter:instance.accent forKey:[IFlySpeechConstant ACCENT]]; } else if ([instance.language isEqualToString:[IATConfig english]]) { [self.iFlySpeechRecognizer setParameter:instance.language forKey:[IFlySpeechConstant LANGUAGE]]; } // 設置是否返回標點符號 [self.iFlySpeechRecognizer setParameter:instance.dot forKey:[IFlySpeechConstant ASR_PTT]]; } #pragma mark 語音聽寫方法------------ /// 開始錄音 - (void)voiceStart:(void (^)(BOOL isStart))startListening speechBegin:(void (^)(void))begin speechEnd:(void (^)(void))end speechError:(void (^)(BOOL isSuccess))error speechResult:(void (^)(NSString *text))result speechVolume:(void (^)(int volume))volume { [self.resultText setString:@""]; // 回調設置 self.beginSpeech = [begin copy]; self.endSpeech = [end copy]; self.errorSpeech = [error copy]; self.resultSpeech = [result copy]; self.volumeSpeech = [volume copy]; // 初始化設置 [self initializeVoice]; [self.iFlySpeechRecognizer cancel]; // 設置音頻來源為麥克風 [self.iFlySpeechRecognizer setParameter:IFLY_AUDIO_SOURCE_MIC forKey:@"audio_source"]; // 設置聽寫結果格式為json [self.iFlySpeechRecognizer setParameter:@"json" forKey:[IFlySpeechConstant RESULT_TYPE]]; // 保存錄音文件,保存在sdk工作路徑中,如未設置工作路徑,則默認保存在library/cache下 [self.iFlySpeechRecognizer setParameter:@"asr.pcm" forKey:[IFlySpeechConstant ASR_AUDIO_PATH]]; BOOL isStart = [self.iFlySpeechRecognizer startListening]; if (startListening) { // 如果開始錄音失敗,可能是上次請求未結束,暫不支持多路並發 startListening(isStart); } } /// 取消聽寫 - (void)voiceCancel { [self.iFlySpeechRecognizer cancel]; } /// 停止錄音 - (void)voiceStop { [self.iFlySpeechRecognizer stopListening]; } #pragma mark IFlySpeechRecognizerDelegate------------ /** 識別結果返回代理 @param :results識別結果 @ param :isLast 表示是否最後一次結果 */ - (void)onResults:(NSArray *)results isLast:(BOOL)isLast { NSMutableString *resultString = [[NSMutableString alloc] init]; NSDictionary *dic = results[0]; for (NSString *key in dic) { [resultString appendFormat:@"%@",key]; } NSString *resultFromJson = [[self class] stringFromJson:resultString]; NSString *resultTextTemp = [NSString stringWithFormat:@"%@%@", self.resultText, resultFromJson]; [self.resultText setString:resultTextTemp]; if (self.resultSpeech) { self.resultSpeech(self.resultText); } } /** 識別會話結束返回代理 @ param error 錯誤碼,error.errorCode=0表示正常結束,非0表示發生錯誤。 */ - (void)onError:(IFlySpeechError *)error { if (self.errorSpeech) { BOOL isSuccess = (0 == error.errorCode); self.errorSpeech(isSuccess); } } /** 停止錄音回調 */ - (void)onEndOfSpeech { if (self.endSpeech) { self.endSpeech(); } } /** 開始識別回調 */ - (void)onBeginOfSpeech { if (self.beginSpeech) { self.beginSpeech(); } } /** 音量回調函數 volume 0-30 */ - (void)onVolumeChanged:(int)volume { if (self.volumeSpeech) { self.volumeSpeech(volume); } } #pragma mark 解析方法------------ /**************************************************************************/ /** 解析命令詞返回的結果 */ + (NSString *)stringFromAsr:(NSString *)params; { NSMutableString * resultString = [[NSMutableString alloc] init]; NSString *inputString = nil; NSArray *array = [params componentsSeparatedByString:@"\n"]; for (int index = 0; index < array.count; index++) { NSRange range; NSString *line = [array objectAtIndex:index]; NSRange idRange = [line rangeOfString:@"id="]; NSRange nameRange = [line rangeOfString:@"name="]; NSRange confidenceRange = [line rangeOfString:@"confidence="]; NSRange grammarRange = [line rangeOfString:@" grammar="]; NSRange inputRange = [line rangeOfString:@"input="]; if (confidenceRange.length == 0 || grammarRange.length == 0 || inputRange.length == 0 ) { continue; } // check nomatch if (idRange.length != 0) { NSUInteger idPosX = idRange.location + idRange.length; NSUInteger idLength = nameRange.location - idPosX; range = NSMakeRange(idPosX, idLength); NSString *subString = [line substringWithRange:range]; NSCharacterSet *subSet = [NSCharacterSet whitespaceAndNewlineCharacterSet]; NSString *idValue = [subString stringByTrimmingCharactersInSet:subSet]; if ([idValue isEqualToString:@"nomatch"]) { return @""; } } // Get Confidence Value NSUInteger confidencePosX = confidenceRange.location + confidenceRange.length; NSUInteger confidenceLength = grammarRange.location - confidencePosX; range = NSMakeRange(confidencePosX,confidenceLength); NSString *score = [line substringWithRange:range]; NSUInteger inputStringPosX = inputRange.location + inputRange.length; NSUInteger inputStringLength = line.length - inputStringPosX; range = NSMakeRange(inputStringPosX , inputStringLength); inputString = [line substringWithRange:range]; [resultString appendFormat:@"%@ 置信度%@\n",inputString, score]; } return resultString; } /** 解析聽寫json格式的數據 params例如: {"sn":1,"ls":true,"bg":0,"ed":0,"ws":[{"bg":0,"cw":[{"w":"白日","sc":0}]},{"bg":0,"cw":[{"w":"依山","sc":0}]},{"bg":0,"cw":[{"w":"盡","sc":0}]},{"bg":0,"cw":[{"w":"黃河入海流","sc":0}]},{"bg":0,"cw":[{"w":"。","sc":0}]}]} */ + (NSString *)stringFromJson:(NSString *)params { if (params == NULL) { return nil; } NSMutableString *tempStr = [[NSMutableString alloc] init]; // 返回的格式必須為utf8的,否則發生未知錯誤 NSData *dataJSON = [params dataUsingEncoding:NSUTF8StringEncoding]; NSDictionary *resultDic = [NSJSONSerialization JSONObjectWithData:dataJSON options:kNilOptions error:nil]; if (resultDic != nil) { NSArray *wordArray = [resultDic objectForKey:@"ws"]; for (int i = 0; i < [wordArray count]; i++) { NSDictionary *wsDic = [wordArray objectAtIndex:i]; NSArray *cwArray = [wsDic objectForKey:@"cw"]; for (int j = 0; j < [cwArray count]; j++) { NSDictionary *wDic = [cwArray objectAtIndex:j]; NSString *str = [wDic objectForKey:@"w"]; [tempStr appendString: str]; } } } return tempStr; } /** 解析語法識別返回的結果 */ + (NSString *)stringFromABNFJson:(NSString *)params { if (params == NULL) { return nil; } NSMutableString *tempStr = [[NSMutableString alloc] init]; NSData *dataJSON = [params dataUsingEncoding:NSUTF8StringEncoding]; NSDictionary *resultDic = [NSJSONSerialization JSONObjectWithData:dataJSON options:kNilOptions error:nil]; NSArray *wordArray = [resultDic objectForKey:@"ws"]; for (int i = 0; i < [wordArray count]; i++) { NSDictionary *wsDic = [wordArray objectAtIndex:i]; NSArray *cwArray = [wsDic objectForKey:@"cw"]; for (int j = 0; j < [cwArray count]; j++) { NSDictionary *wDic = [cwArray objectAtIndex:j]; NSString *str = [wDic objectForKey:@"w"]; NSString *score = [wDic objectForKey:@"sc"]; [tempStr appendString: str]; [tempStr appendFormat:@" 置信度:%@",score]; [tempStr appendString: @"\n"]; } } return tempStr; } /**************************************************************************/ @end
使用
初始化方法 /// 啟動初始化語音程序 + (void)VoiceInitialize { // 設置sdk的log等級,log保存在下面設置的工作路徑中 [IFlySetting setLogFile:LVL_ALL]; // 打開輸出在console的log開關 [IFlySetting showLogcat:YES]; // 設置sdk的工作路徑 NSArray *paths = NSSearchPathForDirectoriesInDomains(NSCachesDirectory, NSUserDomainMask, YES); NSString *cachePath = [paths objectAtIndex:0]; [IFlySetting setLogFilePath:cachePath]; // Appid是應用的身份信息,具有唯一性,初始化時必須要傳入Appid。初始化是一個異步過程,可放在 App 啟動時執行初始化,具體代碼可以參 照 Demo 的 MSCAppDelegate.m。未初始化時使用服務,一般會返回錯誤碼 10111. NSString *initString = [[NSString alloc] initWithFormat:@"appid=%@", VoiceAPPID]; [IFlySpeechUtility createUtility:initString]; } 初始化調用 - (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { // Override point for customization after application launch. [VoiceConversion VoiceInitialize]; return YES; }
#import "VoiceConversion.h" @interface ViewController () @property (nonatomic, strong) VoiceConversion *voiceConversion; @property (nonatomic, strong) UILabel *messageLabel; @end @implementation ViewController - (void)viewDidLoad { [super viewDidLoad]; // Do any additional setup after loading the view, typically from a nib. UIBarButtonItem *startItem = [[UIBarButtonItem alloc] initWithTitle:@"start" style:UIBarButtonItemStyleDone target:self action:@selector(startItemClick:)]; UIBarButtonItem *stopItem = [[UIBarButtonItem alloc] initWithTitle:@"stop" style:UIBarButtonItemStyleDone target:self action:@selector(stopItemClick:)]; UIBarButtonItem *cancelItem = [[UIBarButtonItem alloc] initWithTitle:@"cancel" style:UIBarButtonItemStyleDone target:self action:@selector(cancelItemClick:)]; self.navigationItem.rightBarButtonItems = @[startItem, stopItem, cancelItem]; self.title = @"科大訊飛語音"; [self setUI]; } - (void)didReceiveMemoryWarning { [super didReceiveMemoryWarning]; // Dispose of any resources that can be recreated. } #pragma mark - 視圖 - (void)setUI { if ([self respondsToSelector:@selector(setEdgesForExtendedLayout:)]) { [self setEdgesForExtendedLayout:UIRectEdgeNone]; } self.messageLabel = [[UILabel alloc] initWithFrame:CGRectMake(10.0, 10.0, CGRectGetWidth(self.view.bounds) - 10.0 * 2, 40.0)]; [self.view addSubview:self.messageLabel]; self.messageLabel.backgroundColor = [UIColor colorWithWhite:0.5 alpha:0.3]; self.messageLabel.textAlignment = NSTextAlignmentCenter; } #pragma mark - 響應 - (void)startItemClick:(UIBarButtonItem *)item { ViewController __weak *weakSelf = self; [self.voiceConversion voiceStart:^(BOOL isStart) { NSLog(@"1 start"); if (isStart) { weakSelf.messageLabel.text = @"正在錄音"; } else { weakSelf.messageLabel.text = @"啟動識別服務失敗,請稍後重試"; } } speechBegin:^{ NSLog(@"2 begin"); } speechEnd:^{ NSLog(@"3 end"); } speechError:^(BOOL isSuccess) { NSLog(@"4 error"); } speechResult:^(NSString *text) { NSLog(@"5 result"); weakSelf.messageLabel.text = text; } speechVolume:^(int volume) { NSLog(@"6 volume"); NSString *volumeString = [NSString stringWithFormat:@"音量:%d", volume]; weakSelf.messageLabel.text = volumeString; }]; } - (void)stopItemClick:(UIBarButtonItem *)item { [self.voiceConversion voiceStop]; self.messageLabel.text = @"停止錄音"; } - (void)cancelItemClick:(UIBarButtonItem *)item { [self.voiceConversion voiceCancel]; self.messageLabel.text = @"取消識別"; } #pragma mark - getter - (VoiceConversion *)voiceConversion { if (!_voiceConversion) { _voiceConversion = [[VoiceConversion alloc] init]; } return _voiceConversion; } @end