python调用百度语音识别实现大音频文件语音识别功能


Posted in Python onAugust 30, 2018

本文为大家分享了python实现大音频文件语音识别功能的具体代码,供大家参考,具体内容如下

实现思路:先用ffmpeg将其他非wav格式的音频转换为wav格式,并转换音频的声道(百度支持声道为1),采样率(值为8000),格式转换完成后,再用ffmpeg将音频切成百度。

支持的时长(30秒和60秒2种,本程序用的是30秒)。

# coding: utf-8
import json
import time
import base64
from inc import rtysdb
import urllib2
import requests
import os
import uuid
from inc import db_config
 
 
class BaiduRest:
  def __init__(self, cu_id, api_key, api_secert):
    self.token_url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s"
    self.getvoice_url = "http://tsn.baidu.com/text2audio?tex=%s&lan=zh&cuid=%s&ctp=1&tok=%s"
    self.upvoice_url = 'http://vop.baidu.com/server_api'
 
    self.cu_id = cu_id
    self.get_token(api_key, api_secert)
    return
 
  def get_token(self, api_key, api_secert):
    token_url = self.token_url % (api_key, api_secert)
    r_str = urllib2.urlopen(token_url).read()
    token_data = json.loads(r_str)
    self.token_str = token_data['access_token']
    return True
 
  # 语音合成
  def text2audio(self, text, filename):
    get_url = self.getvoice_url % (urllib2.quote(text), self.cu_id, self.token_str)
    voice_data = urllib2.urlopen(get_url).read()
    voice_fp = open(filename, 'wb+')
    voice_fp.write(voice_data)
    voice_fp.close()
    return True
 
  ##语音识别
  def audio2text(self, filename):
    data = {}
    data['format'] = 'wav'
    data['rate'] = 8000
    data['channel'] = 1
    data['cuid'] = self.cu_id
    data['token'] = self.token_str
 
    wav_fp = open(filename, 'rb')
    voice_data = wav_fp.read()
    data['len'] = len(voice_data)
    # data['speech'] = base64.b64encode(voice_data).decode('utf-8')
    data['speech'] = base64.b64encode(voice_data).replace('\n', '')
    # post_data = json.dumps(data)
    result = requests.post(self.upvoice_url, json=data, headers={'Content-Type': 'application/json'})
    data_result = result.json()
    if(data_result['err_msg'] == 'success.'):
      return data_result['result'][0]
    else:
      return False
 
 
 
def test_voice(voice_file):
  api_key = "vossGHIgEETS6IMRxBDeahv8"
  api_secert = "3c1fe6a6312f41fa21fa2c394dad5510"
  bdr = BaiduRest("0-57-7B-9F-1F-A1", api_key, api_secert)
 
  # 生成
  #start = time.time()
  #bdr.text2audio("你好啊", "out.wav")
  #using = time.time() - start
  #print using
 
  # 识别
  #start = time.time()
  result = bdr.audio2text(voice_file)
  # result = bdr.audio2text("weather.pcm")
  #using = time.time() - start
  return result
 
def get_master_audio(check_status='cut_status'):
  if check_status == 'cut_status':
    sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE status=0"
  elif check_status == 'finished_status':
    sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE finished_status=0"
  else:
    return False
  data = rtysdb.select_data(sql,'more')
  if data:
    return data
  else:
    return False
 
 
def go_recognize(master_id):
  section_path = db_config.SYS_PATH
  sql = "SELECT id,rid,url,status FROM ocenter_section WHERE rid=%d AND status=0 order by id asc limit 10" % (master_id)
  #print sql
  record = rtysdb.select_data(sql,'more')
  #print record
  if not record:
    return False
  for rec in record:
    #print section_path+'/'+rec[1]
    voice_file = section_path+'/'+rec[2]
    if not os.path.exists(voice_file):
      continue
    result = test_voice(voice_file)
    print result
    exit(0)
    if result:
      #rtysdb.update_by_pk('ocenter_section',rec[0],{'content':result,'status':1})
      sql = "update ocenter_section set content='%s', status='%d' where id=%d" % (result,1,rec[0])      #print sql
      rtysdb.do_exec_sql(sql)
      parent_content = rtysdb.select_data("SELECT id,content FROM ocenter_recognition WHERE id=%d" % (rec[1]))
      #print parent_content
      if parent_content:
        new_content = parent_content[1]+result
        update_content_sql = "update ocenter_recognition set content='%s' where id=%d" % (new_content,rec[1])
        rtysdb.do_exec_sql(update_content_sql)
    else:
      rtysdb.do_exec_sql("update ocenter_section set status='%d' where id=%d" % (result,1,rec[0]))
    time.sleep(5)
  else:
    rtysdb.do_exec_sql("UPDATE ocenter_recognition SET finished_status=1 WHERE id=%d" % (master_id))
#对百度语音识别不了的音频文件进行转换
def ffmpeg_convert():
  section_path = db_config.SYS_PATH
  #print section_path
  used_audio = get_master_audio('cut_status')
  #print used_audio
  if used_audio:
    for audio in used_audio:
      audio_path = section_path+'/'+audio[1]
      new_audio = uuid.uuid1()
      command_line = "ffmpeg -i "+audio_path +" -ar 8000 -ac 1 -f wav "+section_path+"/Uploads/Convert/convert_" + str(new_audio) +".wav";
      #print command_line
      os.popen(command_line)
      if os.path.exists(section_path+"/Uploads/Convert/convert_" + str(new_audio) +".wav"):
        convert_name = "Uploads/Convert/convert_" + str(new_audio) +".wav"
        ffmpeg_cut(convert_name,audio[3],audio[0])
        sql = "UPDATE ocenter_recognition SET status=1,convert_name='%s' where id=%d" % (convert_name,audio[0])
        rtysdb.do_exec_sql(sql)
#将大音频文件切成碎片
def ffmpeg_cut(convert_name,sharps,master_id):
  section_path = db_config.SYS_PATH
  if sharps>0:
    for i in range(0,sharps):
      timeArray = time.localtime(i*30)
      h = time.strftime("%H", timeArray)
      h = int(h) - 8
      h = "0" + str(h)
      ms = time.strftime("%M:%S",timeArray)
      start_time = h+':'+str(ms)
      cut_name = section_path+'/'+convert_name
      db_store_name = "Uploads/Section/"+str(uuid.uuid1())+'-'+str(i+1)+".wav"
      section_name = section_path+"/"+db_store_name
      command_line = "ffmpeg.exe -i "+cut_name+" -vn -acodec copy -ss "+start_time+" -t 00:00:30 "+section_name
      #print command_line
      os.popen(command_line)
      data = {}
      data['rid'] = master_id
      data['url'] = db_store_name
      data['create_time'] = int(time.time())
      data['status'] = 0
      rtysdb.insert_one('ocenter_section',data)
 
if __name__ == "__main__":
  ffmpeg_convert()
  audio = get_master_audio('finished_status')
  if audio:
     for ad in audio:
      go_recognize(ad[0])

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持三水点靠木。

Python 相关文章推荐
python 简单的绘图工具turtle使用详解
Jun 21 Python
在numpy矩阵中令小于0的元素改为0的实例
Jan 26 Python
python实战串口助手_解决8串口多个发送的问题
Jun 12 Python
对Python中一维向量和一维向量转置相乘的方法详解
Aug 26 Python
使用Python合成图片的实现代码(图片添加个性化文本,图片上叠加其他图片)
Apr 30 Python
详解python中groupby函数通俗易懂
May 14 Python
opencv 实现特定颜色线条提取与定位操作
Jun 02 Python
详解pytorch中squeeze()和unsqueeze()函数介绍
Sep 03 Python
python,Java,JavaScript实现indexOf
Sep 09 Python
matplotlib 使用 plt.savefig() 输出图片去除旁边的空白区域
Jan 05 Python
Python绘制K线图之可视化神器pyecharts的使用
Mar 02 Python
python数据库批量插入数据的实现(executemany的使用)
Apr 30 Python
python的中异常处理机制
Aug 30 #Python
python调用百度REST API实现语音识别
Aug 30 #Python
python调用百度语音REST API
Aug 30 #Python
python调用百度语音识别api
Aug 30 #Python
python实现ID3决策树算法
Aug 29 #Python
python实现C4.5决策树算法
Aug 29 #Python
python机器学习之KNN分类算法
Aug 29 #Python
You might like
PHP 配置open_basedir 让各虚拟站点独立运行
2009/11/12 PHP
php截取中文字符串函数实例
2015/02/23 PHP
跟着Jquery API学Jquery之一 选择器
2010/04/07 Javascript
10个基于Jquery的幻灯片插件教程
2010/10/29 Javascript
用JQuery调用Session的实现代码
2010/10/29 Javascript
jQuery EasyUI API 中文文档 - ComboBox组合框
2011/10/07 Javascript
原生js编写设为首页兼容ie、火狐和谷歌
2014/06/05 Javascript
JS数字抽奖游戏实现方法
2015/05/04 Javascript
jquery分页插件jquery.pagination.js使用方法解析
2016/04/01 Javascript
JavaScript实现Base64编码转换
2016/04/23 Javascript
jQuery实现链接的title快速出现的方法
2017/02/20 Javascript
BootStrap入门学习第一篇
2017/08/28 Javascript
Vue 应用中结合vux使用微信 jssdk的方法
2018/08/28 Javascript
Vue中Table组件Select的勾选和取消勾选事件详解
2019/03/19 Javascript
基于vue实现滚动条滚动到指定位置对应位置数字进行tween特效
2019/04/18 Javascript
基于vue-cli 路由 实现类似tab切换效果(vue 2.0)
2019/05/08 Javascript
JavaScript使用百度ECharts插件绘制饼图操作示例
2019/11/26 Javascript
javascript局部自定义鼠标右键菜单
2020/12/08 Javascript
[51:07]VGJ.S vs Pain 2018国际邀请赛小组赛BO2 第一场 8.17
2018/08/20 DOTA
[35:39]完美世界DOTA2联赛PWL S2 FTD.C vs Rebirth 第二场 11.22
2020/11/24 DOTA
Python中实现对Timestamp和Datetime及UTC时间之间的转换
2015/04/08 Python
python互斥锁、加锁、同步机制、异步通信知识总结
2018/02/11 Python
Python 3.6打包成EXE可执行程序的实现
2019/10/18 Python
Python如何截图保存的三种方法(小结)
2020/09/01 Python
HTML实现代码雨源码及效果示例
2020/02/25 HTML / CSS
美国隐形眼镜网:Major Lens
2018/02/09 全球购物
社团活动策划书范文
2014/01/09 职场文书
学校联谊活动方案
2014/02/15 职场文书
初三开学计划书
2014/04/27 职场文书
2014年挂职干部工作总结
2014/12/06 职场文书
2014年医院个人工作总结
2014/12/09 职场文书
公司总经理岗位职责
2015/04/01 职场文书
解决jupyter notebook启动后没有token的坑
2021/04/24 Python
基于Python实现将列表数据生成折线图
2022/03/23 Python
什么是Python装饰器?如何定义和使用?
2022/04/11 Python
SqlServer常用函数及时间处理小结
2023/05/08 SQL Server