python调用百度语音识别实现大音频文件语音识别功能


Posted in Python onAugust 30, 2018

本文为大家分享了python实现大音频文件语音识别功能的具体代码,供大家参考,具体内容如下

实现思路:先用ffmpeg将其他非wav格式的音频转换为wav格式,并转换音频的声道(百度支持声道为1),采样率(值为8000),格式转换完成后,再用ffmpeg将音频切成百度。

支持的时长(30秒和60秒2种,本程序用的是30秒)。

# coding: utf-8
import json
import time
import base64
from inc import rtysdb
import urllib2
import requests
import os
import uuid
from inc import db_config
 
 
class BaiduRest:
  def __init__(self, cu_id, api_key, api_secert):
    self.token_url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s"
    self.getvoice_url = "http://tsn.baidu.com/text2audio?tex=%s&lan=zh&cuid=%s&ctp=1&tok=%s"
    self.upvoice_url = 'http://vop.baidu.com/server_api'
 
    self.cu_id = cu_id
    self.get_token(api_key, api_secert)
    return
 
  def get_token(self, api_key, api_secert):
    token_url = self.token_url % (api_key, api_secert)
    r_str = urllib2.urlopen(token_url).read()
    token_data = json.loads(r_str)
    self.token_str = token_data['access_token']
    return True
 
  # 语音合成
  def text2audio(self, text, filename):
    get_url = self.getvoice_url % (urllib2.quote(text), self.cu_id, self.token_str)
    voice_data = urllib2.urlopen(get_url).read()
    voice_fp = open(filename, 'wb+')
    voice_fp.write(voice_data)
    voice_fp.close()
    return True
 
  ##语音识别
  def audio2text(self, filename):
    data = {}
    data['format'] = 'wav'
    data['rate'] = 8000
    data['channel'] = 1
    data['cuid'] = self.cu_id
    data['token'] = self.token_str
 
    wav_fp = open(filename, 'rb')
    voice_data = wav_fp.read()
    data['len'] = len(voice_data)
    # data['speech'] = base64.b64encode(voice_data).decode('utf-8')
    data['speech'] = base64.b64encode(voice_data).replace('\n', '')
    # post_data = json.dumps(data)
    result = requests.post(self.upvoice_url, json=data, headers={'Content-Type': 'application/json'})
    data_result = result.json()
    if(data_result['err_msg'] == 'success.'):
      return data_result['result'][0]
    else:
      return False
 
 
 
def test_voice(voice_file):
  api_key = "vossGHIgEETS6IMRxBDeahv8"
  api_secert = "3c1fe6a6312f41fa21fa2c394dad5510"
  bdr = BaiduRest("0-57-7B-9F-1F-A1", api_key, api_secert)
 
  # 生成
  #start = time.time()
  #bdr.text2audio("你好啊", "out.wav")
  #using = time.time() - start
  #print using
 
  # 识别
  #start = time.time()
  result = bdr.audio2text(voice_file)
  # result = bdr.audio2text("weather.pcm")
  #using = time.time() - start
  return result
 
def get_master_audio(check_status='cut_status'):
  if check_status == 'cut_status':
    sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE status=0"
  elif check_status == 'finished_status':
    sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE finished_status=0"
  else:
    return False
  data = rtysdb.select_data(sql,'more')
  if data:
    return data
  else:
    return False
 
 
def go_recognize(master_id):
  section_path = db_config.SYS_PATH
  sql = "SELECT id,rid,url,status FROM ocenter_section WHERE rid=%d AND status=0 order by id asc limit 10" % (master_id)
  #print sql
  record = rtysdb.select_data(sql,'more')
  #print record
  if not record:
    return False
  for rec in record:
    #print section_path+'/'+rec[1]
    voice_file = section_path+'/'+rec[2]
    if not os.path.exists(voice_file):
      continue
    result = test_voice(voice_file)
    print result
    exit(0)
    if result:
      #rtysdb.update_by_pk('ocenter_section',rec[0],{'content':result,'status':1})
      sql = "update ocenter_section set content='%s', status='%d' where id=%d" % (result,1,rec[0])      #print sql
      rtysdb.do_exec_sql(sql)
      parent_content = rtysdb.select_data("SELECT id,content FROM ocenter_recognition WHERE id=%d" % (rec[1]))
      #print parent_content
      if parent_content:
        new_content = parent_content[1]+result
        update_content_sql = "update ocenter_recognition set content='%s' where id=%d" % (new_content,rec[1])
        rtysdb.do_exec_sql(update_content_sql)
    else:
      rtysdb.do_exec_sql("update ocenter_section set status='%d' where id=%d" % (result,1,rec[0]))
    time.sleep(5)
  else:
    rtysdb.do_exec_sql("UPDATE ocenter_recognition SET finished_status=1 WHERE id=%d" % (master_id))
#对百度语音识别不了的音频文件进行转换
def ffmpeg_convert():
  section_path = db_config.SYS_PATH
  #print section_path
  used_audio = get_master_audio('cut_status')
  #print used_audio
  if used_audio:
    for audio in used_audio:
      audio_path = section_path+'/'+audio[1]
      new_audio = uuid.uuid1()
      command_line = "ffmpeg -i "+audio_path +" -ar 8000 -ac 1 -f wav "+section_path+"/Uploads/Convert/convert_" + str(new_audio) +".wav";
      #print command_line
      os.popen(command_line)
      if os.path.exists(section_path+"/Uploads/Convert/convert_" + str(new_audio) +".wav"):
        convert_name = "Uploads/Convert/convert_" + str(new_audio) +".wav"
        ffmpeg_cut(convert_name,audio[3],audio[0])
        sql = "UPDATE ocenter_recognition SET status=1,convert_name='%s' where id=%d" % (convert_name,audio[0])
        rtysdb.do_exec_sql(sql)
#将大音频文件切成碎片
def ffmpeg_cut(convert_name,sharps,master_id):
  section_path = db_config.SYS_PATH
  if sharps>0:
    for i in range(0,sharps):
      timeArray = time.localtime(i*30)
      h = time.strftime("%H", timeArray)
      h = int(h) - 8
      h = "0" + str(h)
      ms = time.strftime("%M:%S",timeArray)
      start_time = h+':'+str(ms)
      cut_name = section_path+'/'+convert_name
      db_store_name = "Uploads/Section/"+str(uuid.uuid1())+'-'+str(i+1)+".wav"
      section_name = section_path+"/"+db_store_name
      command_line = "ffmpeg.exe -i "+cut_name+" -vn -acodec copy -ss "+start_time+" -t 00:00:30 "+section_name
      #print command_line
      os.popen(command_line)
      data = {}
      data['rid'] = master_id
      data['url'] = db_store_name
      data['create_time'] = int(time.time())
      data['status'] = 0
      rtysdb.insert_one('ocenter_section',data)
 
if __name__ == "__main__":
  ffmpeg_convert()
  audio = get_master_audio('finished_status')
  if audio:
     for ad in audio:
      go_recognize(ad[0])

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持三水点靠木。

Python 相关文章推荐
基于python编写的微博应用
Oct 17 Python
使用Python下的XSLT API进行web开发的简单教程
Apr 15 Python
Python实现PS滤镜特效之扇形变换效果示例
Jan 26 Python
python matlibplot绘制多条曲线图
Feb 19 Python
Python的argparse库使用详解
Oct 09 Python
使用Python实现微信提醒备忘录功能
Dec 04 Python
python numpy 按行归一化的实例
Jan 21 Python
详解Python 定时框架 Apscheduler原理及安装过程
Jun 14 Python
利用matplotlib实现根据实时数据动态更新图形
Dec 13 Python
Python RabbitMQ实现简单的进程间通信示例
Jul 02 Python
OpenCV绘制圆端矩形的示例代码
Aug 30 Python
python​格式化字符串
Apr 20 Python
python的中异常处理机制
Aug 30 #Python
python调用百度REST API实现语音识别
Aug 30 #Python
python调用百度语音REST API
Aug 30 #Python
python调用百度语音识别api
Aug 30 #Python
python实现ID3决策树算法
Aug 29 #Python
python实现C4.5决策树算法
Aug 29 #Python
python机器学习之KNN分类算法
Aug 29 #Python
You might like
PHP 和 MySQL 基础教程(三)
2006/10/09 PHP
简单的cookie计数器实现源码
2013/06/07 PHP
基于flush()不能按顺序输出时的解决办法
2013/06/29 PHP
php第一次无法获取cookie问题处理
2014/12/15 PHP
PHP中的session安全吗?
2016/01/22 PHP
游戏人文件夹程序 ver 3.0
2006/07/14 Javascript
javascript事件冒泡详解和捕获、阻止方法
2014/04/12 Javascript
jquery动态改变div宽度和高度
2015/02/09 Javascript
JS中字符串trim()使用示例
2015/05/26 Javascript
详解JavaScript 中的 replace 方法
2016/01/01 Javascript
javascript高级选择器querySelector和querySelectorAll全面解析
2016/04/07 Javascript
D3.js封装文本实现自动换行和旋转平移等功能
2016/10/14 Javascript
js控制台输出的方法(详解)
2016/11/26 Javascript
微信小程序文章详情页面实现代码
2018/09/10 Javascript
angularjs获取到My97DatePicker选中的值方法
2018/10/02 Javascript
vue中$nextTick的用法讲解
2019/01/17 Javascript
基于Webpack4和React hooks搭建项目的方法
2019/02/05 Javascript
Vue.js递归组件实现组织架构树和选人功能案例分析
2019/07/03 Javascript
Vue中消息横向滚动时setInterval清不掉的问题及解决方法
2019/08/23 Javascript
关于layui时间回显问题的解决方法
2019/09/24 Javascript
Nuxt v-bind绑定img src不显示的解决
2019/12/05 Javascript
vue 输入电话号码自动按3-4-4分割功能的实现代码
2020/04/30 Javascript
[02:32]DOTA2英雄基础教程 美杜莎
2014/01/07 DOTA
在Python中使用pngquant压缩png图片的教程
2015/04/09 Python
详细解析Python中的变量的数据类型
2015/05/13 Python
Python模拟鼠标点击实现方法(将通过实例自动化模拟在360浏览器中自动搜索python)
2017/08/23 Python
Django使用HttpResponse返回图片并显示的方法
2018/05/22 Python
python实现整数的二进制循环移位
2019/03/08 Python
python 字段拆分详解
2019/12/17 Python
python requests模拟登陆github的实现方法
2019/12/26 Python
关于圣诞节的广播稿
2014/01/26 职场文书
教学实验楼管理制度
2014/02/01 职场文书
剪彩仪式主持词
2014/03/19 职场文书
老公给老婆的检讨书(精华篇)
2014/10/18 职场文书
2014年图书馆个人工作总结
2014/12/18 职场文书
SQL Server远程连接的设置步骤(图文)
2022/03/23 SQL Server