编程 Python

python读取word文档,插入mysql数据库的示例代码

Posted in Python onNovember 07, 2018

表格内容如下：

1、实现批量导入word文档，取文档标题中的数字作为编号

2、除取上面打钩的内容需要匹配出来入库入库，其他内容全部直接入库mysql

# wuyanfeng
# -*- coding:utf-8 -*-
# 读取docx中的文本代码示例
import docx
import pymysql
import re
import os

# 创建数据库链接
conn = pymysql.connect(
 host='rm-bp1vu5d84dg12c6d59o.mysql.rds.aliyuncs.com',
 port=3306,
 user='root',
 passwd='wYf092415*',
 db='pays',
 charset='utf8',
)
# 创建游标
cursor = conn.cursor()

#切片函数
def section(info,key,len11):
 a = len(info)
 print(a, type(a))
 d = []
 e = 0
 g = -1
 i = 0
 task_class=[]
 while i < len(info):
  # for i in range(len(info)):
  # i+=1
  print("i::::", i)
  try:
   #c = info.index("a", e)
   #print("c:::::", c)
   c = info.index(key, e)
   #print("c:::::", c)

   print("c类型判断",type(c))
  except ValueError:
   print(ValueError)
  try:
   if (c != '') & (g < int(c)):
    d.append(c)
    g = c
    i = c + 1
    print("illlldddd:", i)
    e = c + 1
    continue

   elif (c == ''):
     break
  except UnboundLocalError:
   print(UnboundLocalError)

   return task_class
  break
 print("d", d, type(d))
 print(d[0], type(d[0]))
 print("d的长度：",len(d))
 #开始切片
 if len(d) != 0:
  for j in range(len(d)):
   print("info11:::", info, type(info))
   info = ''.join(info)
   print("info222:::",info,type(info))
   print("d[%d]"%j,d[j])
   #print("d[j]:5"%j,info[d[j]:5])

   llll = info[d[j]+1:d[j]+5]
   print("d[%d]:5" % j, llll)
   task_class.append(llll)
   print("task_class::11", task_class)

 task_class=",".join(task_class)
 print("str1112222",task_class)
 return task_class


def insettable(file):
 print("file：：：:::::::::::::::::", file)
 print("type：：：:::::::::::::::::", type(file))
 # file1 = file
 # file1 = str(file1)
 ddd = re.findall("知识库\\\(\d+)", file)
 print("ddd：::::::::：：", ddd)
 print("ddd[0]:::", ddd[0])
 ddd = int(ddd[0])
 print("ddd::::", type(ddd))

 file = docx.Document(file)
 # 读取表格：
 t = file.tables[0]
 print(t)
 print("1:", t.cell(0, 0).text) # 1
 cell1 = t.cell(0, 0).text
 print("tyep::::", type(t.cell(0, 0).text))

 print("2:", t.cell(0, 1).text) # 2
 cell2 = t.cell(0, 1).text

 print("2:", t.cell(0, 2).text) # 2
 cell3 = t.cell(0, 2).text

 print("2:", t.cell(0, 3).text) # 2
 cell4 = t.cell(0, 3).text
 print("cell4:::::::::", cell4)

 print("3:", t.cell(1, 0).text) # 3
 cell5 = t.cell(1, 0).text

 print("4:", t.cell(1, 2).text) # 4
 cell6 = t.cell(1, 2).text

 print("5:", t.cell(1, 3).text) # 5
 task_type = t.cell(1, 3).text
 # task_type = re.findall('.*[☑√](.*)$', cell7)
 # task_type = ''.join(cell7)
 print("task_type111111:", task_type)
 # task_class = task_class[0:4]
 '''低级处理方式
 a = int(task_type.count("☑"))
 print("a|||||||", a, type(a))
 b = int(task_type.count("√"))
 print("b|||||||", b, type(a))
 if (a == 1) | (b == 1):
  print("111111111111111111")
  # task_type = re.findall('.*[☑√](.*)$', task_type)
  task_type = re.findall('.*[☑√](.*)$', task_type)
  print("task_type1", task_type)
  task_type = ''.join(task_type)
  print("task_type2", task_type)
  task_type = task_type[0:4]
  print("task_type3:d:%s，b=%d" % (a, b), task_type)
 elif (a == 0) & (b == 0):
  print("2222222222222222222")
  task_type = '法定职责'
  print("a:%s，b=%s" % (a, b), task_type)
 elif (a == 2) | (b == 2):
  print("333333333333333333333")
  task_type = '法定职责,工作职责 '
  print("a:%s，b=%s" % (a, b), task_type)
 '''
 #调用切片函数
 task_type1 = section(task_type, "√", 4)
 task_type2 = section(task_type, "☑", 4)
 task_type1 = "".join(task_type1)
 task_type2 = "".join(task_type2)
 print("task_type1:::", task_type1,type(task_type1))
 print("task_type2:::", task_type2,type(task_type2))
 if task_type1.strip()!="":
  task_type = task_type1
  print("task_type111:::", task_type1)
 elif task_type2.strip()!="":
  task_type = task_type2
  print("task_type222:::", task_type2)

 print("6:", t.cell(1, 4).text) # 6
 cell8 = t.cell(1, 4).text

 print("7:", t.cell(2, 1).text) # 7
 cell9 = t.cell(2, 1).text

 # 获取文档对象
 # file = docx.Document("D:\\配置库\\公案APP\\1.2 系统规格\\知识库\\14人员死亡先期处置.docx")
 print("段落数:" + str(len(file.paragraphs))) # 段落数为13，每个回车隔离一段
 lenn = len(file.paragraphs)
 print("len:", lenn)
 # 输出每一段的内容
 for para in file.paragraphs:
  print(para.text)

 # 输出段落编号及段落内容
 for i in range(len(file.paragraphs)):
  print("第" + str(i) + "段的内容是：" + file.paragraphs[i].text)

 list6 = []
 for i in range(len(file.paragraphs)):
  if 0 == i:
   print("i:", i)
   lis0 = file.paragraphs[i].text
   print("list0:", lis0)
   print(type(lis0))

  elif 1 == i:
   print("i:", i)
   task_class = file.paragraphs[i].text
   print("lis1", task_class,type(task_class))
   '''低级处理方式
   print("task_class111111:", task_class)

   c = int(task_class.count("☑"))
   task_class = ''.join(task_class)
   #print(task_class.index('☑'))
   print("c|||||||", c, type(c))
   d = int(task_class.count("√"))
   print(task_class.index('√'))

   print("d|||||||", d, type(d))
   task_class = re.findall(r'[☑√](?:.*)', task_class)
   task_class = ''.join(task_class)
   task_class = task_class[1:5]
   print("task_class", task_class)
  '''
   #调用切片函数
   task_class1 = section(task_class, "√", 4)
   task_class2 = section(task_class, "☑", 4)
   task_class1 = "".join(task_class1)
   task_class2 = "".join(task_class2)
   print("task_class1:::", task_class1,type(task_class1))
   print("task_class2:::", task_class2,type(task_class2))
   if task_class1.strip()!="":
    task_class = task_class1
    print("task_class11:::", task_class1)
   elif task_class2.strip()!="":
    task_class = task_class2
    print("task_class22:::", task_class2)


  if 2 == i:
   print("i:", i)
   lis2 = file.paragraphs[i].text

   print("lis2", lis2)
   print(type(lis2))
   preparer = re.findall('填表单位：(.*?)$', lis2)
   preparer = ''.join(preparer)
   print("preparer:%s" % preparer)

  # elif 3 == i:
  #  print("i:", i)
  #  lis3 = file.paragraphs[i].text
  elif 3 == i:
   print("i:", i)
   lis4 = file.paragraphs[i].text
   print("lis4", lis4)
   print(type(lis4))
  elif 3 < i < lenn - 1:
   print("i:", i)
   print(file.paragraphs[i].text)
   print(type(file.paragraphs[i].text))
   # list6[i-5] = list6.append(file.paragraphs[i].text)
   list6.append(str(file.paragraphs[i].text).strip('\xa0'))
   # list6.append("%s\n" % str(file.paragraphs[i].text).strip('\xa0'))
   print(list6)
 key_steps = "\n".join(list6)
 # print("key_steps:\n",key_steps.strip('\n'))

 cursor.execute(
  "insert into `t_knowledge_base` (`no`, `preparer`, `task_class`, `task_name`, `task_specification`, `task_type`, `task_desc`, `task_basis`, `key_steps`) values ('%d','%s','%s','%s','%s','%s','%s',NULL,'%s')" % (
   ddd, preparer, task_class, cell2, cell4, task_type, cell9, key_steps))
 conn.commit()


def traverse(f):
 fs = os.listdir(f)
 for f1 in fs:
  tmp_path = os.path.join(f, f1)
  if not os.path.isdir(tmp_path):
   print('文件: %s' % tmp_path)
   insettable(tmp_path)
  else:
   print('文件夹：%s' % tmp_path)
   traverse(tmp_path)


path = 'D:\\配置库\公案APP\\1.2 系统规格\\知识库'
traverse(path)

#单文件调测
# path = 'D:\\配置库\\公案APP\\1.2 系统规格\\知识库\\14人员死亡先期处置.docx'
# insettable(path)

# 关闭游标
cursor.close()
# 关闭连接
conn.close()

以上这篇python读取word文档,插入mysql数据库的示例代码就是小编分享给大家的全部内容了，希望能给大家一个参考，也希望大家多多支持三水点靠木。

python读取word文档,插入mysql数据库的示例代码

- Author -

无所住心

声明：登载此文出于传递更多信息之目的，并不意味着赞同其观点或证实其描述。

Python 相关文章推荐

Python中使用wxPython开发的一个简易笔记本程序实例

Feb 08 Python

在Heroku云平台上部署Python的Django框架的教程

Apr 20 Python

Python脚本处理空格的方法

Aug 08 Python

浅谈python和C语言混编的几种方式(推荐)

Sep 27 Python

Python数据结构之栈、队列的实现代码分享

Dec 04 Python

python 动态加载的实现方法

Dec 22 Python

python开发游戏的前期准备

May 05 Python

python异步实现定时任务和周期任务的方法

Jun 29 Python

利用Python复制文件的9种方法总结

Sep 02 Python

python+selenium定时爬取丁香园的新型冠状病毒数据并制作出类似的地图（部署到云服务器）

Feb 09 Python

Django REST Swagger实现指定api参数

Jul 07 Python

Python开发入门——迭代的基本使用

Sep 03 Python

pandas.DataFrame删除/选取含有特定数值的行或列实例

Nov 07 #Python

python 返回列表中某个值的索引方法

Nov 07 #Python

pandas 根据列的值选取所有行的示例

Nov 07 #Python

Pandas过滤dataframe中包含特定字符串的数据方法

Nov 07 #Python

pandas筛选某列出现编码错误的解决方法

Nov 07 #Python

python绘制中国大陆人口热力图

Nov 07 #Python

利用Python将数值型特征进行离散化操作的方法

Nov 06 #Python

You might like

windows中为php安装mongodb与memcache

2015/01/06 PHP

PHP根据图片色界在不同位置加水印的方法

2015/07/01 PHP

Zend Framework入门教程之Zend_Registry组件用法详解

2016/12/09 PHP

PHP实现图的邻接矩阵表示及几种简单遍历算法分析

2017/11/24 PHP

php中Swoole的热更新实现代码实例

2021/03/04 PHP

artdialog的图片/标题以及关闭按钮不显示的解决方法

2013/06/27 Javascript

jQuery制作的别致导航有阴影背景高亮模式窗口

2014/04/15 Javascript

JS辨别访问浏览器判断是android还是ios系统

2014/08/19 Javascript

分享一些常用的jQuery动画事件和动画函数

2015/11/27 Javascript

微信小程序限制1M的瘦身技巧与方法详解

2017/01/06 Javascript

JavaScript获取中英文混合字符串长度的方法示例

2017/02/04 Javascript

Angular 1.x个人使用的经验小结

2017/07/19 Javascript

js仿微信抢红包功能

2020/09/25 Javascript

微信小程序实现的一键复制功能示例

2019/04/24 Javascript

vue实现跨域的方法分析

2019/05/21 Javascript

Node.js API详解之 querystring用法实例分析

2020/04/29 Javascript

详谈Vue.js框架下main.js,App.vue,page/index.vue之间的区别

2020/08/12 Javascript

python二叉树遍历的实现方法

2013/11/21 Python

Python的Django框架中模板碎片缓存简介

2015/07/24 Python

Python中内置数据类型list,tuple,dict,set的区别和用法

2015/12/14 Python

Ruby使用eventmachine为HTTP服务器添加文件下载功能

2016/04/20 Python

python SSH模块登录，远程机执行shell命令实例解析

2018/01/12 Python

快速了解Python开发中的cookie及简单代码示例

2018/01/17 Python

Python实现批量修改图片格式和大小的方法【opencv库与PIL库】

2018/12/03 Python

新手入门学习python Numpy基础操作

2020/03/02 Python

在keras中实现查看其训练loss值

2020/06/16 Python

python怎么判断模块安装完成

2020/06/19 Python

纯CSS3实现图片无间断轮播效果

2016/08/25 HTML / CSS

德国婴儿服装和婴儿用品购买网站：Baby Sweets

2019/12/08 全球购物

中级会计大学生职业生涯规划书

2014/09/16 职场文书

一年级下册数学教学反思

2016/02/16 职场文书

2019通用版新员工入职培训方案！

2019/07/11 职场文书

选对餐饮营销策略，营业额才会上涨

2019/08/27 职场文书

golang中的struct操作

2021/11/11 Golang

2022年四月新番

2022/03/15 日漫

如何利用python创作字符画

2022/06/25 Python