编程 Python

python多进程读图提取特征存npy

Posted in Python onMay 21, 2019

本文实例为大家分享了python多进程读图提取特征存npy的具体代码，供大家参考，具体内容如下

import multiprocessing
import os, time, random
import numpy as np
import cv2
import os
import sys
from time import ctime
import tensorflow as tf
 
image_dir = r"D:/sxl/处理图片/汉字分类/train10/"  #图像文件夹路径
data_type = 'test'
save_path = r'E:/sxl_Programs/Python/CNN/npy/'  #存储路径
data_name = 'Img10'        #npy文件名
 
char_set = np.array(os.listdir(image_dir))   #文件夹名称列表
np.save(save_path+'ImgShuZi10.npy',char_set)   #文件夹名称列表
char_set_n = len(char_set)       #文件夹列表长度
 
read_process_n = 1 #进程数
repate_n = 4   #随机移动次数
data_size = 1000000 #1个npy大小
 
shuffled = True  #是否打乱
 
#可以读取带中文路径的图
def cv_imread(file_path,type=0):
 cv_img=cv2.imdecode(np.fromfile(file_path,dtype=np.uint8),-1)
 # print(file_path)
 # print(cv_img.shape)
 # print(len(cv_img.shape))
 if(type==0):
  if(len(cv_img.shape)==3):
   cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
 return cv_img
 
#多个数组按同一规则打乱数据
def ShuffledData(features,labels):
 '''
 @description:随机打乱数据与标签，但保持数据与标签一一对应
 '''
 permutation = np.random.permutation(features.shape[0])
 shuffled_features = features[permutation,:] #多维
 shuffled_labels = labels[permutation]  #1维
 return shuffled_features,shuffled_labels
 
#函数功能：简单网格
#函数要求：1.无关图像大小；2.输入图像默认为灰度图;3.参数只有输入图像
#返回数据：1x64*64维特征
def GetFeature(image):
 
 #图像大小归一化
 image = cv2.resize(image,(64,64))
 img_h = image.shape[0]
 img_w = image.shape[1]
 
 #定义特征向量
 feature = np.zeros(img_h*img_w,dtype=np.int16)
 
 for h in range(img_h):
  for w in range(img_w):
   feature[h*img_h+w] = image[h,w]
 
 return feature
 
# 写数据进程执行的代码:
def read_image_to_queue(queue):
 print('Process to write: %s' % os.getpid())
 for j,dirname in enumerate(char_set): # dirname 是文件夹名称
  label = np.where(char_set==dirname)[0][0]  #文件夹名称对应的下标序号
  print('序号：'+str(j),'读 '+dirname+' 文件夹...时间：',ctime() )
  for parent,_,filenames in os.walk(os.path.join(image_dir,dirname)):
   for filename in filenames:
    if(filename[-4:]!='.jpg'):
     continue
    image = cv_imread(os.path.join(parent,filename),0)
 
    # cv2.imshow(dirname,image)
    # cv2.waitKey(0)
    queue.put((image,label))
 
 for i in range(read_process_n):
  queue.put((None,-1))
 
 print('读图结束!')
 return True
  
# 读数据进程执行的代码:
def extract_feature(queue,lock,count):
 '''
 @description:从队列中取出图片进行特征提取
 @queue:先进先出队列
  lock：锁，在计数时上锁，防止冲突
  count:计数
 '''
 
 print('Process %s start reading...' % os.getpid())
 
 global data_n
 features = [] #存放提取到的特征
 labels = [] #存放标签
 flag = True #标志着进程是否结束
 while flag:
  image,label = queue.get() #从队列中获取图像和标签
 
  if len(features) >= data_size or label == -1: #特征数组的长度大于指定长度，则开始存储
 
   array_features = np.array(features) #转换成数组
   array_labels = np.array(labels)
 
   array_features,array_labels = ShuffledData(array_features,array_labels) #打乱数据
   
   lock.acquire() # 锁开始
 
   # 拆分数据为训练集，测试集
   split_x = int(array_features.shape[0] * 0.8)
   train_data, test_data = np.split(array_features, [split_x], axis=0)  # 拆分特征数据集
   train_labels, test_labels = np.split(array_labels, [split_x], axis=0) # 拆分标签数据集
 
   count.value += 1 #下标计数加1
   str_features_name_train = data_name+'_features_train_'+str(count.value)+'.npy'
   str_labels_name_train = data_name+'_labels_train_'+str(count.value)+'.npy'
   str_features_name_test = data_name+'_features_test_'+str(count.value)+'.npy'
   str_labels_name_test = data_name+'_labels_test_'+str(count.value)+'.npy'
 
   lock.release() # 锁释放
 
   np.save(save_path+str_features_name_train,train_data)
   np.save(save_path+str_labels_name_train,train_labels)
   np.save(save_path+str_features_name_test,test_data)
   np.save(save_path+str_labels_name_test,test_labels)
   print(os.getpid(),'save:',str_features_name_train)
   print(os.getpid(),'save:',str_labels_name_train)
   print(os.getpid(),'save:',str_features_name_test)
   print(os.getpid(),'save:',str_labels_name_test)
   features.clear()
   labels.clear()
 
  if label == -1:
   break
 
  # 获取特征向量，传入灰度图
  feature = GetFeature(image)
  features.append(feature)
  labels.append(label)
 
  # # 随机移动4次
  # for itime in range(repate_n):
  #  rMovedImage = randomMoveImage(image)
  #  feature = SimpleGridFeature(rMovedImage) # 简单网格
  #  features.append(feature)
  #  labels.append(label)
 
 print('Process %s is done!' % os.getpid())
 
if __name__=='__main__':
 time_start = time.time() # 开始计时
 
 # 父进程创建Queue，并传给各个子进程：
 image_queue = multiprocessing.Queue(maxsize=1000) #队列
 lock = multiprocessing.Lock()      #锁
 count = multiprocessing.Value('i',0)    #计数
 
 #将图写入队列进程
 write_sub_process = multiprocessing.Process(target=read_image_to_queue, args=(image_queue,))
 
 read_sub_processes = []       #读图子线程
 for i in range(read_process_n):
  read_sub_processes.append(
   multiprocessing.Process(target=extract_feature, args=(image_queue,lock,count))
  )
 
 # 启动子进程pw，写入:
 write_sub_process.start()
 
 # 启动子进程pr，读取:
 for p in read_sub_processes:
  p.start()
 
 # 等待进程结束:
 write_sub_process.join()
 for p in read_sub_processes:
  p.join()
 
 time_end=time.time()
 time_h=(time_end-time_start)/3600
 print('用时：%.6f 小时'% time_h)
 print ("读图提取特征存npy,运行结束！")

以上就是本文的全部内容，希望对大家的学习有所帮助，也希望大家多多支持三水点靠木。

python多进程读图提取特征存npy

- Author -

业余狙击手19

声明：登载此文出于传递更多信息之目的，并不意味着赞同其观点或证实其描述。

Python 相关文章推荐

Python中列表(list)操作方法汇总

Aug 18 Python

python实现合并两个数组的方法

May 16 Python

python中从str中提取元素到list以及将list转换为str的方法

Jun 26 Python

python用post访问restful服务接口的方法

Dec 07 Python

使用Django简单编写一个XSS平台的方法步骤

Mar 25 Python

Python学习笔记之抓取某只基金历史净值数据实战案例

Jun 03 Python

python字符串分割及字符串的一些常规方法

Jul 24 Python

Python numpy.zero() 初始化矩阵实例

Nov 27 Python

在tensorflow下利用plt画论文中loss,acc等曲线图实例

Jun 15 Python

解决Keras使用GPU资源耗尽的问题

Jun 22 Python

基于Python的接口自动化读写excel文件的方法

Jan 15 Python

Python机器学习之底层实现KNN

Jun 20 Python

Python中使用pypdf2合并、分割、加密pdf文件的代码详解

May 21 #Python

python+selenium实现简历自动刷新的示例代码

May 20 #Python

图文详解python安装Scrapy框架步骤

May 20 #Python

Python配置虚拟环境图文步骤

May 20 #Python

Python检测数据类型的方法总结

May 20 #Python

Python中的引用知识点总结

May 20 #Python

Python函数和模块的使用总结

May 20 #Python

You might like

?生?D片??C字串

2006/12/06 PHP

PHP缓存技术的使用说明

2011/08/06 PHP

使用php发送有附件的电子邮件-(PHPMailer使用的实例分析)

2013/04/26 PHP

codeigniter中view通过循环显示数组数据的方法

2015/03/20 PHP

PHP实现生成唯一会员卡号

2015/08/24 PHP

PHP抽象类和接口用法实例详解

2019/07/20 PHP

php高性能日志系统 seaslog 的安装与使用方法分析

2020/02/29 PHP

jQuery实现表单input中提示文字value随鼠标焦点移进移出而显示或隐藏的代码

2010/03/21 Javascript

javascript+mapbar实现地图定位

2010/04/09 Javascript

基于jquery的Repeater实现代码

2010/07/17 Javascript

一个分享按钮的插件使用介绍（可扩展，内附开发制作流程）

2011/09/19 Javascript

Javascript 按位左移运算符使用介绍(

2014/02/04 Javascript

javascript实现简单的二级联动

2015/03/19 Javascript

node.js入门实例helloworld详解

2015/12/23 Javascript

JavaScript优化以及前段开发小技巧

2017/02/02 Javascript

nodejs开发——express路由与中间件

2017/03/24 NodeJs

Layui给switch添加响应事件的例子

2019/09/03 Javascript

JS highcharts实现动态曲线代码示例

2020/10/16 Javascript

python实现在sqlite动态创建表的方法

2015/05/08 Python

Pytorch之Variable的用法

2019/12/31 Python

TensorFlow实现checkpoint文件转换为pb文件

2020/02/10 Python

python GUI库图形界面开发之PyQt5复选框控件QCheckBox详细使用方法与实例

2020/02/28 Python

Python 没有main函数的原因

2020/07/10 Python

python和node.js生成当前时间戳的示例

2020/09/29 Python

linux mint中搜狗输入法导致pycharm卡死的问题

2020/10/28 Python

CSS3制作圆角图片和椭圆形图片

2016/07/08 HTML / CSS

送货司机岗位职责

2013/12/11 职场文书

KTV的创业计划书范文

2014/02/02 职场文书

七年级上册语文教学计划

2015/01/22 职场文书

招商银行工作证明

2015/06/17 职场文书

领导新年致辞2016

2015/07/29 职场文书

2015年党风廉政建设个人总结

2015/08/18 职场文书

体育委员竞选稿

2015/11/21 职场文书

教师个人教学反思

2016/02/23 职场文书

python opencv常用图形绘制方法(线段、矩形、圆形、椭圆、文本)

2021/04/12 Python

Win11 KB5015814遇安装失败影响开始菜单性能解决方法

2022/07/15 数码科技