當前位置:成語大全網 - 古籍修復 - PYTHON如何調取OCR識別模塊識別發票並輸出到EXCLE?

PYTHON如何調取OCR識別模塊識別發票並輸出到EXCLE?

import qqai

from os import path

from win32com.client import Dispatch

import os

from datetime import datetime

def file_path():

global path_this_file

path_this_file = path.abspath('.') + "\\"

global path_excel

path_excel = path_this_file + '信息導出.xlsx'

global path_pic_file

path_pic_file = path_this_file + '照片'

def get_pic_name():

pic_list = []

for pic in os.listdir(path_pic_file):

pic_path = path_pic_file + '\\' + pic

pic_list.append(pic_path)

return pic_list

def HandwritingOCRImage(filename):

robot = qqai.vision.ocr.HandwritingOCR(app_id, app_key)

useless_list = ['登記表']

value_list = []

with open(filename, 'rb') as image_file:

result = robot.run(image_file)

item_list = result['data']['item_list']

for value in item_list:

words= value['itemstring']

if words in useless_list:

continue

else:

value_list.append(words)

return value_list

def get_useful_list(value_list):

key_list = ['姓名', '性別', '出生日期', '國家/地區', '民族', '職業', '手機號碼', '固定電話', '證件類型', '證件有效期限', '證件號碼', '通訊地址', '郵編']

useful_list = []

for words in value_list:

if words in key_list:

key_index = value_list.index(words)

next_index = key_index + 1

if value_list[next_index] in key_list:

useful_list.append('')

else:

if words == '證件號碼':

ID_NUM = "'" + str(value_list[next_index]) #這邊是為了避免科學計數法的問題

useful_list.append(ID_NUM)

else:

useful_list.append(value_list[next_index])

else:

continue

return useful_list

def put_into_excel(useful_list):

xl = Dispatch("Excel.Application")

xl.Visible = False # True是顯示, False是隱藏

xl.DisplayAlerts = 0

excel_input = xl.Workbooks.Open(path_excel)

sheet = excel_input.Sheets('Sheet1')

max_row = sheet.UsedRange.Rows.Count

values = len(useful_list)

for i in range(values):

sheet.Cells(max_row + 1, i + 1).Value = str(useful_list[i])

excel_input.Save()

excel_input.Close()

xl.quit()

starttime = datetime.now()

"""騰訊AI開放平臺 圖片識別"""

app_id = '2110179251'

app_key = '******'

"""app_id , app_key 可以自己去騰訊AI開放平臺註冊,是免費的"""

file_path()

pic_list = get_pic_name()

for filename in pic_list:

value_list =HandwritingOCRImage(filename)

useful_list = get_useful_list(value_list)

put_into_excel(useful_list)

endtime = datetime.now()

total_time = (endtime - starttime).seconds

print(">>>成功錄入信息{}條,總***耗時{}秒!".format(len(pic_list),total_time))