调用百度文字识别接口实现文字识别

234 阅读 0 评论 155 点赞

我是靠谱客的博主粗暴滑板，这篇文章主要介绍调用百度文字识别接口实现文字识别，现在分享给大家，希望可以做个参考。

目标：

1.学习使用百度AI开放平台进行语音识别与语音合成

百度AI有两种开发方式：REST, SDK; 本文使用的是SDK python进行开发

文档地址：https://ai.baidu.com/docs#/OCR-Python-SDK/53932383

2.将学到的内容：多线程，字符串转变为字典的方法

3.生成exe格式可执行文件

pyinstaller -F xxx.py 参考文档 https://blog.csdn.net/qq_35203425/article/details/78568141

4.python-Tkinter图形界面开发

1.单选按钮控件 Radiobutton 2.多选框按钮Checkbutton

问题点：

1.界面上的模块不会随窗口放大产生变化

2.未添加异常捕获

界面：

代码：

# 导包
from aip import AipOcr       #百度文字识别api
from tkinter import *         #python界面开发
from tkinter import ttk
from tkinter.filedialog import askopenfilename,askdirectory
import time
import json

class Text_Recognition():
    """ SDK开发"""
    def __init__(self):
        # 参数初始化化
        """ 你的 APPID AK SK """
        self.APP_ID = '17574626'
        self.API_KEY = 'nITPuSyAT6fwoRs029RR7zMl'
        self.SECRET_KEY = 'BjknUD067cBoEmv30GE7TXrFpdceHLgP'
        self.client = AipOcr(self.APP_ID, self.API_KEY, self.SECRET_KEY)
        self.myWindow = Tk()
        self.image_path=StringVar() #保存图片路径
        self.var_1=StringVar() #精度变量
        self.var_2 = StringVar()  # 位置信息
        self.var_3 = StringVar()  # 检测方向
        self.var_4= StringVar()   # 卡证识别类型
        self.card_list=['身份证正面','身份证背面','银行卡','营业执照','驾驶证','行驶证','车牌']
    def format(self,result,direction='false'):
        # print(result)
        string='文字识别结果'+'n'+'-'*25+'n'
        if direction=='true':
            string=string+'方向:{}'.format(result['direction'])+'n'
        for item in result['words_result']:
            string_1='(L:{},T:{},W:{},H:{}):{}'.format(item['location']["left"],item['location']["top"],item['location']["width"],item['location']["height"],item[ 'words'])
            string=string+string_1+'n'
        return string
    def image_open(self):
        """
        打开图片
        :return:
        """
        # 路径搜索
        self.name = askopenfilename()
        self.image_path.set(self.name)
        with open(self.name, 'rb') as fp:
            self.image= fp.read()
        # print(self.image)

    def basicgeneral(self):
        """
        通用图片识别
        :return:
        """
        self.text_21.delete(1.0,'end')
        string=''
        """ 如果有可选参数 """
        options = {}
        # options["recognize_granularity"] = "big"
        # options["language_type"] = "CHN_ENG"                    #识别语言类型
        options["detect_direction"] = str(self.var_3.get())      #是否检测图像朝向
        # options["detect_language"] = "true"                    #是否检测语言
        options["vertexes_location"] = str(self.var_2.get())     #是否返回文字外接多边形顶点位置
        # options["probability"] = "true"                         #每一行的置信度
        direction=str(self.var_3.get())
        """ 带参数调用通用文字识别, 图片参数为本地图片 """
        if self.var_1.get()=='0':
            result=self.client.general(self.image, options)       # 普通精度识别(含位置信息)
            # self.client.basicGeneral(self.image, options)  # 普通精度识别(不含位置信息)
            # print(result['words_result'])
            string=self.format(result,direction)
        elif self.var_1.get()=='1':
            result=self.client.accurate(self.image, options)   #高精度识别
            string=self.format(result, direction)
        self.text_21.insert('insert', string)
    def webimage(self):
        """
        网络图片识别
        :return:
        """
        """ 如果有可选参数 """
        self.text_31.delete(1.0,'end')
        options = {}
        options["detect_direction"] = "true"
        options["detect_language"] = "true"
        """ 带参数调用网络图片文字识别, 图片参数为本地图片 """
        result=self.client.webImage(self.image, options)
        self.text_31.insert('insert',str(result))
    def card_identification(self):
        """
        卡证识别
        :return:
        """
        self.text_41.delete(1.0,'end')
        string=''
        ident_id=int(self.var_4.get())  #返回的默认是字符串
        print(ident_id)
        if ident_id==0:
            idCardSide = "front"  #含照片面
            """ 调用身份证识别 """
            result=self.client.idcard(self.image, idCardSide)
            for item in result["words_result"].items():
                string_1 = '{}:{}n'.format(item[0], item[1]['words'])
                string = string + string_1
        elif ident_id==1:
            idCardSide = "back"  # 含国徽面
            """ 调用身份证识别 """
            result=self.client.idcard(self.image, idCardSide)
            for item in result["words_result"].items():
                string_1 = '{}:{}n'.format(item[0], item[1]['words'])
                string = string + string_1
        elif ident_id==2:
            """ 调用银行卡识别 """
            result=self.client.bankcard(self.image)
            string='银行卡卡号：{}n有效期：{}n银行名称{}'.format(result['result']['bank_card_number'],result['result']['valid_date'],result['result']['bank_name'])
        elif ident_id==3:
            """ 调用营业执照识别 """
            result=self.client.businessLicense(self.image)
            for item in result["words_result"].items():
                string_1 = '{}:{}n'.format(item[0], item[1]['words'])
                string = string + string_1
        elif ident_id==4:
            """ 如果有可选参数 """
            options = {}
            options["detect_direction"] = "true"

            """ 带参数调用驾驶证识别 """
            result=self.client.drivingLicense(self.image, options)
            for item in result["words_result"].items():
                string_1='{}:{}n'.format(item[0],item[1]['words'])
                string=string+string_1
        elif ident_id==5:
            """ 如果有可选参数 """
            options = {}
            options["detect_direction"] = "true"
            options["accuracy"] = "normal"

            """ 带参数调用行驶证识别 """
            result=self.client.vehicleLicense(self.image, options)
            for item in result["words_result"].items():
                string_1='{}:{}n'.format(item[0],item[1]['words'])
                string=string+string_1
        elif ident_id==6:
            """ 如果有可选参数 """
            options = {}
            options["multi_detect"] = "true"

            """ 带参数调用车牌识别 """
            result=self.client.licensePlate(self.image, options)
            # print(result)
            string='车牌颜色:{}n车牌号:{}'.format(result["words_result"][0]["color"],result["words_result"][0]["number"])

        self.text_41.insert('insert',string)
        # print(result_a)
    def table_identification(self):
        """
        表格文字识别
        :return:
        """
        """ 调用表格文字识别 """
        self.text_51.delete(1.0,'end')
        result=self.client.tableRecognitionAsync(self.image)
        requestId=result['result'][0]['request_id']
        """ 如果有可选参数 """
        options = {}
        options["result_type"] = "json"

        """ 带参数调用表格识别结果 """
        result_1=self.client.getTableRecognitionResult(requestId, options)
        # print(type(result_1['result']['ret_code']))
        while result_1['result']['ret_code']!=3:
            time.sleep(10)
            result_1 = self.client.getTableRecognitionResult(requestId, options)

        string=result_1['result']['result_data']
        self.text_51.insert('insert',string)
        # print(result_1['result']['ret_code'])
    def ticket_identification(self):
        """
        通用票据识别
        :return:
        """
        """ 如果有可选参数 """
        self.text_61.delete(1.0,'end')
        options = {}
        options["recognize_granularity"] = "big"
        options["probability"] = "true"
        options["accuracy"] = "normal"
        options["detect_direction"] = "true"

        """ 带参数调用通用票据识别 """
        result=self.client.receipt(self.image, options)
        string=self.format(result,"true")
        self.text_61.insert('insert',string)
        # print(result)
        # print('通用票据识别')
    def interface(self):
        """
        绘制界面
        :return:
        """
        self.myWindow.title('文字识别演示程序')  # 修改窗口标题
        self.myWindow.geometry("800x600+150+100")  # 修改窗口大小 width*height+xoffset+yoffset
        # 创建六个标签容器
        Entry(self.myWindow, textvariable=self.image_path, width=40).grid(row=0, column=0, sticky='SEW', pady=5, padx=5)
        Button(self.myWindow, text='打开图片', command=self.image_open, width=10, height=1).grid(row=0, column=1, sticky='SEW',pady=5, padx=5)

        #frm_1 = LabelFrame(self.myWindow, width=380, height=10)
        #frm_1.grid(row=0, column=0, ipadx=5, ipady=5, padx=10, pady=5, sticky='SNEW')
        frm_2 = LabelFrame(self.myWindow,text='通用文字识别', width=380, height=400)
        frm_2.grid(row=1, column=0, ipadx=5, ipady=5, padx=10, pady=5,rowspan=6,columnspan=2, sticky='SNEW')
        frm_3 = LabelFrame(self.myWindow, text='网络图片文字识别', width=380, height=100)
        frm_3.grid(row=7, column=0, ipadx=5, ipady=5, padx=10, pady=5, rowspan=2,columnspan=2, sticky='SNEW')
        frm_4 = LabelFrame(self.myWindow, text='卡证识别', width=380, height=180)
        frm_4.grid(row=0, column=2, ipadx=5, ipady=5, padx=10, pady=5, rowspan=5, sticky='SNEW')
        frm_5 = LabelFrame(self.myWindow, text='表格文字识别', width=380, height=105)
        frm_5.grid(row=5, column=2, ipadx=5, ipady=5, padx=10, pady=5, rowspan=2, sticky='SNEW')
        frm_6 = LabelFrame(self.myWindow, text='通用票据识别', width=380, height=105)
        frm_6.grid(row=7, column=2, ipadx=5, ipady=5, padx=10, pady=5, rowspan=2, sticky='SNEW')

        # frm_1控件编辑
        #Entry(frm_1, textvariable=self.image_path, width=40).grid(row=0, column=0, sticky='SEW', pady=5, padx=5)
        #Button(frm_1, text='打开图片', command=self.image_open, width=10, height=1).grid(row=0, column=1, sticky='EW', pady=5, padx=5)

        # frm_2控件编辑
        Radiobutton(frm_2,text='普通精度',variable=self.var_1, value='0').grid(row=0, column=0, sticky='EW', pady=5, padx=5)
        Radiobutton(frm_2, text='高精度', variable=self.var_1, value='1').grid(row=0, column=1, sticky='EW', pady=5,padx=5)
        self.var_1.set(0)  #设置Radiobutton默认选项
        Checkbutton(frm_2, text='含位置信息',variable=self.var_2, onvalue='true', offvalue='false').grid(row=1, column=0, sticky='EW', pady=5, padx=5)
        self.var_2.set('false')
        Checkbutton(frm_2, text='检查方向', variable=self.var_3, onvalue='true', offvalue='false').grid(row=1, column=1, sticky='EW',pady=5, padx=5)
        self.var_3.set('false')
        Button(frm_2, text='识别图片', command=self.basicgeneral, width=10, height=1).grid(row=1, column=2, sticky='EW',pady=5, padx=5)
        self.text_21 = Text(frm_2, width=50, height=15)
        self.text_21.grid(row=2, column=0, sticky='EW', pady=5, padx=5, columnspan=3)
        # frm_3控件编辑
        Button(frm_3, text='识别图片', command=self.webimage, width=4, height=1).grid(row=0, column=3, sticky='EW',pady=5, padx=5)
        self.text_31 = Text(frm_3, width=50, height=8)
        self.text_31.grid(row=1, column=0, sticky='EW', pady=5, padx=5, columnspan=4)
        # frm_4控件编辑
        i,j=0,0
        for a,item in enumerate(self.card_list):
            j=a % 4
            if j==0 and a!=0:
                i=i+1
            Radiobutton(frm_4, text=item, variable=self.var_4, value=a).grid(row=i, column=j, sticky='EW',pady=5, padx=5)
        self.var_4.set(1)
        Button(frm_4, text='识别图片', command=self.card_identification, width=6, height=1).grid(row=1, column=3, sticky='EW', pady=5,padx=5)
        self.text_41 = Text(frm_4, width=30, height=6)
        self.text_41.grid(row=2, column=0, sticky='EW', pady=5, padx=5, columnspan=4)
        # frm_5控件编辑
        Button(frm_5, text='识别图片', command=self.table_identification, width=4, height=1).grid(row=0, column=3, sticky='EW', pady=5,
                                                                                  padx=5)
        self.text_51 = Text(frm_5, width=50, height=6)
        self.text_51.grid(row=1, column=0, sticky='EW', pady=5, padx=5, columnspan=4)
        # frm_6控件编辑
        Button(frm_6, text='识别图片', command=self.ticket_identification, width=4, height=1).grid(row=0, column=3, sticky='EW', pady=5,
                                                                                  padx=5)
        self.text_61 = Text(frm_6, width=50, height=8)
        self.text_61.grid(row=1, column=0, sticky='EW', pady=5, padx=5, columnspan=4)

         # 启动主窗口的消息循环
        self.myWindow.mainloop()

if __name__ == '__main__':
    text=Text_Recognition()
    text.interface()