Commit 513df6d1 by 赵增煜

增加文心一言采集

parent 1b0cfeb8
......@@ -3,10 +3,11 @@ from flask.cli import AppGroup
from applications.extensions import db
from applications.models import AiFileParser
from applications.common.utils.kimi_ai import KimiAi
from applications.common.utils.yiyan_ai import YiyanAi
parser_cli = AppGroup("parser")
@parser_cli.command("init")
@parser_cli.command("kimi_init")
def init():
def kimi_init():
# 常驻脚本扫描数据库中是否存在未解析文件
window = KimiAi()
window.open_url('https://kimi.moonshot.cn')
......@@ -18,7 +19,7 @@ def init():
# path = os.path.join(*path_parts)
# print(path)
db.session.commit()
file_parser_list = AiFileParser.query.filter(AiFileParser.status==0).all()
file_parser_list = AiFileParser.query.filter(AiFileParser.status==0,AiFileParser.parser_engine=="kimi").all()
print(len(file_parser_list),file_parser_list)
if len(file_parser_list) > 0 :
for file_parser in file_parser_list:
......@@ -31,7 +32,7 @@ def init():
# print(path)
# exit()
# prompt = f"现在需要你帮我把编号为{file_parser.file_id}的pdf中表格数据识别解析出来,表格数据存在键值对对应关系,需要包含表头信息(表头信息中包含pdf编号)和表体信息信息(商品list格式),以markdown的json格式输出,只输出解析后的json数据"
prompt = f"pdf编号:{file_parser.file_id},解析pdf,识别出来的内容以json格式返回,json中需包含pdf编号,商品可能存在多条,货物(商品)信息为list格式可包含多商品,表头信息包含如下字段发货人、发货人编号、统一社会信用码、收货人、离境口岸、运抵国、指运港、件数、总毛重、总净重,表体信息包含如下字段商品编号、商品名称、规格型号、数量、单位、单价、总价、原产国、目的国、境内货源地、包装种类、数量、毛重、净重,识别不到的信息为空值,并且只有一个表头,如果pdf中存在多个报关单则把信息拆分成多个json返回"
prompt = f"pdf编号:{file_parser.file_id},解析阅读pdf,识别出来的内容以list[json]格式返回,json中需包含pdf编号,商品可能存在多条,货物(商品)信息为list格式可包含多商品,表头信息包含如下字段发货人、发货人编号、统一社会信用码、收货人、离境口岸、运抵国、指运港、件数、毛重(单位千克,同一框中数字)、净重(单位千克,同一框中数字),表体信息包含如下字段项号、商品编号、商品名称、规格型号、数量、单位、单价、总价、原产国、目的国、境内货源地、包装种类、数量、毛重、净重,识别不到的信息为空值,并且只有一个表头"
res = window.parser(path, prompt)
AiFileParser.query.filter_by(id=file_parser.id).update({ "status": 2,"kimi_ai_parser": res.strip()},synchronize_session=False)
db.session.commit()
......@@ -42,8 +43,45 @@ def init():
time.sleep(10)
continue
@parser_cli.command("yiyan_init")
def yiyan_init():
# 常驻脚本扫描数据库中是否存在未解析文件
window = YiyanAi()
window.open_url('https://yiyan.baidu.com/')
# print("等待40s,先登录!")
time.sleep(40)
while True:
# path_parts = ["D:", "www", "ImageAiParser", "upload", "pdf", "20240812110609.pdf"]
# path = os.path.join(*path_parts)
# print(path)
db.session.commit()
file_parser_list = AiFileParser.query.filter(AiFileParser.status==0,AiFileParser.parser_engine=="yiyan").all()
print(len(file_parser_list),file_parser_list)
if len(file_parser_list) > 0 :
for file_parser in file_parser_list:
print(file_parser.id, file_parser.file_path)
path_parts = json.loads(file_parser.file_path)
if path_parts[0].endswith(':') and not path_parts[0].endswith(os.sep):
path_parts[0] += os.sep
path = os.path.join(*path_parts)
print(path_parts)
# print(path)
# exit()
# prompt = f"现在需要你帮我把编号为{file_parser.file_id}的pdf中表格数据识别解析出来,表格数据存在键值对对应关系,需要包含表头信息(表头信息中包含pdf编号)和表体信息信息(商品list格式),以markdown的json格式输出,只输出解析后的json数据"
# prompt = f"pdf编号:{file_parser.file_id},解析阅读pdf,识别出来的内容以list[json]格式返回,json中需包含pdf编号,商品可能存在多条,货物(商品)信息为list格式可包含多商品,表头信息包含如下字段发货人、发货人编号、统一社会信用码、收货人、离境口岸、运抵国、指运港、件数、毛重(单位千克,同一框中数字)、净重(单位千克,同一框中数字),表体信息包含如下字段项号、商品编号、商品名称、规格型号、数量、单位、单价、总价、原产国、目的国、境内货源地、包装种类、数量、毛重、净重,识别不到的信息为空值,并且只有一个表头"
prompt = f"pdf编号:{file_parser.file_id},解析阅读pdf理解pdf中报关单表格信息,识别出来的内容以list[json]格式返回,json中需包含pdf编号,报关单商品详情可能存在多条,报关单商品详情为list格式可包含多商品,报关单头部信息包含如下字段 :发货人、统一社会信用码(由18位数字或大写拉丁字母组成)、收货人、件数(纯数字)、毛重(千克)、净重(千克),报关单商品详情包含如下字段:项号、商品编号、商品名称、规格型号、数量(纯数字包括浮点型)、单位(千克、kg、pcs、个)、单价(纯数字包含浮点型)、总价(纯数字包含浮点型)、货币(USD、RMB等)、原产国、毛重、净重......,识别不到的信息为空值。"
res = window.parser(path, prompt)
AiFileParser.query.filter_by(id=file_parser.id).update({ "status": 2,"kimi_ai_parser": res.strip()},synchronize_session=False)
db.session.commit()
# db.session.expire(AiFileParser)
time.sleep(10)
else:
print("没有未解析文件")
time.sleep(10)
continue
@parser_cli.command("")
def test():
......
import base64
from datetime import datetime
from io import BytesIO
from random import choice
import time, os, sys, re, random, json
import requests
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import InvalidSelectorException
from urllib.parse import urlparse
#from applications.models import User, Role, Dept, Power
import pandas as pd
from bs4 import BeautifulSoup
from PIL import Image
from selenium.webdriver import ActionChains
import numpy as np
import base64
import io,pyautogui,pyperclip
from pathlib import Path
from pywinauto import Desktop
from pywinauto.application import Application
from pywinauto.keyboard import send_keys
from db import dbModel
# 注意事项:
# 1、kimi扫码登录,打开网页存在15s等待扫码时间,TODO:优化为监听是否已登录
# 2、浏览器内核要和windows下安装的chrome对应
# 3、上传文件路径要绝对路径,文件路径要使用反斜杠,不能使用斜杠否则输入框中无法识别.
# 4、上传解析与提交返回存在监听 TODO 优化
def chrome_public_sec():
# service = Service(executable_path='./chromedriver/chromedriver125.exe')
# service = Service(executable_path='./chromedriver/windows/chromedriver.exe')
service = Service(executable_path=r"./chromedriver/chromedriver127.exe")
browser_opt = webdriver.ChromeOptions()
browser_opt.add_argument("--disable-blink-features=AutomationControlled")
# browser_opt.add_argument("--auto-open-devtools-for-tabs")
browser_opt.add_argument("--start-maximized")
# browser_opt.add_argument('--disable-javascript')
# browser_opt.add_argument("--window-size=1920,1080")
# browser_opt.add_argument("--disable-gpu")
# browser_opt.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36')
# browser_opt.add_argument('--headless')
# browser_opt.add_argument('--user-agent=%s' % ua) # random choice a user_agent profile for browser
# browser_opt.add_argument('--disable-gpu') # disable gpu accelrator
# browser_opt.add_argument('--disable-extensions')
# browser_opt.add_argument('no-default-browser-check')
# browser_opt.add_argument('--disable-dev-shm-usage')
# browser_opt.add_argument('--disable-popup-blocking')
# browser_opt.add_argument('--disable-default-apps')
# browser_opt.add_argument('--disable-translate')
# browser_opt.add_argument('--disable-web-security')
# browser_opt.add_argument('--disable-features=IsolateOrigins,site-per-process')
# browser_opt.add_argument('--blink-settings=imagesEnabled=false')
# browser_opt.add_argument('--disable-javascript')
# browser_opt.AddArgument("--start-maximized") # 开始最大化
# browser_opt.AddArguments("--test-type")
# browser_opt.AddArgument("--ignore-certificate-errors") # 忽略证书错误
# browser_opt.AddArgument("--disable-popup-blocking") # 禁用弹出拦截
# browser_opt.add_argument("no-sandbox") # 取消沙盒模式
# browser_opt.add_argument("no-default-browser-check") # 禁止默认浏览器检查
# browser_opt.AddArgument("about:histograms")
# browser_opt.AddArgument("about:cache")
# browser_opt.AddArgument("disable-extensions") # 禁用扩展
# browser_opt.AddArgument("disable-glsl-translator") # 禁用GLSL翻译
# browser_opt.add_argument("disable-translate") # 禁用翻译
# browser_opt.add_argument("--disable-gpu") # 谷歌文档提到需要加上这个属性来规避bug
# browser_opt.add_argument("--disable-dev-shm-usage")
# browser_opt.add_argument("--hide-scrollbars") # 隐藏滚动条, 应对一些特殊页面
# browser_opt.add_argument("blink-settings=imagesEnabled=false") # 不加载图片, 提升速度
# browser_opt.add_argument('--ignore-ssl-errors=yes')
# browser_opt.add_argument('--ignore-certificate-errors')
# 清除浏览器缓存
# browser_opt.add_argument("--disable-application-cache")
# browser_opt.add_argument("--disable-cache")
# browser_opt.add_argument("--disable-session-crashed-bubble")
# browser_opt.add_argument("--disable-session-restore")
# browser_opt.add_argument('no-sandbox')
# browser_opt.add_argument('--disable-infobars') # disable the message of 'browser is controlled by robot'
# browser_opt.add_argument('--auto-open-devtools-for-tabs') #open browser inspect
# browser_opt.add_experimental_option('useAutomationExtension', False)
browser_opt.add_experimental_option('excludeSwitches', ['enable-automation']) # disable the message of 'browser is controlled by robot' for the latest version of browser drivers
# browser_opt.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
# browser_opt.add_experimental_option('useAutomationExtension', False) # disable Extensions in Dev mode
# browser_opt.add_experimental_option('prefs', {'credentials_enable_service': False, 'profile.password_manager_enable': False}) #disable account and password save function
#desired_capabilities = browser_opt.to_capabilities()
#desired_capabilities['pageLoadStrategy'] = 'none'
#desired_capabilities['timeouts'] = {'implicit' : 3000, 'pageLoad' : 3000, 'script' : 3000}
# browser_opt.binary_location=r"C:\Program Files\Google\Chrome\Application\chrome.exe"
webdriver_info = webdriver.Chrome(service = service, options=browser_opt)
return webdriver_info
class YiyanAi(object):
def __init__(self):
self.index_url = 'https://kimi.moonshot.cn'
self.browser = chrome_public_sec()
self.browser.get('chrome://settings/clearBrowserData')
self.browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined,
configurable: true
});
"""
})
def quit(self):
self.browser.quit()
def open_url(self,url):
# self.browser.refresh()
try:
if url is None:
self.browser.get(self.index_url)
else:
self.browser.get(url)
except:
self.browser.refresh()
def search(self, dict, key):
pass
def test(self,pdf_path,prompt:str):
# 文本输入框editorContentEditable_
try:
textarea_ele = self.browser.find_element(By.CSS_SELECTOR,"[class^='editorContentEditable_']")
textarea_ele.send_keys(prompt)
except Exception as e:
print("文本输入框不见了")
time.sleep(3)
# 点击上传按钮
try:
uploaded_ele = self.browser.find_element(By.CSS_SELECTOR,"[data-testid='msh-chatinput-upload-button']")
uploaded_ele.click()
except Exception as e:
print("上传按钮不见了")
time.sleep(3)
app = Application().connect(title='打开')
# file_dialog = app.window(title='打开') # 注意:这里的标题可能需要根据实际情况调整
try:
file_dialog = app.window(title='打开')
file_dialog.wait('visible', timeout=10) # 等待对话框可见,设置超时时间
# file_edit = file_dialog.child_window(class_name="Edit").set_text(pdf_path)
file_dialog.child_window(class_name="Edit").set_text(pdf_path)
except Exception as e:
print(f"无法定位文件对话框: {e}")
time.sleep(3)
# 点击上传窗体打开按钮
if file_dialog.is_visible():
button_new = file_dialog.child_window(title="打开(&O)",class_name="Button")
button_new.wait('ready', timeout=10)
if button_new.is_enabled():
button_new.double_click()
print("double_click")
else:
print("打开按钮被禁用了")
# 鼠标点击
# print("click")
# pyautogui.moveTo(2500,1630, duration=1)
# pyautogui.click(2500,1630, button='left',duration=0.5)
# 监听文件解析是否成功
try:
time.sleep(3)
while True:
parser_eles = self.browser.find_elements(By.XPATH,"//*[contains(text(), '解析中...')]")
if len(parser_eles) > 0:
print("正在等待kimi解析")
time.sleep(1)
else:
print("kimi解析成功")
break
except Exception as e:
print(e)
# print("等待10秒kimi解析")
# time.sleep(8)
# 点击发送
self.browser.find_element(By.ID,"send-button").click()
## print("等待15秒kimi返回")
time.sleep(2)
# 监听是否已经停止输出
try:
while True:
stop_output_eles = self.browser.find_elements(By.XPATH,"//*[contains(text(), '停止输出')]")
if len(stop_output_eles) > 0:
print("正在等待kimi输出")
time.sleep(1)
else:
print("kimi输出成功")
break
except Exception as e:
print("kimi,停止输出异常")
# stop_output_ele = self.browser.find_element(By.XPATH,"//*[contains(text(), '停止输出')]")
# 滚动到最底部
# try:
# scroll_ele = self.browser.find_element(By.XPATH, '//div[starts-with(@class, "dowmBtnContainer")]/div/button')
# print(scroll_ele)
# scroll_ele.click()
# except Exception as e:
# print(e)
time.sleep(1)
# 点击复制
try:
# copy_ele = self.browser.find_element(By.CSS_SELECTOR, '[data-testid="msh-chat-segment-copy"]')
copy_ele = self.browser.find_element(By.XPATH,"//*[contains(text(), '复制')]")
ActionChains(self.browser).move_to_element(copy_ele).perform()
time.sleep(1)
copy_ele.click()
except Exception as e:
print(e)
time.sleep(3)
json_text = pyperclip.paste()
print(json_text)
return json_text
# app.kill()
def parser(self,pdf_path:str,prompt:str):
# 上传pdf 文件
# 点击文件组件
try:
file_ele = self.browser.find_element(By.XPATH,"//*[contains(text(), '文件')]")
ActionChains(self.browser).move_to_element(file_ele).perform()
time.sleep(1)
print("开始点击文件")
file_ele.click()
except Exception as e:
print(e)
time.sleep(2)
# 点击上传
try:
upload_ele = self.browser.find_element(By.XPATH,"//*[contains(text(), '点击上传或拖入文档')]")
ActionChains(self.browser).move_to_element(upload_ele).perform()
time.sleep(1)
print("开始点击上传")
upload_ele.click()
except Exception as e:
print(e)
time.sleep(3)
app = Application().connect(title='打开')
# file_dialog = app.window(title='打开') # 注意:这里的标题可能需要根据实际情况调整
try:
file_dialog = app.window(title='打开')
file_dialog.wait('visible', timeout=10) # 等待对话框可见,设置超时时间
# file_edit = file_dialog.child_window(class_name="Edit").set_text(pdf_path)
file_dialog.child_window(class_name="Edit").set_text(pdf_path)
except Exception as e:
print(f"无法定位文件对话框: {e}")
time.sleep(3)
# 点击上传窗体打开按钮
if file_dialog.is_visible():
button_new = file_dialog.child_window(title="打开(&O)",class_name="Button")
button_new.wait('ready', timeout=10)
if button_new.is_enabled():
button_new.double_click()
# print("double_click")
else:
print("打开按钮被禁用了")
# 监听文件解析是否成功
try:
time.sleep(3)
while True:
parser_eles = self.browser.find_elements(By.XPATH,"//*[contains(text(), '上传中')]")
if len(parser_eles) > 0:
print("正在等待yiyan解析")
time.sleep(1)
continue
parser_eles = self.browser.find_elements(By.XPATH,"//*[contains(text(), '阅读中')]")
if len(parser_eles) > 0:
print("正在等待yiyan解析")
time.sleep(1)
continue
break
except Exception as e:
print(e)
# 把prompt复制到剪切板
pyperclip.copy(prompt)
# pass
# 移动到输入框位置,点击一次 div class yc-editor-wrapper
input_box = self.browser.find_element(By.CLASS_NAME, 'yc-editor-wrapper')
print(input_box)
ActionChains(self.browser).move_to_element(input_box).perform()
time.sleep(1)
input_box.click()
time.sleep(1)
# pyperclip.paste() 获取剪切板内容
# 粘贴prompt
pyautogui.hotkey('ctrl', 'v')
time.sleep(1)
# 回车发送
pyautogui.press('enter')
# 监听是否已经停止输出
try:
time.sleep(3)
while True:
stop_output_eles = self.browser.find_elements(By.XPATH,"//*[contains(text(), '停止生成')]")
if len(stop_output_eles) > 0:
print("正在等待yiyan输出")
time.sleep(1)
else:
print("yiyan输出成功")
break
except Exception as e:
print("yiyan,停止输出异常")
# 复制一言输出内容
copy_eles = self.browser.find_elements(By.XPATH,"//*[contains(text(), '复制代码')]")
copy_ele = copy_eles[-1]
self.browser.execute_script("arguments[0].scrollIntoView();", copy_ele)
time.sleep(3)
copy_ele.click()
time.sleep(1)
json_data = pyperclip.paste()
print(json_data)
return json_data
if __name__ == '__main__':
window = YiyanAi()
window.open_url('https://yiyan.baidu.com/')
print("等待20s,先扫码登录!")
time.sleep(30)
# window.test()
# pdf_list = [r"D:\www\ImageAiParser\tmp\pdf\1.pdf",r"D:\www\ImageAiParser\tmp\pdf\2.pdf",r"",'D:\www\ImageAiParser\\tmp\pdf\\4.pdf','D:\www\ImageAiParser\\tmp\pdf\\5.pdf','D:\www\ImageAiParser\\tmp\pdf\\6.pdf']
# script_path = Path(__file__).resolve()
# 获取脚本所在的目录
# script_dir = script_path.parent
prompt = f"pdf编号:12313123213123123,解析阅读pdf理解pdf中报关单表格信息,识别出来的内容以list[json]格式返回,json中需包含pdf编号,报关单商品详情可能存在多条,报关单商品详情为list格式可包含多商品,报关单头部信息包含如下字段 :发货人、统一社会信用码(由18位数字或大写拉丁字母组成)、收货人、件数(纯数字)、毛重(千克)、净重(千克),报关单商品详情包含如下字段:项号、商品编号、商品名称、规格型号、数量(纯数字包括浮点型)、单位(千克、kg、pcs、个)、单价(纯数字包含浮点型)、总价(纯数字包含浮点型)、货币(USD、RMB等)、原产国、毛重、净重......,识别不到的信息为空值。"
file_path = r"D:\www\ImageAiParser\tmp\pdf\3.pdf"
window.parser(file_path,prompt)
# window.parser("D:\www\ImageAiParser\\tmp\pdf\\3.pdf",prompt)
# for pdf_path in pdf_list:
# print(str(script_dir) + pdf_path)
# window.parser(pdf_path)
while True:
pass
\ No newline at end of file
......@@ -9,6 +9,7 @@ class AiFileParser(db.Model):
gpt_ai_parser = db.Column(db.Text, nullable=False)
kimi_ai_parser = db.Column(db.Text, nullable=False)
file_id = db.Column(db.String(255), nullable=False)
parser_engine = db.Column(db.String(255), nullable=False)
prompt_id = db.Column(db.Integer, nullable=False)
status = db.Column(db.Integer, nullable=False)
create_date = db.Column(db.DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now, comment='更新时间')
......
......@@ -16,7 +16,7 @@ def test():
# script_path = Path(__file__).resolve()
# script_dir = script_path.parent
# script_dir = script_path.parent
@bp.get("/ai_index_page")
def ai_index_page():
......@@ -104,7 +104,8 @@ def upload():
return fail_api(msg=f"文件上传异常: {e}")
file_code = request.form.get('file_code')
print(file_code)
parser_engine = request.form.get('parser_engine')
print(file_code,parser_engine)
project_root = current_app.config.get('PROJECT_ROOT')
full_directory = project_root + upload_pdf_directory
now = get_current_datetime()
......@@ -120,6 +121,7 @@ def upload():
new_ai_file_parser = AiFileParser(
file_path=json.dumps(path_list),
file_id = file_code,
parser_engine = parser_engine,
status = 0,
)
db.session.add(new_ai_file_parser)
......@@ -134,7 +136,8 @@ def base64_upload():
data = request.json
base64_pdf = data.get('pdf_base64')
file_code = data.get('file_code')
print(file_code)
parser_engine = data.get('parser_engine')
print(file_code,parser_engine)
if base64_pdf is None or file_code is None:
return jsonify({'error': 'Missing PDF or file_code'}), 400
......@@ -162,6 +165,7 @@ def base64_upload():
new_ai_file_parser = AiFileParser(
file_path=json.dumps(path_list),
file_id = file_code,
parser_engine = parser_engine,
status = 0,
)
db.session.add(new_ai_file_parser)
......
......@@ -30,7 +30,9 @@
<tr>
<td>
<div class="layui-form-item">
<pre>
{{data.kimi_ai_parser}}
</pre>
</div>
</td>
</tr>
......
......@@ -17,9 +17,20 @@
</div>
<div class="main-container">
<div class="layui-btn-container">
<label class="layui-form-label">F</label>
<label class="layui-form-label">FILE_CODE</label>
<div class="layui-input-block">
<input type="text" name="file_code" lay-verify="file_code" autocomplete="off" placeholder="请输入策略名称" id="layui-input-file-code" class="layui-input">
<input type="text" name="file_code" lay-verify="file_code" autocomplete="off" placeholder="file_code" id="layui-input-file-code" class="layui-input">
</div>
</div>
</div>
<div class="main-container">
<div class="layui-btn-container">
<label class="layui-form-label">解析AI</label>
<div class="layui-input-block">
<select name="parser_engine" lay-verify="required">
<option value="kimi" selected>kimi</option>
<option value="yiyan">文心一言</option>
</select>
</div>
</div>
</div>
......@@ -62,6 +73,7 @@
,before: function(obj){ //obj参数包含的信息,跟 choose回调完全一致,可参见上文。
this.data={
file_code:$("#layui-input-file-code").val(),
parser_engine:$('select[name="parser_engine"]').val()
}
},
done: function(res){
......
import base64
from datetime import datetime
from io import BytesIO
from random import choice
import time, os, sys, re, random, json
import requests
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import InvalidSelectorException
from urllib.parse import urlparse
#from applications.models import User, Role, Dept, Power
import pandas as pd
from bs4 import BeautifulSoup
from PIL import Image
from selenium.webdriver import ActionChains
import numpy as np
import base64
import io,pyautogui,pyperclip
from pathlib import Path
from pywinauto import Desktop
from pywinauto.application import Application
from pywinauto.keyboard import send_keys
from db import dbModel
# 注意事项:
# 1、kimi扫码登录,打开网页存在15s等待扫码时间,TODO:优化为监听是否已登录
# 2、浏览器内核要和windows下安装的chrome对应
# 3、上传文件路径要绝对路径,文件路径要使用反斜杠,不能使用斜杠否则输入框中无法识别.
# 4、上传解析与提交返回存在监听 TODO 优化
def chrome_public_sec():
# service = Service(executable_path='./chromedriver/chromedriver125.exe')
# service = Service(executable_path='./chromedriver/windows/chromedriver.exe')
service = Service(executable_path=r"./chromedriver/chromedriver127.exe")
browser_opt = webdriver.ChromeOptions()
browser_opt.add_argument("--disable-blink-features=AutomationControlled")
# browser_opt.add_argument("--auto-open-devtools-for-tabs")
browser_opt.add_argument("--start-maximized")
# browser_opt.add_argument('--disable-javascript')
# browser_opt.add_argument("--window-size=1920,1080")
# browser_opt.add_argument("--disable-gpu")
# browser_opt.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36')
# browser_opt.add_argument('--headless')
# browser_opt.add_argument('--user-agent=%s' % ua) # random choice a user_agent profile for browser
# browser_opt.add_argument('--disable-gpu') # disable gpu accelrator
# browser_opt.add_argument('--disable-extensions')
# browser_opt.add_argument('no-default-browser-check')
# browser_opt.add_argument('--disable-dev-shm-usage')
# browser_opt.add_argument('--disable-popup-blocking')
# browser_opt.add_argument('--disable-default-apps')
# browser_opt.add_argument('--disable-translate')
# browser_opt.add_argument('--disable-web-security')
# browser_opt.add_argument('--disable-features=IsolateOrigins,site-per-process')
# browser_opt.add_argument('--blink-settings=imagesEnabled=false')
# browser_opt.add_argument('--disable-javascript')
# browser_opt.AddArgument("--start-maximized") # 开始最大化
# browser_opt.AddArguments("--test-type")
# browser_opt.AddArgument("--ignore-certificate-errors") # 忽略证书错误
# browser_opt.AddArgument("--disable-popup-blocking") # 禁用弹出拦截
# browser_opt.add_argument("no-sandbox") # 取消沙盒模式
# browser_opt.add_argument("no-default-browser-check") # 禁止默认浏览器检查
# browser_opt.AddArgument("about:histograms")
# browser_opt.AddArgument("about:cache")
# browser_opt.AddArgument("disable-extensions") # 禁用扩展
# browser_opt.AddArgument("disable-glsl-translator") # 禁用GLSL翻译
# browser_opt.add_argument("disable-translate") # 禁用翻译
# browser_opt.add_argument("--disable-gpu") # 谷歌文档提到需要加上这个属性来规避bug
# browser_opt.add_argument("--disable-dev-shm-usage")
# browser_opt.add_argument("--hide-scrollbars") # 隐藏滚动条, 应对一些特殊页面
# browser_opt.add_argument("blink-settings=imagesEnabled=false") # 不加载图片, 提升速度
# browser_opt.add_argument('--ignore-ssl-errors=yes')
# browser_opt.add_argument('--ignore-certificate-errors')
# 清除浏览器缓存
# browser_opt.add_argument("--disable-application-cache")
# browser_opt.add_argument("--disable-cache")
# browser_opt.add_argument("--disable-session-crashed-bubble")
# browser_opt.add_argument("--disable-session-restore")
# browser_opt.add_argument('no-sandbox')
# browser_opt.add_argument('--disable-infobars') # disable the message of 'browser is controlled by robot'
# browser_opt.add_argument('--auto-open-devtools-for-tabs') #open browser inspect
# browser_opt.add_experimental_option('useAutomationExtension', False)
browser_opt.add_experimental_option('excludeSwitches', ['enable-automation']) # disable the message of 'browser is controlled by robot' for the latest version of browser drivers
# browser_opt.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
# browser_opt.add_experimental_option('useAutomationExtension', False) # disable Extensions in Dev mode
# browser_opt.add_experimental_option('prefs', {'credentials_enable_service': False, 'profile.password_manager_enable': False}) #disable account and password save function
#desired_capabilities = browser_opt.to_capabilities()
#desired_capabilities['pageLoadStrategy'] = 'none'
#desired_capabilities['timeouts'] = {'implicit' : 3000, 'pageLoad' : 3000, 'script' : 3000}
# browser_opt.binary_location=r"C:\Program Files\Google\Chrome\Application\chrome.exe"
webdriver_info = webdriver.Chrome(service = service, options=browser_opt)
return webdriver_info
class YiyanAi(object):
def __init__(self):
self.index_url = 'https://kimi.moonshot.cn'
self.browser = chrome_public_sec()
self.browser.get('chrome://settings/clearBrowserData')
self.browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined,
configurable: true
});
"""
})
def quit(self):
self.browser.quit()
def open_url(self,url):
# self.browser.refresh()
try:
if url is None:
self.browser.get(self.index_url)
else:
self.browser.get(url)
except:
self.browser.refresh()
def search(self, dict, key):
pass
def parser(self,pdf_path,prompt:str):
# 文本输入框editorContentEditable_
try:
textarea_ele = self.browser.find_element(By.CSS_SELECTOR,"[class^='editorContentEditable_']")
textarea_ele.send_keys(prompt)
except Exception as e:
print("文本输入框不见了")
time.sleep(3)
# 点击上传按钮
try:
uploaded_ele = self.browser.find_element(By.CSS_SELECTOR,"[data-testid='msh-chatinput-upload-button']")
uploaded_ele.click()
except Exception as e:
print("上传按钮不见了")
time.sleep(3)
app = Application().connect(title='打开')
# file_dialog = app.window(title='打开') # 注意:这里的标题可能需要根据实际情况调整
try:
file_dialog = app.window(title='打开')
file_dialog.wait('visible', timeout=10) # 等待对话框可见,设置超时时间
# file_edit = file_dialog.child_window(class_name="Edit").set_text(pdf_path)
file_dialog.child_window(class_name="Edit").set_text(pdf_path)
except Exception as e:
print(f"无法定位文件对话框: {e}")
time.sleep(3)
# 点击上传窗体打开按钮
if file_dialog.is_visible():
button_new = file_dialog.child_window(title="打开(&O)",class_name="Button")
button_new.wait('ready', timeout=10)
if button_new.is_enabled():
button_new.double_click()
print("double_click")
else:
print("打开按钮被禁用了")
# 鼠标点击
# print("click")
# pyautogui.moveTo(2500,1630, duration=1)
# pyautogui.click(2500,1630, button='left',duration=0.5)
# 监听文件解析是否成功
try:
time.sleep(3)
while True:
parser_eles = self.browser.find_elements(By.XPATH,"//*[contains(text(), '解析中...')]")
if len(parser_eles) > 0:
print("正在等待kimi解析")
time.sleep(1)
else:
print("kimi解析成功")
break
except Exception as e:
print(e)
# print("等待10秒kimi解析")
# time.sleep(8)
# 点击发送
self.browser.find_element(By.ID,"send-button").click()
## print("等待15秒kimi返回")
time.sleep(2)
# 监听是否已经停止输出
try:
while True:
stop_output_eles = self.browser.find_elements(By.XPATH,"//*[contains(text(), '停止输出')]")
if len(stop_output_eles) > 0:
print("正在等待kimi输出")
time.sleep(1)
else:
print("kimi输出成功")
break
except Exception as e:
print("kimi,停止输出异常")
# stop_output_ele = self.browser.find_element(By.XPATH,"//*[contains(text(), '停止输出')]")
# 滚动到最底部
# try:
# scroll_ele = self.browser.find_element(By.XPATH, '//div[starts-with(@class, "dowmBtnContainer")]/div/button')
# print(scroll_ele)
# scroll_ele.click()
# except Exception as e:
# print(e)
time.sleep(1)
# 点击复制
try:
# copy_ele = self.browser.find_element(By.CSS_SELECTOR, '[data-testid="msh-chat-segment-copy"]')
copy_ele = self.browser.find_element(By.XPATH,"//*[contains(text(), '复制')]")
ActionChains(self.browser).move_to_element(copy_ele).perform()
time.sleep(1)
copy_ele.click()
except Exception as e:
print(e)
time.sleep(3)
json_text = pyperclip.paste()
print(json_text)
return json_text
# app.kill()
def test(self,pdf_path:str,prompt:str):
# 上传pdf 文件
# 点击文件组件
try:
file_ele = self.browser.find_element(By.XPATH,"//*[contains(text(), '文件')]")
ActionChains(self.browser).move_to_element(file_ele).perform()
time.sleep(1)
print("开始点击文件")
file_ele.click()
except Exception as e:
print(e)
time.sleep(2)
# 点击上传
try:
upload_ele = self.browser.find_element(By.XPATH,"//*[contains(text(), '点击上传或拖入文档')]")
ActionChains(self.browser).move_to_element(upload_ele).perform()
time.sleep(1)
print("开始点击上传")
upload_ele.click()
except Exception as e:
print(e)
time.sleep(3)
app = Application().connect(title='打开')
# file_dialog = app.window(title='打开') # 注意:这里的标题可能需要根据实际情况调整
try:
file_dialog = app.window(title='打开')
file_dialog.wait('visible', timeout=10) # 等待对话框可见,设置超时时间
# file_edit = file_dialog.child_window(class_name="Edit").set_text(pdf_path)
file_dialog.child_window(class_name="Edit").set_text(pdf_path)
except Exception as e:
print(f"无法定位文件对话框: {e}")
time.sleep(3)
# 点击上传窗体打开按钮
if file_dialog.is_visible():
button_new = file_dialog.child_window(title="打开(&O)",class_name="Button")
button_new.wait('ready', timeout=10)
if button_new.is_enabled():
button_new.double_click()
print("double_click")
else:
print("打开按钮被禁用了")
# 监听文件解析是否成功
try:
time.sleep(3)
while True:
parser_eles = self.browser.find_elements(By.XPATH,"//*[contains(text(), '上传中')]")
if len(parser_eles) > 0:
print("正在等待yiyan解析")
time.sleep(1)
continue
parser_eles = self.browser.find_elements(By.XPATH,"//*[contains(text(), '阅读中')]")
if len(parser_eles) > 0:
print("正在等待yiyan解析")
time.sleep(1)
continue
break
except Exception as e:
print(e)
# 把prompt复制到剪切板
pyperclip.copy(prompt)
# pass
# 移动到输入框位置,点击一次 div class yc-editor-wrapper
input_box = self.browser.find_element(By.CLASS_NAME, 'yc-editor-wrapper')
print(input_box)
ActionChains(self.browser).move_to_element(input_box).perform()
time.sleep(1)
input_box.click()
time.sleep(1)
# pyperclip.paste() 获取剪切板内容
# 粘贴prompt
pyautogui.hotkey('ctrl', 'v')
time.sleep(1)
# 回车发送
pyautogui.press('enter')
# 监听是否已经停止输出
try:
time.sleep(3)
while True:
stop_output_eles = self.browser.find_elements(By.XPATH,"//*[contains(text(), '停止生成')]")
if len(stop_output_eles) > 0:
print("正在等待yiyan输出")
time.sleep(1)
else:
print("yiyan输出成功")
break
except Exception as e:
print("yiyan,停止输出异常")
# 复制一言输出内容
copy_eles = self.browser.find_elements(By.XPATH,"//*[contains(text(), '复制代码')]")
copy_ele = copy_eles[-1]
self.browser.execute_script("arguments[0].scrollIntoView();", copy_ele)
time.sleep(3)
copy_ele.click()
time.sleep(1)
json_data = pyperclip.paste()
print(json_data)
if __name__ == '__main__':
window = YiyanAi()
window.open_url('https://yiyan.baidu.com/')
print("等待20s,先扫码登录!")
time.sleep(30)
# window.test()
# pdf_list = [r"D:\www\ImageAiParser\tmp\pdf\1.pdf",r"D:\www\ImageAiParser\tmp\pdf\2.pdf",r"",'D:\www\ImageAiParser\\tmp\pdf\\4.pdf','D:\www\ImageAiParser\\tmp\pdf\\5.pdf','D:\www\ImageAiParser\\tmp\pdf\\6.pdf']
# script_path = Path(__file__).resolve()
# 获取脚本所在的目录
# script_dir = script_path.parent
prompt = f"pdf编号:12313123213123123,解析阅读pdf理解pdf中报关单表格信息,识别出来的内容以list[json]格式返回,json中需包含pdf编号,报关单商品详情可能存在多条,报关单商品详情为list格式可包含多商品,报关单头部信息包含如下字段 :发货人、统一社会信用码(由18位数字或大写拉丁字母组成)、收货人、件数(纯数字)、毛重(千克)、净重(千克),报关单商品详情包含如下字段:项号、商品编号、商品名称、规格型号、数量(纯数字包括浮点型)、单位(千克、kg、pcs、个)、单价(纯数字包含浮点型)、总价(纯数字包含浮点型)、货币(USD、RMB等)、原产国、毛重、净重......,识别不到的信息为空值。"
file_path = r"D:\www\ImageAiParser\tmp\pdf\3.pdf"
window.test(file_path,prompt)
# window.parser("D:\www\ImageAiParser\\tmp\pdf\\3.pdf",prompt)
# for pdf_path in pdf_list:
# print(str(script_dir) + pdf_path)
# window.parser(pdf_path)
while True:
pass
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment