import streamlit as st
from PIL import Image
import io,os,json
import numpy as np 
import pandas as pd
from pdf2image import convert_from_path 
from datetime import datetime
from db import dbModel
import fitz 
import appbuilder
from db import dbModel
# 设置文心一言图片分辨率最大4000
RESOLUTION = 4000
dpi = 200
file_directory = './output/pdf/'
image_directory = './output/convert/'

def get_current_datetime():
    # 获取当前时间  
    now = datetime.now()  
    # 格式化时间字符串  
    formatted_time = now.strftime("%Y%m%d%H%M%S") 
    return formatted_time


def convert_pdf_to_image(file_path, output_folder):  
    images = convert_from_path(file_path)  
    for i, image in enumerate(images):  
        if image.size[0] > 4000 or image.size[1] > 4000:  
            image = image.resize((4000, 4000), Image.ANTIALIAS)  
        output_path = os.path.join(output_folder, f'page_{i+1}.png')  
        image.save(output_path, 'PNG') 

def covert2pic(file_path, zoom, png_path):
    now = get_current_datetime()
    doc = fitz.open(file_path)
    total = doc.page_count
    image_list = []
    for pg in range(total):
        page = doc[pg]
        zoom = int(zoom)  # 值越大，分辨率越高，文件越清晰
        rotate = int(0)
        trans = fitz.Matrix(zoom / 100.0, zoom / 100.0).prerotate(rotate)
        pm = page.get_pixmap(matrix=trans, alpha=False)
        if not os.path.exists(png_path):
            os.mkdir(png_path)
        save_path = os.path.join(png_path, f"{now}_{pg}.png")
        image_list.append(save_path)
        pm.save(save_path)
    doc.close()
    return image_list

def wx_image_parser(image_path):
    # 配置密钥与应用ID
    os.environ["APPBUILDER_TOKEN"] ="bce-v3/ALTAK-SsXWQJtdNpOuxGOiRm5jo/736adf01c0b6c5d354c9815bfb3b616f3d36736b"
    app_id = "e18c7b93-2d42-478e-9baa-07e36fa67452" 
    app_id = "e18c7b93-2d42-478e-9baa-07e36fa67452" 
    # 初始化Agent实例
    builder = appbuilder.AppBuilderClient(app_id)
    # 创建会话ID
    conversation_id = builder.create_conversation()
    # 上传图片
    new_file_id = builder.upload_local_file(conversation_id, image_path )
    # 识别
    msg = builder.run(conversation_id, '识别出来的内容以json格式返回,商品可能存在多条，货物(商品)信息为list格式可包含多商品,表头信息包含发货人、发货人编号、统一社会信用码、收货人、离境口岸、运抵国、指运港、件数、总毛重、总净重,表体信息包含商品编号、商品名称、规格型号、数量、单位、单价、总价、原产国、目的国、境内货源地、包装种类、数量、毛重、净重,识别不到的信息为空值,并且只有一个表头。', file_ids=[new_file_id])
    print("助理回答内容：", msg.content.answer)  
    return  msg.content.answer
# file_path = "./output/pdf/20240808193325duoye.pdf"
file_path = "./output/pdf/2024080819163124-1505-00092.pdf"
image_list = covert2pic(file_path,200,image_directory)
for image_path in image_list:
    wx_ai_parser = wx_image_parser(image_path).strip()
    print(wx_ai_parser)
    sql = f"INSERT INTO ai_image_parser (image_path,wx_ai_parser,file_id) VALUES ('{image_path}','{wx_ai_parser}',3)"
    new_id = dbModel().insert(sql)
print(image_list)