wps dispimg python 解析实现参考
在 wps excel 中,可以把图片嵌入单元格,此时会图片单元格会显示如下内容
=DISPIMG("ID_142D0E21999C4D899C0723FF7FA4A9DD",1)
下面是针对这中图片文件的解析实现
参考博客:Python读取wps中的DISPIMG图片格式_wps dispimg-CSDN博客:https://blog.csdn.net/QAZJOU/article/details/139709948
解析出 dispimg_id
简单的字符串替换实现
def save_dispimg_id(self,cell_value):img_id=cell_value.replace('=DISPIMG("',"").replace('",1)',"")self.wps_dispimg_id_list.append(img_id)pass
解析出 dispimg_id 对应的图片文件流
import zipfile
from lxml import etreewps_dispimg_map ={}def get_wps_dispimg_map(excel_file):if len(wps_dispimg_map)>0:returnxml_content_namespaces = {'xdr': 'http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing','a': 'http://schemas.openxmlformats.org/drawingml/2006/main','r':"http://schemas.openxmlformats.org/officeDocument/2006/relationships",'etc':"http://www.wps.cn/officeDocument/2017/etCustomData"}ref_xml_content_namespaces = {None:"http://schemas.openxmlformats.org/package/2006/relationships",}# 打开 XLSX 文件with zipfile.ZipFile(excel_file, 'r') as zfile:xml_content=""rel_xml_content=""# 获取 dispimg_id rId 的映射关系with zfile.open('xl/cellimages.xml') as file:xml_content = file.read()# 获取 rId target 的映射关系with zfile.open('xl/_rels/cellimages.xml.rels') as file:rel_xml_content = file.read()xml_content_map={}rel_xml_content_map={}xml_content_root = etree.fromstring(xml_content)xdr_pics = xml_content_root.findall(".//xdr:pic",xml_content_namespaces)for xdr_pic in xdr_pics:dispimg_id = xdr_pic.find('.//xdr:cNvPr', namespaces=xml_content_namespaces).attrib.get('name',None)rId = xdr_pic.find('.//a:blip', namespaces=xml_content_namespaces).attrib.get(f'{{{xml_content_namespaces["r"]}}}embed',None)if dispimg_id is not None and rId is not None:xml_content_map[dispimg_id]=rIdrel_xml_content_root = etree.fromstring(rel_xml_content)Relationships=rel_xml_content_root.findall('.//Relationship', namespaces=ref_xml_content_namespaces)for Relationship in Relationships:rId=Relationship.attrib.get('Id',None)Target=Relationship.attrib.get('Target',None)if rId is not None and Target is not None:rel_xml_content_map[rId]=f"xl/{Target}"for dispimg_id,rId in xml_content_map.items():for rId2,Target in rel_xml_content_map.items():if rId2 != rId:continueif Target is None:continuewith zfile.open(Target) as img_file:image_binary_data = img_file.read()if image_binary_data is not None and len(image_binary_data)>0:wps_dispimg_map[dispimg_id]=image_binary_databreakreturn wps_dispimg_map