前几天推送了iReg小程序-批量提取文件列表并创建超链接之后,有群友留言提出能否增加提取PDF文件页数的功能,因此用Python重新写了一个版本,小程序界面非常简洁,点击“浏览”选择需要提取的文件夹,再击“创建文件列表”即可:
生成的“文件列表.xlsx”保存在选择的文件夹下,这个功能可以帮助CRO公司或者翻译公司统计待处理或翻译的PDF页数,便于报价,也可以用于对自己NDA,IND等大型申请的PDF文件进行统计:
压缩包里包含小程序、测试文件和使用说明,需要的小伙伴们就扫码下载吧:
https://pan.baidu.com/s/1a7v9nCoyVfsVcCtw248cmA?pwd=iReg
同时把Python源代码分享如下,欢迎大家提出宝贵意见,以便不断改进:
import tkinter
import tkinter.filedialog
import tkinter.messagebox
import os
import openpyxl
from openpyxl.utils import get_column_letter
import PyPDF2
def get_file_info(file_path):
try:
# 文件大小(以KB为单位,精确到小数点后1位)
size_kb = round(os.path.getsize(file_path) / 1024, 1)
# 文件名
file_name = os.path.basename(file_path)
# 文件页数(仅适用于PDF)
pages = None
if file_path.lower().endswith('.pdf'):
with open(file_path, 'rb') as pdf_file:
reader = PyPDF2.PdfReader(pdf_file)
pages = len(reader.pages)
return {
"文件大小(KB)": size_kb,
"文件页数": pages,
"文件名": file_name,
"文件路径": file_path.replace("/", "\\")
}
except Exception as e:
print(f"Error processing file {file_path}: {e}")
return None
def traverse_directory(directory):
records = []
for root, dirs, files in os.walk(directory):
for file in files:
file_path = os.path.join(root, file)
file_info = get_file_info(file_path)
if file_info:
records.append(file_info)
return records
def write_to_excel(records, output_path):
# 创建一个新的工作簿
workbook = openpyxl.Workbook()
sheet = workbook.active
sheet.title = '文件信息'
# 写入表头
headers = ['文件大小(KB)', '文件页数', '文件名', '文件路径']
for col_num, header in enumerate(headers, 1):
sheet.cell(row=1, column=col_num, value=header)
# 写入数据
for row_num, record in enumerate(records, 2):
sheet.cell(row=row_num, column=1, value=record['文件大小(KB)'])
sheet.cell(row=row_num, column=2, value=record['文件页数'] if record['文件页数'] is not None else '')
sheet.cell(row=row_num, column=3, value=record['文件名'])
sheet.cell(row=row_num, column=4, value=record['文件路径'])
# 保存工作簿
workbook.save(output_path)
class Window():
def __init__(self):
self.root = root = tkinter.Tk()
self.label = tkinter.Label(root, text='选择目录')
self.label.grid(row=0, column=0)
self.entryDir = tkinter.Entry(root)
self.entryDir.grid(row=0, column=1)
self.BrowserDirButton = tkinter.Button(root, text='浏览', command=self.BrowserDir)
self.BrowserDirButton.grid(row=0, column=2)
self.ButtonCov = tkinter.Button(root, text='创建文件列表', command=self.Conv2)
self.ButtonCov.grid(row=0, column=3)
def BrowserDir(self):
directory = tkinter.filedialog.askdirectory(title='iReg小程序')
if directory:
self.entryDir.delete(0, tkinter.END)
self.entryDir.insert(tkinter.END, directory)
def Conv2(self):
directory = self.entryDir.get()
output_excel = os.path.join(directory, "文件列表.xlsx")
records = traverse_directory(directory)
write_to_excel(records, output_excel)
tkinter.messagebox.showinfo("iReg", "已完成 ")
def mainloop(self):
self.root.minsize(380, 120)
self.root.maxsize(380, 120)
self.root.title('iReg小程序-创建文件列表')
self.root.mainloop()
if __name__ == "__main__":
window = Window()
window.mainloop()