
|
import sys import traceback import os import string import re import urllib.request import json
DownloadFolder = "F:/a_downimages" MDFolder = "F:/mywiki"
Counter = 0 PngFile2Src = {} PngFile2SrcFile = "%s/png2src.json" % DownloadFolder
Md2Png = [] Md2PngFile = "%s/md2png.json" % DownloadFolder
ErrorMap = {} ErrorFile = "%s/error.json" % DownloadFolder
UploadedFile = "%s/uploaded.json" % DownloadFolder
''' json存储结构 - Md2PngFile local table = { {filename : { srclink : renameFileName.png srclink2 : renameFileName2.png } } }
重命名文件 到 源链接的 映射 - PngFile2SrcFile local table = { renameFileName.png : srclink }
上传成功json - UploadedFile local table = { renameFileName.png : dstlink }
'''
def readFile(filePath): contentStr = "" with open(filePath, "rb") as fd: contentStr = fd.read() return contentStr
def isMdFile(filePath): ext = filePath[filePath.rfind('.'):].lower() return ext == ".md"
def writeFile(filePath, str): with open(filePath, "wb") as fd: fd.write(str) pass
def getFileImgLink(filePath): filePath = filePath.replace("\\", "/") if not isMdFile(filePath): return global Counter contentStr = readFile(filePath).decode('utf-8') reg = r'\!\[(?:.|.*)\]\((http.+?)\)' imgre = re.compile(reg) imglist = re.findall(imgre, contentStr)
if len(imglist) == 0: return
tmpMap = {} for imgurl in imglist: Counter += 1 filename = "%d.png" % Counter dstFile = "%s/%s" % (DownloadFolder, filename) print ("--- 处理:%s" % imgurl) try: urllib.request.urlretrieve(imgurl, dstFile) except Exception as e: if ErrorMap.get(filename) == None: ErrorMap[filename] = [] ErrorMap[filename].append(imgurl) else: PngFile2Src[filename] = imgurl tmpMap[imgurl] = filename print (u"下载 (%s) 成功 ,文件(%s)" % (imgurl, filePath)) Md2Png.append({filePath : tmpMap}) pass
def replaceLink(): def readJsonFile(filePath): filePath = filePath.replace("\\", "/") contentStr = readFile(filePath).decode('utf-8') return json.loads(contentStr) def dealFile(filePath, urlMap): print("处理文件:%s" % filePath) if len(urlMap) == 0: print("无需替换:%s" % filePath) return filePath = filePath.replace("\\", "/") contentStr = readFile(filePath).decode('utf-8')
for (src, pngFile) in urlMap.items(): repUrl = data_uploaded[pngFile] if repUrl == None: repUrl = "Noneaaa" urlMap[src] = repUrl
rep = dict((re.escape(k), v) for k, v in urlMap.items()) pattern = re.compile("|".join(rep.keys())) contentStr = pattern.sub(lambda m: rep[re.escape(m.group(0))], contentStr) writeFile(filePath, contentStr.encode()) print("--- 替换完成")
data_md2png = readJsonFile(Md2PngFile) data_png2src = readJsonFile(PngFile2SrcFile) data_png2src = readJsonFile(PngFile2SrcFile) data_uploaded = readJsonFile(UploadedFile)
for row in data_md2png: for (k, v) in row.items(): dealFile(k, v) print("处理文件:%s" % k) for (src, pngFile) in v.items(): print("替换url:(%s), png:(%s)" % (src, pngFile)) pass
def getImgs(): for root, dirs, files in os.walk(MDFolder): for file in files: path = os.path.join(root, file) getFileImgLink(path)
pn2src_str = json.JSONEncoder().encode(PngFile2Src) writeFile(PngFile2SrcFile, pn2src_str.encode())
md2png_str = json.JSONEncoder().encode(Md2Png) writeFile(Md2PngFile, md2png_str.encode())
err_str = json.JSONEncoder().encode(ErrorMap) writeFile(ErrorFile, err_str.encode()) print (u"全部下载成功") pass
if __name__ == '__main__': getImgs()
|