1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
|
import sys import traceback import os import string import re import urllib.request import json
DownloadFolder = "F:/a_downimages" MDFolder = "F:/mywiki"
Counter = 0 PngFile2Src = {} PngFile2SrcFile = "%s/png2src.json" % DownloadFolder
Md2Png = [] Md2PngFile = "%s/md2png.json" % DownloadFolder
ErrorMap = {} ErrorFile = "%s/error.json" % DownloadFolder
UploadedFile = "%s/uploaded.json" % DownloadFolder
''' json存储结构 - Md2PngFile local table = { {filename : { srclink : renameFileName.png srclink2 : renameFileName2.png } } }
重命名文件 到 源链接的 映射 - PngFile2SrcFile local table = { renameFileName.png : srclink }
上传成功json - UploadedFile local table = { renameFileName.png : dstlink }
'''
def readFile(filePath): contentStr = "" with open(filePath, "rb") as fd: contentStr = fd.read() return contentStr
def isMdFile(filePath): ext = filePath[filePath.rfind('.'):].lower() return ext == ".md"
def writeFile(filePath, str): with open(filePath, "wb") as fd: fd.write(str) pass
def getFileImgLink(filePath): filePath = filePath.replace("\\", "/") if not isMdFile(filePath): return global Counter contentStr = readFile(filePath).decode('utf-8') reg = r'\!\[(?:.|.*)\]\((http.+?)\)' imgre = re.compile(reg) imglist = re.findall(imgre, contentStr)
if len(imglist) == 0: return
tmpMap = {} for imgurl in imglist: Counter += 1 filename = "%d.png" % Counter dstFile = "%s/%s" % (DownloadFolder, filename) print ("--- 处理:%s" % imgurl) try: urllib.request.urlretrieve(imgurl, dstFile) except Exception as e: if ErrorMap.get(filename) == None: ErrorMap[filename] = [] ErrorMap[filename].append(imgurl) else: PngFile2Src[filename] = imgurl tmpMap[imgurl] = filename print (u"下载 (%s) 成功 ,文件(%s)" % (imgurl, filePath)) Md2Png.append({filePath : tmpMap}) pass
def replaceLink(): def readJsonFile(filePath): filePath = filePath.replace("\\", "/") contentStr = readFile(filePath).decode('utf-8') return json.loads(contentStr) def dealFile(filePath, urlMap): print("处理文件:%s" % filePath) if len(urlMap) == 0: print("无需替换:%s" % filePath) return filePath = filePath.replace("\\", "/") contentStr = readFile(filePath).decode('utf-8')
for (src, pngFile) in urlMap.items(): repUrl = data_uploaded[pngFile] if repUrl == None: repUrl = "Noneaaa" urlMap[src] = repUrl
rep = dict((re.escape(k), v) for k, v in urlMap.items()) pattern = re.compile("|".join(rep.keys())) contentStr = pattern.sub(lambda m: rep[re.escape(m.group(0))], contentStr) writeFile(filePath, contentStr.encode()) print("--- 替换完成")
data_md2png = readJsonFile(Md2PngFile) data_png2src = readJsonFile(PngFile2SrcFile) data_png2src = readJsonFile(PngFile2SrcFile) data_uploaded = readJsonFile(UploadedFile)
for row in data_md2png: for (k, v) in row.items(): dealFile(k, v) print("处理文件:%s" % k) for (src, pngFile) in v.items(): print("替换url:(%s), png:(%s)" % (src, pngFile)) pass
def getImgs(): for root, dirs, files in os.walk(MDFolder): for file in files: path = os.path.join(root, file) getFileImgLink(path)
pn2src_str = json.JSONEncoder().encode(PngFile2Src) writeFile(PngFile2SrcFile, pn2src_str.encode())
md2png_str = json.JSONEncoder().encode(Md2Png) writeFile(Md2PngFile, md2png_str.encode())
err_str = json.JSONEncoder().encode(ErrorMap) writeFile(ErrorFile, err_str.encode()) print (u"全部下载成功") pass
if __name__ == '__main__': getImgs()
|