将以下代码保存到一个 smg.py 文件中,放在服务器的根目录下,用 python smg.py 命令运行,即可在网站根目录生成一个sitemap.txt的网站地图。
也可以修改 site 变量,未子目录生成站点地图,或者修改资源后缀列表,允许或者屏蔽其他的文件类型。
# sitemap generator
# run this at the root of the server
import os
import requests.utils
# no need of ending slash
site = "https://www.example.com"
# preventing adding page resources
blocked_res=[".js",".css",".jpg","jpeg",".png",".svg"]
allowed_res=[".html",".htm",".php",".xhtml"]
def isAllowed(filename):
for r in allowed_res:
if filename.endswith(r):
return True
return False
server_root = os.path.curdir
sitemap=[]
steps = os.walk(server_root)
for pathes, dirs, files in steps:
# print(dirs)
for f in files:
fp = pathes.replace('\\', '/') + '/' + f
fp = site + fp[1:]
if '/.' in fp or not isAllowed(f):
# print("hidden")
pass
else:
fp = requests.utils.requote_uri(fp)
print(fp)
sitemap.append(fp)
f=open("sitemap.txt","w",encoding="utf-8")
for url in sitemap:
f.write(url)
f.write("\r\n")
f.close()