r1a dev

惜秦皇汉武,略输文采;唐宗宋祖,稍逊风骚。俱往矣,数风流人物,还看今朝。

首页
关于
链接
Admin
RSS
Email


自动制作屏蔽词脚本

  • 作者: runoneall
  • 时间: 2024-12-09
  • 分类: 默认分类

使用到的仓库:

  • 屏蔽词库:https://github.com/57ing/Sensitive-word
  • 域名词库:https://github.com/gfwlist/gfwlist
import requests
import os

# 清理文件夹
os.system("cd source && rm -rf ./*")

# 下载屏蔽词文件
block_word_url = "https://codeload.github.com/57ing/Sensitive-word/zip/refs/heads/master"
response = requests.get(block_word_url, stream=True)
response.raise_for_status()
with open("./source/block_words.zip", 'wb') as f:
    for chunk in response.iter_content(chunk_size=1024):
        if chunk:
            f.write(chunk)

# 解压屏蔽词文件
os.system("unzip ./source/block_words.zip -d ./source/")
os.system("rm ./source/block_words.zip")
os.system("cd ./source/Sensitive-word-master && rm README.md key.txt 敏感词库表统计.xlsx 敏感词库表统计.txt")
os.system("cd ./source/Sensitive-word-master && mv ./* ..")
os.system("cd ./source && rm -rf ./Sensitive-word-master")

# 转换屏蔽词文件编码
file_list = os.listdir("./source")
for file_item in file_list:
    with open(f"./source/{file_item}", 'r', encoding="gb18030") as f_in:
        content = f_in.read()
    with open(f"./source/{file_item}", 'w', encoding="utf-8") as f_out:
        f_out.write(content)

# 下载GFW文件
gfw_url = "https://raw.githubusercontent.com/gfwlist/gfwlist/refs/heads/master/gfwlist.txt"
response = requests.get(gfw_url, stream=True)
with open("./source/gfwlist_encode.txt", 'wb') as f:
    for chunk in response.iter_content(chunk_size=1024):
        if chunk:
            f.write(chunk)
os.system("cd ./source && base64 -d gfwlist_encode.txt > gfwlist_decode.txt")
os.system("cd ./source && rm gfwlist_encode.txt")

# 筛选GFW文件
with open("./source/gfwlist_decode.txt", "r", encoding="utf-8") as f:
    content = f.read()
content = content.split("\n")
content = [i for i in content if i != ""]
lines = list()
for item in content:
    if "*" in item:
        continue
    if item.startswith("||"):
        lines.append(item[2:])
        continue
    if item.startswith("|"):
        lines.append(item[1:])
        continue
    if item.startswith("."):
        lines.append(item[1:])
        continue
with open("./source/gfwlist.txt", "a", encoding="utf-8") as f:
    for line in lines:
        f.write(line+"\n")
os.system("cd ./source && rm gfwlist_decode.txt")

# 整合文件
file_list = os.listdir("./source")
for file_item in file_list:
    with open(f"./source/{file_item}", 'r', encoding="utf-8") as f:
        content = f.read()
    content = content.split("\n")
    content = [i for i in content if i != ""]
    with open("./source/block-words.txt", 'a', encoding="utf-8") as f:
        for line in content:
            f.write(line+"\n")
    os.system(f"cd ./source && rm {file_item}")

标签: none

评论已关闭

  • 上一篇: 自建doh服务器+设置Firefox使用
  • 下一篇: 一个简单的Go语言轮子
© 2025 r1a dev. Old Fasion Theme. Powered by Typecho.