update

xingpingcn · Sep 29, 2023 · f71d492 · f71d492
1 parent 0f17886
commit f71d492
Show file tree

Hide file tree

Showing 6 changed files with 121 additions and 85 deletions.
diff --git a/README.md b/README.md
@@ -68,7 +68,7 @@ freecdn-js能提高网站稳定性，如果其中一个cdn链接不可用则启
 
 <font color=#808080 >*注：脚本会自动urlencode，将不是url元字符的字符转义以兼容freecdn-js。*</font>
 
-或者你也用hexo博客（如果你也使用hexo博客，需要把四个`.py`文件放在博客根目录），那么可以使用`import_to_db_with_hexo_blog.py`根据`.md`（博客写作使用markdown）文件的内容直接生成`pic.conf`（作用和`custom.conf`一样，可以用`--merge`合并到`freecdn-manifest.txt`），无需手动把url添加到`urls.txt`。`.md`放在`source\_posts`，或根据需要自行修改。`.py`文件中的正则表达需要根据自己的需求更改。如果你也使用[hexo-volantis](https://github.com/volantis-x/community)可以试着直接运行。脚本适配了`![img](url)`、`{%link%}`、`{%image%}`、`headimg`四个`tag`。
+或者你也用hexo博客（如果你也使用hexo博客，需要把对应的`.py`文件放在博客根目录），那么可以使用`import_to_db_with_hexo_blog.py`根据`.md`（博客写作使用markdown）文件的内容直接生成`pic.conf`（作用和`custom.conf`一样，可以用`--merge`合并到`freecdn-manifest.txt`），无需手动把url添加到`urls.txt`。`.md`放在`source\_posts`，或根据需要自行修改。`.py`文件中的正则表达需要根据自己的需求更改。如果你也使用[hexo-volantis](https://github.com/volantis-x/community)可以试着直接运行。脚本适配了`![img](url)`、`{%link%}`、`{%image%}`、`headimg`四个`tag`。
 
 如果你像我一样把文件（图片和某些js）放在github（我使用[picx.xpoet.cn](https://picx.xpoet.cn/)作为管理工具，上传图片的同时能够自动生成cdn链接），能十分方便生成cdn链接。
 

diff --git a/config.py b/config.py
@@ -0,0 +1,34 @@
+
+blog_md_file_dir = './source/_posts'
+blog_public_dir = './public'
+
+user = ''
+repo = ''
+branch = 'main'
+is_refresh_tag = False  # 如果开启需要token
+token = ''
+
+
+cdn_list = ['https://jsd.cdn.zzko.cn/gh/', 'https://cdn.jsdelivr.us/gh/',
+            'https://cdn.jsdelivr.ren/gh/', 'https://cdn.jsdelivr.net/gh/', 'https://raw.githubusercontent.com/']
+
+is_output_to_txt = False  # 输出md文件中的url到txt文件urls_in_md.txt
+
+is_import_html_to_conf = True  # 导入“blog_public_dir”中的html到pic.conf
+html_file_to_ignore = ['']
+
+is_use_proxy = False  # 使用代理
+if is_use_proxy:
+    proxies_dict = {'http': 'socks5://127.0.0.1:10808',
+                    'https': 'socks5://127.0.0.1:10808'}
+else:
+    proxies_dict = {}
+dir_for_custom_conf = 'dir_for_custom_conf'  # 储存文件的文件夹名称
+headers = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
+    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+    'Accept-Language': 'ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3',
+    'Accept-Encoding': 'gzip, deflate, br',
+    'Sec-Ch-Ua-Platform': "Windows",
+    'Cache-Control': 'no-cache',
+}
diff --git a/generate_external_manifest_file.py b/generate_external_manifest_file.py
@@ -1,20 +1,15 @@
 import os
 import sys,base64,hashlib,requests
-from import_to_db_with_urls_txt import cdn_list,proxies_dict
+
+from config import *
 
 '''
 从freecdn-manifest.txt中生成manifest-full.txt和用于引入外部manifest的freecdn-manifest.txt。需要填写user、token等信息。
 
 is_refresh_tag = True 会刷新tag，此tag用于即时更新cdn缓存（间接）。需要填写user、token（personal access token）等信息。
 '''
 
-user = ''
-repo = ''
-branch = ''
-
-is_refresh_tag = True
 
-token = ''
 
 headers = {
    "Accept" : "application/vnd.github+json",
@@ -38,12 +33,14 @@ def get_release_id():
    if r.status_code == 200:
       id = json["id"]
       print(f'[info] latest release id: {id}.')
-      return id,json['tag_name']
+      return id, json["tag_name"]
    else:
-      if json["message"] == "Not Found":
-         print("[warning] status_code: "+str(r.status_code))
-         print('[info] would get 404 status_code if there were no release. or check your network.')
-         return None
+        if json["message"] == "Not Found":
+            print("[warning] status_code: "+str(r.status_code))
+            print('[info] would get 404 status_code if there were no release. or check your network.')
+            return None, None
+        else:
+            print('[error] '+r.status_code)
 @try_func
 def get_branch_sha():
 
@@ -61,8 +58,8 @@ def get_branch_sha():
    "draft": False
 }
 def post_new_release():
-   release_id, tag_name = get_release_id()
-   if not release_id == None:
+   release_id,tag_name = get_release_id()
+   if  release_id :
         #delete release
         r1 = requests.delete(f'https://api.github.com/repos/{user}/{repo}/releases/{release_id}',headers=headers,proxies=proxies_dict)
         if r1.status_code == 204 :
@@ -99,7 +96,7 @@ def main():
     with open(os.path.join('./public', 'freecdn-manifest.txt'), 'w', encoding='utf8') as f:
         hash256 = CalcFileSha256_with_base64(
             os.path.join('./public', 'manifest-full.txt'))
-        f.write('@include\n\t/manifest-full.txt\n@global\n\topen_timeout=0\n/manifest-full.txt')
+        f.write('/manifest-full.txt')
         if is_refresh_tag:
             post_new_release()
         for cdn in cdn_list:
@@ -111,6 +108,7 @@ def main():
             else:
                 f.write(f'\n\t{cdn}{user}/{repo}/{branch}/manifest-full.txt')
         f.write(f'\n\thash={hash256}')
+        f.write('\n@include\n\t/manifest-full.txt\n@global\n\topen_timeout=0s')
     print('[success] manifest_file generaeted.')
 if __name__ == '__main__':
     main()
diff --git a/import_to_db_with_hexo_blog.py b/import_to_db_with_hexo_blog.py
@@ -1,21 +1,11 @@
 from import_to_db_with_urls_txt import *
 import re,os
 os.chdir(sys.path[0])  # os.chdir(sys.path[0])把当前py文件所在路径设置为当前运行路径.
+
 class main2(main):
     def __init__(self,name_of_conf_to_write) -> None:
         super().__init__(name_of_conf_to_write)
         self.pool_for_write_db = ThreadPoolExecutor(32)
-    def is_vaild_url(self,url,re_obj = None):
-        '''
-        判断是否合法url
-        '''
-        if re.match(r'(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]', url):
-            return True
-        else:
-            print('非法url！如果是urls.txt文件结尾或开头的空白符请忽略这条警告。')
-            print(f'url为：\'{url}\', {re_obj}')
-            return False
-
 
     def get_urls_in_md_file_and_generate(self,md_file: str, re_obj_list:list):
         for re_obj in re_obj_list:
@@ -27,10 +17,25 @@ def get_urls_in_md_file_and_generate(self,md_file: str, re_obj_list:list):
 
                     with self.lock:
                         self.url_list.append(res_url)
-                    self.thread_list.append(self.pool_for_write_db.submit(self.import_url_to_db,res_url))
-    # def write_db(self,res_url):
-    #     if self.is_url_in_db(res_url):
+                    self.thread_list.append(self.pool_for_write_db.submit(self.import_url_to_file,res_url))
+    def import_html_to_file(self,filename):
+        try:
+            filename_encode = self.url_encode(filename)
+            url = '/'+filename_encode
 
+            hash256 = self.CalcFileSha256_with_base64(os.path.join(f'{blog_public_dir}',filename))
+
+        except Exception as e:
+            print(e, '↓\nurl: '+url)
+        else:
+            try:
+                res_url = f'{user}/{repo}@{branch}/{filename_encode}'
+                self.write_file(url,res_url=res_url,hash256=hash256)
+                if filename == 'index.html':
+                    self.write_file('/',res_url=res_url,hash256=hash256)
+            except Exception as e:
+                print(e, '↓\n[error] in writing file - url: '+url)
+
     def run(self):
         re_obj_for_link_tag = re.compile(
         r'\{\s*%\s*link\s*.*::.*?::(.*?)\s*%\s*\}')
@@ -47,6 +52,11 @@ def run(self):
                 with open(os.path.join(f'{blog_md_file_dir}', filename), 'r', encoding='utf8') as f:
                     md_file = f.read()
                 self.pool.submit(self.get_urls_in_md_file_and_generate, md_file, [re_obj_for_link_tag, re_obj_for_image_tag, re_obj_for_headimg_tag, re_obj_for_pic_tag])
+        if is_import_html_to_conf:
+            for filename in os.listdir(f'{blog_public_dir}'):
+                if not filename in html_file_to_ignore:
+                    if re.match(r'.*\.html', filename):
+                        self.pool.submit(self.import_html_to_file,filename)
         self.pool.shutdown()
         wait(self.thread_list)
         if is_output_to_txt:

diff --git a/import_to_db_with_urls_txt.py b/import_to_db_with_urls_txt.py
@@ -8,29 +8,9 @@
 import hashlib
 import base64
 import sqlite3
-cdn_list = ['https://jsd.cdn.zzko.cn/gh/', 'https://cdn.jsdelivr.us/gh/',
-            'https://cdn.jsdelivr.ren/gh/', 'https://cdn.jsdelivr.net/gh/', 'https://raw.githubusercontent.com/']
-
-blog_md_file_dir = './source/_posts'
-is_output_to_txt = False
+from config import *
 os.chdir(sys.path[0])  # os.chdir(sys.path[0])把当前py文件所在路径设置为当前运行路径.
 
-is_use_proxy = True
-if is_use_proxy:    
-    proxies_dict = {'http': 'socks5://127.0.0.1:10808',
-                        'https': 'socks5://127.0.0.1:10808'}
-else:
-    proxies_dict ={}
-dir_for_custom_conf = 'dir_for_custom_conf'  # 储存文件的文件夹名称
-headers = {
-    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
-    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
-    'Accept-Language': 'ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3',
-    'Accept-Encoding': 'gzip, deflate, br',
-    'Sec-Ch-Ua-Platform': "Windows",
-    'Cache-Control': 'no-cache',
-}
-
 
 class main():
     def __init__(self, name_of_conf_to_write) -> None:
@@ -41,7 +21,16 @@ def __init__(self, name_of_conf_to_write) -> None:
         self.lock = threading.Lock()
         self.thread_list = []
         self.lock_for_write_file = threading.Lock()
-
+    def is_vaild_url(self,url,re_obj = None):
+        '''
+        判断是否合法url
+        '''
+        if re.match(r'(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]', url):
+            return True
+        else:
+            print('非法url！如果是urls.txt文件结尾或开头的空白符请忽略这条警告。')
+            print(f'url为：\'{url}\', {re_obj}')
+            return False
     def is_url_in_db(self, url, cursor) -> bool:
         '''
         不存在的话返回fasle
@@ -105,8 +94,36 @@ def down_file(self, url: str, path_url):
         else:
             print(
                 f"\033[5;30;45m[error] download one of the pictures failed↓\n{url}\033[0m")
-
-    def import_url_to_db(self, url):
+    def write_url_to_db(self,url,hash256,cursor= None,sqlite3_conn= None):
+        # 写入url hash到db
+        if not cursor == None:
+            with self.lock:
+                cursor.execute(
+                    f'insert into table_urls (hash, url) values (\'{hash256}\', \'{url}\')')
+                sqlite3_conn.commit()
+            print(f'[success] import {url} to db')
+
+    def write_file(self,url,cursor=None,res_url=None,hash256 = None):
+        with self.lock_for_write_file:
+            self.f_to_w.write(f'\n{url}')
+            for cdn in cdn_list:
+                if not cdn == 'https://raw.githubusercontent.com/':
+                    self.f_to_w.write(f'\n\t{cdn}{res_url}')
+                else:
+                    try:
+                        res_url2 = re.search(
+                            r'@(\S+?)/', f'{res_url}').group(1)
+                        res_url = res_url.replace(
+                            f'@{res_url2}', f'/{res_url2}')
+                        self.f_to_w.write(f'\n\t{cdn}{res_url}')
+                    except:
+                        pass
+            if not cursor == None:
+                self.f_to_w.write(
+                    f'\n\thash={self.get_hash_in_db(url,cursor)}')
+            else:
+                self.f_to_w.write(f'\n\thash={hash256}')
+    def import_url_to_file(self, url):
         try:
             sqlite3_conn = sqlite3.connect(os.path.join(
                 os.path.expanduser('~'), '.freecdn\custom.db'))
@@ -118,36 +135,15 @@ def import_url_to_db(self, url):
                     self.down_file(url, path_url)
                 hash256 = self.CalcFileSha256_with_base64(
                     f'{dir_for_custom_conf}/{path_url}')  # 计算hash
-
-                # 写入url hash到db
-                with self.lock:
-                    cursor.execute(
-                        f'insert into table_urls (hash, url) values (\'{hash256}\', \'{url}\')')
-                    sqlite3_conn.commit()
-                print(f'[success] import {url} to db')
-
+                # 写入数据库
+                self.write_url_to_db(url,hash256,cursor,sqlite3_conn)
+
         except Exception as e:
             print(e, '↓\nurl: '+url)
         else:
             try:
                 res_url = self.url_split(url)
-                path_url = res_url.replace('/', '')
-                with self.lock_for_write_file:
-                    self.f_to_w.write(f'\n{url}')
-                    for cdn in cdn_list:
-                        if not cdn == 'https://raw.githubusercontent.com/':
-                            self.f_to_w.write(f'\n\t{cdn}{res_url}')
-                        else:
-                            try:
-                                res_url2 = re.search(
-                                    r'@(\S+?)/', f'{res_url}').group(1)
-                                res_url = res_url.replace(
-                                    f'@{res_url2}', f'/{res_url2}')
-                                self.f_to_w.write(f'\n\t{cdn}{res_url}')
-                            except:
-                                pass
-                    self.f_to_w.write(
-                        f'\n\thash={self.get_hash_in_db(url,cursor)}')
+                self.write_file(url,cursor,res_url)
             except Exception as e:
                 print(e)
         finally:
@@ -159,18 +155,16 @@ def run(self):
             try:
                 self.f_to_w = open(
                     f'./{self.name_of_conf_to_writ}', 'w', encoding='utf8')
-                self.f_to_w.write('@global\n\topen_timeout=0')
+                self.f_to_w.write('@global\n\topen_timeout=0s')
                 for url in f.readlines():
                     url = url.replace('\n', '')
                     # 验证是否是合法url
-                    if not re.match(r'(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]', url):
-                        print('非法url！如果是urls.txt文件结尾或开头的空白符请忽略这条警告。')
-                        print(url)
+                    if not self.is_vaild_url(url):
                         continue
                     else:
 
                         self.thread_list.append(self.pool.submit(
-                            self.import_url_to_db, self.url_encode(url)))
+                            self.import_url_to_file, self.url_encode(url)))
             except Exception as e:
                 print(e)
 

diff --git a/refresh_cdn_cache.py b/refresh_cdn_cache.py
@@ -1,6 +1,6 @@
 import requests,re
 import os
-from import_to_db_with_urls_txt import proxies_dict
+from config import *
 from concurrent.futures import ThreadPoolExecutor, wait
 import threading
 def main():