#!/usr/bin/env python3 """ repo2txt– 将整个代码仓库/任意目录转换为纯文本输出 功能: 1. 递归列出目录树(类似tree)。 2. 按树中顺序打印每个文件的相对路径 + 其文本内容。 3. 自动跳过明显的二进制/超大文件,防止终端被刷屏。 """ import argparse import os import sys import mimetypes # ---------- 参数处理 ---------- def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Print repo structure and file contents (like repo2txt.com/local.html)" ) parser.add_argument( "root", help="根目录路径", ) parser.add_argument( "-s", "--size-limit", type=int, default=5 * 1024 * 1024, # 5MB help="单文件最大读取字节数,超过将被跳过(默认 5MB)", ) parser.add_argument( "--encoding", default="utf-8", help="尝试读取文件时使用的默认编码(默认 utf-8,失败则回退 latin-1)", ) return parser.parse_args() # ---------- 工具函数 ---------- def is_probably_binary(path: str) -> bool: """ 粗略判断文件是否为二进制: 1. 按 MIME类型判断。 2. 否则读取前 1024字节,若含有NUL字符(0x00)则视为二进制。 """ mime, _ = mimetypes.guess_type(path) if mime and (not mime.startswith("text")): return True try: with open(path, "rb") as f: head = f.read(1024) return b"\x00" in head except Exception: return True # 无法读取时也按二进制处理 def walk_files(root: str): """递归获取 root 下全部相对路径(文件优先,目录在前)""" for current_root, dirs, files in os.walk(root): # 目录顺序保证稳定性 dirs.sort() files.sort() for d in dirs: rel = os.path.relpath(os.path.join(current_root, d), root) yield rel + os.sep # 目录尾部加 / for f in files: rel = os.path.relpath(os.path.join(current_root, f), root) yield rel def print_directory_tree(root: str): """打印类似 tree 的目录结构""" print("# Directory structure\n") for path in walk_files(root): print(path) print("\n" + "=" * 80 + "\n") def print_files_with_content(root: str, size_limit: int, encoding: str): """按顺序输出每个文件及其内容""" for path in walk_files(root): # 只处理文件(目录名已以 / 结尾) if path.endswith(os.sep): continue full_path = os.path.join(root, path) if os.path.getsize(full_path) > size_limit: print(f"===== {path} (skipped: larger than {size_limit} bytes) =====\n") continue if is_probably_binary(full_path): print(f"===== {path} (skipped: binary) =====\n") continue print(f"===== {path} =====") try: with open(full_path, "r", encoding=encoding, errors="strict") as f: print(f.read()) except UnicodeDecodeError: # 尝试 latin-1 宽松读取 with open(full_path, "r", encoding="latin-1", errors="replace") as f: print(f.read()) except Exception as e: print(f"(无法读取文件: {e})") print("\n") # 文件间空一行 # ---------- 主入口 ---------- def main() -> None: args = parse_args() root = os.path.abspath(args.root) if not os.path.isdir(root): sys.exit(f"路径不存在或不是目录: {root}") print_directory_tree(root) print_files_with_content(root, args.size_limit, args.encoding) if __name__ == "__main__": main()