convert.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. #!/usr/bin/env python3
  2. """
  3. Excel → HTML 自动转换器
  4. 监听 /watch 目录,xlsx/xls 文件上传后自动转成 HTML 写入 /output
  5. """
  6. import os
  7. import time
  8. import html
  9. import logging
  10. from pathlib import Path
  11. import openpyxl
  12. from watchdog.observers.polling import PollingObserver # 卷挂载用 polling
  13. from watchdog.events import FileSystemEventHandler
  14. WATCH_DIR = Path("/watch")
  15. OUTPUT_DIR = Path("/output")
  16. OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
  17. logging.basicConfig(
  18. level=logging.INFO,
  19. format="%(asctime)s [converter] %(levelname)s %(message)s",
  20. )
  21. log = logging.getLogger(__name__)
  22. def xlsx_to_html(src: Path, dst: Path) -> None:
  23. wb = openpyxl.load_workbook(src, read_only=True, data_only=True)
  24. parts = []
  25. parts.append(
  26. "<!doctype html><html><head><meta charset='utf-8'>"
  27. "<style>"
  28. "body{font-family:sans-serif;font-size:13px;padding:8px}"
  29. "h2{margin:16px 0 6px}"
  30. "table{border-collapse:collapse;width:100%;margin-bottom:24px}"
  31. "th,td{border:1px solid #ccc;padding:4px 8px;text-align:left;white-space:pre-wrap}"
  32. "th{background:#f0f0f0;font-weight:600}"
  33. "tr:hover td{background:#fafafa}"
  34. "</style></head><body>"
  35. )
  36. parts.append(f"<h1>{html.escape(src.name)}</h1>")
  37. for sheet in wb.worksheets:
  38. parts.append(f"<h2>{html.escape(sheet.title)}</h2>")
  39. parts.append("<table>")
  40. first_row = True
  41. for row in sheet.iter_rows(values_only=True):
  42. tag = "th" if first_row else "td"
  43. cells = "".join(
  44. f"<{tag}>{html.escape(str(c)) if c is not None else ''}</{tag}>"
  45. for c in row
  46. )
  47. parts.append(f"<tr>{cells}</tr>")
  48. first_row = False
  49. parts.append("</table>")
  50. parts.append("</body></html>")
  51. dst.write_text("\n".join(parts), encoding="utf-8")
  52. log.info("Converted: %s -> %s", src.name, dst.name)
  53. def convert_file(path: Path) -> None:
  54. if path.suffix.lower() not in (".xlsx", ".xls"):
  55. return
  56. try:
  57. out = OUTPUT_DIR / (path.stem + ".html")
  58. xlsx_to_html(path, out)
  59. except Exception as e:
  60. log.error("Failed to convert %s: %s", path, e)
  61. class ExcelHandler(FileSystemEventHandler):
  62. def on_created(self, event):
  63. if not event.is_directory:
  64. convert_file(Path(event.src_path))
  65. def on_modified(self, event):
  66. if not event.is_directory:
  67. convert_file(Path(event.src_path))
  68. if __name__ == "__main__":
  69. log.info("Converter started. Watching: %s", WATCH_DIR)
  70. # 启动时转换已有文件
  71. for f in WATCH_DIR.rglob("*.xlsx"):
  72. convert_file(f)
  73. for f in WATCH_DIR.rglob("*.xls"):
  74. convert_file(f)
  75. observer = PollingObserver(timeout=2)
  76. observer.schedule(ExcelHandler(), str(WATCH_DIR), recursive=True)
  77. observer.start()
  78. try:
  79. while True:
  80. time.sleep(5)
  81. except KeyboardInterrupt:
  82. observer.stop()
  83. observer.join()