add index.mjs

2025-12-08 07:30:17 +00:00 · 2023-12-12 22:16:52 +08:00 · 2023-12-12 22:16:52 +08:00 · df70a65b4f
commit df70a65b4f
parent 687b2b4a8d
6 changed files with 419 additions and 6 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,6 @@
+node_modules
+package-lock.json
+test.json
+out
+dist
+.vscode
--- a/README.md
+++ b/README.md
@ -0,0 +1,104 @@
+# 喵帕斯解析器 (nyanpasu)
+
+喵帕斯解析器是一个 BiliBili 番剧视频和弹幕元数据的解析脚本，可以根据链接自动下载和解析视频和弹幕元数据，并创建下载列表。
+
+## 安装
+
+此脚本没有使用任何平台特定的代码，应当可以适用于所有主流操作系统，但是它只在 Linux 下测试过。
+
+你需要首先安装 Node.JS 以运行此脚本。你可以从 Github Release 下载到一个压缩后脚本，包含脚本代码及其 NPM 依赖。该脚本包含正确的“井号注释”（shebang），可以直接赋予可执行权限并执行。你可以考虑将其复制到 PATH 目录下（例如 `~/.local/bin` 或者 `/usr/local/bin`）以便于使用。
+
+你可能希望安装 [yt-dlp](https://github.com/yt-dlp/yt-dlp) 及其依赖 [ffmpeg](https://ffmpeg.org/) 以便于下载视频。你可能还希望使用 [Tmux](https://github.com/tmux/tmux) 或者 [GNU Screen](https://www.gnu.org/software/screen/) 以防止因为关闭终端而导致下载中断。
+
+## 构建
+
+你也可以手工构建并打包此项目，首先你需要安装 Node.JS 和 npm，并下载本仓库，然后执行：
+
+```
+npm install
+npm run build
+```
+
+即可在 `dist/nyanpasu.mjs` 找到输出。
+
+请注意，此脚本的打包脚本 `build.sh` 不适用于 Windows（依赖于 `/bin/sh`），但是可以应当很容易的适配到 Windows（如果对此需要帮助，请随时提交一个 Issue）。
+
+## 使用方法
+
+首先，你需要得到番剧的一集的链接，并作为命令行参数传入脚本。以下以 [幸运星](https://www.bilibili.com/bangumi/play/ep35595) 为例，切换到一个合适的空目录并执行：
+
+```sh
+nyanpasu.mjs https://www.bilibili.com/bangumi/play/ep35595
+```
+
+得到类似于以下的输出：
+
+```
+Downloading descriptor info
+Title: 幸运星
+Count: 25 episodes
+ * Episode 1: 狂奔的女人
+ * Episode 2: 努力与结果
+ * Episode 3: 形形色色的人们
+ * Episode 4: 干劲的问题
+ * Episode 5: 神射手
+ * Episode 6: 夏天的例行节目
+ * Episode 7: 印象
+ * Episode 8: 即使不是我也很旺盛喔
+ * Episode 9: 这种感觉
+ * Episode 10: 愿望
+ * Episode 11: 过平安夜的各种方法
+ * Episode 12: 一起去祭典吧
+ * Episode 13: 美味的日子
+ * Episode 14: 同一个屋檐下
+ * Episode 15: 一时变不过来
+ * Episode 16: 循环
+ * Episode 17: 名正言顺
+ * Episode 18: 十个人十个样
+ * Episode 19: 二次元的本质
+ * Episode 20: 渡过夏天的方法
+ * Episode 21: 潘朵拉的盒子
+ * Episode 22: 在这里的彼方
+ * Episode 23: 微妙的那条线
+ * Episode 24: 未定
+ * Episode 25: OVA
+Command hint: yt-dlp -a vlist.txt -o "%(autonumber)s.%(ext)s" -f mp4
+```
+
+并生成四个文件：
+
+```plain
+cache.json  descriptor.xml  download-danmu.sh  vlist.txt
+```
+
+其中 `cache.json` 是番剧元数据的缓存，`descriptor.xml` 是简化后的番剧元数据，可供人阅读或其他软件解析。`download-danmu.sh` 是用于下载弹幕数据和番剧封面图的脚本（内部使用 `wget`）。`vlist.txt` 是各集视频的链接，你可以进一步使用如下命令下载弹幕数据和番剧封面图：
+
+```sh
+./download-danmu.sh
+```
+
+如果你安装了 [yt-dlp](https://github.com/yt-dlp/yt-dlp)，你进一步可以根据 `vlist.txt` 中的链接下载各集视频：
+
+```
+yt-dlp -a vlist.txt -o "%(autonumber)s.%(ext)s" -f mp4
+```
+
+根据需要，你可能需要添加 `--cookies`、`--cookies-from-browser`、`--abort-on-error` 等命令选项，注意可能无法下载付费视频和地区限制视频，可能会报错。更多信息请参考 [yt-dlp](https://github.com/yt-dlp/yt-dlp) 的文档。
+
+你可以通过 `--help` 选项或阅读 `src/index.mjs` 源代码以查看更多命令行选项。
+
+## 社区
+
+我们期待来自社区的贡献。如果您遇到了错误，请随时提出问题。还有许多可以添加的功能。如果您实现了任何增强功能，欢迎打开一个拉取请求。
+
+## 声明
+
+请注意，本项目不代表上海宽娱数码科技有限公司或番剧版权方的意见，本项目按照“按其原样”的原则提供，不提供任何附带保证，使用者需承担可能的风险。
+
+本项目完全开源，且没有任何代码加密操作，如有疑虑请自行审查代码或停止使用相关文件。对于使用本项目产生的额外问题，如账户封禁或被盗等，维护者不对此负责，请谨慎使用。
+
+请尊重数字版权，请勿二次分发使用此脚本（以及使用此脚本生成的脚本）得到文件。
+
+## 版权
+
+本项目使用 GNU 通用公共许可证 v2.0 许可证。根据该许可证，您有权复制、修改、分发本项目的源代码和衍生作品。然而，您必须遵守许可证中规定的条件，包括在您的衍生作品中保留原始版权信息和许可证，并在分发时提供许可证的副本。此外，您还需要确保任何引用或使用本项目的内容的用户也能够获得许可证的副本。请注意，GNU 通用公共许可证 v2.0 许可证不允许您将本项目的代码用于专有软件，因此任何基于本项目的衍生作品也必须使用GNU 通用公共许可证 v2.0 许可证发布。详细信息请见 `LICENSE` 文件。
--- a/build.sh
+++ b/build.sh
@ -0,0 +1,6 @@
+#!/bin/sh
+cd $(git rev-parse --show-toplevel)
+rm -rf dist
+npx ncc build -m src/index.mjs
+cat nyanpasu.js dist/index.mjs > dist/nyanpasu.mjs
+chmod +x dist/nyanpasu.mjs
--- a/nyanpasu.js
+++ b/nyanpasu.js
@ -0,0 +1,20 @@
+#!/usr/bin/env -S node --experimental-modules
+/*
+nyanpasu.js BiliBili 番剧视频和弹幕元数据解析脚本
+Copyright (C) 2023  方而静
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
--- a/package.json
+++ b/package.json
@ -1,15 +1,25 @@
 {
-  "name": "nyanpasu-clone",
+  "name": "nyanpasu",
+  "type": "module",
  "version": "0.1.0",
-  "description": "下载缓存B站上某一番剧的全部视频和弹幕",
-  "main": "src/index.js",
+  "description": "BiliBili 番剧视频和弹幕元数据解析脚本",
+  "main": "src/index.mjs",
  "scripts": {
-    "test": "echo \"Error: no test specified\" && exit 1"
+    "build": "./build.sh"
  },
  "repository": {
    "type": "git",
-    "url": "https://git.gzezfisher.top/szTom/nyanpasu-clone"
+    "url": "https://github.com/szdytom/nyanpasu"
  },
  "author": "方而静 <szdytom@qq.com>",
-  "license": "GPL-2.0-only"
+  "license": "GPL-2.0-only",
+  "dependencies": {
+    "@xmldom/xmldom": "^0.8.10",
+    "node-fetch": "^3.3.2",
+    "xml-formatter": "^3.6.0",
+    "yargs": "^17.7.2"
+  },
+  "devDependencies": {
+    "@vercel/ncc": "^0.38.1"
+  }
 }
--- a/src/index.mjs
+++ b/src/index.mjs
@ -0,0 +1,267 @@
+import fetch from 'node-fetch';
+import xmldom from '@xmldom/xmldom';
+import fs from 'node:fs/promises';
+import xmlFormat from 'xml-formatter';
+import { hideBin } from 'yargs/helpers';
+import yargs from 'yargs';
+
+const DESC_XMLNS = "nyanpasu:descriptor";
+
+async function fetchDescriptor(url) {
+	try {
+		const response = await fetch(url);
+		const html = await response.text();
+		const regex = /<script id="__NEXT_DATA__" type="application\/json">(.*?)<\/script>/s;;
+		const match = html.match(regex);
+
+		if (match && match[1]) {
+			const jsonData = JSON.parse(match[1]);
+			return jsonData;
+		} else {
+			throw new Error('Failed to extract JSON data from HTML');
+		}
+	} catch (error) {
+		throw error;
+	}
+}
+
+function createTextChild(document, tagName, content) {
+	let res = document.createElement(tagName);
+	if (content != null) {
+		res.textContent = content.toString();
+	}
+	return res;
+}
+
+class Anime {
+	constructor(title, data, stat) {
+		this.title = title;
+		this.data = data;
+		this.stat = stat;
+		this.episodes = [];
+	}
+
+	toXML(document) {
+		let animeRoot = document.createElement('anime');
+		animeRoot.appendChild(createTextChild(document, 'title', this.title));
+
+		const data_fields = ['seasonId', 'mediaId', 'alias'];
+		for (let field of data_fields) {
+			animeRoot.appendChild(createTextChild(document, field, this.data[field]));
+		}
+
+		let statRoot = document.createElement('statistics');
+		animeRoot.appendChild(statRoot);
+		const stat_fields = [
+			'coins', 'danmakus', 'favorite', 'favorites', 'likes',
+			'reply', 'share', 'views',
+		];
+		for (let field of stat_fields) {
+			statRoot.appendChild(createTextChild(document, field, this.stat[field]));
+		}
+		let ratingNode = document.createElement('rating');
+		statRoot.appendChild(ratingNode);
+		ratingNode.textContent = this.stat.rating.score.toString();
+		ratingNode.setAttribute('count', this.stat.rating.count.toString());
+
+		let episodesRoot = document.createElement('episodes');
+		animeRoot.appendChild(episodesRoot);
+		for (let episode of this.episodes) {
+			episodesRoot.appendChild(episode.toXML(document));
+		}
+		animeRoot.appendChild(createTextChild(document, 'exportTime', Math.floor(Date.now() / 1000)));
+
+		return animeRoot;
+	}
+};
+
+class AnimeEpisode {
+	constructor(id, title, data) {
+		this.id = id;
+		this.title = title;
+		this.data = data;
+	}
+
+	get cid() {
+		return this.data.cid;
+	}
+
+	get link() {
+		return this.data.link;
+	}
+
+	get cover() {
+		return this.data.cover;
+	}
+
+	toXML(document) {
+		let episodeRoot = document.createElement('episode');
+		episodeRoot.appendChild(createTextChild(document, 'index', this.id));
+		episodeRoot.appendChild(createTextChild(document, 'title', this.title));
+		const data_fields = [
+			'displayTitle', 'cid', 'bvid', 'aid',
+			'duration', 'publishTime', 'link', 'releaseDate'
+		];
+		for (let field of data_fields) {
+			episodeRoot.appendChild(createTextChild(document, field, this.data[field]));
+		}
+		if (this.data.skip != null) {
+			let skipSections = document.createElement('skip');
+			episodeRoot.appendChild(skipSections);
+			for (let section of ['op', 'ed']) {
+				if (this.data.skip[section] != null) {
+					let sectionRoot = document.createElement(section);
+					skipSections.appendChild(sectionRoot);
+					sectionRoot.setAttribute('start', this.data.skip[section].start.toString());
+					sectionRoot.setAttribute('end', this.data.skip[section].end.toString());
+				}
+			}
+		}
+		return episodeRoot;
+	}
+};
+
+function parseDescriptor(source) {
+	const data = source.props?.pageProps?.dehydratedState?.queries[0]?.state?.data?.seasonInfo?.mediaInfo
+	if (data == null) {
+		throw new Error('Cannot parse descriptor: media info not found');
+	}
+
+	let stat = data.stat;
+	let anime = new Anime(data.title, {
+		alias: data.alias,
+		seasonId: data.season_id,
+		mediaId: data.media_id
+	}, {
+		coins: stat.coins,
+		danmakus: stat.danmakus,
+		favorite: stat.favorite,
+		favorites: stat.favorites,
+		likes: stat.likes,
+		reply: stat.reply,
+		share: stat.share,
+		views: stat.views,
+		rating: data.rating,
+	});
+
+	for (let i = 0; i < data.episodes.length; ++i) {
+		let edata = data.episodes[i];
+		let episode = new AnimeEpisode(i + 1, edata.long_title, {
+			aid: edata.aid,
+			bvid: edata.bvid,
+			cid: edata.cid,
+			duration: edata.duration,
+			cover: edata.cover,
+			link: edata.link,
+			publishTime: edata.pub_time,
+			releaseDate: edata.release_date,
+			displayTitle: edata.playerEpTitle,
+			skip: edata.skip,
+		});
+		anime.episodes.push(episode);
+	}
+
+	return anime;
+}
+
+let enableLogging = true;
+function info(msg) {
+	if (enableLogging) {
+		console.log(`${msg}`);
+	}
+}
+
+async function processDescriptor(rawDesc) {
+	let anime = parseDescriptor(rawDesc);
+	info(`Title: ${anime.title}`);
+	info(`Count: ${anime.episodes.length} episodes`);
+	for (let episode of anime.episodes) {
+		info(` * Episode ${episode.id}: ${episode.title}`);
+	}
+
+	let doc = (new xmldom.DOMImplementation()).createDocument(DESC_XMLNS, 'xml');
+	let xmlRoot = doc.firstChild;
+	xmlRoot.appendChild(anime.toXML(doc));
+
+	let xmlContent = xmlFormat((new xmldom.XMLSerializer()).serializeToString(doc), {
+		collapseContent: true
+	});
+	let tasks = [];
+	tasks.push(fs.writeFile('descriptor.xml', xmlContent));
+
+	info('Command hint: yt-dlp -a vlist.txt -o "%(autonumber)s.%(ext)s" -f mp4');
+	let d_script = '#!/bin/sh\n'
+	let v_list = [], covers = new Set();
+	for (let episode of anime.episodes) {
+		const filename = `${episode.id.toString().padStart(5, '0')}.xml`;
+		const download_link = `https://comment.bilibili.com/${episode.cid}.xml`;
+		d_script += `wget -O ${filename} ${download_link}\n`;
+		v_list.push(episode.link);
+		covers.add(episode.cover);
+	}
+
+	let i = 1;
+	for (let cover of covers) {
+		d_script += `wget -O cover-${i}.jpg https:${cover}\n`;
+		i += 1;
+	}
+	tasks.push(fs.writeFile('download-danmu.sh', d_script));
+	tasks.push(fs.writeFile('vlist.txt', v_list.join('\n')));
+	await Promise.all(tasks);
+	await fs.chmod('download-danmu.sh', 0o755);
+}
+
+async function main() {
+	const args = yargs(hideBin(process.argv))
+		.option('no-cache', {
+			description: 'Ignore local cache.json',
+			type: 'boolean',
+			default: false,
+		}).option('skip-url', {
+			description: 'Skip url for downloading descriptor',
+			type: 'boolean',
+			default: false,
+		}).option('quiet', {
+			description: 'Do not output logs other than errors',
+			type: 'boolean',
+			default: false,
+			alias: 'q',
+		}).usage('Uasge: <url>').help().alias('help', 'h').argv;
+	const url = args._[0];
+	enableLogging = !args.quiet;
+	if (args.skipUrl && args.noCache) {
+		console.error('There is nothing to do.');
+		process.exit(1);
+	}
+	if (!args.skipUrl && url == null) {
+		console.error('No url provided, please specify --skip-url.');
+		process.exit(1);
+	}
+
+	let rawDesc;
+	if (args.noCache) {
+		info('Downloading descriptor info (no cache)');
+		rawDesc = await fetchDescriptor(url);
+	} else {
+		try {
+			await fs.access('cache.json');
+			info('Using cached descriptor info');
+			rawDesc = JSON.parse(await fs.readFile('cache.json'));
+		} catch (err) {
+			if (args.skipUrl) {
+				console.error('cache.json not found');
+				process.exit(1);
+			}
+			info('Downloading descriptor info');
+			rawDesc = await fetchDescriptor(url);
+			await fs.writeFile('cache.json', JSON.stringify(rawDesc));
+		}
+	}
+	await processDescriptor(rawDesc);
+
+	if (process.env.TMUX == null && process.env.STY == null) {
+		info('It seems that you are NOT inside a tmux or screen session!!');
+	}
+}
+
+main()