cpesolino-lexci/scripts/compile-lexico.html

264 lines
8.2 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>C'pesolino Lexici Compiler</title>
</head>
<body>
<label for=input>lexici texto:</label><br/><textarea id=input></textarea><br/>
<label for=szinput>papro caito:</label><br/><input id=szinput placeholder="a4/16k" value="a4"/><br/>
<button onclick="parse()">alqa</button>
<p id=err></p>
<label for=output>markdown frutco:</label><br/>
<textarea id=output readonly></textarea>
<label for=output2>LaTeX frutco:</label><br/>
<textarea id=output2 readonly></textarea>
</body>
<style type="text/css">
#err {
color: red;
}
textarea {
width: 100%;
height: 300px;
}
</style>
<script type="text/javascript">
const auxi = ['b','c','d','f','g','h','i','l','m','n','p','q','r','s','t','v','w','z','zh','dr','ts'];
const basi = ['a','e','i','o','u','ü','ar','er','ir','or','ur','ür','ai','ei','oi','al','el','ul','ül','ao','eo','üo','eu','ua','au','an','en','in','on','un','ing','am','em','im','um','oen'];
const amark = {"a": "\\\"a","b": "p","e": "e","c": "k\\super{h}","d": "t", "dd": "t", "o": "o","f": "f","u": "u","g": "k","ü": "y","h": "h","ar": "A\\textrhookschwa","i": "j","er": "7\\textrhookschwa","l": "l","ir": "i\\textrhookschwa","m": "m","or": "o\\textrhookschwa","n": "n","ur": "u\\textrhookschwa","p": "p\\super{h}","ür": "y\\textrhookschwa","q": "\\:t\\:s\\super{h}","ai": "aj","r": "\\:R\\super{w}","ei": "ej","s": "s","oi": "oj","t": "t\\super{h}","al": "6\\textltilde","v": "v","el": "eo\\textltilde","w": "w","ul": "u\\textltilde","z(1th)": "ts","ül": "4u\\textltilde","z(exa)": "z","ao": "Aw","zh": "\\:z","eo": "eo","dr": "\\t{\\:t\\:R}\\super{w}","ts": "ts\\super{h}","üo": "4o","eu": "ju","ua": "w\\\"a","au": "6","an": "an","en": "@n","in": "in","on": "on","un": "2n","am": "\\\"am","em": "@m","im": "im","um": "2m","ing": "iN","oen": "ow@n"};
const bmark = {"a": "\\\"a","b": "p","e": "e","c": "k\\super{h}","i": "i","d": "t", "dd": "t", "o": "o","f": "f","u": "u","g": "k","ü": "y","h": "h","ar": "A\\textrhookschwa","er": "7\\textrhookschwa","l": "l","ir": "i\\textrhookschwa","m": "m","or": "o\\textrhookschwa","n": "n","ur": "u\\textrhookschwa","p": "p\\super{h}","ür": "y\\textrhookschwa","q": "\\:t\\:s\\super{h}","ai": "aj","r": "\\:R\\super{w}","ei": "ej","s": "s","oi": "oj","t": "t\\super{h}","al": "6\\textltilde","v": "v","el": "eo\\textltilde","w": "w","ul": "u\\textltilde","z(1th)": "ts","ül": "4u\\textltilde","z(exa)": "z","ao": "Aw","zh": "\\:z","eo": "eo","dr": "\\t{\\:t\\:R}\\super{w}","ts": "ts\\super{h}","üo": "4o","eu": "ju","ua": "w\\\"a","au": "6","an": "an","en": "@n","in": "in","on": "on","un": "2n","am": "\\\"am","em": "@m","im": "im","um": "2m","ing": "iN","oen": "ow@n"};
class Lexico {
constructor(val, trans, pt, source) {
this.v = val.trim().replaceAll('ii', 'ü');
this.trans = trans.trim().split('/');
this.pt = pt.trim().split('').map(x => parseInt(x));
this.source = parse_source(source.trim());
}
get_pt() {
if (this.pt.length === 0) {
return '/';
}
let pv = this.v.toLowerCase().replaceAll('x', 'cs').replaceAll('dd', 'd').replaceAll('\'', '');
if (this.pt.reduce((x, y) => x + y, 0) != pv.length) {
report_error('ling divo mifuto');
return null;
}
let res = '', c = 0, pti = 0;
for (let i = 0; i < pv.length; i += 1) {
res += pv[i];
c += 1;
if (c == this.pt[pti] && i + 1 < pv.length) {
c = 0;
res += '/';
pti += 1;
}
}
return res;
}
get_pt2() {
if (this.pt.length === 0) {
return [];
}
let pv = this.v.toLowerCase().replaceAll('x', 'cs').replaceAll('dd', 'd').replaceAll('\'', '');
if (this.pt.reduce((x, y) => x + y, 0) != pv.length) {
report_error('ling divo mifuto');
return null;
}
let res = [], c = 0, pti = 0, cv = '';
for (let i = 0; i < pv.length; i += 1) {
cv += pv[i];
c += 1;
if (c == this.pt[pti]) {
c = 0;
res.push(cv);
cv = '';
pti += 1;
}
}
return res;
}
get_trans() {
return this.trans.reduce((x, y) => x + y + '', '');
}
}
let LINE = 0;
const source_types = [];
function parse_source(s) {
// TODO: impl this
return s;
}
function report_error(x) {
document.getElementById('err').innerHTML = `focopliqo sa rovo ${LINE}: ${x}`;
}
function parse_pt_to_marco(pt) {
if (pt.length == 0) { return '/'; }
let res = '[';
for (let i = 0; i < pt.length; i += 1) {
let pti = pt[i];
let first_auxalino = '', rest = pti;
for (const a of auxi) {
if (pti.startsWith(a)) {
first_auxalino = a;
rest = pti.substring(a.length);
// dont break here for parsing ts, dr and zh!
}
}
if (rest.length == 0) {
if (first_auxalino == 'i') {
res += bmark['i'];
} else if (first_auxalino == 'z') {
res += amark[i == 0 ? 'z(1th)' : 'z(exa)'];
} else {
res += amark[first_auxalino];
}
continue;
}
if (first_auxalino == 'i') {
let first_basolino = '';
for (const b of basi) {
if (pti.startsWith(b)) {
first_basolino = b;
// dont break here!
}
}
if (first_basolino != 'i') {
first_auxalino = '';
rest = pti;
}
}
let basolino = '', rauxa = rest;
for (const b of basi) {
if (rest.startsWith(b)) {
basolino = b;
rauxa = rest.substring(b.length);
// dont break here!
}
}
if (first_auxalino == 'i' && basolino.length == 0) {
basolino = 'i';
first_auxalino = '';
}
if (basolino.length == 0 || (rauxa.length > 0 && !auxi.includes(rauxa))) {
report_error(`ling divo mifuto2: ${pti}`);
return null;
}
if (first_auxalino.length > 0) {
if (first_auxalino == 'z') {
res += amark[i == 0 ? 'z(1th)' : 'z(exa)'];
} else {
res += amark[first_auxalino];
}
}
res += '{}';
if (basolino.length > 0) {
res += bmark[basolino];
}
res += '{}';
if (rauxa.length > 0) {
if (rauxa == 'z') {
res += amark[rauxa + '(exa)'];
} else {
res += amark[rauxa];
}
}
res += '{}';
}
return res + ']';
}
function escape_tex(s) {
return s
.replaceAll('\\', '\\textbackslash{}')
.replaceAll('+', ' + ')
.replaceAll('(', ' (')
.replaceAll(')', ') ')
.replaceAll('<', '\\textless{}')
.replaceAll('>', '\\textgreater{}')
.replaceAll('#', '\\#')
.replaceAll('&', '\\&')
.replaceAll('~', '$\\sim$');
}
const tex_sizing = {
'a4': '\\geometry{a4paper,left=1cm,right=1cm,top=1.5cm,bottom=1.5cm,footskip=5mm}',
'16k': '\\geometry{paperwidth=185mm,paperheight=260mm,left=1cm,right=1cm,top=1.5cm,bottom=1.5cm,footskip=5mm}',
}
function parse() {
document.getElementById('err').innerHTML = '';
document.getElementById('output').value = '';
document.getElementById('output2').value = '';
let papro_caito = document.getElementById('szinput').value;
let t = document.getElementById('input').value.split('\n');
let markres = '| 单词 | 释义 | 音节划分 | 词源 |\n| :---: | :---: | :---: | :---: |\n';
let texres = `\\documentclass[10pt]{article}\\usepackage{tabularx}\\usepackage{ctex}\\usepackage{tipa}\\usepackage{geometry}${tex_sizing[papro_caito]}\\usepackage{xltabular}\\begin{document}\\newcolumntype{Y}{>{\\raggedright\\arraybackslash}X}\\centering\\footnotesize\\begin{xltabular}{\\textwidth}{|Y|Y|l|Y|}\\hline\\textbf{Lexici} & \\textbf{Texo} & \\textbf{Lingmarco} & \\textbf{Matro} \\\\ \\hline\n`;
for (let i in t) {
if (t.length == 0) {
continue;
}
LINE = parseInt(i) + 1;
let l = parse_item(t[i]);
if (l == null) {
return null;
}
let pt = l.get_pt();
if (pt == null) {
console.log(t[i]);
return null;
}
markres += `| ${l.v} | ${l.get_trans()} | ${pt} | ${l.source} |\n`;
let lingmarco = parse_pt_to_marco(l.get_pt2());
if (lingmarco == null) return null;
texres += `\t${escape_tex(l.v)} & ${escape_tex(l.get_trans())} & \\textipa{${lingmarco}} & ${escape_tex(l.source)} \\\\\n\t\\hline\n`;
}
texres += `\\end{xltabular}\\end{document}`
document.getElementById('output').value = markres;
document.getElementById('output2').value = texres;
return true;
}
function parse_item(val) {
let p = val.match(/^([^\:]+):[ \t]*([^\[]+)[ \t]*\[(\d*)\;[ \t]*([^\]]*)\][ \r\n\t]*$/);
if (p == null) {
console.error(val);
report_error('lexo mifuto.');
return null;
}
return new Lexico(p[1], p[2], p[3], p[4]);
}
onload = function() {
document.getElementById('output').value = '';
document.getElementById('output2').value = '';
}
</script>
</html>