From f064be919210082807417f5b2b87c0c337ece5d7 Mon Sep 17 00:00:00 2001 From: szdytom Date: Sat, 2 Sep 2023 11:18:54 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E5=96=84unicode=E5=AD=97=E7=AC=A6?= =?UTF-8?q?=E6=94=AF=E6=8C=81=20(#8)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- include/read.h | 5 ++- include/token.h | 42 +++++++++---------- include/work.h | 3 +- src/main.cpp | 35 +++++++++++----- src/read.cpp | 104 ++++++++++++++++++++++++++++++++++++++++++++---- src/scan.cpp | 2 +- src/token.cpp | 24 ++--------- src/work.cpp | 13 +++--- 8 files changed, 158 insertions(+), 70 deletions(-) diff --git a/include/read.h b/include/read.h index da41de7..97f3bd0 100644 --- a/include/read.h +++ b/include/read.h @@ -1,8 +1,9 @@ #ifndef ACPA_READ_H #define ACPA_READ_H +#include #include -std::string read(); +std::string read(std::istream& file); -#endif \ No newline at end of file +#endif diff --git a/include/token.h b/include/token.h index 3308f95..d9572db 100644 --- a/include/token.h +++ b/include/token.h @@ -5,27 +5,27 @@ #include enum class TokenType { - COMMA, // , - SEMI, // ; - LB, // { - RB, // } - LP, // ( - RP, // ) - LT, // < - RT, // > - ASSIGN, // = - DOT, // . - COLON, // : - SCOPE, // :: - IMPLY, // -> - STRUCT, // struct - FN, // Fn - RETURN, // return - TYPEOF, // typeof - PRIVATE, // private - ADMIT, // admit - DELETE, // delete - ID, // identifier + COMMA, // , + SEMI, // ; + LB, // { + RB, // } + LP, // ( + RP, // ) + LT, // < + RT, // > + ASSIGN, // = + DOT, // . + COLON, // : + SCOPE, // :: + IMPLY, // -> + STRUCT, // struct + FN, // Fn + RETURN, // return + TYPEOF, // typeof + PRIVATE, // private + ADMIT, // admit + DELETE, // delete + ID, // identifier EXCEED }; diff --git a/include/work.h b/include/work.h index eec1a53..b11296f 100644 --- a/include/work.h +++ b/include/work.h @@ -1,10 +1,9 @@ #ifndef ACPA_WORK_H #define ACPA_WORK_H -#include "element.h" #include "token.h" #include void work(std::vector); -#endif \ No newline at end of file +#endif diff --git a/src/main.cpp b/src/main.cpp index e8c961d..fd17481 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,12 +1,10 @@ -#include "element.h" #include "read.h" #include "scan.h" -#include "token.h" #include "work.h" #include #include - -using namespace std; +#include +#include void signal_handler(int signal) { std::cerr << "runtime error, signal: " << signal << std::endl; @@ -26,22 +24,37 @@ int main(int argc, char* argv[]) { argparse::default_arguments::help, false); - program.add_argument("input_file") - .help("Source proof file") - .action([](const std::string& value) { return value; }); + program.add_argument("input_file").help("Source proof file").default_value("-"); + program.add_argument("-Ep") + .help("Preprocess only") + .implicit_value(true) + .default_value(false); try { program.parse_args(argc, argv); } catch (const std::runtime_error& err) { - cerr << err.what() << std::endl; - cerr << program; + std::cerr << err.what() << std::endl; + std::cerr << program; return 1; } auto input_file = program.get("input_file"); - freopen(input_file.c_str(), "r", stdin); + std::string rd_res; + if (input_file != "-") { + std::ifstream file(input_file); + if (!file.is_open()) { + std::cerr << "Unable to open file " << input_file << std::endl; + return 0; + } - work(scan(read())); + rd_res = read(file); + } else { + rd_res = read(std::cin); + } + if (program["-Ep"] == true) { + std::cout << rd_res << std::endl; + } + work(scan(rd_res)); return 0; } diff --git a/src/read.cpp b/src/read.cpp index 765a502..9f06900 100644 --- a/src/read.cpp +++ b/src/read.cpp @@ -1,11 +1,101 @@ #include "read.h" +#include +#include +#include +#include -using namespace std; +const std::string base32_chars = "abcdefghijklmnopqrstuvwxyz012345"; +const std::string leader = "_P"; -string read() { - string s; - for (char ch = getchar(); ch != EOF; ch = getchar()) { - s += ch; +int base32_value(char c) { + if ('a' <= c && c <= 'z') { + return c - 'a'; + } else if ('0' <= c && c <= '5') { + return c - '0' + 26; + } else { + return -1; } - return s; -} \ No newline at end of file +} + +std::string base32_decode(const std::string& input) { + std::string decoded_data; + int buffer = 0, bits = 0; + + for (char c : input) { + int value = base32_value(c); + if (value == -1) { + return input; + } + + buffer <<= 5; + buffer |= value; + bits += 5; + + if (bits >= 8) { + decoded_data.push_back(static_cast(buffer >> (bits - 8))); + bits -= 8; + } + } + + return decoded_data; +} + +std::string base32_encode(const std::string& input) { + std::string output; + int buffer = 0; + int bits_left = 0; + + for (uint8_t c : input) { + buffer <<= 8; + buffer |= c; + bits_left += 8; + + while (bits_left >= 5) { + output += base32_chars[(buffer >> (bits_left - 5)) & 0x1F]; + bits_left -= 5; + } + } + + if (bits_left > 0) { + buffer <<= (5 - bits_left); + output += base32_chars[buffer & 0x1F]; + } + + return output; +} + +std::string read(std::istream& file) { + std::string res; + + std::wstring_convert> converter; + std::string line; + std::string non_ascii_word; + + while (std::getline(file, line)) { + std::wstring utf16_str = converter.from_bytes(line); + for (wchar_t wc : utf16_str) { + std::string utf8_char = converter.to_bytes(wc); + if (wc > 127) { + non_ascii_word += utf8_char; + } else { + if (!non_ascii_word.empty()) { + res += leader; + res += base32_encode(non_ascii_word); + res += "_"; + non_ascii_word.clear(); + } + res += utf8_char; + } + } + + if (!non_ascii_word.empty()) { + res += leader; + res += base32_encode(non_ascii_word); + res += "_"; + non_ascii_word.clear(); + } + res += "\n"; + } + + return res; +} diff --git a/src/scan.cpp b/src/scan.cpp index a667e47..c00030c 100644 --- a/src/scan.cpp +++ b/src/scan.cpp @@ -81,7 +81,7 @@ vector scan(string s) { type = TokenType::DELETE; } else { type = TokenType::ID; - if(mp.find(t) == mp.end()) { + if (mp.find(t) == mp.end()) { mp[t] = id_mp.size(); id_mp.push_back(t); } diff --git a/src/token.cpp b/src/token.cpp index bf27f0b..caa6e6d 100644 --- a/src/token.cpp +++ b/src/token.cpp @@ -3,26 +3,8 @@ using namespace std; -string token_mp[] = {",", - ";", - "{", - "}", - "(", - ")", - "<", - ">", - "=", - ".", - ":", - "::", - "->", - "struct", - "Fn", - "return", - "typeof", - "private", - "admit", - "delete", - "ID"}; +string token_mp[] + = {",", ";", "{", "}", "(", ")", "<", ">", "=", ".", ":", + "::", "->", "struct", "Fn", "return", "typeof", "private", "admit", "delete", "ID"}; vector id_mp{""}; \ No newline at end of file diff --git a/src/work.cpp b/src/work.cpp index cbb6da4..3d80d7d 100644 --- a/src/work.cpp +++ b/src/work.cpp @@ -1,5 +1,6 @@ #include "work.h" -#include +#include "element.h" +#include using namespace std; @@ -410,7 +411,9 @@ vector>> createPars(map>* d->def_var = t; ndefs[s] = static_pointer_cast(d); pars.push_back({s, t}); - if (vars != nullptr && pub) (*vars)[s]=t; + if (vars != nullptr && pub) { + (*vars)[s] = t; + } }; for (single(); preview(TokenType::COMMA); pt++, single()) {} } @@ -431,7 +434,7 @@ pair> createStruct() { if (preview(TokenType::LT)) { tems = createTems(); } - if(preview({TokenType::LP,TokenType::DELETE})) { + if (preview({TokenType::LP, TokenType::DELETE})) { constructor = 0; pt += 2, jump(TokenType::RP); } else { @@ -441,7 +444,7 @@ pair> createStruct() { for (const auto& pr : tems) { t->c1.push_back(pr.second); } - if(constructor) { + if (constructor) { for (const auto& pr : pars) { t->c2.push_back(pr.second); } @@ -525,4 +528,4 @@ pair> createVar() { void work(vector _tokens) { tokens = _tokens; createVar(); -} \ No newline at end of file +}