Compare commits

...

2 Commits

Author SHA1 Message Date
f064be9192
完善unicode字符支持 (#8) 2023-09-02 11:18:54 +08:00
5a7e0da85a
Remove testcase causes stack overflow 2023-09-02 10:31:18 +08:00
9 changed files with 158 additions and 81 deletions

View File

@ -1,8 +1,9 @@
#ifndef ACPA_READ_H #ifndef ACPA_READ_H
#define ACPA_READ_H #define ACPA_READ_H
#include <fstream>
#include <string> #include <string>
std::string read(); std::string read(std::istream& file);
#endif #endif

View File

@ -5,27 +5,27 @@
#include <vector> #include <vector>
enum class TokenType { enum class TokenType {
COMMA, // , COMMA, // ,
SEMI, // ; SEMI, // ;
LB, // { LB, // {
RB, // } RB, // }
LP, // ( LP, // (
RP, // ) RP, // )
LT, // < LT, // <
RT, // > RT, // >
ASSIGN, // = ASSIGN, // =
DOT, // . DOT, // .
COLON, // : COLON, // :
SCOPE, // :: SCOPE, // ::
IMPLY, // -> IMPLY, // ->
STRUCT, // struct STRUCT, // struct
FN, // Fn FN, // Fn
RETURN, // return RETURN, // return
TYPEOF, // typeof TYPEOF, // typeof
PRIVATE, // private PRIVATE, // private
ADMIT, // admit ADMIT, // admit
DELETE, // delete DELETE, // delete
ID, // identifier ID, // identifier
EXCEED EXCEED
}; };

View File

@ -1,10 +1,9 @@
#ifndef ACPA_WORK_H #ifndef ACPA_WORK_H
#define ACPA_WORK_H #define ACPA_WORK_H
#include "element.h"
#include "token.h" #include "token.h"
#include <vector> #include <vector>
void work(std::vector<Token>); void work(std::vector<Token>);
#endif #endif

View File

@ -1,12 +1,10 @@
#include "element.h"
#include "read.h" #include "read.h"
#include "scan.h" #include "scan.h"
#include "token.h"
#include "work.h" #include "work.h"
#include <argparse/argparse.hpp> #include <argparse/argparse.hpp>
#include <csignal> #include <csignal>
#include <fstream>
using namespace std; #include <iostream>
void signal_handler(int signal) { void signal_handler(int signal) {
std::cerr << "runtime error, signal: " << signal << std::endl; std::cerr << "runtime error, signal: " << signal << std::endl;
@ -26,22 +24,37 @@ int main(int argc, char* argv[]) {
argparse::default_arguments::help, argparse::default_arguments::help,
false); false);
program.add_argument("input_file") program.add_argument("input_file").help("Source proof file").default_value("-");
.help("Source proof file") program.add_argument("-Ep")
.action([](const std::string& value) { return value; }); .help("Preprocess only")
.implicit_value(true)
.default_value(false);
try { try {
program.parse_args(argc, argv); program.parse_args(argc, argv);
} catch (const std::runtime_error& err) { } catch (const std::runtime_error& err) {
cerr << err.what() << std::endl; std::cerr << err.what() << std::endl;
cerr << program; std::cerr << program;
return 1; return 1;
} }
auto input_file = program.get<std::string>("input_file"); auto input_file = program.get<std::string>("input_file");
freopen(input_file.c_str(), "r", stdin); std::string rd_res;
if (input_file != "-") {
std::ifstream file(input_file);
if (!file.is_open()) {
std::cerr << "Unable to open file " << input_file << std::endl;
return 0;
}
work(scan(read())); rd_res = read(file);
} else {
rd_res = read(std::cin);
}
if (program["-Ep"] == true) {
std::cout << rd_res << std::endl;
}
work(scan(rd_res));
return 0; return 0;
} }

View File

@ -1,11 +1,101 @@
#include "read.h" #include "read.h"
#include <codecvt>
#include <cstdint>
#include <iostream>
#include <locale>
using namespace std; const std::string base32_chars = "abcdefghijklmnopqrstuvwxyz012345";
const std::string leader = "_P";
string read() { int base32_value(char c) {
string s; if ('a' <= c && c <= 'z') {
for (char ch = getchar(); ch != EOF; ch = getchar()) { return c - 'a';
s += ch; } else if ('0' <= c && c <= '5') {
return c - '0' + 26;
} else {
return -1;
} }
return s; }
}
std::string base32_decode(const std::string& input) {
std::string decoded_data;
int buffer = 0, bits = 0;
for (char c : input) {
int value = base32_value(c);
if (value == -1) {
return input;
}
buffer <<= 5;
buffer |= value;
bits += 5;
if (bits >= 8) {
decoded_data.push_back(static_cast<uint8_t>(buffer >> (bits - 8)));
bits -= 8;
}
}
return decoded_data;
}
std::string base32_encode(const std::string& input) {
std::string output;
int buffer = 0;
int bits_left = 0;
for (uint8_t c : input) {
buffer <<= 8;
buffer |= c;
bits_left += 8;
while (bits_left >= 5) {
output += base32_chars[(buffer >> (bits_left - 5)) & 0x1F];
bits_left -= 5;
}
}
if (bits_left > 0) {
buffer <<= (5 - bits_left);
output += base32_chars[buffer & 0x1F];
}
return output;
}
std::string read(std::istream& file) {
std::string res;
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
std::string line;
std::string non_ascii_word;
while (std::getline(file, line)) {
std::wstring utf16_str = converter.from_bytes(line);
for (wchar_t wc : utf16_str) {
std::string utf8_char = converter.to_bytes(wc);
if (wc > 127) {
non_ascii_word += utf8_char;
} else {
if (!non_ascii_word.empty()) {
res += leader;
res += base32_encode(non_ascii_word);
res += "_";
non_ascii_word.clear();
}
res += utf8_char;
}
}
if (!non_ascii_word.empty()) {
res += leader;
res += base32_encode(non_ascii_word);
res += "_";
non_ascii_word.clear();
}
res += "\n";
}
return res;
}

View File

@ -81,7 +81,7 @@ vector<Token> scan(string s) {
type = TokenType::DELETE; type = TokenType::DELETE;
} else { } else {
type = TokenType::ID; type = TokenType::ID;
if(mp.find(t) == mp.end()) { if (mp.find(t) == mp.end()) {
mp[t] = id_mp.size(); mp[t] = id_mp.size();
id_mp.push_back(t); id_mp.push_back(t);
} }

View File

@ -3,26 +3,8 @@
using namespace std; using namespace std;
string token_mp[] = {",", string token_mp[]
";", = {",", ";", "{", "}", "(", ")", "<", ">", "=", ".", ":",
"{", "::", "->", "struct", "Fn", "return", "typeof", "private", "admit", "delete", "ID"};
"}",
"(",
")",
"<",
">",
"=",
".",
":",
"::",
"->",
"struct",
"Fn",
"return",
"typeof",
"private",
"admit",
"delete",
"ID"};
vector<string> id_mp{""}; vector<string> id_mp{""};

View File

@ -1,5 +1,6 @@
#include "work.h" #include "work.h"
#include <assert.h> #include "element.h"
#include <cassert>
using namespace std; using namespace std;
@ -410,7 +411,9 @@ vector<pair<int, shared_ptr<ValType>>> createPars(map<int, shared_ptr<ValType>>*
d->def_var = t; d->def_var = t;
ndefs[s] = static_pointer_cast<Def>(d); ndefs[s] = static_pointer_cast<Def>(d);
pars.push_back({s, t}); pars.push_back({s, t});
if (vars != nullptr && pub) (*vars)[s]=t; if (vars != nullptr && pub) {
(*vars)[s] = t;
}
}; };
for (single(); preview(TokenType::COMMA); pt++, single()) {} for (single(); preview(TokenType::COMMA); pt++, single()) {}
} }
@ -431,7 +434,7 @@ pair<int, shared_ptr<Struct>> createStruct() {
if (preview(TokenType::LT)) { if (preview(TokenType::LT)) {
tems = createTems(); tems = createTems();
} }
if(preview({TokenType::LP,TokenType::DELETE})) { if (preview({TokenType::LP, TokenType::DELETE})) {
constructor = 0; constructor = 0;
pt += 2, jump(TokenType::RP); pt += 2, jump(TokenType::RP);
} else { } else {
@ -441,7 +444,7 @@ pair<int, shared_ptr<Struct>> createStruct() {
for (const auto& pr : tems) { for (const auto& pr : tems) {
t->c1.push_back(pr.second); t->c1.push_back(pr.second);
} }
if(constructor) { if (constructor) {
for (const auto& pr : pars) { for (const auto& pr : pars) {
t->c2.push_back(pr.second); t->c2.push_back(pr.second);
} }
@ -525,4 +528,4 @@ pair<int, shared_ptr<ValType>> createVar() {
void work(vector<Token> _tokens) { void work(vector<Token> _tokens) {
tokens = _tokens; tokens = _tokens;
createVar(); createVar();
} }

File diff suppressed because one or more lines are too long