-rw-r--r-- 23691 holtrace-20250617/pt2holtrace.cc raw
// XXX: do one-pass parsers
#include <iostream>
#include <fstream>
#include <string>
#include <cstring>
#include <stdexcept>
#include <algorithm>
#include <utility>
#include <tuple>
#include <vector>
#include <unordered_map>
#include <boost/functional/hash.hpp>
#include <ext/stdio_filebuf.h>
using namespace std;
static ofstream output;
typedef long long num;
#define TDSPLIT "]"
static void skiptext(string &line,const char *text)
{
size_t textlen = strlen(text);
if (line.substr(0,textlen) != text) throw invalid_argument("expected string '"+string(text)+"' not found\n");
line = line.substr(textlen);
}
static void parsell(string &line,num &result)
{
size_t pos;
result = stoll(line,&pos);
line = line.substr(pos);
}
static void untilquote(string &line,string &result)
{
int backslash = 0;
int foundquote = 0;
string rest;
for (auto c : line) {
if (foundquote) { rest += c; continue; }
if (backslash) { result += c; backslash = 0; continue; }
if (c == '\\') { backslash = 1; continue; }
if (c == '"') { foundquote = 1; continue; }
result += c;
}
line = rest;
}
static void parselist(string &line,vector<num> &nums,vector<string> &strs)
{
int state = 0;
int bracketlevels = 0;
num num;
string str;
string rest;
for (auto c: line)
switch(state) {
case -1:
rest += c;
break;
case 0:
if (c != '[') throw invalid_argument("expected left bracket not found");
++bracketlevels; state = 1;
break;
case 1:
if (c == ']') {
--bracketlevels; if (bracketlevels == 0) state = -1;
break;
}
if (c == '[') {
++bracketlevels;
break;
}
if (c == ',') break;
if (c == ' ') break;
if (c >= '0' && c <= '9') { state = 2; num = c-'0'; break; }
if (c == '"') { state = 3; str = ""; break; }
throw invalid_argument("expected quote or right bracket not found");
case 2:
if (c == ']') {
nums.push_back(num);
--bracketlevels; if (bracketlevels == 0) state = -1;
break;
}
if (c == ',') { nums.push_back(num); state = 1; break; }
if (c >= '0' && c <= '9') { num = num*10+(c-'0'); break; }
throw invalid_argument("expected digit or comma or right bracket not found");
case 3:
if (c == '\\') { state = 4; break; }
if (c == '"') { strs.push_back(str); state = 1; break; }
str += c;
break;
case 4:
str += c;
state = 3;
}
line = rest;
}
// C++ reserves typeid
#define typeid mytypeid
static num typeid;
static num termid;
static num thmid;
static num infid;
static num typedefid;
static num ptid;
static unordered_map<string,num> tyvar2id;
static unordered_map<pair<string,vector<num>>,num,boost::hash<pair<string,vector<num>>>> app2id;
// fun(y,z) type mapping to y and to z:
static unordered_map<num,num> typeid2fromid;
static unordered_map<num,num> typeid2toid;
static unordered_map<pair<string,num>,num,boost::hash<pair<string,num>>> var2id;
static unordered_map<pair<string,num>,num,boost::hash<pair<string,num>>> const2id;
static unordered_map<pair<num,num>,num,boost::hash<pair<num,num>>> comb2id;
static unordered_map<pair<num,num>,num,boost::hash<pair<num,num>>> abs2id;
static unordered_map<num,num> termid2typeid;
static unordered_map<vector<num>,num,boost::hash<vector<num>>> hhhc2id;
static unordered_map<num,num> ptid2thmid;
static unordered_map<pair<vector<num>,string>,num,boost::hash<pair<vector<num>,string>>> infkey2id;
// for the type_definition conversions:
static unordered_map<num,num> combtermid2funid;
static unordered_map<num,num> combthmid2funid;
static unordered_map<tuple<num,num,num,string>,num,boost::hash<tuple<num,num,num,string>>> typedef2id;
static string simpleencode(num x)
{
if (x == 0) return "0";
if (x == -1) return "!";
if (x == -2) return "@";
if (x == -3) return "#";
if (x == -4) return "$";
if (x == -5) return "%";
if (x == -6) return "^";
if (x == -7) return "&";
if (x == -8) return "*";
if (x == -9) return "(";
if (x == -10) return ")";
string result;
int minussign = 0;
// XXX: assumes x > -2^63
if (x < 0) { minussign = 1; x = -x; }
while (x > 0) {
result += "0123456789ABCDEFGHIJKLMNOPQRSTUV"[x&31];
x >>= 5;
}
if (minussign) result += "-";
reverse(result.begin(),result.end());
return result;
}
static num simpleencode_len(num x)
{
if (x == 0) return 1;
num result = 0;
if (x < 0) { ++result; x = -x; }
while (x > 0) {
result += 1;
x >>= 5;
}
return result;
}
static num compress(num x,num reference)
{
if (x < 0) throw invalid_argument("internal bug, negative integer");
num y = x-reference;
if (y >= 0) return x;
if (y >= -10) return y;
if (simpleencode_len(y) <= simpleencode_len(x)) return y;
return x;
}
static int integral(char c)
{
if (c >= '0' && c <= '9') return 1;
if (c >= 'A' && c <= 'V') return 1;
return 0;
}
static void log(char what,const vector<num> &ids,const string &name)
{
output << what;
char lastbyte = 0;
for (num id : ids) {
string s = simpleencode(id);
auto firstbyte = s.at(0);
if (integral(lastbyte))
if (integral(firstbyte))
output << ' ';
output << s;
lastbyte = s.at(s.length()-1);
}
if (name.length() > 0) {
auto firstbyte = name.at(0);
if (integral(lastbyte))
if (integral(firstbyte) || firstbyte == ' ')
output << ' ';
output << name;
}
output << '\n';
}
static vector<num> string2closings(const string &t)
{
vector<num> result(t.length(),0);
vector<num> open;
vector<char> paren;
num pos = 0;
for (char c : t) {
if (c == '(' || c == '[') {
open.push_back(pos);
paren.push_back(c);
} else if (c == ')' || c == ']') {
if (open.empty()) throw invalid_argument("too many right parens/brackets");
if (c == ')' && paren.back() != '(') throw invalid_argument("paren-bracket mismatch");
if (c == ']' && paren.back() != '[') throw invalid_argument("paren-bracket mismatch");
result.at(open.back()) = pos+1; // seems more useful than pos
open.pop_back();
paren.pop_back();
}
++pos;
}
if (!open.empty()) throw invalid_argument("not enough right parens/brackets");
return result;
}
static num structured_type2id(const string &name,vector<num> &subids)
{
auto key = make_pair(name,subids);
if (app2id.count(key) == 0) {
if (name == "fun") {
if (subids.size() != 2) throw invalid_argument("function type must have 2 arguments");
typeid2fromid[typeid] = subids.at(0);
typeid2toid[typeid] = subids.at(1);
}
vector<num> compressed;
compressed.push_back(subids.size());
for (num id : subids)
compressed.push_back(compress(id,typeid));
log('b',compressed,name);
app2id[key] = typeid++;
}
return app2id[key];
}
static num type2id(const string &t,const vector<num> &closings,num start,num stop)
{
if (stop <= start) throw invalid_argument("stop <= start");
char what = t.at(start);
if (t.at(start+1) != '[') throw invalid_argument("type missing bracket");
vector<num> subids;
if (what == 'v') { // v[name]
if (closings.at(start+1) != stop) throw invalid_argument("wrong number of v type args");
string name = t.substr(start+2,stop-start-3);
if (tyvar2id.count(name) == 0) {
log('a',subids,name);
tyvar2id[name] = typeid++;
}
return tyvar2id[name];
}
if (what == 'c') { // c[name][[v[A]][v[B]]]
num pos = closings.at(start+1);
string name = t.substr(start+2,pos-start-3);
if (t.at(pos) != '[') throw invalid_argument("type missing second bracket");
if (closings.at(pos) != stop) throw invalid_argument("wrong number of c type args");
++pos;
while (pos < stop-1) {
if (t.at(pos) != '[') throw invalid_argument("type missing inner bracket");
num nextpos = closings.at(pos);
subids.push_back(type2id(t,closings,pos+1,nextpos-1));
if (nextpos <= pos) throw invalid_argument("internal error, nextpos <= pos");
pos = nextpos;
}
return structured_type2id(name,subids);
}
throw invalid_argument("unrecognized type symbol");
}
static num type2id(const string &type)
{
auto closings = string2closings(type);
return type2id(type,closings,0,type.length());
}
static num abstype2id(num id0,num id1)
{
vector<num> subids;
subids.push_back(id0);
subids.push_back(id1);
return structured_type2id("fun",subids);
}
static num combtype2id(num id0,num id1)
{
if (!typeid2fromid.count(id0)) throw invalid_argument("comb from non-function type");
if (!typeid2toid.count(id0)) throw invalid_argument("comb from non-function type");
if (typeid2fromid[id0] != id1) throw invalid_argument("comb applied to wrong type");
return typeid2toid[id0];
}
static num term2id(const string &t,const vector<num> &closings,num start,num stop)
{
if (stop <= start) throw invalid_argument("stop <= start");
char what = t.at(start);
if (t.at(start+1) != '(') throw invalid_argument("term missing paren");
num middle = closings.at(start+1);
if (closings.at(middle) != stop) throw invalid_argument("wrong number of term args");
vector<num> compressed;
if (what == 'c') {
auto arg0 = t.substr(start+2,middle-start-3);
num id1 = type2id(t,closings,middle+1,stop-1);
auto key = make_pair(arg0,id1);
if (const2id.count(key) == 0) {
termid2typeid[termid] = id1;
compressed.push_back(compress(id1,typeid));
log('c',compressed,arg0);
const2id[key] = termid++;
}
return const2id[key];
}
if (what == 'v') {
auto arg0 = t.substr(start+2,middle-start-3);
num id1 = type2id(t,closings,middle+1,stop-1);
auto key = make_pair(arg0,id1);
if (var2id.count(key) == 0) {
termid2typeid[termid] = id1;
compressed.push_back(compress(id1,typeid));
log('d',compressed,arg0);
var2id[key] = termid++;
}
return var2id[key];
}
num id0 = term2id(t,closings,start+2,middle-1);
num id1 = term2id(t,closings,middle+1,stop-1);
auto key = make_pair(id0,id1);
if (what == 'C') {
if (comb2id.count(key) == 0) {
combtermid2funid[termid] = id0;
auto yid = combtype2id(termid2typeid[id0],termid2typeid[id1]);
termid2typeid[termid] = yid;
compressed.push_back(compress(yid,typeid));
compressed.push_back(compress(id0,termid));
compressed.push_back(compress(id1,termid));
log('e',compressed,"");
comb2id[key] = termid++;
}
return comb2id[key];
}
if (what == 'A') {
if (abs2id.count(key) == 0) {
auto yid = abstype2id(termid2typeid[id0],termid2typeid[id1]);
termid2typeid[termid] = yid;
compressed.push_back(compress(yid,typeid));
compressed.push_back(compress(id0,termid));
compressed.push_back(compress(id1,termid));
log('f',compressed,"");
abs2id[key] = termid++;
}
return abs2id[key];
}
throw invalid_argument("unrecognized term symbol");
}
static num term2id(const string &term)
{
auto closings = string2closings(term);
return term2id(term,closings,0,term.length());
}
static num thm2id(const vector<string> &hypotheses,const string &conclusion)
{
vector<num> hhhc;
for (string h : hypotheses)
hhhc.push_back(term2id(h));
auto conclusionid = term2id(conclusion);
hhhc.push_back(conclusionid);
if (hhhc2id.count(hhhc) == 0) {
if (hypotheses.size() == 0 && combtermid2funid.count(conclusionid))
combthmid2funid[thmid] = combtermid2funid[conclusionid];
vector<num> compressed;
for (num t : hhhc)
compressed.push_back(compress(t,termid));
log('t',compressed,"");
hhhc2id[hhhc] = thmid++;
}
return hhhc2id[hhhc];
}
static num inf2id(vector<num> &subids,char what,vector<num> &compressed,const string &name)
{
auto key = make_pair(subids,name);
if (infkey2id.count(key) == 0) {
log(what,compressed,name);
infkey2id[key] = infid++;
}
return infkey2id[key];
}
static void preprocess_definition(string &term)
{
string result;
int parenlevel = 0;
int leftparens0 = 0;
int leftparens1 = 0;
for (char c : term) {
if (c == '(') {
if (parenlevel == 0) ++leftparens0;
if (parenlevel == 1) ++leftparens1;
++parenlevel;
result += c;
continue;
}
if (c == ')') {
--parenlevel;
result += c;
continue;
}
if (leftparens0 == 1 && leftparens1 == 2 && parenlevel == 2 && c == 'c') {
result += 'v';
} else
result += c;
}
term = result;
}
static int match(vector<string> &result,const string &x,const string &pattern)
{
num xlen = x.length();
num xpos = 0;
num numtargets = result.size();
vector<int> initialized(numtargets,0);
for (char c : pattern) {
if (xpos >= xlen) return 0;
if (c >= '0' && c <= '9') {
// XXX: maybe generalize to allow more than 10 entries
int entry = c-'0';
if (entry >= numtargets) return 0;
string target;
num parenlevel = 0;
for (;;) {
if (xpos >= xlen) return 0;
char xi = x.at(xpos);
if (xi == '(' || xi == '[')
++parenlevel;
if (xi == ')' || xi == ']') {
if (parenlevel == 0) break;
--parenlevel;
}
target += xi;
++xpos;
}
if (initialized.at(entry)) {
if (result.at(entry) != target) return 0;
} else {
initialized.at(entry) = 1;
result.at(entry) = target;
}
} else {
if (x.at(xpos) != c) return 0;
++xpos;
}
}
if (xpos != xlen) return 0;
return 1;
}
// [v[A]][v[B]] -> A]B]
static string tyvars_pack(string tyvars)
{
string result;
while (tyvars.length() > 0) {
if (tyvars.substr(0,3) != "[v[") throw invalid_argument("tyvars missing open");
tyvars = tyvars.substr(3);
size_t pos = tyvars.find("]]");
if (pos == tyvars.npos) throw invalid_argument("tyvars missing close");
result += tyvars.substr(0,pos);
result += TDSPLIT;
tyvars = tyvars.substr(pos+2);
}
return result;
}
static const string Adef("C(C(c(=)(c[fun][[c[0][1]][c[fun][[c[0][1]][c[bool][]]]]]))(C(c(2)(c[fun][[4][c[0][1]]]))(C(c(3)(c[fun][[c[0][1]][4]]))(v(a)(c[0][1])))))(v(a)(c[0][1]))");
static const string Rdef("C(C(c(=)(c[fun][[c[bool][]][c[fun][[c[bool][]][c[bool][]]]]]))(C(5)(v(r)(4))))(C(C(c(=)(c[fun][[4][c[fun][[4][c[bool][]]]]]))(C(c(3)(c[fun][[c[0][1]][4]]))(C(c(2)(c[fun][[4][c[0][1]]]))(v(r)(4)))))(v(r)(4)))");
// ProofTrace does two TYPE_DEFINITION lines
// distinguished by the term absthm (the output theorem):
// first abs(rep(a)) = a, with newname abs
// then repprop(r) = (rep(abs(r)) = r), with newname rep
// HOLTrace format is instead:
// A reptype repprop repth ]abstypename]abs]rep]tyvar1]tyvar2]...
// D
static void preprocess_type_definition(string &name,char &newtag,num &id0,num &id1,num repthmid,const string &absthm,const string &newname)
{
newtag = 'A';
if (!combthmid2funid.count(repthmid)) throw invalid_argument("repthm is not comb");
id1 = combthmid2funid[repthmid];
vector<string> result(5,"");
if (!match(result,absthm,Adef)) {
result.push_back(""); // for repprop
if (!match(result,absthm,Rdef)) throw invalid_argument("type_definition patterns do not match");
newtag = 'D';
}
auto abstypename = result.at(0);
auto tyvars = result.at(1);
auto absname = result.at(2);
auto repname = result.at(3);
auto reptype = result.at(4);
if (abstypename.find(TDSPLIT) != abstypename.npos) throw invalid_argument("impossible tdsplit in abstypename");
if (absname.find(TDSPLIT) != absname.npos) throw invalid_argument("impossible tdsplit in absname");
if (repname.find(TDSPLIT) != repname.npos) throw invalid_argument("impossible tdsplit in repname");
id0 = type2id(reptype);
if (newtag == 'A') {
if (newname != absname) throw invalid_argument("newname does not match absname");
} else {
if (id1 != term2id(result.at(5))) throw invalid_argument("repprop mismatch");
if (newname != repname) throw invalid_argument("newname does not match repname");
}
name = TDSPLIT;
name += abstypename; name += TDSPLIT;
name += absname; name += TDSPLIT;
name += repname; name += TDSPLIT;
name += tyvars_pack(tyvars);
}
// not super-stringent
// e.g. flattens lists with any nesting structure
// e.g. doesn't care about relative order of numbers and strings
static void parse(ifstream &names,ifstream &proofs,ifstream &theorems)
{
unordered_map<num,string> id2name;
unordered_map<string,char> rule2tag{
{"TYPE_DEFINITION",'A'},
{"BETA",'B'},
{"DEFINITION",'C'},
{"MK_COMB",'E'},
{"ABS",'F'},
{"EQ_MP",'P'},
{"REFL",'R'},
{"INST",'S'},
{"INST_TYPE",'T'},
{"ASSUME",'U'},
{"TRANS",'V'},
{"AXIOM",'X'},
{"DEDUCT_ANTISYM_RULE",'Z'},
};
unordered_map<char,ssize_t> tag2numcount{
{'A',1},
{'B',0},
{'C',0},
{'E',2},
{'F',1},
{'P',2},
{'R',0},
{'S',1},
{'T',1},
{'U',0},
{'V',2},
{'X',0},
{'Z',2},
};
unordered_map<char,ssize_t> tag2strcount{
{'A',2},
{'B',1},
{'C',2},
{'E',0},
{'F',1},
{'P',0},
{'R',1},
{'S',-1},
{'T',-1},
{'U',1},
{'V',0},
{'X',1},
{'Z',0},
};
string line;
while (getline(names,line)) {
num id;
string name;
skiptext(line,"{\"id\": ");
parsell(line,id);
skiptext(line,", \"nm\": \"");
untilquote(line,name);
skiptext(line,"}");
if (line.length() != 0) throw invalid_argument("extra text after names line");
if (id2name.count(id) > 0) throw invalid_argument("multiple names for same id");
id2name[id] = name;
}
while (getline(proofs,line)) {
num idpr;
vector<num> prnums;
vector<string> prstrs;
skiptext(line,"{\"id\": ");
parsell(line,idpr);
skiptext(line,", \"pr\": ");
parselist(line,prnums,prstrs);
skiptext(line,"}");
if (line.length() != 0) throw invalid_argument("extra text after proofs line");
if (idpr < ptid || idpr > ptid+300) throw invalid_argument("surprising ID");
while (idpr > ptid) {
output << ";\n";
++ptid;
}
if (!getline(theorems,line)) throw invalid_argument("fewer theorems than proofs");
num idthm;
vector<num> thmnums;
vector<string> hypotheses;
string conclusion;
skiptext(line,"{\"id\": ");
parsell(line,idthm);
if (idthm != ptid) throw invalid_argument("IDs desynchronized between theorems and proofs");
skiptext(line,", \"th\": {\"hy\": ");
parselist(line,thmnums,hypotheses);
skiptext(line,", \"cc\": \"");
untilquote(line,conclusion);
skiptext(line,"}}");
if (line.length() != 0) throw invalid_argument("extra text after theorems line");
if (thmnums.size() != 0) throw invalid_argument("numbers inside conclusions");
if (prstrs.size() == 0) throw invalid_argument("empty proof");
auto rule = prstrs.at(0);
if (rule2tag.count(rule) == 0) throw invalid_argument("unrecognized inference rule "+rule);
auto tag = rule2tag[rule];
auto expectednumcount = tag2numcount[tag];
if (expectednumcount != (ssize_t) prnums.size())
throw invalid_argument("unexpected number of integers for rule "+rule);
auto expectedstrcount = tag2strcount[tag];
if (expectedstrcount >= 0)
if (expectedstrcount+1 != (ssize_t) prstrs.size())
throw invalid_argument("unexpected number of strings for rule "+rule);
vector<num> subids;
vector<num> compressed;
string name;
subids.push_back(tag);
num ptid0 = -1;
num ptid1 = -1;
if (tag == 'B' || tag == 'R' || tag == 'U' || tag == 'X') {
string t0 = prstrs.at(1);
num id0 = term2id(t0);
subids.push_back(id0);
compressed.push_back(compress(id0,termid));
} else if (tag == 'F') {
string t0 = prstrs.at(1);
ptid0 = prnums.at(0);
num id0 = term2id(t0);
num id1 = ptid2thmid[ptid0];
subids.push_back(id0);
subids.push_back(id1);
compressed.push_back(compress(id0,termid));
compressed.push_back(compress(id1,thmid));
} else if (tag == 'E' || tag == 'P' || tag == 'V' || tag == 'Z') {
ptid0 = prnums.at(0);
ptid1 = prnums.at(1);
num id0 = ptid2thmid[ptid0];
num id1 = ptid2thmid[ptid1];
subids.push_back(id0);
subids.push_back(id1);
compressed.push_back(compress(id0,thmid));
compressed.push_back(compress(id1,thmid));
} else if (tag == 'S' || tag == 'T') {
ptid0 = prnums.at(0);
num id0 = ptid2thmid[ptid0];
subids.push_back(id0);
compressed.push_back(compress(id0,thmid));
int first = 1;
for (auto s1 : prstrs) {
if (first) { first = 0; continue; }
tag == 'S' ? term2id(s1) : type2id(s1);
}
num reference = tag == 'S' ? termid : typeid;
first = 1;
for (auto s1 : prstrs) {
if (first) { first = 0; continue; }
num id1 = tag == 'S' ? term2id(s1) : type2id(s1);
subids.push_back(id1);
compressed.push_back(compress(id1,reference));
}
} else if (tag == 'C') {
name = prstrs.at(2);
string t0 = prstrs.at(1);
preprocess_definition(t0);
num id0 = term2id(t0);
subids.push_back(id0);
compressed.push_back(compress(id0,termid));
} else if (tag == 'A') {
ptid0 = prnums.at(0);
num id2 = ptid2thmid[ptid0];
string t0 = prstrs.at(1);
string newname = prstrs.at(2);
num id0;
num id1;
char newtag;
preprocess_type_definition(name,newtag,id0,id1,id2,t0,newname);
auto key = make_tuple(id0,id1,id2,name);
if (newtag == 'D') {
if (!typedef2id.count(key)) throw invalid_argument("non-canonical TYPE_DEFINITION order");
num Aid = typedef2id[key];
tag = 'D';
if (Aid != typedefid-1) throw invalid_argument("non-adjacent TYPE_DEFINITION");
subids.push_back(Aid);
compressed.push_back(compress(Aid,typedefid));
name = "";
} else {
if (typedef2id.count(key)) throw invalid_argument("non-canonical TYPE_DEFINITION order");
typedef2id[key] = typedefid++;
subids.push_back(id0);
compressed.push_back(compress(id0,typeid));
subids.push_back(id1);
compressed.push_back(compress(id1,termid));
subids.push_back(id2);
compressed.push_back(compress(id2,thmid));
}
} else
throw invalid_argument("surprising tag");
auto iid = inf2id(subids,tag,compressed,name);
num id = thm2id(hypotheses,conclusion);
if (id2name.count(ptid) > 0) {
vector<num> nameids;
nameids.push_back(compress(id,thmid));
log('/',nameids,id2name[ptid]);
}
vector<num> ptsubids;
ptsubids.push_back(compress(id,thmid));
ptsubids.push_back(compress(iid,infid));
if (ptid0 >= 0) ptsubids.push_back(compress(ptid0,ptid));
if (ptid1 >= 0) ptsubids.push_back(compress(ptid1,ptid));
log(',',ptsubids,"");
ptid2thmid[ptid++] = id;
}
if (getline(theorems,line)) throw invalid_argument("fewer proofs than theorems");
}
int main(int argc,char **argv)
{
if (argc < 4) {
cerr << "usage: pt2holtrace pt.names pt.proofs pt.theorems\n";
exit(100);
}
ifstream names(argv[1]);
ifstream proofs(argv[2]);
ifstream theorems(argv[3]);
__gnu_cxx::stdio_filebuf<char> fdbuf(1,ios::out);
output.ios::rdbuf(&fdbuf);
output << "HOLTrace 1\n";
try {
parse(names,proofs,theorems);
} catch(exception &e) {
cerr << e.what() << "\n";
exit(111);
}
return 0;
}