websnark/src/build_tomcook.js

607 lines
18 KiB
JavaScript

const assert = require("assert");
const bigInt = require("big-integer");
module.exports = function buildTomCook(module, _prefix) {
const prefix = _prefix || "tomcook";
const definedFunctions = {};
const CHUNK_BITS = 29;
const CHUNK_BASE = 1 << CHUNK_BITS;
const CHUNK_BASE_MAX = "9223372036317904896";
const CHUNK_MASK = CHUNK_BASE -1;
function load(size, c, localVar, pos) {
if (size == "l") {
return c.i64_load(c.getLocal(localVar), pos*8);
} else if (size=="s") {
return c.i64_load32_s(c.getLocal(localVar), pos*4);
} else {
assert(false, "invalid size: "+size);
}
}
function store(size, c, localVar, pos, value) {
if (size == "l") {
return c.i64_store(c.getLocal(localVar), pos*8, value);
} else if (size == "s") {
return c.i64_store32(c.getLocal(localVar), pos*4, value);
} else {
assert(false, "invalid size: "+size);
}
}
function storeAdjusting(size, c, carryVar, dstVar, auxVar, pos) {
return c.if(
c.i64_lt_s(
c.getLocal(carryVar),
c.i64_const(0)
),
[
...c.setLocal(auxVar,
c.i64_rem_s(
c.i64_add(
c.i64_const(CHUNK_BASE_MAX),
c.getLocal(carryVar)
),
c.i64_const(CHUNK_BASE)
)
),
...store(size, c, dstVar, pos, c.getLocal(auxVar)),
...c.setLocal(carryVar,
c.i64_sub(
c.getLocal(carryVar),
c.getLocal(auxVar)
)
)
]
, // elsif
[
...store(size, c, dstVar, pos,
c.i64_rem_s(
c.getLocal(carryVar),
c.i64_const(CHUNK_BASE)
)
),
]
);
}
function buildMul3(sizes) {
const fnName = prefix+"_mul3"+sizes;
if (definedFunctions[fnName]) return;
definedFunctions[fnName] = true;
const f = module.addFunction(fnName);
f.addParam("x", "i32");
f.addParam("y", "i32");
f.addParam("r", "i32");
f.addLocal("a0", "i64");
f.addLocal("a1", "i64");
f.addLocal("a2", "i64");
f.addLocal("b0", "i64");
f.addLocal("b1", "i64");
f.addLocal("b2", "i64");
f.addLocal("c", "i64");
const c = f.getCodeBuilder();
f.addCode(
// calculate p
c.setLocal("a0", load(sizes[0], c, "x", 0)),
c.setLocal("b0", load(sizes[1], c, "y", 0)),
c.setLocal("c", c.i64_mul( c.getLocal("a0"), c.getLocal("b0") )),
store(sizes[2], c, "r", 0,
c.i64_rem_s( c.getLocal("c"), c.i64_const(CHUNK_BASE))
),
c.setLocal("a1", load(sizes[0], c, "x", 1)),
c.setLocal("b1", load(sizes[1], c, "y", 1)),
c.setLocal("c", c.i64_add(
c.i64_div_s( c.getLocal("c"), c.i64_const(CHUNK_BASE)),
c.i64_add(
c.i64_mul( c.getLocal("a0"), c.getLocal("b1") ),
c.i64_mul( c.getLocal("a1"), c.getLocal("b0") )
)
)),
store(sizes[2], c, "r", 1,
c.i64_rem_s( c.getLocal("c"), c.i64_const(CHUNK_BASE))
),
c.setLocal("a2", load(sizes[0], c, "x", 2)),
c.setLocal("b2", load(sizes[1], c, "y", 2)),
c.setLocal("c", c.i64_add(
c.i64_add(
c.i64_mul( c.getLocal("a0"), c.getLocal("b2") ),
c.i64_mul( c.getLocal("a2"), c.getLocal("b0") )
),
c.i64_add(
c.i64_div_s( c.getLocal("c"), c.i64_const(CHUNK_BASE)),
c.i64_mul( c.getLocal("a1"), c.getLocal("b1") ),
)
)),
store(sizes[2], c, "r", 2,
c.i64_rem_s( c.getLocal("c"), c.i64_const(CHUNK_BASE))
),
c.setLocal("c", c.i64_add(
c.i64_add(
c.i64_div_s( c.getLocal("c"), c.i64_const(CHUNK_BASE)),
c.i64_mul( c.getLocal("a1"), c.getLocal("b2"))
),
c.i64_mul( c.getLocal("a2"), c.getLocal("b1") ),
)),
store(sizes[2], c, "r", 3,
c.i64_rem_s( c.getLocal("c"), c.i64_const(CHUNK_BASE))
),
c.setLocal("c", c.i64_add(
c.i64_mul( c.getLocal("a2"), c.getLocal("b2") ),
c.i64_div_s( c.getLocal("c"), c.i64_const(CHUNK_BASE)),
)),
store(sizes[2], c, "r", 4,
c.i64_rem_s( c.getLocal("c"), c.i64_const(CHUNK_BASE))
),
store(sizes[2], c, "r", 5,
c.i64_div_s( c.getLocal("c"), c.i64_const(CHUNK_BASE)),
),
);
}
function buildNeg(n, sizes) {
const fnName = prefix+"_neg"+n+sizes;
if (definedFunctions[fnName]) return;
definedFunctions[fnName] = true;
const f = module.addFunction(fnName);
f.addParam("x", "i32");
f.addParam("r", "i32");
const c = f.getCodeBuilder();
for (let i=0; i<n; i++) {
f.addCode(
store(sizes[1], c, "r", i,
c.i32_sub(
c.i32_const(0),
load(sizes[0], c, "x", i)
)
)
);
}
}
function buildAdd(n, sizes) {
const fnName = prefix+"_add"+n+sizes;
if (definedFunctions[fnName]) return;
definedFunctions[fnName] = true;
const f = module.addFunction(fnName);
f.addParam("x", "i32");
f.addParam("y", "i32");
f.addParam("r", "i32");
const c = f.getCodeBuilder();
for (let i=0; i<n; i++) {
f.addCode(
store(sizes[2], c, "r", i,
c.i64_add(
load(sizes[0], c, "x", i),
load(sizes[1], c, "y", i)
)
)
);
}
}
function buildSub(n, sizes) {
const fnName = prefix+"_sub"+n+sizes;
if (definedFunctions[fnName]) return;
definedFunctions[fnName] = true;
const f = module.addFunction(fnName);
f.addParam("x", "i32");
f.addParam("y", "i32");
f.addParam("r", "i32");
const c = f.getCodeBuilder();
for (let i=0; i<n; i++) {
f.addCode(
store(sizes[2], c, "r", i,
c.i64_sub(
load(sizes[0], c, "x", i),
load(sizes[1], c, "y", i)
)
)
);
}
}
function buildMulShort(n, sizes) {
const fnName = prefix+"_mulshort"+n+sizes;
if (definedFunctions[fnName]) return;
definedFunctions[fnName] = true;
const f = module.addFunction(fnName);
f.addParam("x", "i32");
f.addParam("s", "i32");
f.addParam("r", "i32");
f.addLocal("s64", "i64");
const c = f.getCodeBuilder();
f.addCode(c.setLocal("s64", c.i64_extend_i32_s(c.getLocal("s"))));
for (let i=0; i<n; i++) {
f.addCode(
store(sizes[1], c, "r", i,
c.i64_mul(
load(sizes[0], c, "x", i),
c.getLocal("s64")
)
)
);
}
}
function buildDivShort(n, sizes) {
const fnName = prefix+"_divshort"+n+sizes;
if (definedFunctions[fnName]) return;
definedFunctions[fnName] = true;
const f = module.addFunction(fnName);
f.addParam("x", "i32");
f.addParam("s", "i32");
f.addParam("r", "i32");
f.addLocal("c", "i64");
f.addLocal("s64", "i64");
const c = f.getCodeBuilder();
f.addCode(
c.setLocal("s64", c.i64_extend_i32_s(c.getLocal("s"))),
c.setLocal("c", load(sizes[0], c, "x", n-1)),
store(sizes[1], c, "r", n-1,
c.i64_div_s(
c.getLocal("c"),
c.getLocal("s64")
)
)
);
for (let i=n-2; i>=0; i--) {
f.addCode(
c.setLocal("c",
c.i64_add(
c.i64_mul(
c.i64_rem_s(
c.getLocal("c"),
c.getLocal("s64")
),
c.i64_const(CHUNK_BASE)
),
load(sizes[0], c, "x", i)
)
),
store(sizes[1], c, "r", i,
c.i64_div_s(
c.getLocal("c"),
c.getLocal("s64")
)
)
);
}
}
function buildRecompose(n, sizes) {
const fnName = prefix+"_recompose"+n+sizes;
if (definedFunctions[fnName]) return;
definedFunctions[fnName] = true;
const sn = n/3;
const f = module.addFunction(fnName);
f.addParam("s", "i32");
f.addParam("r", "i32");
f.addLocal("c", "i64");
f.addLocal("aux", "i64");
const c = f.getCodeBuilder();
/*
0 sn sn*2 sn*3 sn*4 sn*5
0 sn
sn*2 sn*3
sn*4 sn*5
sn+6 sn*7
sn*8 sn*9
b= Math.floor(i/sn)
b*2*sn + (i-b*sn)
b*2*sn + (i-b*sn) - sn
*/
f.addCode(c.setLocal("c", c.i64_const(0)));
for (let i=0; i<sn; i++) {
f.addCode(
c.setLocal(
"c",
c.i64_add(
c.i64_div_s(c.getLocal("c"), c.i64_const(CHUNK_BASE)),
load("l", c, "s", i)
)
),
storeAdjusting(sizes[0], c, "c", "r", "aux", i)
);
}
for (let i=sn; i<sn*5; i++) {
const b= Math.floor(i/sn);
const i1 = b*2*sn + (i-b*sn);
const i2 = i1 - sn;
f.addCode(
c.setLocal(
"c",
c.i64_add(
c.i64_div_s(c.getLocal("c"), c.i64_const(CHUNK_BASE)),
c.i64_add(
load("l", c, "s", i1),
load("l", c, "s", i2)
)
)
)
);
f.addCode(
storeAdjusting(sizes[0], c, "c", "r", "aux", i)
);
}
for (let i=sn*5; i<n*2; i++) {
const b= Math.floor(i/sn);
const i1 = b*2*sn + (i-b*sn);
const i2 = i1 - sn;
f.addCode(
c.setLocal(
"c",
c.i64_add(
c.i64_div_s(c.getLocal("c"), c.i64_const(CHUNK_BASE)),
load("l", c, "s", i2)
)
),
storeAdjusting(sizes[0], c, "c", "r", "aux", i)
);
}
}
function buildFix(n, sizes) {
const fnName = prefix+"_fix"+n+sizes;
if (definedFunctions[fnName]) return;
definedFunctions[fnName] = true;
const f = module.addFunction(fnName);
f.addParam("x", "i32");
f.addParam("r", "i32");
f.addLocal("c", "i64");
f.addLocal("aux", "i64");
const c = f.getCodeBuilder();
f.addCode(
c.setLocal(
"c",
load(sizes[0], c, "x", 0)
),
storeAdjusting(sizes[1], c, "c", "r", "aux", 0)
);
for (let i=1; i<n; i++) {
f.addCode(
c.setLocal(
"c",
c.i64_add(
c.i64_div_s(c.getLocal("c"), c.i64_const(CHUNK_BASE)),
load(sizes[0], c, "x", i)
)
),
storeAdjusting(sizes[1], c, "c", "r", "aux", i)
);
}
}
function buildMul(n, sizes) {
if (n==3) {
return buildMul3(sizes);
}
const fnName = prefix+"_mul"+n+sizes;
if (definedFunctions[fnName]) return;
definedFunctions[fnName] = true;
const sn = n/3;
const sn2 = sn*2;
buildAdd(sn, sizes[0]+sizes[0]+"l");
buildAdd(sn, "l"+sizes[0]+"l");
buildSub(sn, "l"+sizes[0]+"l");
buildAdd(sn, sizes[1]+sizes[1]+"l");
buildAdd(sn, "l"+sizes[1]+"l");
buildSub(sn, "l"+sizes[1]+"l");
buildMulShort(sn, "ll");
buildMul(sn, "lll");
buildMul(sn, sizes[0]+sizes[1]+"l");
buildMulShort(sn2, "ll");
buildDivShort(sn2, "ll");
buildAdd(sn2, "lll");
buildSub(sn2, "lll");
buildRecompose(n, sizes[2]);
const f = module.addFunction(fnName);
f.addParam("x", "i32");
f.addParam("y", "i32");
f.addParam("r", "i32");
f.addLocal("m1", "i32");
f.addLocal("m2", "i32");
f.addLocal("n1", "i32");
f.addLocal("n2", "i32");
const c = f.getCodeBuilder();
const ws0 = sizes[0] == "s" ? 4 : 8;
const ws1 = sizes[1] == "s" ? 4 : 8;
const wsi = 8;
const m0 = c.getLocal("x");
f.addCode(c.setLocal("m1", c.i32_add( c.getLocal("x"), c.i32_const(sn*ws0) )));
const m1 = c.getLocal("m1");
f.addCode(c.setLocal("m2", c.i32_add( c.getLocal("x"), c.i32_const((sn*2)*ws0 ))));
const m2 = c.getLocal("m2");
const n0 = c.getLocal("y");
f.addCode(c.setLocal("n1", c.i32_add( c.getLocal("y"), c.i32_const(sn*ws1) )));
const n1 = c.getLocal("n1");
f.addCode(c.setLocal("n2", c.i32_add( c.getLocal("y"), c.i32_const((sn*2)*ws1 ))));
const n2 = c.getLocal("n2");
const po = c.i32_const(module.alloc(sn*wsi));
f.addCode(c.call(prefix + "_add" + sn + sizes[0]+sizes[0]+"l", m0, m2, po));
const p0 = m0;
const p1 = c.i32_const(module.alloc(sn*wsi));
f.addCode(c.call(prefix + "_add" + sn + "l"+sizes[0]+"l", po, m1, p1));
const pn1 = c.i32_const(module.alloc(sn*wsi));
f.addCode(c.call(prefix + "_sub" + sn + "l"+sizes[0]+"l", po, m1, pn1));
const pn2 = c.i32_const(module.alloc(sn*wsi));
f.addCode(
c.call(prefix + "_add" + sn + "l"+sizes[0]+"l", pn1, m2, pn2),
c.call(prefix + "_mulshort" + sn + "ll", pn2, c.i32_const(2), pn2),
c.call(prefix + "_sub" + sn + "l"+sizes[0]+"l", pn2, m0, pn2)
);
const pi = m2;
const qo = c.i32_const(module.alloc(sn*wsi));
f.addCode(c.call(prefix + "_add" + sn + sizes[1]+sizes[1]+"l", n0, n2, qo));
const q0 = n0;
const q1 = c.i32_const(module.alloc(sn*wsi));
f.addCode(c.call(prefix + "_add" + sn + "l"+sizes[1]+"l", qo, n1, q1));
const qn1 = c.i32_const(module.alloc(sn*wsi));
f.addCode(c.call(prefix + "_sub" + sn + "l"+sizes[1]+"l", qo, n1, qn1));
const qn2 = c.i32_const(module.alloc(sn*wsi));
f.addCode(
c.call(prefix + "_add" + sn + "l"+sizes[1]+"l", qn1, n2, qn2),
c.call(prefix + "_mulshort" + sn +"ll", qn2, c.i32_const(2), qn2),
c.call(prefix + "_sub" + sn + "l"+sizes[1]+"l", qn2, n0, qn2)
);
const qi = n2;
const ps0 = module.alloc(sn*2*5*wsi);
const s0 = c.i32_const(ps0);
const s1 = c.i32_const(ps0 + (2*sn)*wsi);
const s2 = c.i32_const(ps0 + (4*sn)*wsi);
const s3 = c.i32_const(ps0 + (6*sn)*wsi);
const s4 = c.i32_const(ps0 + (8*sn)*wsi);
const r0 = s0;
const r1 = c.i32_const(module.alloc(sn*2*wsi));
const rn1 = c.i32_const(module.alloc(sn*2*wsi));
const rn2 = c.i32_const(module.alloc(sn*2*wsi));
const ri = s4;
f.addCode(c.call(prefix + "_mul" + sn + sizes[0]+sizes[1]+"l", p0, q0, r0));
f.addCode(c.call(prefix + "_mul" + sn + "lll" , p1, q1, r1));
f.addCode(c.call(prefix + "_mul" + sn + "lll" , pn1, qn1, rn1));
f.addCode(c.call(prefix + "_mul" + sn + "lll" , pn2, qn2, rn2));
f.addCode(c.call(prefix + "_mul" + sn + sizes[0]+sizes[1]+"l", pi, qi, ri));
const aux = c.i32_const(module.alloc(sn*wsi));
f.addCode(
// s3 = (r(-2) - r(1))/3
c.call(prefix + "_sub" + sn2 + "lll", rn2, r1, s3),
c.call(prefix + "_divshort" + sn2 + "ll", s3, c.i32_const(3), s3),
// s1 = (r(1) - r(-1))/2
c.call(prefix + "_sub" + sn2 + "lll", r1, rn1, s1),
c.call(prefix + "_divshort" + sn2 + "ll", s1, c.i32_const(2), s1),
// s2 = r(-1) - r(0)
c.call(prefix + "_sub" + sn2 + "lll", rn1, r0, s2),
// s3 = (s2-s3)/2 + r(inf)*2
c.call(prefix + "_sub" + sn2 + "lll", s2, s3, s3),
c.call(prefix + "_divshort" + sn2 + "ll", s3, c.i32_const(2), s3),
c.call(prefix + "_mulshort" + sn2 + "ll", ri, c.i32_const(2), aux),
c.call(prefix + "_add" + sn2 + "lll", s3, aux, s3),
// s2 = s2 + s1 - s4
c.call(prefix + "_add" + sn2 + "lll", s2, s1, s2),
c.call(prefix + "_sub" + sn2 + "lll", s2, s4, s2),
// s1 = s1 - s3
c.call(prefix + "_sub" + sn2 + "lll", s1, s3, s1),
);
f.addCode(c.call(prefix + "_recompose"+ n + sizes[2], s0, c.getLocal("r")));
}
buildMul(9, "sss");
/* module.exportFunction(prefix+"_divshort6");
module.exportFunction(prefix+"_mulshort6");
module.exportFunction(prefix+"_mul3");
module.exportFunction(prefix+"_mulu9");
*/
module.exportFunction(prefix+"_mul9sss", prefix+"_mul9");
buildDivShort(6, "ss");
module.exportFunction(prefix+"_divshort6ss", prefix+"_divshort6");
buildMulShort(6, "ss");
module.exportFunction(prefix+"_mulshort6ss", prefix+"_mulshort6");
buildMul(3, "sss");
module.exportFunction(prefix+"_mul3sss", prefix+"_mul3");
};