uint32_t tmp = x[k] & (1<<sh)-1;
x[k] = (x[k]>>sh) + carry;
carry = (1000000000>>sh) * tmp;
if (k==a && !x[k]) {
a = (a+1 & MASK);
uint32_t tmp = x[k] & (1<<sh)-1;
x[k] = (x[k]>>sh) + carry;
carry = (1000000000>>sh) * tmp;
if (k==a && !x[k]) {
a = (a+1 & MASK);