diff --git a/src/gr_generic/generic.c b/src/gr_generic/generic.c index fea3901acd..12e2e85593 100644 --- a/src/gr_generic/generic.c +++ b/src/gr_generic/generic.c @@ -2612,23 +2612,32 @@ gr_generic_vec_dot_fmpz(gr_ptr res, gr_srcptr initial, int subtract, gr_srcptr v int gr_generic_vec_set_powers(gr_ptr res, gr_srcptr x, slong len, gr_ctx_t ctx) { + int status = GR_SUCCESS; + slong sz = ctx->sizeof_elem; + if (len <= 0) return status; + status |= gr_one(GR_ENTRY(res, 0, sz), ctx); + if (len <= 1) return status; + status |= gr_set(GR_ENTRY(res, 1, sz), x, ctx); + if (len <= 2) return status; + gr_method_binary_op mul = GR_BINARY_OP(ctx, MUL); gr_method_unary_op sqr = GR_UNARY_OP(ctx, SQR); - int status = GR_SUCCESS; slong i; - slong sz = ctx->sizeof_elem;; - for (i = 0; i < len; i++) + /* Prefer squaring for powers? */ + if (gr_ctx_is_finite(ctx) == T_TRUE || gr_ctx_has_real_prec(ctx) == T_TRUE) { - if (i == 0) - status |= gr_one(GR_ENTRY(res, i, sz), ctx); - else if (i == 1) - status |= gr_set(GR_ENTRY(res, i, sz), x, ctx); - else if (i % 2 == 0) - status |= sqr(GR_ENTRY(res, i, sz), GR_ENTRY(res, i / 2, sz), ctx); - else - status |= mul(GR_ENTRY(res, i, sz), GR_ENTRY(res, i - 1, sz), x, ctx); + for (i = 2; i < len; i++) + { + if (i % 2 == 0) + status |= sqr(GR_ENTRY(res, i, sz), GR_ENTRY(res, i / 2, sz), ctx); + else + status |= mul(GR_ENTRY(res, i, sz), GR_ENTRY(res, (i + 1) / 2, sz), GR_ENTRY(res, i / 2, sz), ctx); + } } + else + for (i = 2; i < len; i++) + status |= mul(GR_ENTRY(res, i, sz), GR_ENTRY(res, i - 1, sz), x, ctx); return status; } diff --git a/src/gr_poly/compose.c b/src/gr_poly/compose.c index 16966703b3..c30529f20d 100644 --- a/src/gr_poly/compose.c +++ b/src/gr_poly/compose.c @@ -13,6 +13,7 @@ #include "gr_vec.h" #include "gr_poly.h" +#include "longlong.h" /* compose by poly2 = a*x^n + c, no aliasing; n >= 1 */ static int @@ -39,19 +40,64 @@ _gr_poly_compose_axnc(gr_ptr res, gr_srcptr poly1, slong len1, } else { + int maxbit = FLINT_CLOG2(len1); gr_ptr t; - GR_TMP_INIT(t, ctx); - status |= gr_set(t, a, ctx); - - for (i = 1; i < len1; i++) + /* Prefer squaring for powers? cf. _gr_vec_set_powers */ + if (gr_ctx_is_finite(ctx) == T_TRUE || gr_ctx_has_real_prec(ctx) == T_TRUE) { - status |= gr_mul(GR_ENTRY(res, i, sz), GR_ENTRY(res, i, sz), t, ctx); - if (i + 1 < len1) - status |= gr_mul(t, t, a, ctx); + GR_TMP_INIT_VEC(t, maxbit, ctx); + + status |= gr_set(GR_ENTRY(t, 0, sz), a, ctx); + status |= gr_mul(GR_ENTRY(res, 1, sz), GR_ENTRY(res, 1, sz), a, ctx); + for (int j = 1; j < maxbit; ++j) + { + status |= gr_sqr(GR_ENTRY(t, j, sz), GR_ENTRY(t, j-1, sz), ctx); + status |= gr_mul(GR_ENTRY(res, 1<>bit, sz), GR_ENTRY(res, i>>bit, sz), GR_ENTRY(t, maxbit-1-bit, sz), ctx); + + for (int j = bit; j > 0; --j) + { + status |= gr_sqr(GR_ENTRY(t, maxbit-j, sz), GR_ENTRY(t, maxbit-j-1, sz), ctx); + status |= gr_mul(GR_ENTRY(res, i>>(j-1), sz), GR_ENTRY(res, i>>(j-1), sz), GR_ENTRY(t, maxbit-j, sz), ctx); + } + } + + for (i = (len1 + 1) >> 1; i < ((slong)1 << (maxbit-1)); i++) + { + int bit = flint_ctz(i); + status |= gr_mul(GR_ENTRY(t, maxbit-2-bit, sz), GR_ENTRY(t, maxbit-2-bit, sz), a, ctx); + status |= gr_mul(GR_ENTRY(res, i>>bit, sz), GR_ENTRY(res, i>>bit, sz), GR_ENTRY(t, maxbit-2-bit, sz), ctx); + + for (int j = bit; j > 0; --j) + { + status |= gr_sqr(GR_ENTRY(t, maxbit-j-1, sz), GR_ENTRY(t, maxbit-j-2, sz), ctx); + status |= gr_mul(GR_ENTRY(res, i>>(j-1), sz), GR_ENTRY(res, i>>(j-1), sz), GR_ENTRY(t, maxbit-j-1, sz), ctx); + } + } + + GR_TMP_CLEAR_VEC(t, maxbit, ctx); } + else + { + GR_TMP_INIT(t, ctx); + + status |= gr_set(t, a, ctx); - GR_TMP_CLEAR(t, ctx); + for (i = 1; i < len1; i++) + { + status |= gr_mul(GR_ENTRY(res, i, sz), GR_ENTRY(res, i, sz), t, ctx); + if (i + 1 < len1) + status |= gr_mul(t, t, a, ctx); + } + + GR_TMP_CLEAR(t, ctx); + } } } diff --git a/src/gr_poly/compose_series_brent_kung.c b/src/gr_poly/compose_series_brent_kung.c index 12a4447c32..de1557bc14 100644 --- a/src/gr_poly/compose_series_brent_kung.c +++ b/src/gr_poly/compose_series_brent_kung.c @@ -46,7 +46,7 @@ _gr_poly_compose_series_brent_kung(gr_ptr res, gr_srcptr poly1, slong len1, status |= _gr_vec_set(Brow(i), GR_ENTRY(poly1, i * m, sz), m, ctx); status |= _gr_vec_set(Brow(i), GR_ENTRY(poly1, i * m, sz), len1 % m, ctx); - /* Set rows of A to powers of poly2 */ + /* Set rows of A to powers of poly2 (cf. _gr_vec_set_powers) */ status |= gr_one(Arow(0), ctx); status |= _gr_vec_set(Arow(1), poly2, len2, ctx);