library_for_cpp

This documentation is automatically generated by online-judge-tools/verification-helper

View the Project on GitHub Kazun1998/library_for_cpp

:heavy_check_mark: 多点評価
(Modulo_Polynomial/Multipoint_Evaluation.hpp)

Outline

多項式 $P$ と $a_1, \dots, a_m \in F$ に対して, $P(a_1), \dots, P(a_m)$ を高速に求める.

Theory

次の事実を用いる.

以上から, 次のように計算することによって, $P(a_1), \dots, P(a_m)$ を高速に求めることが可能になる.

これを直接求めるのは厳しいが, 以下のようにして根付き木 Subproduct Tree, Subremainder Tree を構築することに寄って, $P(a_j)$ を高速に求められる.

Contents

Multipoint_Evaluation(Fast_Power_Series<mint> P, const vector<mint> &X)
日付 内容
2026/02/08 Multipoint_Evaluation の実装

Depends on

Required by

Verified with

Code

#pragma once

#include "Fast_Power_Series.hpp"

template<typename mint>
vector<mint> Multipoint_Evaluation(Fast_Power_Series<mint> P, const vector<mint> &X) {
    int m = X.size();
    if (m == 0) { return {}; }

    using FPS = Fast_Power_Series<mint>;

    // Subproduct Tree の構築
    // インデックス k=1 が根。
    int tree_size = 1; while (tree_size < m) { tree_size *= 2; }
    vector<FPS> g(2 * tree_size);

    auto build = [&](auto self, int l, int r, int k) -> void {
        if (r - l == 1) {
            g[k] = FPS({-X[l], 1});
            return;
        }
        int mid = (l + r) / 2;
        self(self, l, mid, 2 * k);
        self(self, mid, r, 2 * k + 1);
        int len = g[2 * k].size() + g[2 * k + 1].size() - 1;
        g[2 * k].precision = len;
        g[2 * k + 1].precision = len;
        g[k] = g[2 * k] * g[2 * k + 1];
    };

    build(build, 0, m, 1);

    vector<mint> Ys(m);

    auto solve = [&](auto self, int l, int r, int k, const FPS &Q) -> void {
        if (r - l == 1) {
            if (Q.size() > 0) { Ys[l] = Q[0]; }
            else { Ys[l] = 0; }
            return;
        }
        
        int mid = (l + r) / 2;
        FPS Q_l = Q % g[2 * k];
        self(self, l, mid, 2 * k, Q_l);
        
        FPS Q_r = Q % g[2 * k + 1];
        self(self, mid, r, 2 * k + 1, Q_r);
    };

    FPS Q = P % g[1];
    solve(solve, 0, m, 1, Q);

    return Ys;
}
#line 2 "Modulo_Polynomial/Multipoint_Evaluation.hpp"

#line 2 "Modulo_Polynomial/Fast_Power_Series.hpp"

#line 2 "Modulo_Polynomial/Modulo_Polynomial.hpp"

#line 2 "template/template.hpp"

using namespace std;

// intrinstic
#include <immintrin.h>

#include <algorithm>
#include <array>
#include <bitset>
#include <cassert>
#include <cctype>
#include <cfenv>
#include <cfloat>
#include <chrono>
#include <cinttypes>
#include <climits>
#include <cmath>
#include <complex>
#include <concepts>
#include <cstdarg>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <deque>
#include <fstream>
#include <functional>
#include <initializer_list>
#include <iomanip>
#include <ios>
#include <iostream>
#include <istream>
#include <iterator>
#include <limits>
#include <list>
#include <map>
#include <memory>
#include <new>
#include <numeric>
#include <ostream>
#include <optional>
#include <queue>
#include <random>
#include <set>
#include <sstream>
#include <stack>
#include <streambuf>
#include <string>
#include <tuple>
#include <type_traits>
#include <typeinfo>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>

// utility
#line 2 "template/utility.hpp"

using ll = long long;

// a ← max(a, b) を実行する. a が更新されたら, 返り値が true.
template<typename T, typename U>
inline bool chmax(T &a, const U b){
    return (a < b ? a = b, 1: 0);
}

// a ← min(a, b) を実行する. a が更新されたら, 返り値が true.
template<typename T, typename U>
inline bool chmin(T &a, const U b){
    return (a > b ? a = b, 1: 0);
}

// a の最大値を取得する.
template<typename T>
inline T max(const vector<T> &a){
    if (a.empty()) throw invalid_argument("vector is empty.");

    return *max_element(a.begin(), a.end());
}

// vector<T> a の最小値を取得する.
template<typename T>
inline T min(const vector<T> &a){
    if (a.empty()) throw invalid_argument("vector is empty.");

    return *min_element(a.begin(), a.end());
}

// vector<T> a の最大値のインデックスを取得する.
template<typename T>
inline size_t argmax(const vector<T> &a){
    if (a.empty()) throw std::invalid_argument("vector is empty.");

    return distance(a.begin(), max_element(a.begin(), a.end()));
}

// vector<T> a の最小値のインデックスを取得する.
template<typename T>
inline size_t argmin(const vector<T> &a){
    if (a.empty()) throw invalid_argument("vector is empty.");

    return distance(a.begin(), min_element(a.begin(), a.end()));
}
#line 61 "template/template.hpp"

// math
#line 2 "template/math.hpp"

// 演算子
template<typename T>
T add(const T &x, const T &y) { return x + y; }

template<typename T>
T sub(const T &x, const T &y) { return x - y; }

template<typename T>
T mul(const T &x, const T &y) { return x * y; }

template<typename T>
T neg(const T &x) { return -x; }

template<integral T>
T bitwise_and(const T &x, const T &y) { return x & y; }

template<integral T>
T bitwise_or(const T &x, const T &y) { return x | y; }

template<integral T>
T bitwise_xor(const T &x, const T &y) { return x ^ y; }

// 除算に関する関数

// floor(x / y) を求める.
template<integral T, integral U>
auto div_floor(T x, U y){
    return x / y - ((x % y != 0) && ((x < 0) != (y < 0)));
}

// ceil(x / y) を求める.
template<integral T, integral U>
auto div_ceil(T x, U y){
    return x / y + ((x % y != 0) && ((x < 0) == (y < 0)));
}

// x を y で割った余りを求める.
template<integral T, integral U>
auto safe_mod(T x, U y){
    auto q = div_floor(x, y);
    return x - q * y ;
}

// x を y で割った商と余りを求める.
template<integral T, integral U>
auto divmod(T x, U y){
    auto q = div_floor(x, y);
    return make_pair(q, x - q * y);
}

// 四捨五入を求める.
template<integral T, integral U>
auto round(T x, U y){
    auto [q, r] = divmod(x, y);
    if (y < 0) return (r <= div_floor(y, 2)) ? q + 1 : q;
    return (r >= div_ceil(y, 2)) ? q + 1 : q;
}

// 奇数かどうか判定する.
template<integral T>
bool is_odd(const T &x) { return x % 2 != 0; }

// 偶数かどうか判定する.
template<integral T>
bool is_even(const T &x) { return x % 2 == 0; }

// m の倍数かどうか判定する.
template<integral T, integral U>
bool is_multiple(const T &x, const U &m) { return x % m == 0; }

// 正かどうか判定する.
template<typename T>
bool is_positive(const T &x) { return x > 0; }

// 負かどうか判定する.
template<typename T>
bool is_negative(const T &x) { return x < 0; }

// ゼロかどうか判定する.
template<typename T>
bool is_zero(const T &x) { return x == 0; }

// 非負かどうか判定する.
template<typename T>
bool is_non_negative(const T &x) { return x >= 0; }

// 非正かどうか判定する.
template<typename T>
bool is_non_positive(const T &x) { return x <= 0; }

// 指数に関する関数

// x の y 乗を求める.
ll intpow(ll x, ll y){
    ll a = 1;
    while (y){
        if (y & 1) { a *= x; }
        x *= x;
        y >>= 1;
    }
    return a;
}

// x の y 乗を z で割った余りを求める.
template<typename T, integral U>
T modpow(T x, U y, T z) {
    T a = 1;
    while (y) {
        if (y & 1) { (a *= x) %= z; }

        (x *= x) %= z;
        y >>= 1;
    }

    return a;
}

template<typename T>
T sum(const vector<T> &X) {
    T y = T(0);
    for (auto &&x: X) { y += x; }
    return y;
}

template<typename T>
T gcd(const T x, const T y) {
    return y == 0 ? x : gcd(y, x % y);
}

// a x + b y = gcd(a, b) を満たす整数の組 (a, b) に対して, (x, y, gcd(a, b)) を求める.
template<integral T>
tuple<T, T, T> Extended_Euclid(T a, T b) {
    T s = 1, t = 0, u = 0, v = 1;
    while (b) {
        auto [q, r] = divmod(a, b);
        a = b;
        b = r;
        tie(s, t) = make_pair(t, s - q * t);
        tie(u, v) = make_pair(v, u - q * v);
    }

    return make_tuple(s, u, a);
}

// floor(sqrt(N)) を求める (N < 0 のときは, 0 とする).
ll isqrt(const ll &N) { 
    if (N <= 0) { return 0; }

    ll x = sqrtl(N);
    while ((x + 1) * (x + 1) <= N) { x++; }
    while (x * x > N) { x--; }

    return x;
}

// floor(sqrt(N)) を求める (N < 0 のときは, 0 とする).
ll floor_sqrt(const ll &N) { return isqrt(N); }

// ceil(sqrt(N)) を求める (N < 0 のときは, 0 とする).
ll ceil_sqrt(const ll &N) {
    ll x = isqrt(N);
    return x * x == N ? x : x + 1;
}
#line 64 "template/template.hpp"

// inout
#line 1 "template/inout.hpp"
// 入出力
template<class... T>
void input(T&... a){ (cin >> ... >> a); }

void print(){ cout << "\n"; }

template<class T, class... Ts>
void print(const T& a, const Ts&... b){
    cout << a;
    (cout << ... << (cout << " ", b));
    cout << "\n";
}

template<typename T, typename U>
istream &operator>>(istream &is, pair<T, U> &P){
    is >> P.first >> P.second;
    return is;
}

template<typename T, typename U>
ostream &operator<<(ostream &os, const pair<T, U> &P){
    os << P.first << " " << P.second;
    return os;
}

template<typename T>
vector<T> vector_input(int N, int index){
    vector<T> X(N+index);
    for (int i=index; i<index+N; i++) cin >> X[i];
    return X;
}

template<typename T>
istream &operator>>(istream &is, vector<T> &X){
    for (auto &x: X) { is >> x; }
    return is;
}

template<typename T>
ostream &operator<<(ostream &os, const vector<T> &X){
    int s = (int)X.size();
    for (int i = 0; i < s; i++) { os << (i ? " " : "") << X[i]; }
    return os;
}

template<typename T>
ostream &operator<<(ostream &os, const unordered_set<T> &S){
    int i = 0;
    for (T a: S) {os << (i ? " ": "") << a; i++;}
    return os;
}

template<typename T>
ostream &operator<<(ostream &os, const set<T> &S){
    int i = 0;
    for (T a: S) { os << (i ? " ": "") << a; i++; }
    return os;
}

template<typename T>
ostream &operator<<(ostream &os, const unordered_multiset<T> &S){
    int i = 0;
    for (T a: S) { os << (i ? " ": "") << a; i++; }
    return os;
}

template<typename T>
ostream &operator<<(ostream &os, const multiset<T> &S){
    int i = 0;
    for (T a: S) { os << (i ? " ": "") << a; i++; }
    return os;
}

template<typename T>
std::vector<T> input_vector(size_t n, size_t offset = 0) {
    std::vector<T> res;
    // 最初に必要な全容量を確保(再確保を防ぐ)
    res.reserve(n + offset);
    // offset 分をデフォルト値で埋める(特別 indexed 用)
    res.assign(offset, T());
    
    for (size_t i = 0; i < n; ++i) {
        T el;
        if (!(std::cin >> el)) break;
        res.push_back(std::move(el));
    }
    return res;
}
#line 67 "template/template.hpp"

// macro
#line 2 "template/macro.hpp"

// マクロの定義
#define all(x) x.begin(), x.end()
#define len(x) ll(x.size())
#define elif else if
#define unless(cond) if (!(cond))
#define until(cond) while (!(cond))
#define loop while (true)

// オーバーロードマクロ
#define overload2(_1, _2, name, ...) name
#define overload3(_1, _2, _3, name, ...) name
#define overload4(_1, _2, _3, _4, name, ...) name
#define overload5(_1, _2, _3, _4, _5, name, ...) name

// 繰り返し系
#define rep1(n) for (ll i = 0; i < n; i++)
#define rep2(i, n) for (ll i = 0; i < n; i++)
#define rep3(i, a, b) for (ll i = a; i < b; i++)
#define rep4(i, a, b, c) for (ll i = a; i < b; i += c)
#define rep(...) overload4(__VA_ARGS__, rep4, rep3, rep2, rep1)(__VA_ARGS__)

#define foreach1(x, a) for (auto &&x: a)
#define foreach2(x, y, a) for (auto &&[x, y]: a)
#define foreach3(x, y, z, a) for (auto &&[x, y, z]: a)
#define foreach4(x, y, z, w, a) for (auto &&[x, y, z, w]: a)
#define foreach(...) overload5(__VA_ARGS__, foreach4, foreach3, foreach2, foreach1)(__VA_ARGS__)
#line 70 "template/template.hpp"

// bitop
#line 2 "template/bitop.hpp"

// 非負整数 x の bit legnth を求める.
ll bit_length(ll x) {
    if (x == 0) { return 0; }
    return (sizeof(long) * CHAR_BIT) - __builtin_clzll(x);
}

// 非負整数 x の popcount を求める.
ll popcount(ll x) { return __builtin_popcountll(x); }

// 正の整数 x に対して, floor(log2(x)) を求める.
ll floor_log2(ll x) { return bit_length(x) - 1; }

// 正の整数 x に対して, ceil(log2(x)) を求める.
ll ceil_log2(ll x) { return bit_length(x - 1); }

// x の第 k ビットを取得する
int get_bit(ll x, int k) { return (x >> k) & 1; }

// x のビット列を取得する.
// k はビット列の長さとする.
vector<int> get_bits(ll x, int k) {
    vector<int> bits(k);
    rep(i, k) {
        bits[i] = x & 1;
        x >>= 1;
    }

    return bits;
}

// x のビット列を取得する.
vector<int> get_bits(ll x) { return get_bits(x, bit_length(x)); }
#line 73 "template/template.hpp"

// exception
#line 2 "template/exception.hpp"

class NotExist: public exception {
    private:
    string message;

    public:
    NotExist() : message("求めようとしていたものは存在しません.") {}

    const char* what() const noexcept override {
        return message.c_str();
    }
};
#line 2 "Algebra/modint.hpp"

#line 4 "Algebra/modint.hpp"

template<int M>
class modint {
    public:
    static constexpr int _mod = M; 
    uint64_t x;

    public:
    static constexpr int mod() { return _mod; }

    static modint raw(int v) {
        modint a;
        a.x = v;
        return a;
    }

    // 初期化
    constexpr modint(): x(0) {}
    constexpr modint(int64_t a) {
        int64_t w = (int64_t)(a) % mod();
        if (w < 0) { w += mod(); }
        x = w;
    }

    // マイナス元
    modint operator-() const { return modint(-x); }

    // 加法
    modint& operator+=(const modint &b){
        if ((x += b.x) >= mod()) x -= mod();
        return *this;
    }

    friend modint operator+(const modint &x, const modint &y) { return modint(x) += y; }

    // 減法
    modint& operator-=(const modint &b){
        if ((x += mod() - b.x) >= mod()) x -= mod();
        return *this;
    }

    friend modint operator-(const modint &x, const modint &y) { return modint(x) -= y; }

    // 乗法
    modint& operator*=(const modint &b){
        (x *= b.x) %= mod();
        return *this;
    }

    friend modint operator*(const modint &x, const modint &y) { return modint(x) *= y; }
    friend modint operator*(const int &x, const modint &y) { return modint(x) *= y; }
    friend modint operator*(const ll &x, const modint &y) { return modint(x) *= y; }

    // 除法
    modint& operator/=(const modint &b){ return (*this) *= b.inverse(); }

    friend modint operator/(const modint &x, const modint &y) { return modint(x) /= y; }

    modint inverse() const {
        int64_t s = 1, t = 0;
        int64_t a = x, b = mod();

        while (b > 0) {
            int64_t q = a / b;

            a -= q * b; swap(a, b);
            s -= q * t; swap(s, t);
        }

        assert (a == 1);

        return modint(s);
    }

    // 比較
    friend bool operator==(const modint &a, const modint &b) { return (a.x == b.x); }
    friend bool operator==(const modint &a, const int &b) { return a.x == safe_mod(b, mod()); }
    friend bool operator!=(const modint &a, const modint &b) { return (a.x != b.x); }

    // 入力
    friend istream &operator>>(istream &is, modint &a) {
        int64_t x;
        is >> x;
        a.x = safe_mod(x, mod());
        return is;
    }

    // 出力
    friend ostream &operator<<(ostream &os, const modint &a) { return os << a.x; }

    bool is_zero() const { return x == 0; }
    bool is_member(ll a) const { return x == (a % mod() + mod()) % mod(); }
};

template<typename T>
struct is_modint : std::false_type {};

template<int M>
struct is_modint<modint<M>> : std::true_type {};

template<typename Mint>
requires is_modint<Mint>::value
Mint pow(Mint x, long long n) {
    if (n < 0) { return pow(x, -n).inverse(); }

    Mint res(1);
    for (; n; n >>= 1) {
        if (n & 1) { res *= x; }
        x *= x;
    }

    return res;
}
#line 5 "Modulo_Polynomial/Modulo_Polynomial.hpp"

template<typename mint>
class Modulo_Polynomial {
    public:
    int precision = 0;

    public:
    vector<mint> poly;
    Modulo_Polynomial(vector<mint> _poly, int precision): precision(precision) {
        if (_poly.size() > precision) { _poly.resize(precision); }
        poly = _poly;
    }

    Modulo_Polynomial() = default;
    Modulo_Polynomial(vector<mint> poly) : Modulo_Polynomial(poly, poly.size()) {}
    Modulo_Polynomial(int precision) : Modulo_Polynomial({}, precision) {}

    // 演算子の定義
    public:
    // マイナス元
    Modulo_Polynomial operator-() const {
        Modulo_Polynomial res(*this);
        for (auto &a : res.poly) { a = -a; }
        return res;
    }

    // 加法
    Modulo_Polynomial& operator+=(const Modulo_Polynomial &P){
        if (size() < P.size()) { resize(P.size()); }

        for (int i = 0; i < (int) P.poly.size(); i++) { poly[i] += P[i]; }
        reduce();

        return *this;
    }

    Modulo_Polynomial& operator+=(const mint &a){
        if (poly.empty()) { resize(1); }
        poly[0] += a;
        reduce();

        return *this;
    }

    friend Modulo_Polynomial operator+(const Modulo_Polynomial &lhs, const Modulo_Polynomial &rhs) { return Modulo_Polynomial(lhs) += rhs; }
    Modulo_Polynomial operator+(const mint &a) const { return Modulo_Polynomial(*this) += a; }

    // 減法
    Modulo_Polynomial& operator-=(const Modulo_Polynomial &P){
        if (size() < P.size()) { resize(P.size()); }

        for (int i = 0; i < (int) P.poly.size(); i++) { poly[i] -= P[i]; }
        reduce();

        return *this;
    }

    Modulo_Polynomial& operator-=(const mint &a){
        if (poly.empty()) { resize(1); }
        poly[0] -= a;
        reduce();

        return *this;
    }

    friend Modulo_Polynomial operator-(const Modulo_Polynomial &lhs, const Modulo_Polynomial &rhs) { return Modulo_Polynomial(lhs) -= rhs; }
    Modulo_Polynomial operator-(const mint &a) const { return Modulo_Polynomial(*this) -= a; }

    // スカラー倍
    Modulo_Polynomial& operator*=(const mint &a){
        for (int i = 0; i < size(); i++) { poly[i] *= a; }
        reduce();
        return *this;
    }

    Modulo_Polynomial operator*(const mint &a) const {return Modulo_Polynomial(*this) *= a;}

    friend Modulo_Polynomial operator*(const mint &a, const Modulo_Polynomial &P) {
        Modulo_Polynomial res(P);
        res *= a;
        return res;
    }

    // 積
    Modulo_Polynomial& operator*=(const Modulo_Polynomial &P) {
        int r = min({(int) (poly.size() + P.poly.size()) - 1, precision, P.precision});
        vector<mint> A(r);
        for (int i = 0; i < size(); i++) {
            for (int j = 0; j < P.size(); j++) {
                if (i + j < r) { A[i + j] += poly[i] * P.poly[j]; }
            }
        }

        poly = A;
        precision = min(precision, P.precision);
        return *this;
    }

    friend Modulo_Polynomial operator*(const Modulo_Polynomial &lhs, const Modulo_Polynomial &rhs) { return Modulo_Polynomial(lhs) *= rhs; }

    // スカラー除算
    Modulo_Polynomial& operator/=(const mint &a) {
        mint a_inv = a.inverse();
        for (int i = 0; i < size(); i++) { poly[i] *= a_inv; }
        return *this;
    }

    Modulo_Polynomial operator/(const mint &a) const { return Modulo_Polynomial(*this) /= a; }

    // index
    mint operator[] (int k) const { return (k < poly.size()) ? poly[k] : 0; }

    // istream
    friend istream &operator>>(istream &is, Modulo_Polynomial &P) {
        P.poly.resize(P.precision);
        for (int i = 0; i < (int)P.precision; i++) { is >> P.poly[i]; }
        return (is);
    }

    // ostream
    friend ostream &operator<<(ostream &os, const Modulo_Polynomial &P){
        for (int i = 0; i < (int)P.poly.size(); i++){
            os << (i ? " " : "") << P[i];
        }
        return os;
    }

    // poly で保持しているベクトルの長さを size にする.
    // size = -1 のときは, size = precision に変換される.
    void resize(int size = -1) {
        if (size == -1) { size = this -> precision; }
        size = min(size, this -> precision);
        poly.resize(size);
    }

    /// @brief poly のサイズが size より大きい場合のみ, size に切り詰める.
    /// @param size 指定するサイズ. -1 の場合は precision を使用する.
    void weak_resize(int size = -1) {
        if (size == -1) { size = this->precision; }

        if (size >= this->poly.size()) return;

        resize(size);
    }

    bool is_zero() const {
        for (auto &a: poly) { unless(a.is_zero()) {return false;} }
        return true;
    }

    // 高次に連なる 0 を削除する
    void reduce() {
        while (!poly.empty() && poly.back().is_zero()) { poly.pop_back(); }
    }

    // 保持している多項式の乗法の項の長さを求める
    int size() const { return poly.size(); }

    // 次数を求める (ゼロ多項式の時は -1)
    int degree() const {
        for (int d = size() - 1; d >= 0; d--) {
            unless(poly[d].is_zero()) { return d; }
        }
        return -1;
    }

    // 位数 (係数が非ゼロである次数の最小値)
    int order() const {
        for (int d = 0; d < size(); d++) {
            unless(poly[d].is_zero()) { return d; }
        }
        return -1;
    }
};
#line 2 "Modulo_Polynomial/Numeric_Theory_Translation.hpp"

#line 5 "Modulo_Polynomial/Numeric_Theory_Translation.hpp"

template<typename F>
class Numeric_Theory_Translation {
    public:
    F primitive;
    vector<F> root, iroot, rate2, irate2, rate3, irate3;

    public:
    Numeric_Theory_Translation() {
        primitive = primitive_root();
        build_up();
    }

    private:
    F primitive_root(){
        if (F::mod() == 2) { return F(1); }
        if (F::mod() == 998244353) { return F(3); }

        vector<int> fac;
        int v = F::mod() - 1;

        for (int q = 2; q * q <= v; q++){
            int e = 0;
            while (v % q == 0){
                e++; v /= q;
            }

            if (e > 0) { fac.emplace_back(q); }
        }

        if (v > 1) { fac.emplace_back(v); }

        F g(2);
        while (true) {
            bool flag = true;
            for (int q: fac) {
                if (pow(g, (F::mod() - 1) / q) == 1){
                    flag = false;
                    break;
                }
            }
            if (flag) { break; }
            g += 1;
        }
        return g;
    }

    void build_up() {
        int x = ~(F::mod() - 1) & (F::mod() - 2);
        int rank2 = bit_length(x);

        root.resize(rank2 + 1); iroot.resize(rank2 + 1);
        rate2.resize(max(0, rank2 - 1)); irate2.resize(max(0, rank2 - 1));
        rate3.resize(max(0, rank2 - 2)); irate3.resize(max(0, rank2 - 2));

        root.back() = pow(primitive, (F::mod() - 1) >> rank2);
        iroot.back() = root.back().inverse();

        for (int i = rank2 - 1; i >= 0; i--){
            root[i] = root[i + 1] * root[i + 1];
            iroot[i] = iroot[i + 1] * iroot[i + 1];
        }

        F prod(1), iprod(1);
        for (int i = 0; i < rank2 - 1; i++){
            rate2[i] = root[i + 2] * prod;
            irate2[i] = iroot[i + 2] * prod;

            prod *= iroot[i + 2]; iprod *= root[i + 2];
        }

        prod = 1; iprod = 1;
        for (int i = 0; i < rank2 - 2; i++){
            rate3[i] = root[i + 3] * prod;
            irate3[i] = iroot[i + 3] * iprod;

            prod *= iroot[i + 3]; iprod *= root[i + 3];
        }
    }

    public:
    void ntt(vector<F> &A){
        int N = A.size();
        int h = ceil_log2(N);

        F I = root[2];
        for (int l = 0; l < h;){
            if (h - l == 1){
                int p = 1 << (h - l - 1);
                F rot(1);
                for (int s = 0; s < (1 << l); s++){
                    int offset = s << (h - l);
                    for(int i = 0; i < p; i++){
                        F x = A[i + offset], y = A[i + offset + p] * rot;
                        A[i + offset] = x + y;
                        A[i + offset + p] = x - y;
                    }

                    unless (s + 1 == (1 << l)){ rot *= rate2[bit_length(~s & -(~s)) - 1]; }
                }
                l++;
            } else {
                int p = 1 << (h - l - 2);
                F rot(1);
                for (int s = 0; s < (1 << l); s++){
                    F rot2 = rot * rot, rot3 = rot2 * rot;
                    int offset = s << (h - l);
                    for (int i = 0; i < p; i++){
                        F a0 = A[i + offset];
                        F a1 = A[i + offset + p] * rot;
                        F a2 = A[i + offset + 2 * p] * rot2;
                        F a3 = A[i + offset + 3 * p] * rot3;

                        F alpha = (a1 - a3) * I;

                        A[i + offset]         = a0 + a2 + a1 + a3;
                        A[i + offset + p]     = a0 + a2 - a1 - a3;
                        A[i + offset + 2 * p] = a0 - a2 + alpha;
                        A[i + offset + 3 * p] = a0 - a2 - alpha;
                    }

                    unless(s + 1 == 1 << l) { rot *= rate3[bit_length(~s & -(~s)) - 1]; }
                }
                l += 2;
            }
        }
    }

    public:
    void inverse_ntt(vector<F> &A){
        int N = A.size();
        int h = ceil_log2(N);

        F J = iroot[2];
        for (int l = h; l > 0;){
            if (l == 1){
                int p = 1 << (h - l);
                F irot(1);
                for (int s = 0; s < (1 << (l - 1)); s++){
                    int offset = s << (h - l + 1);
                    for(int i = 0; i < p; i++){
                        F x = A[i + offset], y = A[i + offset + p];
                        A[i + offset]   = x + y;
                        A[i + offset + p] = (x - y) * irot;
                    }

                    unless (s+1 == 1 << (l - 1) ) { irot *= irate2[bit_length(~s & -(~s)) -1]; }
                }
                l--;
            } else {
                int p = 1 << (h - l);
                F irot(1);
                for (int s=0; s<(1<<(l-2)); s++){
                    F irot2 = irot * irot, irot3 = irot2  *irot;
                    int offset=s<<(h-l+2);
                    for (int i = 0; i < p; i++){
                        F a0 = A[i + offset];
                        F a1 = A[i + offset + p];
                        F a2 = A[i + offset + 2 * p];
                        F a3 = A[i + offset + 3 * p];

                        F beta = (a2 - a3) * J;

                        A[i + offset]         = a0 + a2 + a1 + a3;
                        A[i + offset + p]     = (a0 - a1 + beta) * irot;
                        A[i + offset + 2 * p] = (a0 + a1 - a2 - a3) * irot2;
                        A[i + offset + 3 * p] = (a0 - a1 - beta) * irot3;
                    }

                    unless (s + 1 == 1 << (l - 2)) { irot *= irate3[bit_length(~s & -(~s)) - 1]; }
                }
                l-=2;
            }
        }

        F N_inv=F(N).inverse();
        for (int i=0; i<N; i++) A[i]*=N_inv;
    }

    vector<F> convolution(vector<F> A, vector<F> B){
        if (A.empty() || B.empty()) return vector<F>{};

        int M=A.size(), N=B.size(), L=M+N-1;
        if (min(M,N)<64){
            vector<F> C(L);
            for(int i=0; i<M; i++){
                for (int j=0; j<N; j++){
                    C[i+j]+=A[i]*B[j];
                }
            }
            return C;
        }

        int h=bit_length(L);
        int K=1<<h;

        vector<F> X(K), Y(K);
        copy(A.begin(), A.end(), X.begin());
        copy(B.begin(), B.end(), Y.begin());

        ntt(X); ntt(Y);
        for (int i=0; i<K; i++) X[i]*=Y[i];

        inverse_ntt(X); X.resize(L);
        return X;
    }

    vector<F> inverse(vector<F> P, int d) {
        int n = P.size();
        assert(!P.empty() && !P[0].is_zero());

        vector<F> G{P[0].inverse()};
        while (G.size() < d) {
            int m = G.size();
            vector<F> A(P.begin(), P.begin() + min(n, 2 * m));
            A.resize(2 * m);
            vector<F> B(G);
            B.resize(2 * m);

            ntt(A); ntt(B);
            for (int i = 0; i < 2 * m; i++) { A[i] *= B[i]; }

            inverse_ntt(A);
            A.erase(A.begin(), A.begin() + m);
            A.resize(2 * m);

            ntt(A);
            for (int i = 0; i < 2 * m; i++) { A[i] *= -B[i]; }

            inverse_ntt(A);

            G.insert(G.end(), A.begin(), A.begin() + m);
        }

        G.resize(d);
        return G;
    }

    vector<F> inverse(vector<F> P) { return inverse(P, P.size()); }

    vector<F> multiple_convolution(vector<vector<F>> A) {
        if (A.empty()) { return {1}; }

        deque<int> queue(A.size());
        iota(queue.begin(), queue.end(), 0);

        while (queue.size() > 1) {
            int i = queue.front(); queue.pop_front();
            int j = queue.front(); queue.pop_front();

            A[i] = convolution(A[i], A[j]);
            queue.emplace_back(i);
        }

        return A[queue.back()];
    }
};
#line 5 "Modulo_Polynomial/Fast_Power_Series.hpp"

template<typename mint>
class Fast_Power_Series : public Modulo_Polynomial<mint> {
    protected:
    static Numeric_Theory_Translation<mint> calculator;

    public:
    Fast_Power_Series(vector<mint> _poly, int _precision) : Modulo_Polynomial<mint>(_poly, _precision) {}

    Fast_Power_Series() = default;
    Fast_Power_Series(vector<mint> _poly) : Fast_Power_Series(_poly, _poly.size()) {}
    Fast_Power_Series(int _precision) : Fast_Power_Series({}, _precision) {}

    // 加算
    Fast_Power_Series& operator+=(const Fast_Power_Series &B) {
        this->poly.resize(max(this->poly.size(), B.poly.size()));
        for (int i = 0; i < B.poly.size(); i++) {
            this->poly[i] += B.poly[i];
        }
        this->precision = min(this->precision, B.precision);
        this->weak_resize();
        this->reduce();
        return *this;
    }

    friend Fast_Power_Series<mint> operator+(const Fast_Power_Series<mint> &lhs, const Fast_Power_Series<mint> &rhs) {
        return Fast_Power_Series<mint>(lhs) += rhs; 
    }

    // 減算
    Fast_Power_Series& operator-=(const Fast_Power_Series &B) {
        this->poly.resize(max(this->poly.size(), B.poly.size()));
        for (int i = 0; i < B.poly.size(); i++) {
            this->poly[i] -= B.poly[i];
        }
        this->precision = min(this->precision, B.precision);
        this->weak_resize();
        this->reduce();
        return *this;
    }

    friend Fast_Power_Series<mint> operator-(const Fast_Power_Series<mint> &lhs, const Fast_Power_Series<mint> &rhs) {
        return Fast_Power_Series<mint>(lhs) -= rhs; 
    }

    // スカラー倍
    Fast_Power_Series& operator*=(const mint &a){
        for (int i = 0; i < this->size(); i++) { this->poly[i] *= a; }
        this->reduce();
        return *this;
    }

    Fast_Power_Series operator*(const mint &a) const {return Fast_Power_Series(*this) *= a; }

    friend Fast_Power_Series operator*(const mint &a, const Fast_Power_Series &P) { return Fast_Power_Series(P) *= a; }

    friend Fast_Power_Series operator*(const ll &a, const Fast_Power_Series &P) { return mint(a) * P; }

    // 積
    Fast_Power_Series& operator*=(const Fast_Power_Series &P) {
        auto tmp = calculator.convolution(this->poly, P.poly);

        this->poly = tmp;
        this->precision = min(this->precision, P.precision);

        this->weak_resize();
        this->reduce();
        return *this;
    }

    friend Fast_Power_Series operator*(const Fast_Power_Series &lhs, const Fast_Power_Series &rhs) { return Fast_Power_Series(lhs) *= rhs; }

    // 多項式としての積. 精度は結果の次数に合わせて自動拡張される.
    Fast_Power_Series mul_poly(const Fast_Power_Series &P) const {
        auto tmp = calculator.convolution(this->poly, P.poly);
        return Fast_Power_Series(tmp, tmp.size());
    }

    // (mod X^d) における逆元を求める
    // d = -1 のときは, d = precision になる.
    Fast_Power_Series inverse(int d = -1) {
        vector<mint> p = calculator.inverse(this->poly, (d == -1) ? this->precision : min(d, this->precision));
        return {p, this->precision};
    }

    // 除算
    Fast_Power_Series& operator/=(const Fast_Power_Series &P) {
        vector<mint> inv = calculator.inverse(P.poly, P.precision);
        this->poly = calculator.convolution(this->poly, inv);
        this->precision = min(this->precision, P.precision);
        this->weak_resize();
        this->reduce();
        return *this;
    }

    friend Fast_Power_Series operator/(const Fast_Power_Series &lhs, const Fast_Power_Series &rhs) { return Fast_Power_Series(lhs) /= rhs; }

    // 多項式としての除算
    Fast_Power_Series div(const Fast_Power_Series &B) {
        this->reduce(); // B.reduce(); const なので変更しない

        int n = this->poly.size(), m = B.poly.size();

        if (n < m) { return Fast_Power_Series({0}); }

        vector<mint> a_rev(this->poly), b_rev(B.poly);
        reverse(a_rev.begin(), a_rev.end());
        reverse(b_rev.begin(), b_rev.end());

        int k = n - m + 1;
        if (a_rev.size() > k) { a_rev.resize(k); }
        if (b_rev.size() > k) { b_rev.resize(k); }
        vector<mint> c = calculator.convolution(a_rev, calculator.inverse(b_rev, k));
        c.resize(k);
        reverse(c.begin(), c.end());
        return Fast_Power_Series(c, n);
    }

    Fast_Power_Series& operator%=(const Fast_Power_Series &B) {
        Fast_Power_Series Q = this->div(B);
        vector<mint> product = calculator.convolution(B.poly, Q.poly);
        if (this->poly.size() < product.size()) { this->poly.resize(product.size()); }
        for (int i = 0; i < product.size(); i++) { this->poly[i] -= product[i]; }
        this->reduce();
        return *this;
    }

    friend Fast_Power_Series operator%(const Fast_Power_Series &lhs, const Fast_Power_Series &rhs) { return Fast_Power_Series(lhs) %= rhs; }

    pair<Fast_Power_Series, Fast_Power_Series> divmod(const Fast_Power_Series &B) {
        Fast_Power_Series Q = this->div(B);
        vector<mint> product = calculator.convolution(B.poly, Q.poly);

        Fast_Power_Series R(*this);
        if (R.poly.size() < product.size()) { R.poly.resize(product.size()); }
        for (int i = 0; i < product.size(); i++) { R.poly[i] -= product[i]; }
        R.reduce();
        return {Q, R};
    }
};

template<typename mint>
Numeric_Theory_Translation<mint> Fast_Power_Series<mint>::calculator = Numeric_Theory_Translation<mint>();

template<typename mint>
pair<Fast_Power_Series<mint>, Fast_Power_Series<mint>> divmod(Fast_Power_Series<mint> &A, const Fast_Power_Series<mint> &B) {
    return A.divmod(B);
}
#line 4 "Modulo_Polynomial/Multipoint_Evaluation.hpp"

template<typename mint>
vector<mint> Multipoint_Evaluation(Fast_Power_Series<mint> P, const vector<mint> &X) {
    int m = X.size();
    if (m == 0) { return {}; }

    using FPS = Fast_Power_Series<mint>;

    // Subproduct Tree の構築
    // インデックス k=1 が根。
    int tree_size = 1; while (tree_size < m) { tree_size *= 2; }
    vector<FPS> g(2 * tree_size);

    auto build = [&](auto self, int l, int r, int k) -> void {
        if (r - l == 1) {
            g[k] = FPS({-X[l], 1});
            return;
        }
        int mid = (l + r) / 2;
        self(self, l, mid, 2 * k);
        self(self, mid, r, 2 * k + 1);
        int len = g[2 * k].size() + g[2 * k + 1].size() - 1;
        g[2 * k].precision = len;
        g[2 * k + 1].precision = len;
        g[k] = g[2 * k] * g[2 * k + 1];
    };

    build(build, 0, m, 1);

    vector<mint> Ys(m);

    auto solve = [&](auto self, int l, int r, int k, const FPS &Q) -> void {
        if (r - l == 1) {
            if (Q.size() > 0) { Ys[l] = Q[0]; }
            else { Ys[l] = 0; }
            return;
        }
        
        int mid = (l + r) / 2;
        FPS Q_l = Q % g[2 * k];
        self(self, l, mid, 2 * k, Q_l);
        
        FPS Q_r = Q % g[2 * k + 1];
        self(self, mid, r, 2 * k + 1, Q_r);
    };

    FPS Q = P % g[1];
    solve(solve, 0, m, 1, Q);

    return Ys;
}
Back to top page