This documentation is automatically generated by online-judge-tools/verification-helper
#include "poly.hpp"
#pragma once
#include "poly_basic.hpp"
#include <algorithm>
#include <array>
#include <cassert>
#include <iostream>
#include <vector>
template<typename Tp> class Poly : public std::vector<Tp> {
using Base = std::vector<Tp>;
public:
using Base::Base;
static Poly zero() { return Poly(); }
static Poly one() { return Poly{Tp::one()}; }
bool is_zero() const { return deg() < 0; }
bool is_one() const { return deg() == 0 && lc() == Tp::one(); }
int deg() const { return degree(*this); }
int ord() const { return order(*this); }
Poly rev() const {
const int d = deg();
Poly res(d + 1);
for (int i = d; i >= 0; --i) res[i] = Base::operator[](d - i);
return res;
}
Poly slice(int L, int R) const {
Poly res(R - L);
for (int i = L; i < std::min<int>(R, Base::size()); ++i) res[i - L] = Base::operator[](i);
return res;
}
Poly trunc(int D) const {
Poly res(D);
for (int i = 0; i < std::min<int>(D, Base::size()); ++i) res[i] = Base::operator[](i);
return res;
}
Poly &shrink() {
Base::resize(deg() + 1);
return *this;
}
Tp lc() const {
const int d = deg();
return d == -1 ? Tp() : Base::operator[](d);
}
Poly monic() const {
const int d = deg();
assert(d >= 0);
const auto iv = Base::operator[](d).inv();
Poly res(*this);
for (int i = 0; i <= d; ++i) res[i] *= iv;
return res;
}
Poly taylor_shift(Tp c) const {
Base::operator=(taylor_shift(*this, c));
return shrink();
}
Poly operator-() const {
const int d = deg();
Poly res(d + 1);
for (int i = 0; i <= d; ++i) res[i] = -Base::operator[](i);
res.shrink();
return res;
}
std::array<Poly, 2> divmod(const Poly &R) const {
const auto [q, r] = euclidean_div(*this, R);
return {Poly(q.begin(), q.end()), Poly(r.begin(), r.end())};
}
Poly &operator+=(const Poly &R) {
if (Base::size() < R.size()) Base::resize(R.size());
for (int i = 0; i < (int)R.size(); ++i) Base::operator[](i) += R[i];
return shrink();
}
Poly &operator-=(const Poly &R) {
if (Base::size() < R.size()) Base::resize(R.size());
for (int i = 0; i < (int)R.size(); ++i) Base::operator[](i) -= R[i];
return shrink();
}
Poly &operator*=(const Poly &R) {
Base::operator=(convolution(*this, R));
return shrink();
}
Poly &operator/=(const Poly &R) {
Base::operator=(euclidean_div_quotient(*this, R));
return shrink();
}
Poly &operator%=(const Poly &R) {
Base::operator=(std::get<1>(divmod(R)));
return shrink();
}
Poly &operator<<=(int D) {
if (D > 0) {
Base::insert(Base::begin(), D, Tp(0));
} else if (D < 0) {
if (-D < (int)Base::size()) {
Base::erase(Base::begin(), Base::begin() + (-D));
} else {
Base::clear();
}
}
return shrink();
}
Poly &operator>>=(int D) { return operator<<=(-D); }
friend Poly operator+(const Poly &L, const Poly &R) { return Poly(L) += R; }
friend Poly operator-(const Poly &L, const Poly &R) { return Poly(L) -= R; }
friend Poly operator*(const Poly &L, const Poly &R) { return Poly(L) *= R; }
friend Poly operator/(const Poly &L, const Poly &R) { return Poly(L) /= R; }
friend Poly operator%(const Poly &L, const Poly &R) { return Poly(L) %= R; }
friend Poly operator<<(const Poly &L, int D) { return Poly(L) <<= D; }
friend Poly operator>>(const Poly &L, int D) { return Poly(L) >>= D; }
friend std::ostream &operator<<(std::ostream &L, const Poly &R) {
L << '[';
const int d = R.deg();
if (d < 0) {
L << '0';
} else {
for (int i = 0; i <= d; ++i) {
L << R[i];
if (i == 1) L << "*x";
if (i > 1) L << "*x^" << i;
if (i != d) L << " + ";
}
}
return L << ']';
}
};
// 2x2 matrix for Euclidean algorithm
template<typename Tp> class GCDMatrix : public std::array<std::array<Tp, 2>, 2> {
public:
GCDMatrix(const Tp &x00, const Tp &x01, const Tp &x10, const Tp &x11)
: std::array<std::array<Tp, 2>, 2>{std::array{x00, x01}, std::array{x10, x11}} {}
GCDMatrix operator*(const GCDMatrix &R) const {
return {(*this)[0][0] * R[0][0] + (*this)[0][1] * R[1][0],
(*this)[0][0] * R[0][1] + (*this)[0][1] * R[1][1],
(*this)[1][0] * R[0][0] + (*this)[1][1] * R[1][0],
(*this)[1][0] * R[0][1] + (*this)[1][1] * R[1][1]};
}
std::array<Tp, 2> operator*(const std::array<Tp, 2> &R) const {
return {(*this)[0][0] * R[0] + (*this)[0][1] * R[1],
(*this)[1][0] * R[0] + (*this)[1][1] * R[1]};
}
Tp det() const { return (*this)[0][0] * (*this)[1][1] - (*this)[0][1] * (*this)[1][0]; }
GCDMatrix adj() const { return {(*this)[1][1], -(*this)[0][1], -(*this)[1][0], (*this)[0][0]}; }
};
// returns M s.t. deg(M) <= d and deg(M21*A+M22*B) < max(deg(A),deg(B)) - d
// det(M) in {-1, 1}
// see:
// [1]: Daniel J. Bernstein. Fast multiplication and its applications.
template<typename Tp> inline GCDMatrix<Poly<Tp>> hgcd(const Poly<Tp> &A, const Poly<Tp> &B, int d) {
using Mat = GCDMatrix<Poly<Tp>>;
assert(!(A.deg() < 0 && B.deg() < 0));
if (A.deg() < B.deg()) return hgcd(B, A, d) * Mat({}, {Tp(1)}, {Tp(1)}, {});
if (A.deg() < d) return hgcd(A, B, A.deg());
if (B.deg() < A.deg() - d) return Mat({Tp(1)}, {}, {}, {Tp(1)});
if (int dd = A.deg() - d * 2; dd > 0) return hgcd(A >> dd, B >> dd, d);
if (d == 0) return Mat({}, {Tp(1)}, {Tp(1)}, -(A / B));
const auto M = hgcd(A, B, d / 2);
const auto D = M[1][0] * A + M[1][1] * B;
if (D.deg() < A.deg() - d) return M;
const auto C = M[0][0] * A + M[0][1] * B;
const auto [Q, R] = C.divmod(D);
return hgcd(D, R, D.deg() - (A.deg() - d)) * Mat({}, {Tp(1)}, {Tp(1)}, -Q) * M;
}
template<typename Tp> inline std::array<Poly<Tp>, 3> xgcd(const Poly<Tp> &A, const Poly<Tp> &B) {
const auto M = hgcd(A, B, std::max(A.deg(), B.deg()));
return {M[0][0], M[0][1], M[0][0] * A + M[0][1] * B};
}
template<typename Tp> inline std::array<Poly<Tp>, 2> inv_gcd(const Poly<Tp> &A, const Poly<Tp> &B) {
const auto M = hgcd(A, B, std::max(A.deg(), B.deg()));
return {M[0][0], M[0][0] * A + M[0][1] * B};
}
// returns P, Q s.t. [x^[-k, 0)] P/Q = [x^[-k, 0)] A/B
// where P, Q in F[x], deg(Q) is minimized
// requires deg(A) < deg(B)
template<typename Tp>
inline std::array<Poly<Tp>, 2> rational_approximation(const Poly<Tp> &A, const Poly<Tp> &B, int k) {
auto M = hgcd(B, A, k / 2);
const auto [C, D] = M * std::array{B, A};
if (D.deg() >= 0 && D.deg() - C.deg() >= -(k - (B.deg() - C.deg()) * 2))
M = GCDMatrix<Poly<Tp>>({}, {Tp(1)}, {Tp(1)}, -(C / D)) * M;
return {M.adj()[1][0], M.adj()[0][0]};
}
template<typename Tp>
inline std::array<Poly<Tp>, 2> rational_reconstruction(const std::vector<Tp> &A) {
return rational_approximation(Poly<Tp>(A.rbegin(), A.rend()), Poly<Tp>{Tp(1)} << A.size(),
A.size());
}
// returns [x^[-k, 0)] A/B
// requires deg(A) < deg(B)
template<typename Tp>
inline std::vector<Tp> fraction_to_series(const Poly<Tp> &A, const Poly<Tp> &B, int k) {
return (((A << k) / B).rev() << (B.deg() - A.deg() - 1)).slice(0, k);
}
#line 2 "poly.hpp"
#line 2 "poly_basic.hpp"
#line 2 "binomial.hpp"
#include <algorithm>
#include <vector>
template<typename Tp> class Binomial {
std::vector<Tp> factorial_, invfactorial_;
Binomial() : factorial_{Tp(1)}, invfactorial_{Tp(1)} {}
void preprocess(int n) {
if (const int nn = factorial_.size(); nn < n) {
int k = nn;
while (k < n) k *= 2;
k = std::min<long long>(k, Tp::mod());
factorial_.resize(k);
invfactorial_.resize(k);
for (int i = nn; i < k; ++i) factorial_[i] = factorial_[i - 1] * i;
invfactorial_.back() = factorial_.back().inv();
for (int i = k - 2; i >= nn; --i) invfactorial_[i] = invfactorial_[i + 1] * (i + 1);
}
}
public:
static const Binomial &get(int n) {
static Binomial bin;
bin.preprocess(n);
return bin;
}
Tp binom(int n, int m) const {
return n < m ? Tp() : factorial_[n] * invfactorial_[m] * invfactorial_[n - m];
}
Tp inv(int n) const { return factorial_[n - 1] * invfactorial_[n]; }
Tp factorial(int n) const { return factorial_[n]; }
Tp inv_factorial(int n) const { return invfactorial_[n]; }
};
#line 2 "fft.hpp"
#line 4 "fft.hpp"
#include <cassert>
#include <iterator>
#include <memory>
#line 8 "fft.hpp"
template<typename Tp> class FftInfo {
static Tp least_quadratic_nonresidue() {
for (int i = 2;; ++i)
if (Tp(i).pow((Tp::mod() - 1) / 2) == -1) return Tp(i);
}
const int ordlog2_;
const Tp zeta_;
const Tp invzeta_;
const Tp imag_;
const Tp invimag_;
mutable std::vector<Tp> root_;
mutable std::vector<Tp> invroot_;
FftInfo()
: ordlog2_(__builtin_ctzll(Tp::mod() - 1)),
zeta_(least_quadratic_nonresidue().pow((Tp::mod() - 1) >> ordlog2_)),
invzeta_(zeta_.inv()), imag_(zeta_.pow(1LL << (ordlog2_ - 2))), invimag_(-imag_),
root_{Tp(1), imag_}, invroot_{Tp(1), invimag_} {}
public:
static const FftInfo &get() {
static FftInfo info;
return info;
}
Tp imag() const { return imag_; }
Tp inv_imag() const { return invimag_; }
Tp zeta() const { return zeta_; }
Tp inv_zeta() const { return invzeta_; }
const std::vector<Tp> &root(int n) const {
// [0, n)
assert((n & (n - 1)) == 0);
if (const int s = root_.size(); s < n) {
root_.resize(n);
for (int i = __builtin_ctz(s); (1 << i) < n; ++i) {
const int j = 1 << i;
root_[j] = zeta_.pow(1LL << (ordlog2_ - i - 2));
for (int k = j + 1; k < j * 2; ++k) root_[k] = root_[k - j] * root_[j];
}
}
return root_;
}
const std::vector<Tp> &inv_root(int n) const {
// [0, n)
assert((n & (n - 1)) == 0);
if (const int s = invroot_.size(); s < n) {
invroot_.resize(n);
for (int i = __builtin_ctz(s); (1 << i) < n; ++i) {
const int j = 1 << i;
invroot_[j] = invzeta_.pow(1LL << (ordlog2_ - i - 2));
for (int k = j + 1; k < j * 2; ++k) invroot_[k] = invroot_[k - j] * invroot_[j];
}
}
return invroot_;
}
};
inline int fft_len(int n) {
--n;
n |= n >> 1, n |= n >> 2, n |= n >> 4, n |= n >> 8;
return (n | n >> 16) + 1;
}
namespace detail {
template<typename Iterator> inline void
butterfly_n(Iterator a, int n,
const std::vector<typename std::iterator_traits<Iterator>::value_type> &root) {
assert(n > 0);
assert((n & (n - 1)) == 0);
const int bn = __builtin_ctz(n);
if (bn & 1) {
for (int i = 0; i < n / 2; ++i) {
const auto a0 = a[i], a1 = a[i + n / 2];
a[i] = a0 + a1, a[i + n / 2] = a0 - a1;
}
}
for (int i = n >> (bn & 1); i >= 4; i /= 4) {
const int i4 = i / 4;
for (int k = 0; k < i4; ++k) {
const auto a0 = a[k + i4 * 0], a1 = a[k + i4 * 1];
const auto a2 = a[k + i4 * 2], a3 = a[k + i4 * 3];
const auto a02p = a0 + a2, a02m = a0 - a2;
const auto a13p = a1 + a3, a13m = (a1 - a3) * root[1];
a[k + i4 * 0] = a02p + a13p, a[k + i4 * 1] = a02p - a13p;
a[k + i4 * 2] = a02m + a13m, a[k + i4 * 3] = a02m - a13m;
}
for (int j = i, m = 2; j < n; j += i, m += 2) {
const auto r = root[m], r2 = r * r, r3 = r2 * r;
for (int k = j; k < j + i4; ++k) {
const auto a0 = a[k + i4 * 0], a1 = a[k + i4 * 1] * r;
const auto a2 = a[k + i4 * 2] * r2, a3 = a[k + i4 * 3] * r3;
const auto a02p = a0 + a2, a02m = a0 - a2;
const auto a13p = a1 + a3, a13m = (a1 - a3) * root[1];
a[k + i4 * 0] = a02p + a13p, a[k + i4 * 1] = a02p - a13p;
a[k + i4 * 2] = a02m + a13m, a[k + i4 * 3] = a02m - a13m;
}
}
}
}
template<typename Iterator> inline void
inv_butterfly_n(Iterator a, int n,
const std::vector<typename std::iterator_traits<Iterator>::value_type> &root) {
assert(n > 0);
assert((n & (n - 1)) == 0);
const int bn = __builtin_ctz(n);
for (int i = 4; i <= (n >> (bn & 1)); i *= 4) {
const int i4 = i / 4;
for (int k = 0; k < i4; ++k) {
const auto a0 = a[k + i4 * 0], a1 = a[k + i4 * 1];
const auto a2 = a[k + i4 * 2], a3 = a[k + i4 * 3];
const auto a01p = a0 + a1, a01m = a0 - a1;
const auto a23p = a2 + a3, a23m = (a2 - a3) * root[1];
a[k + i4 * 0] = a01p + a23p, a[k + i4 * 1] = a01m + a23m;
a[k + i4 * 2] = a01p - a23p, a[k + i4 * 3] = a01m - a23m;
}
for (int j = i, m = 2; j < n; j += i, m += 2) {
const auto r = root[m], r2 = r * r, r3 = r2 * r;
for (int k = j; k < j + i4; ++k) {
const auto a0 = a[k + i4 * 0], a1 = a[k + i4 * 1];
const auto a2 = a[k + i4 * 2], a3 = a[k + i4 * 3];
const auto a01p = a0 + a1, a01m = a0 - a1;
const auto a23p = a2 + a3, a23m = (a2 - a3) * root[1];
a[k + i4 * 0] = a01p + a23p, a[k + i4 * 1] = (a01m + a23m) * r;
a[k + i4 * 2] = (a01p - a23p) * r2, a[k + i4 * 3] = (a01m - a23m) * r3;
}
}
}
if (bn & 1) {
for (int i = 0; i < n / 2; ++i) {
const auto a0 = a[i], a1 = a[i + n / 2];
a[i] = a0 + a1, a[i + n / 2] = a0 - a1;
}
}
}
} // namespace detail
// FFT_n: A(x) |-> bit-reversed order of [A(1), A(zeta_n), ..., A(zeta_n^(n-1))]
template<typename Iterator> inline void fft_n(Iterator a, int n) {
using Tp = typename std::iterator_traits<Iterator>::value_type;
detail::butterfly_n(a, n, FftInfo<Tp>::get().root(n / 2));
}
template<typename Tp> inline void fft(std::vector<Tp> &a) { fft_n(a.begin(), a.size()); }
// IFFT_n: bit-reversed order of [A(1), A(zeta_n), ..., A(zeta_n^(n-1))] |-> A(x)
template<typename Iterator> inline void inv_fft_n(Iterator a, int n) {
using Tp = typename std::iterator_traits<Iterator>::value_type;
detail::inv_butterfly_n(a, n, FftInfo<Tp>::get().inv_root(n / 2));
const Tp iv = Tp::mod() - (Tp::mod() - 1) / n;
for (int i = 0; i < n; ++i) a[i] *= iv;
}
template<typename Tp> inline void inv_fft(std::vector<Tp> &a) { inv_fft_n(a.begin(), a.size()); }
// IFFT_n^T: A(x) |-> 1/n FFT_n((x^n A(x^(-1))) mod (x^n - 1))
template<typename Iterator> inline void transposed_inv_fft_n(Iterator a, int n) {
using Tp = typename std::iterator_traits<Iterator>::value_type;
const Tp iv = Tp::mod() - (Tp::mod() - 1) / n;
for (int i = 0; i < n; ++i) a[i] *= iv;
detail::butterfly_n(a, n, FftInfo<Tp>::get().inv_root(n / 2));
}
template<typename Tp> inline void transposed_inv_fft(std::vector<Tp> &a) {
transposed_inv_fft_n(a.begin(), a.size());
}
// FFT_n^T : FFT_n((x^n A(x^(-1))) mod (x^n - 1)) |-> n A(x)
template<typename Iterator> inline void transposed_fft_n(Iterator a, int n) {
using Tp = typename std::iterator_traits<Iterator>::value_type;
detail::inv_butterfly_n(a, n, FftInfo<Tp>::get().root(n / 2));
}
template<typename Tp> inline void transposed_fft(std::vector<Tp> &a) {
transposed_fft_n(a.begin(), a.size());
}
template<typename Tp> inline std::vector<Tp> convolution_fft(std::vector<Tp> a, std::vector<Tp> b) {
if (a.empty() || b.empty()) return {};
const int n = a.size();
const int m = b.size();
const int len = fft_len(n + m - 1);
a.resize(len);
b.resize(len);
fft(a);
fft(b);
for (int i = 0; i < len; ++i) a[i] *= b[i];
inv_fft(a);
a.resize(n + m - 1);
return a;
}
template<typename Tp> inline std::vector<Tp> square_fft(std::vector<Tp> a) {
if (a.empty()) return {};
const int n = a.size();
const int len = fft_len(n * 2 - 1);
a.resize(len);
fft(a);
for (int i = 0; i < len; ++i) a[i] *= a[i];
inv_fft(a);
a.resize(n * 2 - 1);
return a;
}
template<typename Tp>
inline std::vector<Tp> convolution_naive(const std::vector<Tp> &a, const std::vector<Tp> &b) {
if (a.empty() || b.empty()) return {};
const int n = a.size();
const int m = b.size();
std::vector<Tp> res(n + m - 1);
for (int i = 0; i < n; ++i)
for (int j = 0; j < m; ++j) res[i + j] += a[i] * b[j];
return res;
}
template<typename Tp>
inline std::vector<Tp> convolution(const std::vector<Tp> &a, const std::vector<Tp> &b) {
if (std::min(a.size(), b.size()) < 60) return convolution_naive(a, b);
if (std::addressof(a) == std::addressof(b)) return square_fft(a);
return convolution_fft(a, b);
}
#line 2 "fps_basic.hpp"
#line 2 "semi_relaxed_conv.hpp"
#line 5 "semi_relaxed_conv.hpp"
#include <type_traits>
#include <utility>
#line 8 "semi_relaxed_conv.hpp"
template<typename Tp, typename Closure>
inline std::enable_if_t<std::is_invocable_r_v<Tp, Closure, int, const std::vector<Tp> &>,
std::vector<Tp>>
semi_relaxed_convolution_naive(const std::vector<Tp> &A, Closure gen, int n) {
std::vector<Tp> B(n), AB(n);
for (int i = 0; i < n; ++i) {
for (int j = std::max(0, i - (int)A.size() + 1); j < i; ++j) AB[i] += A[i - j] * B[j];
B[i] = gen(i, AB);
if (!A.empty()) AB[i] += A[0] * B[i];
}
return B;
}
// returns coefficients generated by closure
// closure: gen(index, current_product)
template<typename Tp, typename Closure>
inline std::enable_if_t<std::is_invocable_r_v<Tp, Closure, int, const std::vector<Tp> &>,
std::vector<Tp>>
semi_relaxed_convolution(const std::vector<Tp> &A, Closure gen, int n) {
if (A.size() < 60) return semi_relaxed_convolution_naive(A, gen, n);
enum { BaseCaseSize = 32 };
static_assert((BaseCaseSize & (BaseCaseSize - 1)) == 0);
static const int Block[] = {16, 16, 16, 16, 16};
static const int BlockSize[] = {
BaseCaseSize,
BaseCaseSize * Block[0],
BaseCaseSize * Block[0] * Block[1],
BaseCaseSize * Block[0] * Block[1] * Block[2],
BaseCaseSize * Block[0] * Block[1] * Block[2] * Block[3],
BaseCaseSize * Block[0] * Block[1] * Block[2] * Block[3] * Block[4],
};
// returns (which_block, level)
auto blockinfo = [](int ind) {
int i = ind / BaseCaseSize, lv = 0;
while ((i & (Block[lv] - 1)) == 0) i /= Block[lv++];
return std::make_pair(i & (Block[lv] - 1), lv);
};
std::vector<Tp> B(n), AB(n);
std::vector<std::vector<std::vector<Tp>>> dftA, dftB;
for (int i = 0; i < n; ++i) {
const int s = i & (BaseCaseSize - 1);
// block contribution
if (i >= BaseCaseSize && s == 0) {
const auto [j, lv] = blockinfo(i);
const int blocksize = BlockSize[lv];
if (blocksize * j == i) {
if ((int)dftA.size() == lv) {
dftA.emplace_back();
dftB.emplace_back(Block[lv] - 1);
}
if ((j - 1) * blocksize < (int)A.size()) {
dftA[lv]
.emplace_back(A.begin() + (j - 1) * blocksize,
A.begin() + std::min<int>((j + 1) * blocksize, A.size()))
.resize(blocksize * 2);
fft(dftA[lv][j - 1]);
}
}
if (!dftA[lv].empty()) {
dftB[lv][j - 1].resize(blocksize * 2);
std::copy_n(B.begin() + (i - blocksize), blocksize, dftB[lv][j - 1].begin());
std::fill_n(dftB[lv][j - 1].begin() + blocksize, blocksize, Tp(0));
fft(dftB[lv][j - 1]);
// middle product
std::vector<Tp> mp(blocksize * 2);
for (int k = 0; k < std::min<int>(j, dftA[lv].size()); ++k)
for (int l = 0; l < blocksize * 2; ++l)
mp[l] += dftA[lv][k][l] * dftB[lv][j - 1 - k][l];
inv_fft(mp);
for (int k = 0; k < blocksize && i + k < n; ++k) AB[i + k] += mp[k + blocksize];
}
}
// basecase contribution
for (int j = std::max(i - s, i - (int)A.size() + 1); j < i; ++j) AB[i] += A[i - j] * B[j];
B[i] = gen(i, AB);
if (!A.empty()) AB[i] += A[0] * B[i];
}
return B;
}
#line 8 "fps_basic.hpp"
template<typename Tp> inline int order(const std::vector<Tp> &a) {
for (int i = 0; i < (int)a.size(); ++i)
if (a[i] != 0) return i;
return -1;
}
template<typename Tp> inline std::vector<Tp> fps_inv(const std::vector<Tp> &a, int n) {
assert(order(a) == 0);
if (n <= 0) return {};
if (std::min<int>(a.size(), n) < 60)
return semi_relaxed_convolution(
a, [v = a[0].inv()](int n, auto &&c) { return n == 0 ? v : -c[n] * v; }, n);
enum { Threshold = 32 };
const int len = fft_len(n);
std::vector<Tp> invA, shopA(len), shopB(len);
invA = semi_relaxed_convolution(
a, [v = a[0].inv()](int n, auto &&c) { return n == 0 ? v : -c[n] * v; }, Threshold);
invA.resize(len);
for (int i = Threshold * 2; i <= len; i *= 2) {
std::fill(std::copy_n(a.begin(), std::min<int>(a.size(), i), shopA.begin()),
shopA.begin() + i, Tp(0));
std::copy_n(invA.begin(), i, shopB.begin());
fft_n(shopA.begin(), i);
fft_n(shopB.begin(), i);
for (int j = 0; j < i; ++j) shopA[j] *= shopB[j];
inv_fft_n(shopA.begin(), i);
std::fill_n(shopA.begin(), i / 2, Tp(0));
fft_n(shopA.begin(), i);
for (int j = 0; j < i; ++j) shopA[j] *= shopB[j];
inv_fft_n(shopA.begin(), i);
for (int j = i / 2; j < i; ++j) invA[j] = -shopA[j];
}
invA.resize(n);
return invA;
}
template<typename Tp>
inline std::vector<Tp> fps_div(const std::vector<Tp> &a, const std::vector<Tp> &b, int n) {
assert(order(b) == 0);
if (n <= 0) return {};
return semi_relaxed_convolution(
b,
[&, v = b[0].inv()](int n, auto &&c) {
if (n < (int)a.size()) return (a[n] - c[n]) * v;
return -c[n] * v;
},
n);
}
template<typename Tp> inline std::vector<Tp> deriv(const std::vector<Tp> &a) {
const int n = (int)a.size() - 1;
if (n <= 0) return {};
std::vector<Tp> res(n);
for (int i = 1; i <= n; ++i) res[i - 1] = a[i] * i;
return res;
}
template<typename Tp> inline std::vector<Tp> integr(const std::vector<Tp> &a, Tp c = {}) {
const int n = a.size() + 1;
auto &&bin = Binomial<Tp>::get(n);
std::vector<Tp> res(n);
res[0] = c;
for (int i = 1; i < n; ++i) res[i] = a[i - 1] * bin.inv(i);
return res;
}
template<typename Tp> inline std::vector<Tp> fps_log(const std::vector<Tp> &a, int n) {
return integr(fps_div(deriv(a), a, n - 1));
}
template<typename Tp> inline std::vector<Tp> fps_exp(const std::vector<Tp> &a, int n) {
if (n <= 0) return {};
assert(a.empty() || a[0] == 0);
return semi_relaxed_convolution(
deriv(a),
[bin = Binomial<Tp>::get(n)](int n, auto &&c) {
return n == 0 ? Tp(1) : c[n - 1] * bin.inv(n);
},
n);
}
template<typename Tp> inline std::vector<Tp> fps_pow(std::vector<Tp> a, long long e, int n) {
if (n <= 0) return {};
if (e == 0) {
std::vector<Tp> res(n);
res[0] = 1;
return res;
}
const int o = order(a);
if (o < 0 || o > n / e || (o == n / e && n % e == 0)) return std::vector<Tp>(n);
if (o != 0) a.erase(a.begin(), a.begin() + o);
const Tp ia0 = a[0].inv();
const Tp a0e = a[0].pow(e);
const Tp me = e;
for (int i = 0; i < (int)a.size(); ++i) a[i] *= ia0;
a = fps_log(a, n - o * e);
for (int i = 0; i < (int)a.size(); ++i) a[i] *= me;
a = fps_exp(a, n - o * e);
for (int i = 0; i < (int)a.size(); ++i) a[i] *= a0e;
a.insert(a.begin(), o * e, Tp(0));
return a;
}
#line 7 "poly_basic.hpp"
#include <array>
#line 10 "poly_basic.hpp"
template<typename Tp> inline int degree(const std::vector<Tp> &a) {
int n = (int)a.size() - 1;
while (n >= 0 && a[n] == 0) --n;
return n;
}
template<typename Tp> inline void shrink(std::vector<Tp> &a) { a.resize(degree(a) + 1); }
template<typename Tp> inline std::vector<Tp> taylor_shift(std::vector<Tp> a, Tp c) {
const int n = a.size();
auto &&bin = Binomial<Tp>::get(n);
for (int i = 0; i < n; ++i) a[i] *= bin.factorial(i);
Tp cc = 1;
std::vector<Tp> b(n);
for (int i = 0; i < n; ++i) {
b[i] = cc * bin.inv_factorial(i);
cc *= c;
}
std::reverse(a.begin(), a.end());
auto ab = convolution(a, b);
ab.resize(n);
std::reverse(ab.begin(), ab.end());
for (int i = 0; i < n; ++i) ab[i] *= bin.inv_factorial(i);
return ab;
}
// returns (quotient, remainder)
// O(deg(Q)deg(B))
template<typename Tp> inline std::array<std::vector<Tp>, 2>
euclidean_div_naive(const std::vector<Tp> &A, const std::vector<Tp> &B) {
const int degA = degree(A);
const int degB = degree(B);
assert(degB >= 0);
const int degQ = degA - degB;
if (degQ < 0) return {std::vector<Tp>{Tp(0)}, A};
std::vector<Tp> Q(degQ + 1), R = A;
const auto inv = B[degB].inv();
for (int i = degQ, n = degA; i >= 0; --i)
if ((Q[i] = R[n--] * inv) != 0)
for (int j = 0; j <= degB; ++j) R[i + j] -= Q[i] * B[j];
R.resize(degB);
return {Q, R};
}
// O(min(deg(Q)^2,deg(Q)deg(B)))
template<typename Tp> inline std::vector<Tp>
euclidean_div_quotient_naive(const std::vector<Tp> &A, const std::vector<Tp> &B) {
const int degA = degree(A);
const int degB = degree(B);
assert(degB >= 0);
const int degQ = degA - degB;
if (degQ < 0) return {Tp(0)};
const auto inv = B[degB].inv();
std::vector<Tp> Q(degQ + 1);
for (int i = 0; i <= degQ; ++i) {
for (int j = 1; j <= std::min(i, degB); ++j) Q[degQ - i] += B[degB - j] * Q[degQ - i + j];
Q[degQ - i] = (A[degA - i] - Q[degQ - i]) * inv;
}
return Q;
}
// returns (quotient, remainder)
template<typename Tp> inline std::array<std::vector<Tp>, 2>
euclidean_div(const std::vector<Tp> &A, const std::vector<Tp> &B) {
const int degA = degree(A);
const int degB = degree(B);
assert(degB >= 0);
// A = Q*B + R => A/B = Q + R/B in R((x^(-1)))
const int degQ = degA - degB;
if (degQ < 0) return {std::vector<Tp>{Tp(0)}, A};
if (degQ < 60 || degB < 60) return euclidean_div_naive(A, B);
auto Q = fps_div(std::vector(A.rend() - (degA + 1), A.rend()),
std::vector(B.rend() - (degB + 1), B.rend()), degQ + 1);
std::reverse(Q.begin(), Q.end());
// returns a mod (x^n-1)
auto make_cyclic = [](const std::vector<Tp> &a, int n) {
assert((n & (n - 1)) == 0);
std::vector<Tp> b(n);
for (int i = 0; i < (int)a.size(); ++i) b[i & (n - 1)] += a[i];
return b;
};
const int len = fft_len(std::max(degB, 1));
const auto cyclicA = make_cyclic(A, len);
auto cyclicB = make_cyclic(B, len);
auto cyclicQ = make_cyclic(Q, len);
fft(cyclicQ);
fft(cyclicB);
for (int i = 0; i < len; ++i) cyclicQ[i] *= cyclicB[i];
inv_fft(cyclicQ);
// R = A - QB mod (x^n-1) (n >= degB)
std::vector<Tp> R(degB);
for (int i = 0; i < degB; ++i) R[i] = cyclicA[i] - cyclicQ[i];
return {Q, R};
}
template<typename Tp>
inline std::vector<Tp> euclidean_div_quotient(const std::vector<Tp> &A, const std::vector<Tp> &B) {
const int degA = degree(A);
const int degB = degree(B);
assert(degB >= 0);
// A = Q*B + R => A/B = Q + R/B in R((x^(-1)))
const int degQ = degA - degB;
if (degQ < 0) return {Tp(0)};
if (std::min(degQ, degB) < 60) return euclidean_div_quotient_naive(A, B);
auto Q = fps_div(std::vector(A.rend() - (degA + 1), A.rend()),
std::vector(B.rend() - (degB + 1), B.rend()), degQ + 1);
std::reverse(Q.begin(), Q.end());
return Q;
}
#line 7 "poly.hpp"
#include <iostream>
#line 9 "poly.hpp"
template<typename Tp> class Poly : public std::vector<Tp> {
using Base = std::vector<Tp>;
public:
using Base::Base;
static Poly zero() { return Poly(); }
static Poly one() { return Poly{Tp::one()}; }
bool is_zero() const { return deg() < 0; }
bool is_one() const { return deg() == 0 && lc() == Tp::one(); }
int deg() const { return degree(*this); }
int ord() const { return order(*this); }
Poly rev() const {
const int d = deg();
Poly res(d + 1);
for (int i = d; i >= 0; --i) res[i] = Base::operator[](d - i);
return res;
}
Poly slice(int L, int R) const {
Poly res(R - L);
for (int i = L; i < std::min<int>(R, Base::size()); ++i) res[i - L] = Base::operator[](i);
return res;
}
Poly trunc(int D) const {
Poly res(D);
for (int i = 0; i < std::min<int>(D, Base::size()); ++i) res[i] = Base::operator[](i);
return res;
}
Poly &shrink() {
Base::resize(deg() + 1);
return *this;
}
Tp lc() const {
const int d = deg();
return d == -1 ? Tp() : Base::operator[](d);
}
Poly monic() const {
const int d = deg();
assert(d >= 0);
const auto iv = Base::operator[](d).inv();
Poly res(*this);
for (int i = 0; i <= d; ++i) res[i] *= iv;
return res;
}
Poly taylor_shift(Tp c) const {
Base::operator=(taylor_shift(*this, c));
return shrink();
}
Poly operator-() const {
const int d = deg();
Poly res(d + 1);
for (int i = 0; i <= d; ++i) res[i] = -Base::operator[](i);
res.shrink();
return res;
}
std::array<Poly, 2> divmod(const Poly &R) const {
const auto [q, r] = euclidean_div(*this, R);
return {Poly(q.begin(), q.end()), Poly(r.begin(), r.end())};
}
Poly &operator+=(const Poly &R) {
if (Base::size() < R.size()) Base::resize(R.size());
for (int i = 0; i < (int)R.size(); ++i) Base::operator[](i) += R[i];
return shrink();
}
Poly &operator-=(const Poly &R) {
if (Base::size() < R.size()) Base::resize(R.size());
for (int i = 0; i < (int)R.size(); ++i) Base::operator[](i) -= R[i];
return shrink();
}
Poly &operator*=(const Poly &R) {
Base::operator=(convolution(*this, R));
return shrink();
}
Poly &operator/=(const Poly &R) {
Base::operator=(euclidean_div_quotient(*this, R));
return shrink();
}
Poly &operator%=(const Poly &R) {
Base::operator=(std::get<1>(divmod(R)));
return shrink();
}
Poly &operator<<=(int D) {
if (D > 0) {
Base::insert(Base::begin(), D, Tp(0));
} else if (D < 0) {
if (-D < (int)Base::size()) {
Base::erase(Base::begin(), Base::begin() + (-D));
} else {
Base::clear();
}
}
return shrink();
}
Poly &operator>>=(int D) { return operator<<=(-D); }
friend Poly operator+(const Poly &L, const Poly &R) { return Poly(L) += R; }
friend Poly operator-(const Poly &L, const Poly &R) { return Poly(L) -= R; }
friend Poly operator*(const Poly &L, const Poly &R) { return Poly(L) *= R; }
friend Poly operator/(const Poly &L, const Poly &R) { return Poly(L) /= R; }
friend Poly operator%(const Poly &L, const Poly &R) { return Poly(L) %= R; }
friend Poly operator<<(const Poly &L, int D) { return Poly(L) <<= D; }
friend Poly operator>>(const Poly &L, int D) { return Poly(L) >>= D; }
friend std::ostream &operator<<(std::ostream &L, const Poly &R) {
L << '[';
const int d = R.deg();
if (d < 0) {
L << '0';
} else {
for (int i = 0; i <= d; ++i) {
L << R[i];
if (i == 1) L << "*x";
if (i > 1) L << "*x^" << i;
if (i != d) L << " + ";
}
}
return L << ']';
}
};
// 2x2 matrix for Euclidean algorithm
template<typename Tp> class GCDMatrix : public std::array<std::array<Tp, 2>, 2> {
public:
GCDMatrix(const Tp &x00, const Tp &x01, const Tp &x10, const Tp &x11)
: std::array<std::array<Tp, 2>, 2>{std::array{x00, x01}, std::array{x10, x11}} {}
GCDMatrix operator*(const GCDMatrix &R) const {
return {(*this)[0][0] * R[0][0] + (*this)[0][1] * R[1][0],
(*this)[0][0] * R[0][1] + (*this)[0][1] * R[1][1],
(*this)[1][0] * R[0][0] + (*this)[1][1] * R[1][0],
(*this)[1][0] * R[0][1] + (*this)[1][1] * R[1][1]};
}
std::array<Tp, 2> operator*(const std::array<Tp, 2> &R) const {
return {(*this)[0][0] * R[0] + (*this)[0][1] * R[1],
(*this)[1][0] * R[0] + (*this)[1][1] * R[1]};
}
Tp det() const { return (*this)[0][0] * (*this)[1][1] - (*this)[0][1] * (*this)[1][0]; }
GCDMatrix adj() const { return {(*this)[1][1], -(*this)[0][1], -(*this)[1][0], (*this)[0][0]}; }
};
// returns M s.t. deg(M) <= d and deg(M21*A+M22*B) < max(deg(A),deg(B)) - d
// det(M) in {-1, 1}
// see:
// [1]: Daniel J. Bernstein. Fast multiplication and its applications.
template<typename Tp> inline GCDMatrix<Poly<Tp>> hgcd(const Poly<Tp> &A, const Poly<Tp> &B, int d) {
using Mat = GCDMatrix<Poly<Tp>>;
assert(!(A.deg() < 0 && B.deg() < 0));
if (A.deg() < B.deg()) return hgcd(B, A, d) * Mat({}, {Tp(1)}, {Tp(1)}, {});
if (A.deg() < d) return hgcd(A, B, A.deg());
if (B.deg() < A.deg() - d) return Mat({Tp(1)}, {}, {}, {Tp(1)});
if (int dd = A.deg() - d * 2; dd > 0) return hgcd(A >> dd, B >> dd, d);
if (d == 0) return Mat({}, {Tp(1)}, {Tp(1)}, -(A / B));
const auto M = hgcd(A, B, d / 2);
const auto D = M[1][0] * A + M[1][1] * B;
if (D.deg() < A.deg() - d) return M;
const auto C = M[0][0] * A + M[0][1] * B;
const auto [Q, R] = C.divmod(D);
return hgcd(D, R, D.deg() - (A.deg() - d)) * Mat({}, {Tp(1)}, {Tp(1)}, -Q) * M;
}
template<typename Tp> inline std::array<Poly<Tp>, 3> xgcd(const Poly<Tp> &A, const Poly<Tp> &B) {
const auto M = hgcd(A, B, std::max(A.deg(), B.deg()));
return {M[0][0], M[0][1], M[0][0] * A + M[0][1] * B};
}
template<typename Tp> inline std::array<Poly<Tp>, 2> inv_gcd(const Poly<Tp> &A, const Poly<Tp> &B) {
const auto M = hgcd(A, B, std::max(A.deg(), B.deg()));
return {M[0][0], M[0][0] * A + M[0][1] * B};
}
// returns P, Q s.t. [x^[-k, 0)] P/Q = [x^[-k, 0)] A/B
// where P, Q in F[x], deg(Q) is minimized
// requires deg(A) < deg(B)
template<typename Tp>
inline std::array<Poly<Tp>, 2> rational_approximation(const Poly<Tp> &A, const Poly<Tp> &B, int k) {
auto M = hgcd(B, A, k / 2);
const auto [C, D] = M * std::array{B, A};
if (D.deg() >= 0 && D.deg() - C.deg() >= -(k - (B.deg() - C.deg()) * 2))
M = GCDMatrix<Poly<Tp>>({}, {Tp(1)}, {Tp(1)}, -(C / D)) * M;
return {M.adj()[1][0], M.adj()[0][0]};
}
template<typename Tp>
inline std::array<Poly<Tp>, 2> rational_reconstruction(const std::vector<Tp> &A) {
return rational_approximation(Poly<Tp>(A.rbegin(), A.rend()), Poly<Tp>{Tp(1)} << A.size(),
A.size());
}
// returns [x^[-k, 0)] A/B
// requires deg(A) < deg(B)
template<typename Tp>
inline std::vector<Tp> fraction_to_series(const Poly<Tp> &A, const Poly<Tp> &B, int k) {
return (((A << k) / B).rev() << (B.deg() - A.deg() - 1)).slice(0, k);
}