# thinking

First of all, consider the scheme number of violence \ (dp \), \ (f[i][j] \) represents the number of first \ (I \), the sum of numbers is modulo \ (P \) and the rest \ (j \).
We do not consider the condition that there must be a prime number first, but first count out all the schemes. Then subtract the scheme without prime number.
Then \ (f[i + 1][(j + k) \% p] += f[i][j](1\le k \le m) \)
It is found that \ ((j + k) \% p \) has at least \ (\ lfloor\frac{m}{p}\rfloor \) for each remainder. Then there are \ (m\%p \) and \ (\ lfloor\frac{m}{p}\rfloor + 1 \) numbers. Just handle these \ (m\%p \) numbers separately.
Then calculate the number of schemes without prime number, and transfer them all with composite number.
Then it is found that this can be matrix optimized.
The list of \ (j \) in line \ (i \) of the transition matrix shows how many digital modules \ (p \) are left in \ (1 + i \) to \ (m+i \). You can also calculate the list matrix of \ (O(p^2) \).
In the case of total transfer, the transfer matrix can be (O(p\frac{m}{lnm}) \) transferred, and the oxygen absorption can be over

# Code

/*
* @Author: wxyww
* @Date:   2019-02-13 16:22:09
*/
#include<cstdio>
#include<iostream>
#include<cstdlib>
#include<cstring>
#include<algorithm>
#include<queue>
#include<vector>
#include<ctime>
using namespace std;
typedef long long ll;
// #define int ll
const int mod = 20170408;
ll x=0,f=1;char c=getchar();
while(c<'0'||c>'9') {
if(c=='-') f=-1;
c=getchar();
}
while(c>='0'&&c<='9') {
x=x*10+c-'0';
c=getchar();
}
return x*f;
}
int n,m,p;
namespace BF2 {
const int N = 110;
struct node {
int a[N][N],n,m;
node() {
memset(a,0,sizeof(a));n = 0,m = 0;
}
node(int nn,int mm) {
memset(a,0,sizeof(a));n = nn;m = mm;
}
node(int nn) {
n = nn;
memset(a,0,sizeof(a));
for(int i = 0;i <= nn;++i) a[i][i] = 1;
}
};
node operator * (const node &x,const node &y) {
int n = x.n,m = y.m;
node c(n,m);
int k = x.m;
for(int i = 0;i <= k;++i) {
for(int j = 0;j <= n;++j) {
for(int z = 0;z <= m;++z) {
c.a[j][z] += 1ll * x.a[j][i] * y.a[i][z] % mod;
c.a[j][z] %= mod;
}
}
}
return c;
}
node operator ^ (node x,int y) {
node ans(x.n);
for(;y;y >>= 1,x = x * x)
if(y & 1) ans = ans * x;
return ans;
}
int dis[2000003],vis[20000003];
int tot,tmp[2000003];
int js;
void Eur() {
vis[1] = 1;
for(int i = 2;i <= m;++i) {
if(!vis[i]) dis[++js] = i;
for(int j = 1;1ll * dis[j] * i <= m;++j) {
vis[dis[j] * i] = 1;
if(dis[j] % i == 0) break;
}
}
}
void Main() {
node C(p - 1,p - 1);
int k = m % p;
for(int i = 0;i < p;++i) {
for(int j = i + 1;j <= i + k;++j) {
C.a[i][j % p]++;
}
}
for(int i = 0;i < p;++i) for(int j = 0;j < p;++j) C.a[i][j] += m / p;
node ans(0,p - 1);
ans.a[0][0] = 1;
ans = ans * (C ^ n);
int anss = ans.a[0][0];
memset(ans.a,0,sizeof(ans.a));
memset(C.a,0,sizeof(C.a));
ans.a[0][0] = 1;
Eur();
for(int i = 0;i < p;++i) {
for(int j = i + 1;j <= i + k;++j) {
C.a[i][j % p]++;
}
}
for(int i = 0;i < p;++i) for(int j = 0;j < p;++j) C.a[i][j] += m / p;
for(int i = 0;i < p;++i)
for(int j = 1;j <= js;++j)
C.a[i][(i + dis[j]) % p]--;
ans = ans * (C ^ n);
cout<<(anss - ans.a[0][0] + mod) % mod;
}

}
signed main() {
}