-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathStrTest.cpp
More file actions
executable file
·200 lines (146 loc) · 4.91 KB
/
Copy pathStrTest.cpp
File metadata and controls
executable file
·200 lines (146 loc) · 4.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
/*
Description
The French author Georges Perec (1936-1982) once wrote a book, La disparition, without the letter 'e'. He was a member of the Oulipo group. A quote from the book:
Tout avait Pair normal, mais tout s'affirmait faux. Tout avait Fair normal, d'abord, puis surgissait l'inhumain, l'affolant. Il aurait voulu savoir où s'articulait l'association qui l'unissait au roman : stir son tapis, assaillant à tout instant son imagination, l'intuition d'un tabou, la vision d'un mal obscur, d'un quoi vacant, d'un non-dit : la vision, l'avision d'un oubli commandant tout, où s'abolissait la raison : tout avait l'air normal mais…
Perec would probably have scored high (or rather, low) in the following contest. People are asked to write a perhaps even meaningful text on some subject with as few occurrences of a given "word" as possible. Our task is to provide the jury with a program that counts these occurrences, in order to obtain a ranking of the competitors. These competitors often write very long texts with nonsense meaning; a sequence of 500,000 consecutive 'T's is not unusual. And they never use spaces.
So we want to quickly find out how often a word, i.e., a given string, occurs in a text. More formally: given the alphabet {'A', 'B', 'C', …, 'Z'} and two finite strings over that alphabet, a word W and a text T, count the number of occurrences of W in T. All the consecutive characters of W must exactly match consecutive characters of T. Occurrences may overlap.
Input
The first line of the input file contains a single number: the number of test cases to follow. Each test case has the following format:
" One line with the word W, a string over {'A', 'B', 'C', …, 'Z'}, with 1 ≤ |W| ≤ 10,000 (here |W| denotes the length of the string W).
" One line with the text T, a string over {'A', 'B', 'C', …, 'Z'}, with |W| ≤ |T| ≤ 1,000,000.
Output
For every test case in the input file, the output should contain a single number, on a single line: the number of occurrences of the word W in the text T.
Sample Input
3
BAPC
BAPC
AZA
AZAZAZA
VERDI
AVERDXIVYERDIAN
Sample Output
1
3
0
*/
/*
void GetNext(string pattern, int next[])
{
int patternSize = pattern.size();
int j, k;
next[0] = 0;
k = 0;
for( j = 1 ; j < patternSize; j++)
{
while( k > 0 && pattern[j] != pattern[k])
{
k = next[k];
}
if(pattern[j] == pattern[k])
{
k++;
}
next[j] = k;
}
}
*/
#include<cstring>
#include<iostream>
#include<cstdio>
using namespace std;
void GetNext(string pattern, int next[])
{
int patternSize = pattern.size();
next[0] = -1;
int k = -1;
int j = 0;
while( j < patternSize)
{
if( k == -1 || pattern[j] == pattern[k])
{
++k;
++j;
next[j] = k;
}
else
{
k = next[k];
}
}
}
int matchCount(string text, string pattern)
{
int textSize = text.size();
int patternSize = pattern.size();
int textIdx = 0; //模式串的0下标,在正文串中的位置
int patternIdx = 0; //模式串当前匹配到的索引
int* next = new int[patternSize+1];
GetNext(pattern,next);
int count = 0;
while(textIdx < textSize)
{
if(patternIdx <= patternSize)
{
if(text[textIdx] == pattern[patternIdx] || patternIdx == -1)
{
patternIdx++;
textIdx++;
if(patternIdx == patternSize)
{
count++;
}
}
else
{
patternIdx = next[patternIdx];
}
}
}
return count;
}
/*
int matchCount(string text, string pattern)
{
int textSize = text.size();
int patternSize = pattern.size();
int textIdx = 0; //模式串的0下标,在正文串中的位置
int patternIdx = 0; //模式串当前匹配到的索引
int count = 0;
while(textIdx < textSize)
{
if(patternIdx < patternSize)
{
if(text[textIdx + patternIdx] == pattern[patternIdx])
{
patternIdx++;
}
else
{
textIdx++;
patternIdx = 0;
}
}
else
{
count++;
patternIdx = 0;
textIdx++;
}
}
return count;
}
*/
int main(int argc, char** argv)
{
const char* INPUTFILE("../res/test-case/sample_input_strtest.txt");
freopen(INPUTFILE,"r",stdin);
int T;
cin >> T;
for(int i = 0 ; i < T; i++)
{
string pattern, text;
cin >> pattern;
cin >> text;
cout << matchCount(text,pattern) << endl;
}
}