Zoltan2
Loading...
Searching...
No Matches
largestComponent2Binary.cpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Zoltan2: A package of combinatorial algorithms for scientific computing
4//
5// Copyright 2012 NTESS and the Zoltan2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10//
12// Written by Seher Acer, 2019
13// This code reads a mtx file and writes a binary file in CRS format:
14// [nrows nnzs rowPtr[0 nrows] colInd[0 nnzs]
15// 1) It symmetrizes the input, regardless of it is already symmetric or not
16// 2) It removes the diagonal entries
17// 3) The column indices are sorted in increasing order for each row
18// 4) Index base for the output file is 0
20// The corresponding file reader exists in "readMatrixFromBinaryFile.hpp"
21// Note: This is research code. We do not guarantee it works in all cases.
23
24#include <iostream>
25#include <fstream>
26#include <sstream>
27#include <set>
28#include <vector>
29#include <algorithm>
30#include <queue>
31
32typedef long long ord_type;
33
35 ord_type inn, ord_type *inRowPtr, ord_type *inColInd,
36 ord_type &outn, ord_type *outRowPtr, ord_type *outColInd)
37{
38
39 std::vector<int> mark(inn, -1);
40 std::queue<ord_type> active;
41 ord_type i, v, lastSearch, root, popCnt = 0, ncc = 0;
42
43 root = seed;
44 lastSearch = 0;
45
46 while(popCnt < inn) {
47 active.push(root);
48 mark[root] = ncc;
49
50 while(active.empty() == false) {
51
52 i = active.front();
53 active.pop();
54 popCnt ++;
55
56 for(ord_type j = inRowPtr[i]; j < inRowPtr[i+1]; ++j) {
57 v = inColInd[j];
58 if(mark[v] == -1) {
59 mark[v] = ncc;
60 active.push(v);
61 }
62 }
63 }
64
65 //search for an unmarked vertex to be the next root
66 for(ord_type i_r = lastSearch; i_r < inn; ++i_r) {
67 if(mark[i_r] == -1) {
68 root = i_r;
69 lastSearch = i_r;
70 break;
71 }
72 }
73
74 //increase component id
75 ncc++;
76 }
77
78 // find component sizes
79 std::vector<ord_type> compSize(ncc, 0);
80 for(ord_type i_c = 0; i_c < inn; ++i_c) {
81 if(mark[i_c] == -1) {
82 std::cout << "Vertex " << i_c << " is untouched,, Exiting!\n";
83 exit(1);
84 }
85 else
86 compSize[mark[i_c]]++;
87 }
88
89 // find the largest component
90 ord_type maxSize = 0;
91 ord_type maxId = -1;
92 for(ord_type i_l = 0; i_l < ncc; ++i_l) {
93 if(compSize[i_l] > maxSize) {
94 maxSize = compSize[i_l];
95 maxId = i_l;
96 }
97 }
98
99 // renumber the vertices
100 outn = 0;
101 for(ord_type i_v = 0; i_v < inn; ++i_v) {
102 if(mark[i_v] == maxId)
103 mark[i_v] = outn++;
104 else
105 mark[i_v] = -1;
106 }
107
108 if(outn != maxSize) {
109 std::cout << "The number of vertices in this component: " << outn << " does not match the maximum component size: " << maxSize << "\n";
110 exit(1);
111 }
112
113 // write the largest component to the output arrays
114 ord_type ptr = 0;
115 outn = 0;
116 outRowPtr[outn] = 0;
117 for(ord_type ii = 0; ii < inn; ii++) {
118 if(mark[ii] > -1){
119 for(ord_type j = inRowPtr[ii]; j < inRowPtr[ii+1]; ++j) {
120 v = inColInd[j];
121 if(mark[v] == -1) {
122 std::cout << "Neighbor " << v << " of " << ii << " is found to be absent in the component\n";
123 exit(1);
124 }
125 outColInd[ptr++] = mark[v];
126 }
127 outn++;
128 outRowPtr[outn] = ptr;
129 }
130 }
131
132 if(outn != maxSize) {
133 std::cout << "The number of vertices written: " << outn << " does not match the maximum component size: " << maxSize << "\n";
134 exit(1);
135 }
136
137}
138
139int main(int argc, char* argv[])
140{
141
142 std::string line;
143 ord_type nrows, nnzs, r, c = 0;
144
145 std::ifstream in(argv[1]);
146
147 do{
148 getline(in, line);
149 }
150 while(line[0] == '%');
151
152 std::stringstream ss1(line);
153 ss1 >> nrows >> nrows >> nnzs;
154 std::cout << "Number of Rows " << nrows << " Number of Columns " << nrows << std::endl;
155 std::vector<bool> diag(nrows, false);
156
157 ord_type *rowPtr = new ord_type[nrows+2];
158 for(ord_type i = 0; i < nrows+2; i++)
159 rowPtr[i] = 0;
160
161 while(getline(in, line)) {
162
163 std::stringstream ss2(line);
164 ss2 >> r >> c;
165 r--;
166 c--;
167
168 if(r != c) {
169 rowPtr[r+2]++;
170 rowPtr[c+2]++;
171 }
172 }
173 in.close();
174
175 for(ord_type j0 = 0; j0 < nrows; j0++)
176 rowPtr[j0+2]++;
177
178
179 for(ord_type k = 2; k < nrows+2; k++)
180 rowPtr[k] += rowPtr[k-1];
181
182 ord_type *colInd = new ord_type[rowPtr[nrows+1]];
183
184 // re-read from the beginning, skip the intro
185 in.open(argv[1]);
186 do {
187 getline(in, line);
188 }
189 while(line[0] == '%');
190
191
192 while(getline(in, line)) {
193
194 std::stringstream ss2(line);
195 ss2 >> r >> c;
196 r--;
197 c--;
198
199 if(r != c) {
200 colInd[rowPtr[r+1]++] = c;
201 colInd[rowPtr[c+1]++] = r;
202 }
203
204 }
205 in.close();
206
207
208 for(ord_type i0 = 0; i0 < nrows; i0++)
209 colInd[rowPtr[i0+1]++] = i0;
210
211
212 ord_type *rowPtrNew = new ord_type[nrows+1];
213 ord_type *colIndNew = new ord_type[rowPtr[nrows+1]];
214
215 rowPtrNew[0] = 0;
216 ord_type ptr = 0;
217 ord_type prev = -1;
218 for(ord_type i1 = 0; i1 < nrows; i1++) {
219
220 ord_type deg = rowPtr[i1+1] - rowPtr[i1];
221 if(deg > 0) {
222
223 std::sort(&colInd[rowPtr[i1]], &colInd[rowPtr[i1+1]]);
224 colIndNew[ptr++] = colInd[rowPtr[i1]];
225 prev = colInd[rowPtr[i1]];
226 for(ord_type j1 = rowPtr[i1]+1; j1 < rowPtr[i1+1]; j1++) {
227 if(colInd[j1] != prev) {
228 colIndNew[ptr++] = colInd[j1];
229 prev = colInd[j1];
230 }
231 }
232 }
233
234 rowPtrNew[i1+1] = ptr;
235 }
236
237 for(ord_type i2 = 0; i2 < nrows; i2++) {
238 for(ord_type j2 = rowPtrNew[i2]; j2 < rowPtrNew[i2+1]; ++j2)
239 if(colIndNew[j2] == i2)
240 diag[i2] = true;
241
242 if(diag[i2] == false)
243 std::cout << "ROW " << i2 << " misses diagonal\n";
244 }
245
246 std::cout << argv[1] << " " << nrows << " " << ptr << " ";
247
248 ord_type newnrows = -1;
249 findLargestComponent(0, //seed
250 nrows, rowPtrNew, colIndNew,
251 newnrows, rowPtr, colInd );
252 ptr = rowPtr[newnrows]; //new number of nonzeros
253
254 ord_type deg, max = 0;
255 for(ord_type i3 = 0; i3 < nrows; i3++) {
256 deg = rowPtrNew[i3+1] - rowPtrNew[i3];
257 if(deg > max)
258 max = deg;
259 }
260
261 // write into the output file
262 std::ofstream out(argv[2], std::ios::out | std::ios::binary);
263 out.write((char*)&newnrows, sizeof(ord_type));
264 out.write((char*)&ptr, sizeof(ord_type));
265
266 out.write((char*)rowPtr, sizeof(ord_type)*(newnrows+1));
267 out.write((char*)colInd, sizeof(ord_type)*(ptr));
268
269 out.close();
270
271 std::cout << newnrows << " " << ptr << " " << max << "\n";
272
273 delete [] rowPtr;
274 delete [] colInd;
275
276 delete [] rowPtrNew;
277 delete [] colIndNew;
278
279 return 0;
280}
int main()
void findLargestComponent(ord_type seed, ord_type inn, ord_type *inRowPtr, ord_type *inColInd, ord_type &outn, ord_type *outRowPtr, ord_type *outColInd)
long long ord_type