Attachment 'jgd_solve.patch'
Download 1 # HG changeset patch
2 # User Clement Pernet <clement.pernet@gmail.com>
3 # Date 1224064381 -7200
4 # Node ID 27b3032e924cbab41eb79da139e41d02f2533c6e
5 # Parent 7d525bf8b16f4c51f6e45e0b09dda5cdf3eae34c
6 * fixed bug in permutation applytrans
7 * added test_zero
8 * added license in parity
9 * added linear system solving using pluq
10 * added appropriate test suite for solve
11
12 diff -r 7d525bf8b16f -r 27b3032e924c src/packedmatrix.c
13 --- a/src/packedmatrix.c Sat Oct 11 10:16:18 2008 +0200
14 +++ b/src/packedmatrix.c Wed Oct 15 11:53:01 2008 +0200
15 @@ -984,6 +984,45 @@ permutation *mzd_col_block_rotate(packed
16 return P;
17 }
18
19 +
20 +
21 +
22 +int mzd_test_zero(packedmatrix *A)
23 +{
24 + /* Could be improved: stopping as the first non zero value is found (status!=0)*/
25 + size_t mb = A->nrows;
26 + size_t nb = A->ncols;
27 + size_t Aoffset = A->offset;
28 + size_t nbrest = (nb + Aoffset) % RADIX;
29 + int status=0;
30 + if (nb + Aoffset >= RADIX) {
31 + // Large A
32 + word mask_begin = RIGHT_BITMASK(RADIX-Aoffset);
33 + if (Aoffset == 0)
34 + mask_begin = ~mask_begin;
35 + word mask_end = LEFT_BITMASK(nbrest);
36 + size_t i;
37 + for (i=0; i<mb; ++i) {
38 + status |= A->values [A->rowswap [i]] & mask_begin;
39 + size_t j;
40 + for ( j = 1; j < A->width-1; ++j)
41 + status |= A->values [A->rowswap [i] + j];
42 + status |= A->values [A->rowswap [i] + A->width - 1] & mask_end;
43 + }
44 + } else {
45 + // Small A
46 + word mask = ((ONE << nb) - 1) ;
47 + mask <<= (RADIX-nb-Aoffset);
48 +
49 + size_t i;
50 + for (i=0; i < mb; ++i) {
51 + status |= A->values [A->rowswap [i]] & mask;
52 + }
53 + }
54 +
55 + return !status;
56 +}
57 +
58 void mzd_apply_p_left(packedmatrix *A, permutation *P) {
59 assert(A->offset == 0);
60 size_t i;
61 @@ -995,8 +1034,8 @@ void mzd_apply_p_left(packedmatrix *A, p
62
63 void mzd_apply_p_left_trans(packedmatrix *A, permutation *P) {
64 assert(A->offset == 0);
65 - size_t i;
66 - for (i=0; i<P->length; i++) {
67 + int i;
68 + for (i=P->length-1; i>=0; i--) {
69 if(P->values[i] != i)
70 mzd_row_swap(A, i, P->values[i]);
71 }
72 @@ -1004,8 +1043,8 @@ void mzd_apply_p_left_trans(packedmatrix
73
74 void mzd_apply_p_right_trans(packedmatrix *A, permutation *P) {
75 assert(A->offset == 0);
76 - size_t i;
77 - for (i=0; i<P->length; i++) {
78 + int i;
79 + for (i = P->length-1;i>=0; i--) {
80 if(P->values[i] != i)
81 mzd_col_swap(A, i, P->values[i]);
82 }
83 @@ -1014,7 +1053,7 @@ void mzd_apply_p_right(packedmatrix *A,
84 void mzd_apply_p_right(packedmatrix *A, permutation *P) {
85 assert(A->offset == 0);
86 size_t i;
87 - for (i=0; 0<P->length; i++) {
88 + for (i=0; i<P->length; i++) {
89 if(P->values[i] != i)
90 mzd_col_swap(A, i, P->values[i]);
91 }
92 diff -r 7d525bf8b16f -r 27b3032e924c src/packedmatrix.h
93 --- a/src/packedmatrix.h Sat Oct 11 10:16:18 2008 +0200
94 +++ b/src/packedmatrix.h Wed Oct 15 11:53:01 2008 +0200
95 @@ -762,6 +762,17 @@ static inline void mzd_clear_bits(const
96 }
97 }
98
99 +
100 +
101 +/**
102 + * \brief Zero test for matrix.
103 + *
104 + * \param A Input matrix.
105 + *
106 + */
107 +int mzd_test_zero(packedmatrix *A) ;
108 +
109 +
110 /**
111 * Rotate zero columns to the end.
112 *
113 diff -r 7d525bf8b16f -r 27b3032e924c src/parity.h
114 --- a/src/parity.h Sat Oct 11 10:16:18 2008 +0200
115 +++ b/src/parity.h Wed Oct 15 11:53:01 2008 +0200
116 @@ -5,6 +5,27 @@
117 *
118 * \author David Harvey
119 */
120 +
121 +#ifndef PARITY_H
122 +#define PARITY_H
123 + /*******************************************************************
124 + *
125 + * M4RI: Method of the Four Russians Inversion
126 + *
127 + * Copyright (C) 2008 David Harvey
128 + *
129 + * Distributed under the terms of the GNU General Public License (GPL)
130 + *
131 + * This code is distributed in the hope that it will be useful,
132 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
133 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
134 + * General Public License for more details.
135 + *
136 + * The full text of the GPL is available at:
137 + *
138 + * http://www.gnu.org/licenses/
139 + *
140 + ********************************************************************/
141
142 /**
143 * \brief Step for mixing two 64-bit words to compute their parity.
144 @@ -98,3 +119,5 @@ static inline word parity64(word* buf)
145
146 return MIX1(e1, e0);
147 }
148 +
149 +#endif
150 diff -r 7d525bf8b16f -r 27b3032e924c src/solve.c
151 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
152 +++ b/src/solve.c Wed Oct 15 11:53:01 2008 +0200
153 @@ -0,0 +1,151 @@
154 + /*******************************************************************
155 + *
156 + * M4RI: Method of the Four Russians Inversion
157 + *
158 + * Copyright (C) 2008 Jean-Guillaume.Dumas@imag.fr
159 + *
160 + * Distributed under the terms of the GNU General Public License (GPL)
161 + *
162 + * This code is distributed in the hope that it will be useful,
163 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
164 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
165 + * General Public License for more details.
166 + *
167 + * The full text of the GPL is available at:
168 + *
169 + * http://www.gnu.org/licenses/
170 + *
171 + ********************************************************************/
172 +
173 +#include "solve.h"
174 +#include "strassen.h"
175 +#include "pluq.h"
176 +#include "trsm.h"
177 +#include "permutation.h"
178 +
179 +/**
180 + * \brief System solving with matrix.
181 + *
182 + * Solves A X = B where A, and B are input matrices.
183 + * The solution X is stored inplace on B
184 + * *
185 + *
186 + * \param A Input matrix.
187 + * \param B Input matrix, being overwritten by the solution matrix X
188 + * \param cutoff Minimal dimension for Strassen recursion.
189 + *
190 + */
191 +void mzd_solve_left (packedmatrix *A, packedmatrix *B, const int cutoff, const int InconsistencyCheck)
192 +{
193 + if(A->ncols > B->nrows)
194 + m4ri_die("mzd_solve_left: A ncols (%d) need to be lower than B nrows (%d).\n", A->ncols, B->nrows);
195 + if (cutoff <= 0)
196 + m4ri_die("mzd_solve_left: cutoff must be > 0.\n");
197 +
198 + _mzd_solve_left (A, B, cutoff, InconsistencyCheck);
199 +}
200 +
201 +void mzd_pluq_solve_left (packedmatrix *A, size_t rank,
202 + permutation *P, permutation *Q,
203 + packedmatrix *B, const int cutoff, const int InconsistencyCheck)
204 +{
205 + if(A->ncols > B->nrows)
206 + m4ri_die("mzd_pluq_solve_left: A ncols (%d) need to be lower than B nrows (%d).\n", A->ncols, B->nrows);
207 + if(P->length != A->nrows)
208 + m4ri_die("mzd_pluq_solve_left: A nrows (%d) need to match P size (%d).\n", A->nrows, P->length);
209 + if(Q->length != A->ncols)
210 + m4ri_die("mzd_pluq_solve_left: A ncols (%d) need to match Q size (%d).\n", A->ncols, P->length);
211 +
212 + if (cutoff <= 0)
213 + m4ri_die("mzd_pluq_solve_left: cutoff must be > 0.\n");
214 +
215 + _mzd_pluq_solve_left (A, rank, P, Q, B, cutoff, InconsistencyCheck);
216 +}
217 +
218 +void _mzd_pluq_solve_left (packedmatrix *A, size_t rank,
219 + permutation *P, permutation *Q,
220 + packedmatrix *B, const int cutoff, const int InconsistencyCheck)
221 +{
222 + /** A is supposed to store L lower triangular and U upper triangular
223 + * B is modified in place
224 + * (Bi's in the comments are just modified versions of B)
225 + * PLUQ = A
226 + * 1°) P B2 = B1
227 + * 2°) L B3 = B2
228 + * 3°) U B4 = B3
229 + * 4°) Q B5 = B4
230 + */
231 +
232 + /* P B2 = B1 or B2 = P^T B1*/
233 + mzd_apply_p_left_trans(B, P);
234 +
235 + /* L B3 = B2 */
236 +
237 + /* view on the upper part of L */
238 + packedmatrix *LU = mzd_init_window(A,0,0,rank,rank);
239 + packedmatrix *Y1 = mzd_init_window(B,0,0,rank,B->ncols);
240 + _mzd_trsm_lower_left(LU, Y1, cutoff);
241 +
242 + if (InconsistencyCheck) {
243 + /* Check for inconsistency */
244 + /** FASTER without this check
245 + *
246 + * update with the lower part of L
247 + */
248 + packedmatrix *H = mzd_init_window(A,rank,0,A->nrows,rank);
249 + packedmatrix *Y2 = mzd_init_window(B,rank,0,B->nrows,B->ncols);
250 + mzd_addmul(Y2, H, Y1, cutoff);
251 + /*
252 + * test whether Y2 is the zero matrix
253 + */
254 + if(! mzd_test_zero(Y2) ) {
255 + printf("inconsistent system of size %d x %d\n", Y2->nrows, Y2->ncols);
256 + printf("Y2=");
257 + mzd_print_matrix(Y2);
258 + }
259 + mzd_free_window(H);
260 + mzd_free_window(Y2);
261 + }
262 + /* U B4 = B3 */
263 + _mzd_trsm_upper_left(LU, Y1, cutoff);
264 + mzd_free_window(LU);
265 + mzd_free_window(Y1);
266 +
267 + if (! InconsistencyCheck) {
268 + /** Default is to set the indefined bits to zero
269 + * if inconsistency has been checked then
270 + * Y2 bits are already all zeroes
271 + * thus this clearing is not needed
272 + */
273 + if (rank < B->nrows) mzd_clear_bits(B,rank,0, (B->nrows-rank)*B->ncols);
274 + }
275 +
276 + /* Q B5 = B4 or B5 = Q^T B4*/
277 + mzd_apply_p_left_trans(B, Q);
278 + /* P L U Q B5 = B1 */
279 +}
280 +
281 +
282 +void _mzd_solve_left (packedmatrix *A, packedmatrix *B, const int cutoff, const int InconsistencyCheck)
283 +{
284 + /**
285 + * B is modified in place
286 + * (Bi's in the comments are just modified versions of B)
287 + * 1°) PLUQ = A
288 + * 2°) P B2 = B1
289 + * 3°) L B3 = B2
290 + * 4°) U B4 = B3
291 + * 5°) Q B5 = B4
292 + */
293 + permutation * P = mzp_init(A->nrows);
294 + permutation * Q = mzp_init(A->ncols);
295 +
296 + /* PLUQ = A */
297 + size_t rank = _mzd_pluq(A, P, Q, cutoff);
298 + /* 2°, 3°, 4°, 5° */
299 + mzd_pluq_solve_left(A, rank, P, Q, B, cutoff, InconsistencyCheck);
300 +
301 + mzp_free(P);
302 + mzp_free(Q);
303 +}
304 +
305 diff -r 7d525bf8b16f -r 27b3032e924c src/solve.h
306 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
307 +++ b/src/solve.h Wed Oct 15 11:53:01 2008 +0200
308 @@ -0,0 +1,123 @@
309 +/**
310 + * \file solve.h
311 + *
312 + * \brief system solving with matrix routines.
313 + *
314 + * \author Jean-Guillaume Dumas <Jean-Guillaume.Dumas@imag.fr>
315 + *
316 + */
317 +#ifndef SOLVE_H
318 +#define SOLVE_H
319 + /*******************************************************************
320 + *
321 + * M4RI: Method of the Four Russians Inversion
322 + *
323 + * Copyright (C) 2008 Jean-Guillaume.Dumas@imag.fr
324 + *
325 + * Distributed under the terms of the GNU General Public License (GPL)
326 + *
327 + * This code is distributed in the hope that it will be useful,
328 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
329 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
330 + * General Public License for more details.
331 + *
332 + * The full text of the GPL is available at:
333 + *
334 + * http://www.gnu.org/licenses/
335 + *
336 + ********************************************************************/
337 +
338 +
339 +#include "misc.h"
340 +#include "packedmatrix.h"
341 +#include "stdio.h"
342 +
343 +/**
344 + * \brief System solving with matrix.
345 + *
346 + * Solves A X = B where A, and B are input matrices.
347 + * The solution X is stored inplace on B
348 + *
349 + * \param A Input matrix.
350 + * \param B Input matrix, being overwritten by the solution matrix X
351 + * \param cutoff Minimal dimension for Strassen recursion.
352 + * \param InconsistencyCheck decide wether or not to check for incosistency (faster without but output not defined if system is not consistent).
353 + *
354 + */
355 +void mzd_solve_left (packedmatrix *A, packedmatrix *B, const int cutoff, const int InconsistencyCheck);
356 +
357 +
358 +/**
359 + * \brief System solving with matrix.
360 + *
361 + * Solves (P L U Q) X = B where
362 + * A is an input matrix supposed to store both:
363 + * an upper right triangular matrix U
364 + * a lower left unitary triangular matrix L
365 + * P and Q are permutation matrices
366 + * B is the input matrix to be solved.
367 + * The solution X is stored inplace on B
368 + * This version assumes that the matrices are at an even position on
369 + * the RADIX grid and that their dimension is a multiple of RADIX.
370 + *
371 + * \param A Input upper/lower triangular matrices.
372 + * \param rank is rank of A.
373 + * \param P Input row permutation matrix.
374 + * \param Q Input column permutation matrix.
375 + * \param B Input matrix, being overwritten by the solution matrix X.
376 + * \param cutoff Minimal dimension for Strassen recursion.
377 + * \param InconsistencyCheck decide whether or not to check for incosistency (faster without but output not defined if system is not consistent).
378 + *
379 + */
380 +void mzd_pluq_solve_left (packedmatrix *A, size_t rank,
381 + permutation *P, permutation *Q,
382 + packedmatrix *B, const int cutoff, const int InconsistencyCheck);
383 +
384 +
385 +
386 +/**
387 + * \brief System solving with matrix.
388 + *
389 + * Solves (P L U Q) X = B where
390 + * A is an input matrix supposed to store both:
391 + * an upper right triangular matrix U
392 + * a lower left unitary triangular matrix L
393 + * P and Q are permutation matrices
394 + * B is the input matrix to be solved.
395 + * The solution X is stored inplace on B
396 + * This version assumes that the matrices are at an even position on
397 + * the RADIX grid and that their dimension is a multiple of RADIX.
398 + *
399 + * \param A Input upper/lower triangular matrices.
400 + * \param rank is rank of A.
401 + * \param P Input row permutation matrix.
402 + * \param Q Input column permutation matrix.
403 + * \param B Input matrix, being overwritten by the solution matrix X.
404 + * \param cutoff Minimal dimension for Strassen recursion.
405 + * \param InconsistencyCheck decide whether or not to check for incosistency (faster without but output not defined if system is not consistent).
406 + *
407 + * \internal
408 + */
409 +void _mzd_pluq_solve_left (packedmatrix *A, size_t rank,
410 + permutation *P, permutation *Q,
411 + packedmatrix *B, const int cutoff, const int InconsistencyCheck);
412 +
413 +/**
414 + * \brief System solving with matrix.
415 + *
416 + * Solves A X = B where A, and B are input matrices.
417 + * The solution X is stored inplace on B
418 + * This version assumes that the matrices are at an even position on
419 + * the RADIX grid and that their dimension is a multiple of RADIX.
420 + *
421 + * \param A Input matrix.
422 + * \param B Input matrix, being overwritten by the solution matrix X.
423 + * \param cutoff Minimal dimension for Strassen recursion.
424 + * \param InconsistencyCheck decide whether or not to check for incosistency (faster without but output not defined if system is not consistent).
425 + *
426 + * \internal
427 + */
428 +void _mzd_solve_left (packedmatrix *A, packedmatrix *B, const int cutoff, const int InconsistencyCheck);
429 +
430 +
431 +#endif // SOLVE_H
432 diff -r 7d525bf8b16f -r 27b3032e924c testsuite/test_solve.c
433 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
434 +++ b/testsuite/test_solve.c Wed Oct 15 11:53:01 2008 +0200
435 @@ -0,0 +1,156 @@
436 +#include <stdlib.h>
437 +#include "m4ri.h"
438 +#include "strassen.h"
439 +#include "trsm.h"
440 +#include "solve.h"
441 +
442 +int test_pluq_solve_left (int m, int n, int offsetA, int offsetB){
443 + packedmatrix* Abase = mzd_init (2048,2048);
444 + packedmatrix* Bbase = mzd_init (2048,2048);
445 + mzd_randomize (Abase);
446 + mzd_randomize (Bbase);
447 + packedmatrix* Bbasecopy = mzd_copy (NULL, Bbase);
448 +
449 + packedmatrix* A = mzd_init_window (Abase, 0, offsetA, m, offsetA + m);
450 + packedmatrix* B = mzd_init_window (Bbase, 0, offsetB, m, offsetB + n);
451 +
452 + size_t i,j;
453 +
454 + packedmatrix* W = mzd_init (B->nrows, B->ncols);
455 + for ( i=0; i<B->nrows; ++i)
456 + for ( j=0; j<B->ncols; ++j)
457 + mzd_write_bit(W,i,j, mzd_read_bit (B,i,j));
458 +
459 + for (i=0; i<m; ++i){
460 + mzd_write_bit(A,i,i, 1);
461 + }
462 +
463 + mzd_solve_left(A, B, 2048, 1);
464 +
465 +/* mzd_addmul(W, A, B, 2048); */
466 + packedmatrix *L = mzd_init(A->nrows,A->nrows);
467 + for ( i=0; i<m; ++i)
468 + for ( j=0; j<=i; ++j)
469 + if (mzd_read_bit (A,i,j))
470 + mzd_write_bit(L,i,j,1);
471 +
472 +
473 + packedmatrix *U = mzd_init(A->nrows,A->ncols);
474 + for ( i=0; i<A->nrows; ++i)
475 + for ( j=i; j<A->ncols; ++j)
476 + if (mzd_read_bit (A,i,j))
477 + mzd_write_bit(U,i,j,1);
478 +
479 + packedmatrix *X = mzd_init(B->nrows,B->ncols);
480 + for ( i=0; i<B->nrows; ++i)
481 + for ( j=0; j<B->ncols; ++j)
482 + mzd_write_bit(X,i,j, mzd_read_bit (B,i,j));
483 + packedmatrix *T = mzd_init(A->nrows,B->ncols);
484 + mzd_mul(T, U, X, 2048);
485 +
486 + packedmatrix *H = mzd_init(B->nrows,B->ncols);
487 + mzd_mul(H, L, T, 2048);
488 +
489 + packedmatrix *Z = mzd_init(B->nrows,B->ncols);
490 +
491 + mzd_add(Z, W, H);
492 +
493 + int status = 0;
494 + for ( i=0; i<m; ++i)
495 + for ( j=0; j<n; ++j){
496 + if (mzd_read_bit (Z,i,j)){
497 + status = 1;
498 + }
499 + }
500 +/*
501 + // Verifiying that nothing has been changed around the submatrices
502 + mzd_addmul(W, A, B, 2048);
503 +
504 + mzd_copy (B, W);
505 +
506 + for ( i=0; i<2048; ++i)
507 + for ( j=0; j<2048/RADIX; ++j){
508 + if (Bbase->values [Bbase->rowswap[i] + j] != Bbasecopy->values [Bbasecopy->rowswap[i] + j]){
509 + status = 1;
510 + }
511 + }
512 +*/
513 +
514 + mzd_free(L);
515 + mzd_free(U);
516 + mzd_free(T);
517 + mzd_free(H);
518 + mzd_free(Z);
519 + mzd_free_window (A);
520 + mzd_free_window (B);
521 + mzd_free_window (W);
522 + mzd_free(Abase);
523 + mzd_free(Bbase);
524 + mzd_free(Bbasecopy);
525 +
526 + if (!status)
527 + printf("passed\n");
528 + else
529 + printf("failed\n");
530 + return status;
531 +}
532 +
533 +int main(int argc, char **argv) {
534 + int status = 0;
535 +
536 + printf("SolveLeft: small A even, small B odd ... ");
537 + status += test_pluq_solve_left (2, 4, 0, 1);
538 +
539 + printf("SolveLeft: small A even, small B even ... ");
540 + status += test_pluq_solve_left (10, 20, 0, 0);
541 + printf("SolveLeft: small A even, large B even ... ");
542 + status += test_pluq_solve_left (10, 80, 0, 0);
543 +
544 + printf("SolveLeft: small A even, small B odd ... ");
545 + status += test_pluq_solve_left (10, 20, 0, 15);
546 + printf("SolveLeft: small A even, large B odd ... ");
547 + status += test_pluq_solve_left (10, 80, 0, 15);
548 +
549 + printf("SolveLeft: small A odd, small B even ... ");
550 + status += test_pluq_solve_left (10, 20, 15, 0);
551 + printf("SolveLeft: small A odd, large B even ... ");
552 + status += test_pluq_solve_left (10, 80, 15, 0);
553 +
554 + printf("SolveLeft: small A odd, small B odd ... ");
555 + status += test_pluq_solve_left (10, 20, 15, 20);
556 + printf("SolveLeft: small A odd, large B odd ... ");
557 + status += test_pluq_solve_left (10, 80, 15, 20);
558 +
559 + printf("SolveLeft: large A even, small B even ... ");
560 + status += test_pluq_solve_left (70, 20, 0, 0);
561 + printf("SolveLeft: large A even, large B even ... ");
562 + status += test_pluq_solve_left (70, 80, 0, 0);
563 +
564 + printf("SolveLeft: large A even, small B odd ... ");
565 + status += test_pluq_solve_left (70, 10, 0, 15);
566 + printf("SolveLeft: large A even, large B odd ... ");
567 + status += test_pluq_solve_left (70, 80, 0, 15);
568 +
569 + printf("SolveLeft: large A odd, small B even ... ");
570 + status += test_pluq_solve_left (70, 20, 15, 0);
571 + printf("SolveLeft: large A odd, large B even ... ");
572 + status += test_pluq_solve_left (70, 80, 15, 0);
573 +
574 + printf("SolveLeft: large A odd, small B odd ... ");
575 + status += test_pluq_solve_left (70, 20, 15, 20);
576 + printf("SolveLeft: large A odd, large B odd ... ");
577 + status += test_pluq_solve_left (70, 80, 15, 20);
578 +
579 + printf("SolveLeft: larger A odd, larger B odd ... ");
580 + status += test_pluq_solve_left (770, 1600, 75, 89);
581 + printf("SolveLeft: larger A odd, larger B odd ... ");
582 + status += test_pluq_solve_left (1764, 1345, 198, 123);
583 +
584 +
585 + if (!status) {
586 + printf("All tests passed.\n");
587 + }
588 +
589 + m4ri_destroy_all_codes();
590 + return 0;
591 +}
592
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.