View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  
23  package org.hipparchus.linear;
24  
25  import java.io.Serializable;
26  
27  import org.hipparchus.Field;
28  import org.hipparchus.FieldElement;
29  import org.hipparchus.exception.LocalizedCoreFormats;
30  import org.hipparchus.exception.MathIllegalArgumentException;
31  import org.hipparchus.exception.NullArgumentException;
32  import org.hipparchus.util.FastMath;
33  import org.hipparchus.util.MathArrays;
34  import org.hipparchus.util.MathUtils;
35  
36  /**
37   * Cache-friendly implementation of FieldMatrix using a flat arrays to store
38   * square blocks of the matrix.
39   * <p>
40   * This implementation is specially designed to be cache-friendly. Square blocks are
41   * stored as small arrays and allow efficient traversal of data both in row major direction
42   * and columns major direction, one block at a time. This greatly increases performances
43   * for algorithms that use crossed directions loops like multiplication or transposition.
44   * </p>
45   * <p>
46   * The size of square blocks is a static parameter. It may be tuned according to the cache
47   * size of the target computer processor. As a rule of thumbs, it should be the largest
48   * value that allows three blocks to be simultaneously cached (this is necessary for example
49   * for matrix multiplication). The default value is to use 36x36 blocks.
50   * </p>
51   * <p>
52   * The regular blocks represent {@link #BLOCK_SIZE} x {@link #BLOCK_SIZE} squares. Blocks
53   * at right hand side and bottom side which may be smaller to fit matrix dimensions. The square
54   * blocks are flattened in row major order in single dimension arrays which are therefore
55   * {@link #BLOCK_SIZE}<sup>2</sup> elements long for regular blocks. The blocks are themselves
56   * organized in row major order.
57   * </p>
58   * <p>
59   * As an example, for a block size of 36x36, a 100x60 matrix would be stored in 6 blocks.
60   * Block 0 would be a Field[1296] array holding the upper left 36x36 square, block 1 would be
61   * a Field[1296] array holding the upper center 36x36 square, block 2 would be a Field[1008]
62   * array holding the upper right 36x28 rectangle, block 3 would be a Field[864] array holding
63   * the lower left 24x36 rectangle, block 4 would be a Field[864] array holding the lower center
64   * 24x36 rectangle and block 5 would be a Field[672] array holding the lower right 24x28
65   * rectangle.
66   * </p>
67   * <p>
68   * The layout complexity overhead versus simple mapping of matrices to java
69   * arrays is negligible for small matrices (about 1%). The gain from cache efficiency leads
70   * to up to 3-fold improvements for matrices of moderate to large size.
71   * </p>
72   * @param <T> the type of the field elements
73   */
74  public class BlockFieldMatrix<T extends FieldElement<T>> extends AbstractFieldMatrix<T> implements Serializable {
75      /** Block size. */
76      public static final int BLOCK_SIZE = 36;
77      /** Serializable version identifier. */
78      private static final long serialVersionUID = -4602336630143123183L;
79      /** Blocks of matrix entries. */
80      private final T[][] blocks;
81      /** Number of rows of the matrix. */
82      private final int rows;
83      /** Number of columns of the matrix. */
84      private final int columns;
85      /** Number of block rows of the matrix. */
86      private final int blockRows;
87      /** Number of block columns of the matrix. */
88      private final int blockColumns;
89  
90      /**
91       * Create a new matrix with the supplied row and column dimensions.
92       *
93       * @param field Field to which the elements belong.
94       * @param rows Number of rows in the new matrix.
95       * @param columns Number of columns in the new matrix.
96       * @throws MathIllegalArgumentException if row or column dimension is not
97       * positive.
98       */
99      public BlockFieldMatrix(final Field<T> field, final int rows,
100                             final int columns)
101         throws MathIllegalArgumentException {
102         super(field, rows, columns);
103         this.rows    = rows;
104         this.columns = columns;
105 
106         // number of blocks
107         blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
108         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
109 
110         // allocate storage blocks, taking care of smaller ones at right and bottom
111         blocks = createBlocksLayout(field, rows, columns);
112     }
113 
114     /**
115      * Create a new dense matrix copying entries from raw layout data.
116      * <p>The input array <em>must</em> already be in raw layout.</p>
117      * <p>Calling this constructor is equivalent to call:</p>
118      * <pre>matrix = new BlockFieldMatrix&lt;T&gt;(getField(), rawData.length, rawData[0].length,
119      *                                 toBlocksLayout(rawData), false);</pre>
120      *
121      * @param rawData Data for the new matrix, in raw layout.
122      * @throws MathIllegalArgumentException if the {@code blockData} shape is
123      * inconsistent with block layout.
124      * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
125      */
126     public BlockFieldMatrix(final T[][] rawData)
127         throws MathIllegalArgumentException {
128         this(rawData.length, rawData[0].length, toBlocksLayout(rawData), false);
129     }
130 
131     /**
132      * Create a new dense matrix copying entries from block layout data.
133      * <p>The input array <em>must</em> already be in blocks layout.</p>
134      * @param rows  the number of rows in the new matrix
135      * @param columns  the number of columns in the new matrix
136      * @param blockData data for new matrix
137      * @param copyArray if true, the input array will be copied, otherwise
138      * it will be referenced
139      *
140      * @throws MathIllegalArgumentException if the {@code blockData} shape is
141      * inconsistent with block layout.
142      * @throws MathIllegalArgumentException if row or column dimension is not
143      * positive.
144      * @see #createBlocksLayout(Field, int, int)
145      * @see #toBlocksLayout(FieldElement[][])
146      * @see #BlockFieldMatrix(FieldElement[][])
147      */
148     public BlockFieldMatrix(final int rows, final int columns,
149                             final T[][] blockData, final boolean copyArray)
150         throws MathIllegalArgumentException {
151         super(extractField(blockData), rows, columns);
152         this.rows    = rows;
153         this.columns = columns;
154 
155         // number of blocks
156         blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
157         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
158 
159         if (copyArray) {
160             // allocate storage blocks, taking care of smaller ones at right and bottom
161             blocks = MathArrays.buildArray(getField(), blockRows * blockColumns, -1);
162         } else {
163             // reference existing array
164             blocks = blockData; // NOPMD - array copy is taken care of by parameter
165         }
166 
167         int index = 0;
168         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
169             final int iHeight = blockHeight(iBlock);
170             for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++index) {
171                 if (blockData[index].length != iHeight * blockWidth(jBlock)) {
172                     throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH,
173                                                            blockData[index].length,
174                                                            iHeight * blockWidth(jBlock));
175                 }
176                 if (copyArray) {
177                     blocks[index] = blockData[index].clone();
178                 }
179             }
180         }
181     }
182 
183     /**
184      * Convert a data array from raw layout to blocks layout.
185      * <p>
186      * Raw layout is the straightforward layout where element at row i and
187      * column j is in array element <code>rawData[i][j]</code>. Blocks layout
188      * is the layout used in {@link BlockFieldMatrix} instances, where the matrix
189      * is split in square blocks (except at right and bottom side where blocks may
190      * be rectangular to fit matrix size) and each block is stored in a flattened
191      * one-dimensional array.
192      * </p>
193      * <p>
194      * This method creates an array in blocks layout from an input array in raw layout.
195      * It can be used to provide the array argument of the {@link
196      * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
197      * constructor.
198      * </p>
199      * @param <T> Type of the field elements.
200      * @param rawData Data array in raw layout.
201      * @return a new data array containing the same entries but in blocks layout
202      * @throws MathIllegalArgumentException if {@code rawData} is not rectangular
203      *  (not all rows have the same length).
204      * @see #createBlocksLayout(Field, int, int)
205      * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
206      */
207     public static <T extends FieldElement<T>> T[][] toBlocksLayout(final T[][] rawData)
208         throws MathIllegalArgumentException {
209 
210         final int rows         = rawData.length;
211         final int columns      = rawData[0].length;
212         final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
213         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
214 
215         // safety checks
216         for (int i = 0; i < rawData.length; ++i) {
217             final int length = rawData[i].length;
218             if (length != columns) {
219                 throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH,
220                                                        columns, length);
221             }
222         }
223 
224         // convert array
225         final Field<T> field = extractField(rawData);
226         final T[][] blocks = MathArrays.buildArray(field, blockRows * blockColumns, -1);
227         int blockIndex = 0;
228         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
229             final int pStart  = iBlock * BLOCK_SIZE;
230             final int pEnd    = FastMath.min(pStart + BLOCK_SIZE, rows);
231             final int iHeight = pEnd - pStart;
232             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
233                 final int qStart = jBlock * BLOCK_SIZE;
234                 final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
235                 final int jWidth = qEnd - qStart;
236 
237                 // allocate new block
238                 final T[] block = MathArrays.buildArray(field, iHeight * jWidth);
239                 blocks[blockIndex] = block;
240 
241                 // copy data
242                 int index = 0;
243                 for (int p = pStart; p < pEnd; ++p) {
244                     System.arraycopy(rawData[p], qStart, block, index, jWidth);
245                     index += jWidth;
246                 }
247 
248                 ++blockIndex;
249             }
250         }
251 
252         return blocks;
253     }
254 
255     /**
256      * Create a data array in blocks layout.
257      * <p>
258      * This method can be used to create the array argument of the {@link
259      * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
260      * constructor.
261      * </p>
262      * @param <T> Type of the field elements.
263      * @param field Field to which the elements belong.
264      * @param rows Number of rows in the new matrix.
265      * @param columns Number of columns in the new matrix.
266      * @return a new data array in blocks layout.
267      * @see #toBlocksLayout(FieldElement[][])
268      * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
269      */
270     public static <T extends FieldElement<T>> T[][] createBlocksLayout(final Field<T> field,
271                                                                        final int rows, final int columns) {
272         final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
273         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
274 
275         final T[][] blocks = MathArrays.buildArray(field, blockRows * blockColumns, -1);
276         int blockIndex = 0;
277         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
278             final int pStart  = iBlock * BLOCK_SIZE;
279             final int pEnd    = FastMath.min(pStart + BLOCK_SIZE, rows);
280             final int iHeight = pEnd - pStart;
281             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
282                 final int qStart = jBlock * BLOCK_SIZE;
283                 final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
284                 final int jWidth = qEnd - qStart;
285                 blocks[blockIndex] = MathArrays.buildArray(field, iHeight * jWidth);
286                 ++blockIndex;
287             }
288         }
289 
290         return blocks;
291     }
292 
293     /** {@inheritDoc} */
294     @Override
295     public FieldMatrix<T> createMatrix(final int rowDimension,
296                                        final int columnDimension)
297         throws MathIllegalArgumentException {
298         return new BlockFieldMatrix<T>(getField(), rowDimension,
299                                        columnDimension);
300     }
301 
302     /** {@inheritDoc} */
303     @Override
304     public FieldMatrix<T> copy() {
305 
306         // create an empty matrix
307         BlockFieldMatrix<T> copied = new BlockFieldMatrix<>(getField(), rows, columns);
308 
309         // copy the blocks
310         for (int i = 0; i < blocks.length; ++i) {
311             System.arraycopy(blocks[i], 0, copied.blocks[i], 0, blocks[i].length);
312         }
313 
314         return copied;
315     }
316 
317     /** {@inheritDoc} */
318     @Override
319     public FieldMatrix<T> add(final FieldMatrix<T> m)
320         throws MathIllegalArgumentException {
321         if (m instanceof BlockFieldMatrix) {
322             return add((BlockFieldMatrix<T>) m);
323         } else {
324 
325             // safety check
326             checkAdditionCompatible(m);
327 
328             final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, columns);
329 
330             // perform addition block-wise, to ensure good cache behavior
331             int blockIndex = 0;
332             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
333                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
334 
335                     // perform addition on the current block
336                     final T[] outBlock = out.blocks[blockIndex];
337                     final T[] tBlock   = blocks[blockIndex];
338                     final int      pStart   = iBlock * BLOCK_SIZE;
339                     final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, rows);
340                     final int      qStart   = jBlock * BLOCK_SIZE;
341                     final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, columns);
342                     int k = 0;
343                     for (int p = pStart; p < pEnd; ++p) {
344                         for (int q = qStart; q < qEnd; ++q) {
345                             outBlock[k] = tBlock[k].add(m.getEntry(p, q));
346                             ++k;
347                         }
348                     }
349 
350                     // go to next block
351                     ++blockIndex;
352 
353                 }
354             }
355 
356             return out;
357         }
358     }
359 
360     /**
361      * Compute the sum of {@code this} and {@code m}.
362      *
363      * @param m matrix to be added
364      * @return {@code this + m}
365      * @throws MathIllegalArgumentException if {@code m} is not the same
366      * size as {@code this}
367      */
368     public BlockFieldMatrix<T> add(final BlockFieldMatrix<T> m)
369         throws MathIllegalArgumentException {
370 
371         // safety check
372         checkAdditionCompatible(m);
373 
374         final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, columns);
375 
376         // perform addition block-wise, to ensure good cache behavior
377         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
378             final T[] outBlock = out.blocks[blockIndex];
379             final T[] tBlock   = blocks[blockIndex];
380             final T[] mBlock   = m.blocks[blockIndex];
381             for (int k = 0; k < outBlock.length; ++k) {
382                 outBlock[k] = tBlock[k].add(mBlock[k]);
383             }
384         }
385 
386         return out;
387     }
388 
389     /** {@inheritDoc} */
390     @Override
391     public FieldMatrix<T> subtract(final FieldMatrix<T> m)
392         throws MathIllegalArgumentException {
393         if (m instanceof BlockFieldMatrix) {
394             return subtract((BlockFieldMatrix<T>) m);
395         } else {
396 
397             // safety check
398             checkSubtractionCompatible(m);
399 
400             final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, columns);
401 
402             // perform subtraction block-wise, to ensure good cache behavior
403             int blockIndex = 0;
404             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
405                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
406 
407                     // perform subtraction on the current block
408                     final T[] outBlock = out.blocks[blockIndex];
409                     final T[] tBlock   = blocks[blockIndex];
410                     final int      pStart   = iBlock * BLOCK_SIZE;
411                     final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, rows);
412                     final int      qStart   = jBlock * BLOCK_SIZE;
413                     final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, columns);
414                     int k = 0;
415                     for (int p = pStart; p < pEnd; ++p) {
416                         for (int q = qStart; q < qEnd; ++q) {
417                             outBlock[k] = tBlock[k].subtract(m.getEntry(p, q));
418                             ++k;
419                         }
420                     }
421 
422                     // go to next block
423                     ++blockIndex;
424 
425                 }
426             }
427 
428             return out;
429         }
430     }
431 
432     /**
433      * Compute {@code this - m}.
434      *
435      * @param m matrix to be subtracted
436      * @return {@code this - m}
437      * @throws MathIllegalArgumentException if {@code m} is not the same
438      * size as {@code this}
439      */
440     public BlockFieldMatrix<T> subtract(final BlockFieldMatrix<T> m) throws MathIllegalArgumentException {
441         // safety check
442         checkSubtractionCompatible(m);
443 
444         final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, columns);
445 
446         // perform subtraction block-wise, to ensure good cache behavior
447         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
448             final T[] outBlock = out.blocks[blockIndex];
449             final T[] tBlock   = blocks[blockIndex];
450             final T[] mBlock   = m.blocks[blockIndex];
451             for (int k = 0; k < outBlock.length; ++k) {
452                 outBlock[k] = tBlock[k].subtract(mBlock[k]);
453             }
454         }
455 
456         return out;
457     }
458 
459     /** {@inheritDoc} */
460     @Override
461     public FieldMatrix<T> scalarAdd(final T d) {
462         final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, columns);
463 
464         // perform subtraction block-wise, to ensure good cache behavior
465         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
466             final T[] outBlock = out.blocks[blockIndex];
467             final T[] tBlock   = blocks[blockIndex];
468             for (int k = 0; k < outBlock.length; ++k) {
469                 outBlock[k] = tBlock[k].add(d);
470             }
471         }
472 
473         return out;
474     }
475 
476     /** {@inheritDoc} */
477     @Override
478     public FieldMatrix<T> scalarMultiply(final T d) {
479 
480         final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, columns);
481 
482         // perform subtraction block-wise, to ensure good cache behavior
483         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
484             final T[] outBlock = out.blocks[blockIndex];
485             final T[] tBlock   = blocks[blockIndex];
486             for (int k = 0; k < outBlock.length; ++k) {
487                 outBlock[k] = tBlock[k].multiply(d);
488             }
489         }
490 
491         return out;
492     }
493 
494     /** {@inheritDoc} */
495     @Override
496     public FieldMatrix<T> multiply(final FieldMatrix<T> m)
497         throws MathIllegalArgumentException {
498         if (m instanceof BlockFieldMatrix) {
499             return multiply((BlockFieldMatrix<T>) m);
500         } else {
501 
502             // safety check
503             checkMultiplicationCompatible(m);
504 
505             final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, m.getColumnDimension());
506             final T zero = getField().getZero();
507 
508             // perform multiplication block-wise, to ensure good cache behavior
509             int blockIndex = 0;
510             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
511 
512                 final int pStart = iBlock * BLOCK_SIZE;
513                 final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
514 
515                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
516 
517                     final int qStart = jBlock * BLOCK_SIZE;
518                     final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, m.getColumnDimension());
519 
520                     // select current block
521                     final T[] outBlock = out.blocks[blockIndex];
522 
523                     // perform multiplication on current block
524                     for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
525                         final int kWidth      = blockWidth(kBlock);
526                         final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
527                         final int rStart      = kBlock * BLOCK_SIZE;
528                         int k = 0;
529                         for (int p = pStart; p < pEnd; ++p) {
530                             final int lStart = (p - pStart) * kWidth;
531                             final int lEnd   = lStart + kWidth;
532                             for (int q = qStart; q < qEnd; ++q) {
533                                 T sum = zero;
534                                 int r = rStart;
535                                 for (int l = lStart; l < lEnd; ++l) {
536                                     sum = sum.add(tBlock[l].multiply(m.getEntry(r, q)));
537                                     ++r;
538                                 }
539                                 outBlock[k] = outBlock[k].add(sum);
540                                 ++k;
541                             }
542                         }
543                     }
544 
545                     // go to next block
546                     ++blockIndex;
547 
548                 }
549             }
550 
551             return out;
552         }
553     }
554 
555     /**
556      * Returns the result of postmultiplying {@code this} by {@code m}.
557      *
558      * @param m matrix to postmultiply by
559      * @return {@code this * m}
560      * @throws MathIllegalArgumentException if the matrices are not compatible.
561      */
562     public BlockFieldMatrix<T> multiply(BlockFieldMatrix<T> m)
563         throws MathIllegalArgumentException {
564 
565         // safety check
566         checkMultiplicationCompatible(m);
567 
568         final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, m.columns);
569         final T zero = getField().getZero();
570 
571         // perform multiplication block-wise, to ensure good cache behavior
572         int blockIndex = 0;
573         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
574 
575             final int pStart = iBlock * BLOCK_SIZE;
576             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
577 
578             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
579                 final int jWidth = out.blockWidth(jBlock);
580                 final int jWidth2 = jWidth  + jWidth;
581                 final int jWidth3 = jWidth2 + jWidth;
582                 final int jWidth4 = jWidth3 + jWidth;
583 
584                 // select current block
585                 final T[] outBlock = out.blocks[blockIndex];
586 
587                 // perform multiplication on current block
588                 for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
589                     final int kWidth = blockWidth(kBlock);
590                     final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
591                     final T[] mBlock = m.blocks[kBlock * m.blockColumns + jBlock];
592                     int k = 0;
593                     for (int p = pStart; p < pEnd; ++p) {
594                         final int lStart = (p - pStart) * kWidth;
595                         final int lEnd   = lStart + kWidth;
596                         for (int nStart = 0; nStart < jWidth; ++nStart) {
597                             T sum = zero;
598                             int l = lStart;
599                             int n = nStart;
600                             while (l < lEnd - 3) {
601                                 sum = sum.
602                                       add(tBlock[l].multiply(mBlock[n])).
603                                       add(tBlock[l + 1].multiply(mBlock[n + jWidth])).
604                                       add(tBlock[l + 2].multiply(mBlock[n + jWidth2])).
605                                       add(tBlock[l + 3].multiply(mBlock[n + jWidth3]));
606                                 l += 4;
607                                 n += jWidth4;
608                             }
609                             while (l < lEnd) {
610                                 sum = sum.add(tBlock[l++].multiply(mBlock[n]));
611                                 n += jWidth;
612                             }
613                             outBlock[k] = outBlock[k].add(sum);
614                             ++k;
615                         }
616                     }
617                 }
618 
619                 // go to next block
620                 ++blockIndex;
621             }
622         }
623 
624         return out;
625     }
626 
627     /**
628      * Returns the result of postmultiplying {@code this} by {@code m^T}.
629      * @param m matrix to first transpose and second postmultiply by
630      * @return {@code this * m}
631      * @throws MathIllegalArgumentException if
632      * {@code columnDimension(this) != columnDimension(m)}
633      * @since 1.3
634      */
635     public BlockFieldMatrix<T> multiplyTransposed(BlockFieldMatrix<T> m)
636         throws MathIllegalArgumentException {
637         // safety check
638         MatrixUtils.checkSameColumnDimension(this, m);
639 
640         final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, m.rows);
641 
642         // perform multiplication block-wise, to ensure good cache behavior
643         int blockIndex = 0;
644         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
645 
646             final int pStart = iBlock * BLOCK_SIZE;
647             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
648 
649             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
650                 final int jWidth = out.blockWidth(jBlock);
651 
652                 // select current block
653                 final T[] outBlock = out.blocks[blockIndex];
654 
655                 // perform multiplication on current block
656                 for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
657                     final int kWidth = blockWidth(kBlock);
658                     final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
659                     final T[] mBlock = m.blocks[jBlock * m.blockColumns + kBlock];
660                     int k = 0;
661                     for (int p = pStart; p < pEnd; ++p) {
662                         final int lStart = (p - pStart) * kWidth;
663                         final int lEnd   = lStart + kWidth;
664                         for (int nStart = 0; nStart < jWidth * kWidth; nStart += kWidth) {
665                             T sum = getField().getZero();
666                             int l = lStart;
667                             int n = nStart;
668                             while (l < lEnd - 3) {
669                                 sum = sum.
670                                       add(tBlock[l].multiply(mBlock[n])).
671                                       add(tBlock[l + 1].multiply(mBlock[n + 1])).
672                                       add(tBlock[l + 2].multiply(mBlock[n + 2])).
673                                       add(tBlock[l + 3].multiply(mBlock[n + 3]));
674                                 l += 4;
675                                 n += 4;
676                             }
677                             while (l < lEnd) {
678                                 sum = sum.add(tBlock[l++].multiply(mBlock[n++]));
679                             }
680                             outBlock[k] = outBlock[k].add(sum);
681                             ++k;
682                         }
683                     }
684                 }
685                 // go to next block
686                 ++blockIndex;
687             }
688         }
689 
690         return out;
691     }
692 
693     /** {@inheritDoc} */
694     @Override
695     public BlockFieldMatrix<T> multiplyTransposed(final FieldMatrix<T> m)
696         throws MathIllegalArgumentException {
697         if (m instanceof BlockFieldMatrix) {
698             return multiplyTransposed((BlockFieldMatrix<T>) m);
699         } else {
700             // safety check
701             MatrixUtils.checkSameColumnDimension(this, m);
702 
703             final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, m.getRowDimension());
704 
705             // perform multiplication block-wise, to ensure good cache behavior
706             int blockIndex = 0;
707             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
708                 final int pStart = iBlock * BLOCK_SIZE;
709                 final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
710 
711                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
712                     final int qStart = jBlock * BLOCK_SIZE;
713                     final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, m.getRowDimension());
714 
715                     // select current block
716                     final T[] outBlock = out.blocks[blockIndex];
717 
718                     // perform multiplication on current block
719                     for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
720                         final int kWidth = blockWidth(kBlock);
721                         final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
722                         final int rStart = kBlock * BLOCK_SIZE;
723                         int k = 0;
724                         for (int p = pStart; p < pEnd; ++p) {
725                             final int lStart = (p - pStart) * kWidth;
726                             final int lEnd = lStart + kWidth;
727                             for (int q = qStart; q < qEnd; ++q) {
728                                 T sum = getField().getZero();
729                                 int r = rStart;
730                                 for (int l = lStart; l < lEnd; ++l) {
731                                     sum = sum.add(tBlock[l].multiply(m.getEntry(q, r)));
732                                     ++r;
733                                 }
734                                 outBlock[k] = outBlock[k].add(sum);
735                                 ++k;
736                             }
737                         }
738                     }
739                     // go to next block
740                     ++blockIndex;
741                 }
742             }
743 
744             return out;
745         }
746     }
747 
748     /**
749      * Returns the result of postmultiplying {@code this^T} by {@code m}.
750      * @param m matrix to postmultiply by
751      * @return {@code this^T * m}
752      * @throws MathIllegalArgumentException if
753      * {@code columnDimension(this) != columnDimension(m)}
754      * @since 1.3
755      */
756     public BlockFieldMatrix<T> transposeMultiply(final BlockFieldMatrix<T> m)
757         throws MathIllegalArgumentException {
758         // safety check
759         MatrixUtils.checkSameRowDimension(this, m);
760 
761         final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), columns, m.columns);
762 
763         // perform multiplication block-wise, to ensure good cache behavior
764         int blockIndex = 0;
765         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
766 
767             final int iHeight  = out.blockHeight(iBlock);
768             final int iHeight2 = iHeight  + iHeight;
769             final int iHeight3 = iHeight2 + iHeight;
770             final int iHeight4 = iHeight3 + iHeight;
771             final int pStart   = iBlock * BLOCK_SIZE;
772             final int pEnd     = FastMath.min(pStart + BLOCK_SIZE, columns);
773 
774             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
775                 final int jWidth  = out.blockWidth(jBlock);
776                 final int jWidth2 = jWidth  + jWidth;
777                 final int jWidth3 = jWidth2 + jWidth;
778                 final int jWidth4 = jWidth3 + jWidth;
779 
780                 // select current block
781                 final T[] outBlock = out.blocks[blockIndex];
782 
783                 // perform multiplication on current block
784                 for (int kBlock = 0; kBlock < blockRows; ++kBlock) {
785                     final int kHeight = blockHeight(kBlock);
786                     final T[] tBlock  = blocks[kBlock * blockColumns + iBlock];
787                     final T[] mBlock  = m.blocks[kBlock * m.blockColumns + jBlock];
788                     int k = 0;
789                     for (int p = pStart; p < pEnd; ++p) {
790                         final int lStart = p - pStart;
791                         final int lEnd   = lStart + iHeight * kHeight;
792                         for (int nStart = 0; nStart < jWidth; ++nStart) {
793                             T sum = getField().getZero();
794                             int l = lStart;
795                             int n = nStart;
796                             while (l < lEnd - iHeight3) {
797                                 sum = sum.add(tBlock[l].multiply(mBlock[n]).
798                                        add(tBlock[l + iHeight].multiply(mBlock[n + jWidth])).
799                                        add(tBlock[l + iHeight2].multiply(mBlock[n + jWidth2])).
800                                        add(tBlock[l + iHeight3].multiply(mBlock[n + jWidth3])));
801                                 l += iHeight4;
802                                 n += jWidth4;
803                             }
804                             while (l < lEnd) {
805                                 sum = sum.add(tBlock[l].multiply(mBlock[n]));
806                                 l += iHeight;
807                                 n += jWidth;
808                             }
809                             outBlock[k] = outBlock[k].add(sum);
810                             ++k;
811                         }
812                     }
813                 }
814                 // go to next block
815                 ++blockIndex;
816             }
817         }
818 
819         return out;
820     }
821 
822     /** {@inheritDoc} */
823     @Override
824     public BlockFieldMatrix<T> transposeMultiply(final FieldMatrix<T> m)
825         throws MathIllegalArgumentException {
826         if (m instanceof BlockFieldMatrix) {
827             return transposeMultiply((BlockFieldMatrix<T>) m);
828         } else {
829             // safety check
830             MatrixUtils.checkSameRowDimension(this, m);
831 
832             final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), columns, m.getColumnDimension());
833 
834             // perform multiplication block-wise, to ensure good cache behavior
835             int blockIndex = 0;
836             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
837 
838                 final int iHeight = out.blockHeight(iBlock);
839                 final int pStart  = iBlock * BLOCK_SIZE;
840                 final int pEnd    = FastMath.min(pStart + BLOCK_SIZE, columns);
841 
842                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
843                     final int qStart = jBlock * BLOCK_SIZE;
844                     final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, m.getColumnDimension());
845 
846                     // select current block
847                     final T[] outBlock = out.blocks[blockIndex];
848 
849                     // perform multiplication on current block
850                     for (int kBlock = 0; kBlock < blockRows; ++kBlock) {
851                         final int kHeight = blockHeight(kBlock);
852                         final T[] tBlock  = blocks[kBlock * blockColumns + iBlock];
853                         final int rStart  = kBlock * BLOCK_SIZE;
854                         int k = 0;
855                         for (int p = pStart; p < pEnd; ++p) {
856                             final int lStart = p - pStart;
857                             final int lEnd   = lStart + iHeight * kHeight;
858                             for (int q = qStart; q < qEnd; ++q) {
859                                 T sum = getField().getZero();
860                                 int r = rStart;
861                                 for (int l = lStart; l < lEnd; l += iHeight) {
862                                     sum = sum.add(tBlock[l].multiply(m.getEntry(r++, q)));
863                                 }
864                                 outBlock[k] = outBlock[k].add(sum);
865                                 ++k;
866                             }
867                         }
868                     }
869                     // go to next block
870                     ++blockIndex;
871                 }
872             }
873 
874             return out;
875         }
876 
877     }
878 
879     /** {@inheritDoc} */
880     @Override
881     public T[][] getData() {
882 
883         final T[][] data = MathArrays.buildArray(getField(), getRowDimension(), getColumnDimension());
884         final int lastColumns = columns - (blockColumns - 1) * BLOCK_SIZE;
885 
886         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
887             final int pStart = iBlock * BLOCK_SIZE;
888             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
889             int regularPos   = 0;
890             int lastPos      = 0;
891             for (int p = pStart; p < pEnd; ++p) {
892                 final T[] dataP = data[p];
893                 int blockIndex = iBlock * blockColumns;
894                 int dataPos    = 0;
895                 for (int jBlock = 0; jBlock < blockColumns - 1; ++jBlock) {
896                     System.arraycopy(blocks[blockIndex++], regularPos, dataP, dataPos, BLOCK_SIZE);
897                     dataPos += BLOCK_SIZE;
898                 }
899                 System.arraycopy(blocks[blockIndex], lastPos, dataP, dataPos, lastColumns);
900                 regularPos += BLOCK_SIZE;
901                 lastPos    += lastColumns;
902             }
903         }
904 
905         return data;
906     }
907 
908     /** {@inheritDoc} */
909     @Override
910     public FieldMatrix<T> getSubMatrix(final int startRow, final int endRow,
911                                        final int startColumn,
912                                        final int endColumn)
913         throws MathIllegalArgumentException {
914         // safety checks
915         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
916 
917         // create the output matrix
918         final BlockFieldMatrix<T> out =
919             new BlockFieldMatrix<>(getField(), endRow - startRow + 1, endColumn - startColumn + 1);
920 
921         // compute blocks shifts
922         final int blockStartRow    = startRow    / BLOCK_SIZE;
923         final int rowsShift        = startRow    % BLOCK_SIZE;
924         final int blockStartColumn = startColumn / BLOCK_SIZE;
925         final int columnsShift     = startColumn % BLOCK_SIZE;
926 
927         // perform extraction block-wise, to ensure good cache behavior
928         int pBlock = blockStartRow;
929         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
930             final int iHeight = out.blockHeight(iBlock);
931             int qBlock = blockStartColumn;
932             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
933                 final int jWidth = out.blockWidth(jBlock);
934 
935                 // handle one block of the output matrix
936                 final int      outIndex = iBlock * out.blockColumns + jBlock;
937                 final T[] outBlock = out.blocks[outIndex];
938                 final int      index    = pBlock * blockColumns + qBlock;
939                 final int      width    = blockWidth(qBlock);
940 
941                 final int heightExcess = iHeight + rowsShift - BLOCK_SIZE;
942                 final int widthExcess  = jWidth + columnsShift - BLOCK_SIZE;
943                 if (heightExcess > 0) {
944                     // the submatrix block spans on two blocks rows from the original matrix
945                     if (widthExcess > 0) {
946                         // the submatrix block spans on two blocks columns from the original matrix
947                         final int width2 = blockWidth(qBlock + 1);
948                         copyBlockPart(blocks[index], width,
949                                       rowsShift, BLOCK_SIZE,
950                                       columnsShift, BLOCK_SIZE,
951                                       outBlock, jWidth, 0, 0);
952                         copyBlockPart(blocks[index + 1], width2,
953                                       rowsShift, BLOCK_SIZE,
954                                       0, widthExcess,
955                                       outBlock, jWidth, 0, jWidth - widthExcess);
956                         copyBlockPart(blocks[index + blockColumns], width,
957                                       0, heightExcess,
958                                       columnsShift, BLOCK_SIZE,
959                                       outBlock, jWidth, iHeight - heightExcess, 0);
960                         copyBlockPart(blocks[index + blockColumns + 1], width2,
961                                       0, heightExcess,
962                                       0, widthExcess,
963                                       outBlock, jWidth, iHeight - heightExcess, jWidth - widthExcess);
964                     } else {
965                         // the submatrix block spans on one block column from the original matrix
966                         copyBlockPart(blocks[index], width,
967                                       rowsShift, BLOCK_SIZE,
968                                       columnsShift, jWidth + columnsShift,
969                                       outBlock, jWidth, 0, 0);
970                         copyBlockPart(blocks[index + blockColumns], width,
971                                       0, heightExcess,
972                                       columnsShift, jWidth + columnsShift,
973                                       outBlock, jWidth, iHeight - heightExcess, 0);
974                     }
975                 } else {
976                     // the submatrix block spans on one block row from the original matrix
977                     if (widthExcess > 0) {
978                         // the submatrix block spans on two blocks columns from the original matrix
979                         final int width2 = blockWidth(qBlock + 1);
980                         copyBlockPart(blocks[index], width,
981                                       rowsShift, iHeight + rowsShift,
982                                       columnsShift, BLOCK_SIZE,
983                                       outBlock, jWidth, 0, 0);
984                         copyBlockPart(blocks[index + 1], width2,
985                                       rowsShift, iHeight + rowsShift,
986                                       0, widthExcess,
987                                       outBlock, jWidth, 0, jWidth - widthExcess);
988                     } else {
989                         // the submatrix block spans on one block column from the original matrix
990                         copyBlockPart(blocks[index], width,
991                                       rowsShift, iHeight + rowsShift,
992                                       columnsShift, jWidth + columnsShift,
993                                       outBlock, jWidth, 0, 0);
994                     }
995                }
996                 ++qBlock;
997             }
998             ++pBlock;
999         }
1000 
1001         return out;
1002     }
1003 
1004     /**
1005      * Copy a part of a block into another one
1006      * <p>This method can be called only when the specified part fits in both
1007      * blocks, no verification is done here.</p>
1008      * @param srcBlock source block
1009      * @param srcWidth source block width ({@link #BLOCK_SIZE} or smaller)
1010      * @param srcStartRow start row in the source block
1011      * @param srcEndRow end row (exclusive) in the source block
1012      * @param srcStartColumn start column in the source block
1013      * @param srcEndColumn end column (exclusive) in the source block
1014      * @param dstBlock destination block
1015      * @param dstWidth destination block width ({@link #BLOCK_SIZE} or smaller)
1016      * @param dstStartRow start row in the destination block
1017      * @param dstStartColumn start column in the destination block
1018      */
1019     private void copyBlockPart(final T[] srcBlock, final int srcWidth,
1020                                final int srcStartRow, final int srcEndRow,
1021                                final int srcStartColumn, final int srcEndColumn,
1022                                final T[] dstBlock, final int dstWidth,
1023                                final int dstStartRow, final int dstStartColumn) {
1024         final int length = srcEndColumn - srcStartColumn;
1025         int srcPos = srcStartRow * srcWidth + srcStartColumn;
1026         int dstPos = dstStartRow * dstWidth + dstStartColumn;
1027         for (int srcRow = srcStartRow; srcRow < srcEndRow; ++srcRow) {
1028             System.arraycopy(srcBlock, srcPos, dstBlock, dstPos, length);
1029             srcPos += srcWidth;
1030             dstPos += dstWidth;
1031         }
1032     }
1033 
1034     /** {@inheritDoc} */
1035     @Override
1036     public void setSubMatrix(final T[][] subMatrix, final int row,
1037                              final int column)
1038         throws MathIllegalArgumentException, NullArgumentException {
1039         // safety checks
1040         MathUtils.checkNotNull(subMatrix);
1041         final int refLength = subMatrix[0].length;
1042         if (refLength == 0) {
1043             throw new MathIllegalArgumentException(LocalizedCoreFormats.AT_LEAST_ONE_COLUMN);
1044         }
1045         final int endRow    = row + subMatrix.length - 1;
1046         final int endColumn = column + refLength - 1;
1047         checkSubMatrixIndex(row, endRow, column, endColumn);
1048         for (final T[] subRow : subMatrix) {
1049             if (subRow.length != refLength) {
1050                 throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH,
1051                                                        refLength, subRow.length);
1052             }
1053         }
1054 
1055         // compute blocks bounds
1056         final int blockStartRow    = row / BLOCK_SIZE;
1057         final int blockEndRow      = (endRow + BLOCK_SIZE) / BLOCK_SIZE;
1058         final int blockStartColumn = column / BLOCK_SIZE;
1059         final int blockEndColumn   = (endColumn + BLOCK_SIZE) / BLOCK_SIZE;
1060 
1061         // perform copy block-wise, to ensure good cache behavior
1062         for (int iBlock = blockStartRow; iBlock < blockEndRow; ++iBlock) {
1063             final int iHeight  = blockHeight(iBlock);
1064             final int firstRow = iBlock * BLOCK_SIZE;
1065             final int iStart   = FastMath.max(row,    firstRow);
1066             final int iEnd     = FastMath.min(endRow + 1, firstRow + iHeight);
1067 
1068             for (int jBlock = blockStartColumn; jBlock < blockEndColumn; ++jBlock) {
1069                 final int jWidth      = blockWidth(jBlock);
1070                 final int firstColumn = jBlock * BLOCK_SIZE;
1071                 final int jStart      = FastMath.max(column,    firstColumn);
1072                 final int jEnd        = FastMath.min(endColumn + 1, firstColumn + jWidth);
1073                 final int jLength     = jEnd - jStart;
1074 
1075                 // handle one block, row by row
1076                 final T[] block = blocks[iBlock * blockColumns + jBlock];
1077                 for (int i = iStart; i < iEnd; ++i) {
1078                     System.arraycopy(subMatrix[i - row], jStart - column,
1079                                      block, (i - firstRow) * jWidth + (jStart - firstColumn),
1080                                      jLength);
1081                 }
1082 
1083             }
1084         }
1085     }
1086 
1087     /** {@inheritDoc} */
1088     @Override
1089     public FieldMatrix<T> getRowMatrix(final int row)
1090         throws MathIllegalArgumentException {
1091         checkRowIndex(row);
1092         final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), 1, columns);
1093 
1094         // perform copy block-wise, to ensure good cache behavior
1095         final int iBlock  = row / BLOCK_SIZE;
1096         final int iRow    = row - iBlock * BLOCK_SIZE;
1097         int outBlockIndex = 0;
1098         int outIndex      = 0;
1099         T[] outBlock = out.blocks[outBlockIndex];
1100         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1101             final int jWidth     = blockWidth(jBlock);
1102             final T[] block = blocks[iBlock * blockColumns + jBlock];
1103             final int available  = outBlock.length - outIndex;
1104             if (jWidth > available) {
1105                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, available);
1106                 outBlock = out.blocks[++outBlockIndex];
1107                 System.arraycopy(block, iRow * jWidth, outBlock, 0, jWidth - available);
1108                 outIndex = jWidth - available;
1109             } else {
1110                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, jWidth);
1111                 outIndex += jWidth;
1112             }
1113         }
1114 
1115         return out;
1116     }
1117 
1118     /** {@inheritDoc} */
1119     @Override
1120     public void setRowMatrix(final int row, final FieldMatrix<T> matrix)
1121         throws MathIllegalArgumentException {
1122         if (matrix instanceof BlockFieldMatrix) {
1123             setRowMatrix(row, (BlockFieldMatrix<T>) matrix);
1124         } else {
1125             super.setRowMatrix(row, matrix);
1126         }
1127     }
1128 
1129     /**
1130      * Sets the entries in row number <code>row</code>
1131      * as a row matrix.  Row indices start at 0.
1132      *
1133      * @param row the row to be set
1134      * @param matrix row matrix (must have one row and the same number of columns
1135      * as the instance)
1136      * @throws MathIllegalArgumentException if the matrix dimensions do
1137      * not match one instance row.
1138      * @throws MathIllegalArgumentException if the specified row index is invalid.
1139      */
1140     public void setRowMatrix(final int row, final BlockFieldMatrix<T> matrix)
1141         throws MathIllegalArgumentException {
1142         checkRowIndex(row);
1143         final int nCols = getColumnDimension();
1144         if ((matrix.getRowDimension() != 1) ||
1145             (matrix.getColumnDimension() != nCols)) {
1146             throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH_2x2,
1147                                                    matrix.getRowDimension(), matrix.getColumnDimension(),
1148                                                    1, nCols);
1149         }
1150 
1151         // perform copy block-wise, to ensure good cache behavior
1152         final int iBlock = row / BLOCK_SIZE;
1153         final int iRow   = row - iBlock * BLOCK_SIZE;
1154         int mBlockIndex  = 0;
1155         int mIndex       = 0;
1156         T[] mBlock  = matrix.blocks[mBlockIndex];
1157         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1158             final int jWidth     = blockWidth(jBlock);
1159             final T[] block = blocks[iBlock * blockColumns + jBlock];
1160             final int available  = mBlock.length - mIndex;
1161             if (jWidth > available) {
1162                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, available);
1163                 mBlock = matrix.blocks[++mBlockIndex];
1164                 System.arraycopy(mBlock, 0, block, iRow * jWidth, jWidth - available);
1165                 mIndex = jWidth - available;
1166             } else {
1167                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, jWidth);
1168                 mIndex += jWidth;
1169            }
1170         }
1171     }
1172 
1173     /** {@inheritDoc} */
1174     @Override
1175     public FieldMatrix<T> getColumnMatrix(final int column)
1176         throws MathIllegalArgumentException {
1177         checkColumnIndex(column);
1178         final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), rows, 1);
1179 
1180         // perform copy block-wise, to ensure good cache behavior
1181         final int jBlock  = column / BLOCK_SIZE;
1182         final int jColumn = column - jBlock * BLOCK_SIZE;
1183         final int jWidth  = blockWidth(jBlock);
1184         int outBlockIndex = 0;
1185         int outIndex      = 0;
1186         T[] outBlock = out.blocks[outBlockIndex];
1187         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1188             final int iHeight = blockHeight(iBlock);
1189             final T[] block = blocks[iBlock * blockColumns + jBlock];
1190             for (int i = 0; i < iHeight; ++i) {
1191                 if (outIndex >= outBlock.length) {
1192                     outBlock = out.blocks[++outBlockIndex];
1193                     outIndex = 0;
1194                 }
1195                 outBlock[outIndex++] = block[i * jWidth + jColumn];
1196             }
1197         }
1198 
1199         return out;
1200     }
1201 
1202     /** {@inheritDoc} */
1203     @Override
1204     public void setColumnMatrix(final int column, final FieldMatrix<T> matrix)
1205         throws MathIllegalArgumentException {
1206         if (matrix instanceof BlockFieldMatrix) {
1207             setColumnMatrix(column, (BlockFieldMatrix<T>) matrix);
1208         } else {
1209             super.setColumnMatrix(column, matrix);
1210         }
1211     }
1212 
1213     /**
1214      * Sets the entries in column number {@code column}
1215      * as a column matrix.  Column indices start at 0.
1216      *
1217      * @param column Column to be set.
1218      * @param matrix Column matrix (must have one column and the same number of rows
1219      * as the instance).
1220      * @throws MathIllegalArgumentException if the matrix dimensions do
1221      * not match one instance column.
1222      * @throws MathIllegalArgumentException if the specified column index is invalid.
1223      */
1224     void setColumnMatrix(final int column, final BlockFieldMatrix<T> matrix)
1225         throws MathIllegalArgumentException {
1226         checkColumnIndex(column);
1227         final int nRows = getRowDimension();
1228         if ((matrix.getRowDimension() != nRows) ||
1229             (matrix.getColumnDimension() != 1)) {
1230             throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH_2x2,
1231                                                    matrix.getRowDimension(), matrix.getColumnDimension(),
1232                                                    nRows, 1);
1233         }
1234 
1235         // perform copy block-wise, to ensure good cache behavior
1236         final int jBlock  = column / BLOCK_SIZE;
1237         final int jColumn = column - jBlock * BLOCK_SIZE;
1238         final int jWidth  = blockWidth(jBlock);
1239         int mBlockIndex = 0;
1240         int mIndex      = 0;
1241         T[] mBlock = matrix.blocks[mBlockIndex];
1242         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1243             final int iHeight = blockHeight(iBlock);
1244             final T[] block = blocks[iBlock * blockColumns + jBlock];
1245             for (int i = 0; i < iHeight; ++i) {
1246                 if (mIndex >= mBlock.length) {
1247                     mBlock = matrix.blocks[++mBlockIndex];
1248                     mIndex = 0;
1249                 }
1250                 block[i * jWidth + jColumn] = mBlock[mIndex++];
1251             }
1252         }
1253     }
1254 
1255     /** {@inheritDoc} */
1256     @Override
1257     public FieldVector<T> getRowVector(final int row)
1258         throws MathIllegalArgumentException {
1259         checkRowIndex(row);
1260         final T[] outData = MathArrays.buildArray(getField(), columns);
1261 
1262         // perform copy block-wise, to ensure good cache behavior
1263         final int iBlock  = row / BLOCK_SIZE;
1264         final int iRow    = row - iBlock * BLOCK_SIZE;
1265         int outIndex      = 0;
1266         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1267             final int jWidth     = blockWidth(jBlock);
1268             final T[] block = blocks[iBlock * blockColumns + jBlock];
1269             System.arraycopy(block, iRow * jWidth, outData, outIndex, jWidth);
1270             outIndex += jWidth;
1271         }
1272 
1273         return new ArrayFieldVector<T>(getField(), outData, false);
1274     }
1275 
1276     /** {@inheritDoc} */
1277     @Override
1278     public void setRowVector(final int row, final FieldVector<T> vector)
1279         throws MathIllegalArgumentException {
1280         if (vector instanceof ArrayFieldVector) {
1281             setRow(row, ((ArrayFieldVector<T>) vector).getDataRef());
1282         } else {
1283             super.setRowVector(row, vector);
1284         }
1285     }
1286 
1287     /** {@inheritDoc} */
1288     @Override
1289     public FieldVector<T> getColumnVector(final int column)
1290         throws MathIllegalArgumentException {
1291         checkColumnIndex(column);
1292         final T[] outData = MathArrays.buildArray(getField(), rows);
1293 
1294         // perform copy block-wise, to ensure good cache behavior
1295         final int jBlock  = column / BLOCK_SIZE;
1296         final int jColumn = column - jBlock * BLOCK_SIZE;
1297         final int jWidth  = blockWidth(jBlock);
1298         int outIndex      = 0;
1299         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1300             final int iHeight = blockHeight(iBlock);
1301             final T[] block = blocks[iBlock * blockColumns + jBlock];
1302             for (int i = 0; i < iHeight; ++i) {
1303                 outData[outIndex++] = block[i * jWidth + jColumn];
1304             }
1305         }
1306 
1307         return new ArrayFieldVector<T>(getField(), outData, false);
1308     }
1309 
1310     /** {@inheritDoc} */
1311     @Override
1312     public void setColumnVector(final int column, final FieldVector<T> vector)
1313         throws MathIllegalArgumentException {
1314         if (vector instanceof ArrayFieldVector) {
1315             setColumn(column, ((ArrayFieldVector<T>) vector).getDataRef());
1316         } else {
1317             super.setColumnVector(column, vector);
1318         }
1319     }
1320 
1321     /** {@inheritDoc} */
1322     @Override
1323     public T[] getRow(final int row) throws MathIllegalArgumentException {
1324         checkRowIndex(row);
1325         final T[] out = MathArrays.buildArray(getField(), columns);
1326 
1327         // perform copy block-wise, to ensure good cache behavior
1328         final int iBlock  = row / BLOCK_SIZE;
1329         final int iRow    = row - iBlock * BLOCK_SIZE;
1330         int outIndex      = 0;
1331         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1332             final int jWidth     = blockWidth(jBlock);
1333             final T[] block = blocks[iBlock * blockColumns + jBlock];
1334             System.arraycopy(block, iRow * jWidth, out, outIndex, jWidth);
1335             outIndex += jWidth;
1336         }
1337 
1338         return out;
1339     }
1340 
1341     /** {@inheritDoc} */
1342     @Override
1343     public void setRow(final int row, final T[] array)
1344         throws MathIllegalArgumentException {
1345         checkRowIndex(row);
1346         final int nCols = getColumnDimension();
1347         if (array.length != nCols) {
1348             throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH_2x2,
1349                                                    1, array.length,
1350                                                    1, nCols);
1351         }
1352 
1353         // perform copy block-wise, to ensure good cache behavior
1354         final int iBlock  = row / BLOCK_SIZE;
1355         final int iRow    = row - iBlock * BLOCK_SIZE;
1356         int outIndex      = 0;
1357         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1358             final int jWidth     = blockWidth(jBlock);
1359             final T[] block = blocks[iBlock * blockColumns + jBlock];
1360             System.arraycopy(array, outIndex, block, iRow * jWidth, jWidth);
1361             outIndex += jWidth;
1362         }
1363     }
1364 
1365     /** {@inheritDoc} */
1366     @Override
1367     public T[] getColumn(final int column) throws MathIllegalArgumentException {
1368         checkColumnIndex(column);
1369         final T[] out = MathArrays.buildArray(getField(), rows);
1370 
1371         // perform copy block-wise, to ensure good cache behavior
1372         final int jBlock  = column / BLOCK_SIZE;
1373         final int jColumn = column - jBlock * BLOCK_SIZE;
1374         final int jWidth  = blockWidth(jBlock);
1375         int outIndex      = 0;
1376         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1377             final int iHeight = blockHeight(iBlock);
1378             final T[] block = blocks[iBlock * blockColumns + jBlock];
1379             for (int i = 0; i < iHeight; ++i) {
1380                 out[outIndex++] = block[i * jWidth + jColumn];
1381             }
1382         }
1383 
1384         return out;
1385     }
1386 
1387     /** {@inheritDoc} */
1388     @Override
1389     public void setColumn(final int column, final T[] array)
1390         throws MathIllegalArgumentException {
1391         checkColumnIndex(column);
1392         final int nRows = getRowDimension();
1393         if (array.length != nRows) {
1394             throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH_2x2,
1395                                                    array.length,
1396                                                    1, nRows, 1);
1397         }
1398 
1399         // perform copy block-wise, to ensure good cache behavior
1400         final int jBlock  = column / BLOCK_SIZE;
1401         final int jColumn = column - jBlock * BLOCK_SIZE;
1402         final int jWidth  = blockWidth(jBlock);
1403         int outIndex      = 0;
1404         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1405             final int iHeight = blockHeight(iBlock);
1406             final T[] block = blocks[iBlock * blockColumns + jBlock];
1407             for (int i = 0; i < iHeight; ++i) {
1408                 block[i * jWidth + jColumn] = array[outIndex++];
1409             }
1410         }
1411     }
1412 
1413     /** {@inheritDoc} */
1414     @Override
1415     public T getEntry(final int row, final int column)
1416         throws MathIllegalArgumentException {
1417         checkRowIndex(row);
1418         checkColumnIndex(column);
1419 
1420         final int iBlock = row    / BLOCK_SIZE;
1421         final int jBlock = column / BLOCK_SIZE;
1422         final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1423             (column - jBlock * BLOCK_SIZE);
1424 
1425         return blocks[iBlock * blockColumns + jBlock][k];
1426     }
1427 
1428     /** {@inheritDoc} */
1429     @Override
1430     public void setEntry(final int row, final int column, final T value)
1431         throws MathIllegalArgumentException {
1432         checkRowIndex(row);
1433         checkColumnIndex(column);
1434 
1435         final int iBlock = row    / BLOCK_SIZE;
1436         final int jBlock = column / BLOCK_SIZE;
1437         final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1438             (column - jBlock * BLOCK_SIZE);
1439 
1440         blocks[iBlock * blockColumns + jBlock][k] = value;
1441     }
1442 
1443     /** {@inheritDoc} */
1444     @Override
1445     public void addToEntry(final int row, final int column, final T increment)
1446         throws MathIllegalArgumentException {
1447         checkRowIndex(row);
1448         checkColumnIndex(column);
1449 
1450         final int iBlock = row    / BLOCK_SIZE;
1451         final int jBlock = column / BLOCK_SIZE;
1452         final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1453             (column - jBlock * BLOCK_SIZE);
1454         final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1455 
1456         blockIJ[k] = blockIJ[k].add(increment);
1457     }
1458 
1459     /** {@inheritDoc} */
1460     @Override
1461     public void multiplyEntry(final int row, final int column, final T factor)
1462         throws MathIllegalArgumentException {
1463         checkRowIndex(row);
1464         checkColumnIndex(column);
1465 
1466         final int iBlock = row    / BLOCK_SIZE;
1467         final int jBlock = column / BLOCK_SIZE;
1468         final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1469             (column - jBlock * BLOCK_SIZE);
1470         final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1471 
1472         blockIJ[k] = blockIJ[k].multiply(factor);
1473     }
1474 
1475     /** {@inheritDoc} */
1476     @Override
1477     public FieldMatrix<T> transpose() {
1478         final int nRows = getRowDimension();
1479         final int nCols = getColumnDimension();
1480         final BlockFieldMatrix<T> out = new BlockFieldMatrix<>(getField(), nCols, nRows);
1481 
1482         // perform transpose block-wise, to ensure good cache behavior
1483         int blockIndex = 0;
1484         for (int iBlock = 0; iBlock < blockColumns; ++iBlock) {
1485             for (int jBlock = 0; jBlock < blockRows; ++jBlock) {
1486 
1487                 // transpose current block
1488                 final T[] outBlock = out.blocks[blockIndex];
1489                 final T[] tBlock   = blocks[jBlock * blockColumns + iBlock];
1490                 final int      pStart   = iBlock * BLOCK_SIZE;
1491                 final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, columns);
1492                 final int      qStart   = jBlock * BLOCK_SIZE;
1493                 final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, rows);
1494                 int k = 0;
1495                 for (int p = pStart; p < pEnd; ++p) {
1496                     final int lInc = pEnd - pStart;
1497                     int l = p - pStart;
1498                     for (int q = qStart; q < qEnd; ++q) {
1499                         outBlock[k] = tBlock[l];
1500                         ++k;
1501                         l+= lInc;
1502                     }
1503                 }
1504 
1505                 // go to next block
1506                 ++blockIndex;
1507 
1508             }
1509         }
1510 
1511         return out;
1512     }
1513 
1514     /** {@inheritDoc} */
1515     @Override
1516     public int getRowDimension() {
1517         return rows;
1518     }
1519 
1520     /** {@inheritDoc} */
1521     @Override
1522     public int getColumnDimension() {
1523         return columns;
1524     }
1525 
1526     /** {@inheritDoc} */
1527     @Override
1528     public T[] operate(final T[] v) throws MathIllegalArgumentException {
1529         if (v.length != columns) {
1530             throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH,
1531                                                    v.length, columns);
1532         }
1533         final T[] out = MathArrays.buildArray(getField(), rows);
1534         final T zero = getField().getZero();
1535 
1536         // perform multiplication block-wise, to ensure good cache behavior
1537         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1538             final int pStart = iBlock * BLOCK_SIZE;
1539             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1540             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1541                 final T[] block  = blocks[iBlock * blockColumns + jBlock];
1542                 final int      qStart = jBlock * BLOCK_SIZE;
1543                 final int      qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1544                 int k = 0;
1545                 for (int p = pStart; p < pEnd; ++p) {
1546                     T sum = zero;
1547                     int q = qStart;
1548                     while (q < qEnd - 3) {
1549                         sum = sum.
1550                               add(block[k].multiply(v[q])).
1551                               add(block[k + 1].multiply(v[q + 1])).
1552                               add(block[k + 2].multiply(v[q + 2])).
1553                               add(block[k + 3].multiply(v[q + 3]));
1554                         k += 4;
1555                         q += 4;
1556                     }
1557                     while (q < qEnd) {
1558                         sum = sum.add(block[k++].multiply(v[q++]));
1559                     }
1560                     out[p] = out[p].add(sum);
1561                 }
1562             }
1563         }
1564 
1565         return out;
1566     }
1567 
1568     /** {@inheritDoc} */
1569     @Override
1570     public T[] preMultiply(final T[] v) throws MathIllegalArgumentException {
1571 
1572         if (v.length != rows) {
1573             throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH,
1574                                                    v.length, rows);
1575         }
1576         final T[] out = MathArrays.buildArray(getField(), columns);
1577         final T zero = getField().getZero();
1578 
1579         // perform multiplication block-wise, to ensure good cache behavior
1580         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1581             final int jWidth  = blockWidth(jBlock);
1582             final int jWidth2 = jWidth  + jWidth;
1583             final int jWidth3 = jWidth2 + jWidth;
1584             final int jWidth4 = jWidth3 + jWidth;
1585             final int qStart = jBlock * BLOCK_SIZE;
1586             final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1587             for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1588                 final T[] block  = blocks[iBlock * blockColumns + jBlock];
1589                 final int      pStart = iBlock * BLOCK_SIZE;
1590                 final int      pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1591                 for (int q = qStart; q < qEnd; ++q) {
1592                     int k = q - qStart;
1593                     T sum = zero;
1594                     int p = pStart;
1595                     while (p < pEnd - 3) {
1596                         sum = sum.
1597                               add(block[k].multiply(v[p])).
1598                               add(block[k + jWidth].multiply(v[p + 1])).
1599                               add(block[k + jWidth2].multiply(v[p + 2])).
1600                               add(block[k + jWidth3].multiply(v[p + 3]));
1601                         k += jWidth4;
1602                         p += 4;
1603                     }
1604                     while (p < pEnd) {
1605                         sum = sum.add(block[k].multiply(v[p++]));
1606                         k += jWidth;
1607                     }
1608                     out[q] = out[q].add(sum);
1609                 }
1610             }
1611         }
1612 
1613         return out;
1614     }
1615 
1616     /** {@inheritDoc} */
1617     @Override
1618     public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor) {
1619         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1620         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1621             final int pStart = iBlock * BLOCK_SIZE;
1622             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1623             for (int p = pStart; p < pEnd; ++p) {
1624                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1625                     final int jWidth = blockWidth(jBlock);
1626                     final int qStart = jBlock * BLOCK_SIZE;
1627                     final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1628                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1629                     int k = (p - pStart) * jWidth;
1630                     for (int q = qStart; q < qEnd; ++q) {
1631                         block[k] = visitor.visit(p, q, block[k]);
1632                         ++k;
1633                     }
1634                 }
1635              }
1636         }
1637         return visitor.end();
1638     }
1639 
1640     /** {@inheritDoc} */
1641     @Override
1642     public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor) {
1643         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1644         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1645             final int pStart = iBlock * BLOCK_SIZE;
1646             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1647             for (int p = pStart; p < pEnd; ++p) {
1648                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1649                     final int jWidth = blockWidth(jBlock);
1650                     final int qStart = jBlock * BLOCK_SIZE;
1651                     final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1652                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1653                     int k = (p - pStart) * jWidth;
1654                     for (int q = qStart; q < qEnd; ++q) {
1655                         visitor.visit(p, q, block[k]);
1656                         ++k;
1657                     }
1658                 }
1659              }
1660         }
1661         return visitor.end();
1662     }
1663 
1664     /** {@inheritDoc} */
1665     @Override
1666     public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor,
1667                             final int startRow, final int endRow,
1668                             final int startColumn, final int endColumn)
1669         throws MathIllegalArgumentException {
1670         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1671         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1672         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1673             final int p0     = iBlock * BLOCK_SIZE;
1674             final int pStart = FastMath.max(startRow, p0);
1675             final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1676             for (int p = pStart; p < pEnd; ++p) {
1677                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1678                     final int jWidth = blockWidth(jBlock);
1679                     final int q0     = jBlock * BLOCK_SIZE;
1680                     final int qStart = FastMath.max(startColumn, q0);
1681                     final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1682                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1683                     int k = (p - p0) * jWidth + qStart - q0;
1684                     for (int q = qStart; q < qEnd; ++q) {
1685                         block[k] = visitor.visit(p, q, block[k]);
1686                         ++k;
1687                     }
1688                 }
1689              }
1690         }
1691         return visitor.end();
1692     }
1693 
1694     /** {@inheritDoc} */
1695     @Override
1696     public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor,
1697                             final int startRow, final int endRow,
1698                             final int startColumn, final int endColumn)
1699         throws MathIllegalArgumentException {
1700         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1701         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1702         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1703             final int p0     = iBlock * BLOCK_SIZE;
1704             final int pStart = FastMath.max(startRow, p0);
1705             final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1706             for (int p = pStart; p < pEnd; ++p) {
1707                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1708                     final int jWidth = blockWidth(jBlock);
1709                     final int q0     = jBlock * BLOCK_SIZE;
1710                     final int qStart = FastMath.max(startColumn, q0);
1711                     final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1712                     final T[] block = blocks[iBlock * blockColumns + jBlock];
1713                     int k = (p - p0) * jWidth + qStart - q0;
1714                     for (int q = qStart; q < qEnd; ++q) {
1715                         visitor.visit(p, q, block[k]);
1716                         ++k;
1717                     }
1718                 }
1719              }
1720         }
1721         return visitor.end();
1722     }
1723 
1724     /** {@inheritDoc} */
1725     @Override
1726     public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor) {
1727         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1728         int blockIndex = 0;
1729         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1730             final int pStart = iBlock * BLOCK_SIZE;
1731             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1732             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1733                 final int qStart = jBlock * BLOCK_SIZE;
1734                 final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1735                 final T[] block = blocks[blockIndex];
1736                 int k = 0;
1737                 for (int p = pStart; p < pEnd; ++p) {
1738                     for (int q = qStart; q < qEnd; ++q) {
1739                         block[k] = visitor.visit(p, q, block[k]);
1740                         ++k;
1741                     }
1742                 }
1743                 ++blockIndex;
1744             }
1745         }
1746         return visitor.end();
1747     }
1748 
1749     /** {@inheritDoc} */
1750     @Override
1751     public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor) {
1752         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1753         int blockIndex = 0;
1754         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1755             final int pStart = iBlock * BLOCK_SIZE;
1756             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1757             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1758                 final int qStart = jBlock * BLOCK_SIZE;
1759                 final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1760                 final T[] block = blocks[blockIndex];
1761                 int k = 0;
1762                 for (int p = pStart; p < pEnd; ++p) {
1763                     for (int q = qStart; q < qEnd; ++q) {
1764                         visitor.visit(p, q, block[k]);
1765                         ++k;
1766                     }
1767                 }
1768                 ++blockIndex;
1769             }
1770         }
1771         return visitor.end();
1772     }
1773 
1774     /** {@inheritDoc} */
1775     @Override
1776     public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor,
1777                                   final int startRow, final int endRow,
1778                                   final int startColumn, final int endColumn)
1779         throws MathIllegalArgumentException {
1780         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1781         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1782         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1783             final int p0     = iBlock * BLOCK_SIZE;
1784             final int pStart = FastMath.max(startRow, p0);
1785             final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1786             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1787                 final int jWidth = blockWidth(jBlock);
1788                 final int q0     = jBlock * BLOCK_SIZE;
1789                 final int qStart = FastMath.max(startColumn, q0);
1790                 final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1791                 final T[] block = blocks[iBlock * blockColumns + jBlock];
1792                 for (int p = pStart; p < pEnd; ++p) {
1793                     int k = (p - p0) * jWidth + qStart - q0;
1794                     for (int q = qStart; q < qEnd; ++q) {
1795                         block[k] = visitor.visit(p, q, block[k]);
1796                         ++k;
1797                     }
1798                 }
1799             }
1800         }
1801         return visitor.end();
1802     }
1803 
1804     /** {@inheritDoc} */
1805     @Override
1806     public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor,
1807                                   final int startRow, final int endRow,
1808                                   final int startColumn, final int endColumn)
1809         throws MathIllegalArgumentException {
1810         checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1811         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1812         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1813             final int p0     = iBlock * BLOCK_SIZE;
1814             final int pStart = FastMath.max(startRow, p0);
1815             final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1816             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1817                 final int jWidth = blockWidth(jBlock);
1818                 final int q0     = jBlock * BLOCK_SIZE;
1819                 final int qStart = FastMath.max(startColumn, q0);
1820                 final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1821                 final T[] block = blocks[iBlock * blockColumns + jBlock];
1822                 for (int p = pStart; p < pEnd; ++p) {
1823                     int k = (p - p0) * jWidth + qStart - q0;
1824                     for (int q = qStart; q < qEnd; ++q) {
1825                         visitor.visit(p, q, block[k]);
1826                         ++k;
1827                     }
1828                 }
1829             }
1830         }
1831         return visitor.end();
1832     }
1833 
1834     /**
1835      * Get the height of a block.
1836      * @param blockRow row index (in block sense) of the block
1837      * @return height (number of rows) of the block
1838      */
1839     private int blockHeight(final int blockRow) {
1840         return (blockRow == blockRows - 1) ? rows - blockRow * BLOCK_SIZE : BLOCK_SIZE;
1841     }
1842 
1843     /**
1844      * Get the width of a block.
1845      * @param blockColumn column index (in block sense) of the block
1846      * @return width (number of columns) of the block
1847      */
1848     private int blockWidth(final int blockColumn) {
1849         return (blockColumn == blockColumns - 1) ? columns - blockColumn * BLOCK_SIZE : BLOCK_SIZE;
1850     }
1851 }