parent
							
								
									8cada1d894
								
							
						
					
					
						commit
						52a7386aef
					
				| @ -972,7 +972,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga | ||||
|               EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \ | ||||
|               EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \ | ||||
|               internal::prefetch(blA+(3*K+16)*LhsProgress); \ | ||||
|               if (EIGEN_ARCH_ARM) internal::prefetch(blB+(4*K+16)*RhsProgress); /* Bug 953 */ \ | ||||
|               if (EIGEN_ARCH_ARM) { internal::prefetch(blB+(4*K+16)*RhsProgress); } /* Bug 953 */ \ | ||||
|               traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0);  \ | ||||
|               traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1);  \ | ||||
|               traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2);  \ | ||||
|  | ||||
| @ -786,30 +786,38 @@ class SparseMatrix | ||||
|       EIGEN_DBG_SPARSE( | ||||
|         s << "Nonzero entries:\n"; | ||||
|         if(m.isCompressed()) | ||||
|         { | ||||
|           for (Index i=0; i<m.nonZeros(); ++i) | ||||
|             s << "(" << m.m_data.value(i) << "," << m.m_data.index(i) << ") "; | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|           for (Index i=0; i<m.outerSize(); ++i) | ||||
|           { | ||||
|             Index p = m.m_outerIndex[i]; | ||||
|             Index pe = m.m_outerIndex[i]+m.m_innerNonZeros[i]; | ||||
|             Index k=p; | ||||
|             for (; k<pe; ++k) | ||||
|             for (; k<pe; ++k) { | ||||
|               s << "(" << m.m_data.value(k) << "," << m.m_data.index(k) << ") "; | ||||
|             for (; k<m.m_outerIndex[i+1]; ++k) | ||||
|             } | ||||
|             for (; k<m.m_outerIndex[i+1]; ++k) { | ||||
|               s << "(_,_) "; | ||||
|             } | ||||
|           } | ||||
|         } | ||||
|         s << std::endl; | ||||
|         s << std::endl; | ||||
|         s << "Outer pointers:\n"; | ||||
|         for (Index i=0; i<m.outerSize(); ++i) | ||||
|         for (Index i=0; i<m.outerSize(); ++i) { | ||||
|           s << m.m_outerIndex[i] << " "; | ||||
|         } | ||||
|         s << " $" << std::endl; | ||||
|         if(!m.isCompressed()) | ||||
|         { | ||||
|           s << "Inner non zeros:\n"; | ||||
|           for (Index i=0; i<m.outerSize(); ++i) | ||||
|           for (Index i=0; i<m.outerSize(); ++i) { | ||||
|             s << m.m_innerNonZeros[i] << " "; | ||||
|           } | ||||
|           s << " $" << std::endl; | ||||
|         } | ||||
|         s << std::endl; | ||||
|  | ||||
| @ -106,22 +106,22 @@ void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const | ||||
|          | ||||
| #define KMADD(c, a, b, tmp) {tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);} | ||||
| #define WORK(I)  \ | ||||
|                     c0 = pload<Packet>(C0+i+(I)*PacketSize);   \ | ||||
|                     c1 = pload<Packet>(C1+i+(I)*PacketSize);   \ | ||||
|                     KMADD(c0, a0, b00, t0)      \ | ||||
|                     KMADD(c1, a0, b01, t1)      \ | ||||
|                     a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \ | ||||
|                     KMADD(c0, a1, b10, t0)      \ | ||||
|                     KMADD(c1, a1, b11, t1)       \ | ||||
|                     a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \ | ||||
|           if(RK==4) KMADD(c0, a2, b20, t0)       \ | ||||
|           if(RK==4) KMADD(c1, a2, b21, t1)       \ | ||||
|           if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \ | ||||
|           if(RK==4) KMADD(c0, a3, b30, t0)       \ | ||||
|           if(RK==4) KMADD(c1, a3, b31, t1)       \ | ||||
|           if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \ | ||||
|                     pstore(C0+i+(I)*PacketSize, c0);           \ | ||||
|                     pstore(C1+i+(I)*PacketSize, c1) | ||||
|                      c0 = pload<Packet>(C0+i+(I)*PacketSize);    \ | ||||
|                      c1 = pload<Packet>(C1+i+(I)*PacketSize);    \ | ||||
|                      KMADD(c0, a0, b00, t0)                      \ | ||||
|                      KMADD(c1, a0, b01, t1)                      \ | ||||
|                      a0 = pload<Packet>(A0+i+(I+1)*PacketSize);  \ | ||||
|                      KMADD(c0, a1, b10, t0)                      \ | ||||
|                      KMADD(c1, a1, b11, t1)                      \ | ||||
|                      a1 = pload<Packet>(A1+i+(I+1)*PacketSize);  \ | ||||
|           if(RK==4){ KMADD(c0, a2, b20, t0)                     }\ | ||||
|           if(RK==4){ KMADD(c1, a2, b21, t1)                     }\ | ||||
|           if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\ | ||||
|           if(RK==4){ KMADD(c0, a3, b30, t0)                     }\ | ||||
|           if(RK==4){ KMADD(c1, a3, b31, t1)                     }\ | ||||
|           if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\ | ||||
|                      pstore(C0+i+(I)*PacketSize, c0);            \ | ||||
|                      pstore(C1+i+(I)*PacketSize, c1) | ||||
|          | ||||
|         // process rows of A' - C' with aggressive vectorization and peeling 
 | ||||
|         for(Index i=0; i<actual_b_end1; i+=PacketSize*8) | ||||
| @ -131,14 +131,15 @@ void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const | ||||
|                     prefetch((A1+i+(5)*PacketSize)); | ||||
|           if(RK==4) prefetch((A2+i+(5)*PacketSize)); | ||||
|           if(RK==4) prefetch((A3+i+(5)*PacketSize)); | ||||
|                     WORK(0); | ||||
|                     WORK(1); | ||||
|                     WORK(2); | ||||
|                     WORK(3); | ||||
|                     WORK(4); | ||||
|                     WORK(5); | ||||
|                     WORK(6); | ||||
|                     WORK(7); | ||||
| 
 | ||||
|           WORK(0); | ||||
|           WORK(1); | ||||
|           WORK(2); | ||||
|           WORK(3); | ||||
|           WORK(4); | ||||
|           WORK(5); | ||||
|           WORK(6); | ||||
|           WORK(7); | ||||
|         } | ||||
|         // process the remaining rows with vectorization only
 | ||||
|         for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize) | ||||
| @ -203,16 +204,16 @@ void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const | ||||
|         } | ||||
|          | ||||
| #define WORK(I) \ | ||||
|                   c0 = pload<Packet>(C0+i+(I)*PacketSize);   \ | ||||
|                   KMADD(c0, a0, b00, t0)       \ | ||||
|                   a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \ | ||||
|                   KMADD(c0, a1, b10, t0)       \ | ||||
|                   a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \ | ||||
|         if(RK==4) KMADD(c0, a2, b20, t0)       \ | ||||
|         if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \ | ||||
|         if(RK==4) KMADD(c0, a3, b30, t0)       \ | ||||
|         if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \ | ||||
|                   pstore(C0+i+(I)*PacketSize, c0); | ||||
|                    c0 = pload<Packet>(C0+i+(I)*PacketSize);     \ | ||||
|                    KMADD(c0, a0, b00, t0)                       \ | ||||
|                    a0 = pload<Packet>(A0+i+(I+1)*PacketSize);   \ | ||||
|                    KMADD(c0, a1, b10, t0)                       \ | ||||
|                    a1 = pload<Packet>(A1+i+(I+1)*PacketSize);   \ | ||||
|         if(RK==4){ KMADD(c0, a2, b20, t0)                      }\ | ||||
|         if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize);  }\ | ||||
|         if(RK==4){ KMADD(c0, a3, b30, t0)                      }\ | ||||
|         if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize);  }\ | ||||
|                    pstore(C0+i+(I)*PacketSize, c0); | ||||
|          | ||||
|         // agressive vectorization and peeling
 | ||||
|         for(Index i=0; i<actual_b_end1; i+=PacketSize*8) | ||||
|  | ||||
| @ -90,7 +90,7 @@ inline void on_temporary_creation(long int size) { | ||||
| #define VERIFY_EVALUATION_COUNT(XPR,N) {\ | ||||
|     nb_temporaries = 0; \ | ||||
|     XPR; \ | ||||
|     if(nb_temporaries!=N) std::cerr << "nb_temporaries == " << nb_temporaries << "\n"; \ | ||||
|     if(nb_temporaries!=N) { std::cerr << "nb_temporaries == " << nb_temporaries << "\n"; }\ | ||||
|     VERIFY( (#XPR) && nb_temporaries==N ); \ | ||||
|   } | ||||
|    | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Gael Guennebaud
						Gael Guennebaud