Bibtex


@InProceedings{AggarwalACS87,
  author =  {A. Aggarwal and B. Alpern and A.K. Chandra and  M. Snir},
  title =    {A model for hierarchical memory},
  booktitle = {Proceedings of 19th Annual ACM Symposium on the Theory of Computing},
  pages =    {305-314},
  year =     {1987},
  address =        {New York},
  }

@InProceedings{AggarwalCS87,
  author =  {  A. Aggarwal and A.K. Chandra  and M. Snir},
  title =    {Hierarchical memory with block transfer},
  booktitle = {28th Annual Symposium on Foundations of Computer Science},
  pages =    { 204-216},
  year =     {1987},
  address =        {Los Angeles, California},
  month =    {October},
  }
 

@Book{AndersonBBDDDGHMOS95,
  author =  {E. Anderson and Z. Bai and C. Bischof and J. Demmel
        J. Dongarra and J. DuCroz and A. Greenbaum and S. Hammarling and A. McKenney
        and S. Ostrouchov and D. Sorensen},
  title =    {{LAPACK} User' Guide, Release 2.0},
  publisher =      {SIAM},
  year =     {1995},
  edition =        {2},
}

@inproceedings{BaileyG88,
 author = {D. H. Bailey and H. R. P. Gerguson},
 title = {A {S}trassen-{N}ewton algorithm for high-speed parallelizable matrix inversion},
 booktitle = {Supercomputing '88: Proceedings of the 1988 ACM/IEEE conference on Supercomputing},
 year = {1988},
 isbn = {0-8186-0882-X},
 pages = {419--424},
 location = {Orlando, Florida, United States},
 publisher = {IEEE Computer Society Press},
 }

@article{BaileyLS1990,
 author = {D. Bailey and K. Lee and H. Simon},
 title = {Using Strassen's algorithm to accelerate the solution of linear systems},
 journal = {J. Supercomput.},
 volume = {4},
 number = {4},
 year = {1990},
 issn = {0920-8542},
 pages = {357--371},
 doi = {http://dx.doi.org/10.1007/BF00129836},
 publisher = {Kluwer Academic Publishers},
 address = {Hingham, MA, USA},
 }
@Article{BientinesiDv2005,
  author =  {P. Bientinesi and  I.S. Dhillon, and R.A. van de Geijn},
  title =    {A Parallel Eigensolver for Dense Symmetric Matrices Based on Multiple Relatively Robust Representations},
  journal =        {SIAM Journal on Scientific Computing},
  year =     {2005},
  volume =  {27},
  number =  {1},
  pages =    {43--66},
}

@InProceedings{BilardiDN01,
  author =  {G. Bilardi and P. D'Alberto and A. Nicolau },
  title =    {Fractal matrix multiplication: a case study on portability of cache performance},
  booktitle = {Workshop on Algorithm Engineering 2001},
  year =     {2001},
  address =        {Aarhus, Denmark},
  }

@InProceedings{BilmesACD97,
  author =  { J. Bilmes and K. Asanovic and C. Chin and J. Demmel},
  title =    {Optimizing matrix multiply using {PH}i{PAC}: a portable, high-performance, {A}nsi {C} coding methodology},
  booktitle = {International Conference on Supercomputing},
  year =     {1997},
  month =    {July},
  }
@TechReport{Brent1970B,
  author =  {R. P. Brent},
  title =    {Algorithms for matrix multiplication},
  institution =  {Stanford University},
  year =     {1970},
  number =  {TR-CS-70-157},
  month =    {Mar},
  doi = {http://web.comlab.ox.ac.uk/oucl/work/richard.brent/pd/rpb002i.pdf}

}


@Article{Brent1970,
  author =  {R. P. Brent},
  title =    {Error analysis of algorithms for matrix multiplication and triangular decomposition using {W}inograd's identity},
  journal =        {Numerische Mathematik},
  year =     {1970},
  volume =  {16},
  pages =    {145-156},
  doi = {http://web.comlab.ox.ac.uk/oucl/work/richard.brent/pd/rpb004.pdf},
}

@misc{CohnKSU2005,
      author = {H. Cohn   and R. Kleinberg and B. Szegedy   and C. Umans},
      citeulike-article-id = {402464},
      eprint = {math.GR/0511460},
      keywords = {algorithm algorithms combinatorics cs graph group-theory math mathematics matrix matrix-multiplication pre-print},
      month = {Nov},
      title = {Group-theoretic algorithms for matrix multiplication},
      url = {http://arxiv.org/abs/math.GR/0511460},
     
year = {2005}
}

@InProceedings{CoppersmithW87,
  author =  { D. Coppersmith and S. Winograd},
  title =    { Matrix Multiplication via Arithmetic Progressions},
  booktitle = {Proceedings of the 19-th annual ACM conference on {T}heory of computing},
  pages =    {1-6},
  year  =        {1987},
  }





@InProceedings{DalbertoN2005a,
  author =  {P. D'Alberto and A. Nicolau},
  title =    {Using Recursion to Boost {ATLAS}'s Performance},
  booktitle = {The Sixth International Symposium on High Performance Computing (ISHPC-VI)},
  year =     {2005}

}

@InProceedings{DalbertoN2005,
  author =  {P. D'Alberto and A. Nicolau},
  title =    {Adaptive {S}trassen and {ATLAS}'s {DGEMM}: A Fast Square-Matrix Multiply for Modern High-Performance Systems},
  booktitle = { The 8th International Conference on High Performance Computing in Asia Pacific Region (HPC asia)},
  pages =    {45-52},
  year =     {2005},
  address =        {Beijing},
  month =    {Dec}
}

@InProceedings{ChatterjeeLPT,
  author =       {S. Chatterjee and A.R. Lebeck and P.K. Patnala and M. Thottethodi},
  title =        {Recursive array layout and fast parallel matrix multiplication},
  booktitle = { Proc. 11-th ACM SIGPLAN},
  year =         {1999},
  month =        {June},
  }

@article{DemmelH92,
    author = "J. Demmel and N. Higham",
    title = "Stability of Block Algorithms with Fast Level-3 {BLAS}",
    journal = "ACM Transactions on Mathematical Software",
    volume = "18",
    number = "3",
    pages = "274--291",
    year = "1992",
    url = "citeseer.ist.psu.edu/demmel92stability.html" }


@ARTICLE{Demmel:05,
   AUTHOR = {J. Demmel and J. Dongarra and E. Eijkhout and E. Fuentes and E. Petitet and V. Vuduc and R.C. Whaley and K. Yelick},
   TITLE = {Self-{A}dapting linear algebra algorithms and software},
   JOURNAL = {Proceedings of the IEEE, special issue on "Program Generation, Optimization, and Adaptation"},
   VOLUME = {93},
   NUMBER = 2,
   YEAR={2005}
}

@misc{DemmelDHK2006,
      author = {J. Demmel   and J. Dumitriu and O. Holtz   and R. Kleinberg},
      citeulike-article-id = {543540},
      eprint = {math.NA/0603207},
      keywords = {algorithms computation mathematics},
      month = {Mar},
      priority = {2},
      title = {Fast matrix multiplication is stable},
      url = {http://arxiv.org/abs/math.NA/0603207},
     
year = {2006}
}

@article{DouglasHSSS94,
    author = "C.C. Douglas and M. Heroux and G. Slishman and R.M. Smith",
    title = "{GEMMW}: {A} Portable Level 3 {BLAS Winograd} Variant of {Strassen's} Matrix--Matrix Multiply Algorithm",
    journal = "J. Comp. Phys.",
    volume = "110",
    pages = "1--10",
    year = "1994",
    url = "citeseer.ist.psu.edu/douglas94gemmw.html" }

@InProceedings{EironRS98,
  author =  { N. Eiron and M. Rodeh and I. Steinwarts},
  title =    { Matrix multiplication: a case study of algorithm engineering},
  booktitle = {Proceedings WAE'98},
  year =     {1998},
  address =        {Saarbr\.ucken, Germany},
  month =    {Aug},
  }

@Article{FFTW05,
  author =  {M. Frigo and S. Johnson},
  title =    {The Design and Implementation of {FFTW3}},
  journal =        {Proceedings of the IEEE, special issue on "Program Generation, Optimization, and Adaptation"},
  year =     {2005},
  volume =  {93},
  number =  {2},
  pages =   {216--231},
  }

@InProceedings{FrigoLPR99,
  author =  {M. Frigo and C.E. Leiserson and H. Prokop and S. Ramachandran },
  title =    {Cache oblivious algorithms},
  booktitle = {Proceedings 40th Annual Symposium on Foundations of Computer Science},
  year =     {1999},
  }



@Article{FrensW97,
  author =  {J.D. Frens and D.S. Wise},
  title =    { Auto-{B}locking matrix-multiplication or tracking {BLAS3} performance from source code},
  journal = { Proc. 1997 ACM Symp. on Principles and Practice of Parallel Programming},
  pages =    {206-216},
  year =     {1997},
  volume =  {32},
  number =  {7},
  month =    {July},
}

@Unpublished{GotoG2006,
  author =  {K. Goto and R.A. van de Geijn},
  title =    {Anatomy of High-Performance Matrix Multiplication},
  note =     {ACM Transactions on Mathematical Software}
}

@techreport{grayson95high,
    author = "B. Grayson and A. Pankaj Shah and R.A. van de Geijn",
    title = "A High Performance Parallel Strassen Implementation",
    number = "CS-TR-95-24",
    month = "1,",
    year = "1995",
    url = "citeseer.ist.psu.edu/grayson95high.html" }

@Article{Gunnels:2001:FFL,
  author =       "J.A. Gunnels and F.G. Gustavson and G.M. Henry and R.A. van de Geijn",
  title =        "{FLAME}: {Formal Linear Algebra Methods Environment}",
  journal =      "{ACM} Transactions on Mathematical Software",
  volume =       "27",
  number =       "4",
  pages =        "422--455",
  month =        dec,
  year =         "2001",
  CODEN =        "ACMSCU",
  ISSN =         "0098-3500",
  bibsource =    "http://www.acm.org/pubs/contents/journals/toms/",
  URL =          "http://doi.acm.org/10.1145/504210.504213",
 
}





@article{Higham1990,
 author = {N.J. Higham},
 title = {Exploiting fast matrix multiplication within the level 3 {BLAS}},
 journal = {ACM Trans. Math. Softw.},
 volume = {16},
 number = {4},
 year = {1990},
 issn = {0098-3500},
 pages = {352--368},
 doi = {http://doi.acm.org/10.1145/98267.98290},
 publisher = {ACM Press},
 }



@Book{Higham2002,
  author =  {N.J. Higham},
  ALTeditor =      {},
  title =    {Accuracy and Stability of Numerical Algorithms, Second Edition},
  publisher =      {SIAM},
  year =     {2002},
}

@inproceedings{Huss-LedermaJTTJ96,
 author = {S. Huss-Lederman and E.M. Jacobson and A. Tsao and T. Turnbull and J.R. Johnson},
 title = {Implementation of {S}trassen's algorithm for matrix multiplication},
 booktitle = {Supercomputing '96: Proceedings of the 1996 ACM/IEEE conference on Supercomputing (CDROM)},
 year = {1996},
 isbn = {0-89791-854-1},
 pages = {32},
 location = {Pittsburgh, Pennsylvania, United States},
 doi = {http://doi.acm.org/10.1145/369028.369096},
 publisher = {ACM Press},
 }

@TechReport{Huss-LedermanJJTT96,
  author = "S. Huss-Lederman and E. Jacobson and J. Johnson and A. Tsao and T. Turnbull",
  title = "Strassen's algorithm for matrix multiplication: Modeling analysis, and implementation.",
  OPTtext = "Steven Huss-Lederman, Elaine M. Jacobson, J. R. Johnson, Anna Tsao, and
    Thomas Turnbull. Strassen's algorithm for matrix multiplication: Modeling,
    analysis, and implementation. Technical Report Technical Report CCS-TR-96-147,
    Center for Computing Sciences, November 1996. 45",
  number =  {CCS-TR-96-14},
  institution =  {Center for Computing Sciences},
  year = "1996",
  url = "citeseer.ist.psu.edu/huss-lederman96strassens.html" }

@inproceedings{LiGP05,
  author = "X. Li and M. Garzaran and D. Padua",
  title = "Optimizing Sorting with Genetic Algorithms",
  booktitle = "In In Proc. of the Int. Symp. on Code Generation and Optimization",
  pages = "99--110",
  month = "March",
  year = "2005",
  url = "citeseer.ist.psu.edu/li05optimizing.html" }

@article{Kaporini2004,
 author = {Igor Kaporin},
 
title = {The aggregation and cancellation techniques as a practical tool for faster matrix multiplication},
 journal = {Theor. Comput. Sci.},
 volume = {315},
 number = {2-3},
 year = {2004},
 issn = {0304-3975},
 pages = {469--510},
 doi = {http://dx.doi.org/10.1016/j.tcs.2004.01.004},
 publisher = {Elsevier Science Publishers Ltd.},
 address = {Essex, UK},
 }

@Article{Kaporin1999,
  author =  {I. Kaporin},
  title =    {A practical algorithm for faster matrix multiplication},
  journal =        {Numerical Linear Algebra with Applications},
  year =     {1999},
  OPTkey =  {},
  volume =  {6},
  number =  {8},
  pages =    {687-700},
  OPTmonth =       {},
  note =     {Centre for Supercomputer and Massively Parallel Applications, Computing Centre of the Russian Academy of Sciences, Vavilova 40, Moscow 117967, Russia},
  OPTannote =      {}
}

@Article{KagstromLVL981,
  author =  {B. Kagstrom  and P. Ling  and C. van Loan },
  title =    {Algorithm 784: {GEMM}-based level 3 {BLAS}: portability and optimization issues},
  journal =        {ACM Transactions on Mathematical Software},
  year =     {1998},
  volume =  {24},
  number =  {3},
  pages =    {303-316},
  month =    {Sept},
  }
@Article{KagstromLVL982,
  author =  {B. Kagstrom  and P. Ling  and C. van Loan },
  title =    {{GEMM}-based level 3 {BLAS}: high-performance model implementations and performance evaluation benchmark},
  journal =        {ACM Transactions on Mathematical Software},
  year =     {1998},
  volume =  {24},
  number =  {3},
  pages =    {268-302},
  month =    {Sept},
  }




@Article{Knight1995,
  author =  {P. Knight},
  title =    {Fast rectangular matrix multiplication and {QR}-Decomposition},
  journal =        {Linear algebra and its applications},
  year =     {1995},
  OPTkey =  {},
  volume =  {221},
  OPTnumber =      {},
  pages =    {69--81},
  OPTmonth =       {},
  OPTnote =        {},
  OPTannote =      {}
}




@InProceedings{NguyenLBH2005,
  author =  {D. Nguyen and  I.Lavallee and  M.Bui and  Q.Ha},
  title =    {A General Scalable Implementation of Fast Matrix Multiplication Algorithms on Distributed Memory Computers},
  booktitle = {Proceedings Sixth International Conference on Software Engineering, Artificial Intelligence, Networking and Parallel/Distributed Computing and First ACIS International Workshop on Self-Assembling Wireless Networks},
  pages =    {116-122},
  year =     {2005},
  note =     {http://doi.ieeecomputersociety.org/10.1109/SNPD-SAWN.2005.2}
}

@InProceedings{OhtakiTBS2004,
  author =  {Y. Ohtaki and  D. Takahashi and T. Boku and  M. Sato},
  title =    {Parallel Implementation of Strassen's Matrix Multiplication Algorithm for Heterogeneous Clusters},
  booktitle = {Proceedings of the 18th International Parallel and Distributed Processing Symposium},
  pages =    {112},
  year =     {2004},
  note =     {http://doi.ieeecomputersociety.org/10.1109/IPDPS.2004.1303066}
}


@article{Pan1984,
  author = {V. Pan},
  collaboration = {},
  title = {How Can We Speed Up Matrix Multiplication?},
  publisher = {SIAM},
  year = {1984},
  journal = {SIAM Review},
  volume = {26},
  number = {3},
  pages = {393-415},
  url = {http://link.aip.org/link/?SIR/26/393/1},
  doi = {10.1137/1026076}
}

@inproceedings{Pan1978,
  author    = {V. Pan},
  title     = {Strassen's Algorithm Is not Optimal: Trililnear Technique
               of Aggregating, Uniting and Canceling for Constructing Fast
               Algorithms for Matrix Operations},
  booktitle = {FOCS},
  year      = {1978},
  pages     = {166-176},
  OPTcrossref  = {DBLP:conf/focs/FOCS19},
  bibsource = {DBLP, http://dblp.uni-trier.de}
}

@inproceedings{Priest91,
    author = "D. Priest",
    title = "Algorithms for arbitrary precision floating point arithmetic",
    booktitle = "Proceedings of the 10th {IEEE} Symposium on Computer Arithmetic (Arith-10)",
    publisher = "IEEE Computer Society Press, Los Alamitos , CA",
    address = "Grenoble, France",
    editor = "P. Kornerup and D.~W. Matula",
    pages = "132--144",
    year = "1991",
    url = "citeseer.ist.psu.edu/priest91algorithms.html" }

@ARTICLE{Pueschel:05,
  AUTHOR = {M. P{\"u}schel and J.M.F. Moura and J. Johnson and D. Padua and M. Veloso and B.W. Singer and J. Xiong and F. Franchetti and A. Ga\v{c}i\'{c} and Y. Voronenko and K. Chen and R.W. Johnson and N. Rizzolo},
  TITLE = {{SPIRAL}: Code Generation for {DSP} Transforms},
  JOURNAL = {Proceedings of the IEEE, special issue on "Program Generation, Optimization, and Adaptation"},
  VOLUME = {93},
  NUMBER = 2,
  YEAR={2005}
}

@Article{Strassen69,
  author =  {V. Strassen},
  title =    {Gaussian elimination is not optimal.},
  journal =        {Numerische Mathematik},
  year =     {1969},
  volume =  {14},
  number =  {3},
  pages =    {354-356},
  }
 

@InProceedings{ThottethodiCL98,
  author =  {M. Thottethodi and  S. Chatterjee and A.R. Lebeck},
  title =    {Tuning {S}trassen's matrix multiplication for memory efficiency.},
  booktitle = {Proc. Supercomputing },
  year =     {1998},
  address =        {Orlando, FL},
  month =    {nov},
}
 

@inproceedings{WhaleyD98,
 author = {R. Whaley and J. Dongarra},
 title = {Automatically tuned linear algebra software},
 booktitle = {Proceedings of the 1998 ACM/IEEE conference on Supercomputing (CDROM)},
 year = {1998},
 isbn = {0-89791-984-X},
 pages = {1--27},
 location = {San Jose, CA},
 publisher = {IEEE Computer Society},
 }

 


HOME

Copyright (c) 2007, P. D'Alberto, A. Nicolau, and A. Kumar.