@InProceedings{AggarwalACS87,
author = {A. Aggarwal and B. Alpern and A.K. Chandra and M.
Snir},
title = {A model for hierarchical memory},
booktitle = {Proceedings of 19th Annual ACM Symposium on the Theory of
Computing},
pages = {305-314},
year = {1987},
address = {New York},
}
@InProceedings{AggarwalCS87,
author = { A. Aggarwal and A.K. Chandra and M. Snir},
title = {Hierarchical memory with block transfer},
booktitle = {28th Annual Symposium on Foundations of Computer Science},
pages = { 204-216},
year = {1987},
address = {Los Angeles, California},
month = {October},
}
@Book{AndersonBBDDDGHMOS95,
author = {E. Anderson and Z. Bai and C. Bischof and J. Demmel
J. Dongarra and J. DuCroz and A.
Greenbaum and S. Hammarling and A. McKenney
and S. Ostrouchov and D. Sorensen},
title = {{LAPACK} User' Guide, Release 2.0},
publisher = {SIAM},
year = {1995},
edition = {2},
}
@inproceedings{BaileyG88,
author = {D. H. Bailey and H. R. P. Gerguson},
title = {A {S}trassen-{N}ewton algorithm for high-speed parallelizable
matrix inversion},
booktitle = {Supercomputing '88: Proceedings of the 1988 ACM/IEEE
conference on Supercomputing},
year = {1988},
isbn = {0-8186-0882-X},
pages = {419--424},
location = {Orlando, Florida, United States},
publisher = {IEEE Computer Society Press},
}
@article{BaileyLS1990,
author = {D. Bailey and K. Lee and H. Simon},
title = {Using Strassen's algorithm to accelerate the solution of linear
systems},
journal = {J. Supercomput.},
volume = {4},
number = {4},
year = {1990},
issn = {0920-8542},
pages = {357--371},
doi = {http://dx.doi.org/10.1007/BF00129836},
publisher = {Kluwer Academic Publishers},
address = {Hingham, MA, USA},
}
@Article{BientinesiDv2005,
author = {P. Bientinesi and I.S. Dhillon, and R.A. van de
Geijn},
title = {A Parallel Eigensolver for Dense Symmetric
Matrices Based on Multiple Relatively Robust Representations},
journal = {SIAM Journal on
Scientific Computing},
year = {2005},
volume = {27},
number = {1},
pages = {43--66},
}
@InProceedings{BilardiDN01,
author = {G. Bilardi and P. D'Alberto and A. Nicolau },
title = {Fractal matrix multiplication: a case study on
portability of cache performance},
booktitle = {Workshop on Algorithm Engineering 2001},
year = {2001},
address = {Aarhus, Denmark},
}
@InProceedings{BilmesACD97,
author = { J. Bilmes and K. Asanovic and C. Chin and J. Demmel},
title = {Optimizing matrix multiply using {PH}i{PAC}: a
portable, high-performance, {A}nsi {C} coding methodology},
booktitle = {International Conference on Supercomputing},
year = {1997},
month = {July},
}
@TechReport{Brent1970B,
author = {R. P. Brent},
title = {Algorithms for matrix multiplication},
institution = {Stanford University},
year = {1970},
number = {TR-CS-70-157},
month = {Mar},
doi = {http://web.comlab.ox.ac.uk/oucl/work/richard.brent/pd/rpb002i.pdf}
}
@Article{Brent1970,
author = {R. P. Brent},
title = {Error analysis of algorithms for matrix
multiplication and triangular decomposition using {W}inograd's identity},
journal = {Numerische Mathematik},
year = {1970},
volume = {16},
pages = {145-156},
doi =
{http://web.comlab.ox.ac.uk/oucl/work/richard.brent/pd/rpb004.pdf},
}
@misc{CohnKSU2005,
author = {H. Cohn and R. Kleinberg
and B. Szegedy and C. Umans},
citeulike-article-id = {402464},
eprint = {math.GR/0511460},
keywords = {algorithm algorithms combinatorics
cs graph group-theory math mathematics matrix matrix-multiplication
pre-print},
month = {Nov},
title = {Group-theoretic algorithms for matrix
multiplication},
url = {http://arxiv.org/abs/math.GR/0511460},
year = {2005}
}
@InProceedings{CoppersmithW87,
author = { D. Coppersmith and S. Winograd},
title = { Matrix Multiplication via Arithmetic
Progressions},
booktitle = {Proceedings of the 19-th annual ACM conference on {T}heory
of computing},
pages = {1-6},
year = {1987},
}
@InProceedings{DalbertoN2005a,
author = {P. D'Alberto and A. Nicolau},
title = {Using Recursion to Boost {ATLAS}'s
Performance},
booktitle = {The Sixth International Symposium on High Performance
Computing (ISHPC-VI)},
year = {2005}
}
@InProceedings{DalbertoN2005,
author = {P. D'Alberto and A. Nicolau},
title = {Adaptive {S}trassen and {ATLAS}'s {DGEMM}: A Fast
Square-Matrix Multiply for Modern High-Performance Systems},
booktitle = { The 8th International Conference on High Performance
Computing in Asia Pacific Region (HPC asia)},
pages = {45-52},
year = {2005},
address = {Beijing},
month = {Dec}
}
@InProceedings{ChatterjeeLPT,
author = {S. Chatterjee and A.R.
Lebeck and P.K. Patnala and M. Thottethodi},
title = {Recursive array
layout and fast parallel matrix multiplication},
booktitle = { Proc. 11-th ACM SIGPLAN},
year = {1999},
month = {June},
}
@article{DemmelH92,
author = "J. Demmel and N. Higham",
title = "Stability of Block Algorithms with Fast
Level-3 {BLAS}",
journal = "ACM Transactions on Mathematical
Software",
volume = "18",
number = "3",
pages = "274--291",
year = "1992",
url = "citeseer.ist.psu.edu/demmel92stability.html"
}
@ARTICLE{Demmel:05,
AUTHOR = {J. Demmel and J. Dongarra and E. Eijkhout and E. Fuentes
and E. Petitet and V. Vuduc and R.C. Whaley and K. Yelick},
TITLE = {Self-{A}dapting linear algebra algorithms and
software},
JOURNAL = {Proceedings of the IEEE, special issue on "Program
Generation, Optimization, and Adaptation"},
VOLUME = {93},
NUMBER = 2,
YEAR={2005}
}
@misc{DemmelDHK2006,
author = {J. Demmel and J. Dumitriu
and O. Holtz and R. Kleinberg},
citeulike-article-id = {543540},
eprint = {math.NA/0603207},
keywords = {algorithms computation
mathematics},
month = {Mar},
priority = {2},
title = {Fast matrix multiplication is
stable},
url =
{http://arxiv.org/abs/math.NA/0603207},
year = {2006}
}
@article{DouglasHSSS94,
author = "C.C. Douglas and M. Heroux and G. Slishman
and R.M. Smith",
title = "{GEMMW}: {A} Portable Level 3 {BLAS Winograd}
Variant of {Strassen's} Matrix--Matrix Multiply Algorithm",
journal = "J. Comp. Phys.",
volume = "110",
pages = "1--10",
year = "1994",
url = "citeseer.ist.psu.edu/douglas94gemmw.html"
}
@InProceedings{EironRS98,
author = { N. Eiron and M. Rodeh and I. Steinwarts},
title = { Matrix multiplication: a case study of algorithm
engineering},
booktitle = {Proceedings WAE'98},
year = {1998},
address = {Saarbr\.ucken, Germany},
month = {Aug},
}
@Article{FFTW05,
author = {M. Frigo and S. Johnson},
title = {The Design and Implementation of {FFTW3}},
journal = {Proceedings of the IEEE,
special issue on "Program Generation, Optimization, and
Adaptation"},
year = {2005},
volume = {93},
number = {2},
pages = {216--231},
}
@InProceedings{FrigoLPR99,
author = {M. Frigo and C.E. Leiserson and H. Prokop and S. Ramachandran },
title = {Cache oblivious algorithms},
booktitle = {Proceedings 40th Annual Symposium on Foundations of
Computer Science},
year = {1999},
}
@Article{FrensW97,
author = {J.D. Frens and D.S. Wise},
title = { Auto-{B}locking matrix-multiplication or tracking
{BLAS3} performance from source code},
journal = { Proc. 1997 ACM Symp. on Principles and Practice of Parallel Programming},
pages = {206-216},
year = {1997},
volume = {32},
number = {7},
month = {July},
}
@Unpublished{GotoG2006,
author = {K. Goto and R.A. van de Geijn},
title = {Anatomy of High-Performance Matrix
Multiplication},
note = {ACM Transactions on Mathematical Software}
}
@techreport{grayson95high,
author = "B. Grayson and A. Pankaj Shah and R.A. van de
Geijn",
title = "A High Performance Parallel Strassen
Implementation",
number = "CS-TR-95-24",
month = "1,",
year = "1995",
url = "citeseer.ist.psu.edu/grayson95high.html"
}
@Article{Gunnels:2001:FFL,
author = "J.A. Gunnels and F.G.
Gustavson and G.M. Henry and R.A. van de Geijn",
title = "{FLAME}: {Formal
Linear Algebra Methods Environment}",
journal = "{ACM} Transactions on
Mathematical Software",
volume = "27",
number = "4",
pages =
"422--455",
month = dec,
year =
"2001",
CODEN =
"ACMSCU",
ISSN =
"0098-3500",
bibsource =
"http://www.acm.org/pubs/contents/journals/toms/",
URL
=
"http://doi.acm.org/10.1145/504210.504213",
}
@article{Higham1990,
author = {N.J. Higham},
title = {Exploiting fast matrix multiplication within the level 3
{BLAS}},
journal = {ACM Trans. Math. Softw.},
volume = {16},
number = {4},
year = {1990},
issn = {0098-3500},
pages = {352--368},
doi = {http://doi.acm.org/10.1145/98267.98290},
publisher = {ACM Press},
}
@Book{Higham2002,
author = {N.J. Higham},
ALTeditor = {},
title = {Accuracy and Stability of Numerical Algorithms,
Second Edition},
publisher = {SIAM},
year = {2002},
}
@inproceedings{Huss-LedermaJTTJ96,
author = {S. Huss-Lederman and E.M. Jacobson and A. Tsao and T. Turnbull
and J.R. Johnson},
title = {Implementation of {S}trassen's algorithm for matrix
multiplication},
booktitle = {Supercomputing '96: Proceedings of the 1996 ACM/IEEE
conference on Supercomputing (CDROM)},
year = {1996},
isbn = {0-89791-854-1},
pages = {32},
location = {Pittsburgh, Pennsylvania, United States},
doi = {http://doi.acm.org/10.1145/369028.369096},
publisher = {ACM Press},
}
@TechReport{Huss-LedermanJJTT96,
author = "S. Huss-Lederman and E. Jacobson and J. Johnson and A.
Tsao and T. Turnbull",
title = "Strassen's algorithm for matrix multiplication: Modeling
analysis, and implementation.",
OPTtext = "Steven Huss-Lederman, Elaine M. Jacobson, J. R. Johnson,
Anna Tsao, and
Thomas Turnbull. Strassen's algorithm for matrix
multiplication: Modeling,
analysis, and implementation. Technical Report Technical
Report CCS-TR-96-147,
Center for Computing Sciences, November 1996.
45",
number = {CCS-TR-96-14},
institution = {Center for Computing Sciences},
year = "1996",
url = "citeseer.ist.psu.edu/huss-lederman96strassens.html" }
@inproceedings{LiGP05,
author = "X. Li and M. Garzaran and D. Padua",
title = "Optimizing Sorting with Genetic Algorithms",
booktitle = "In In Proc. of the Int. Symp. on Code Generation and
Optimization",
pages = "99--110",
month = "March",
year = "2005",
url = "citeseer.ist.psu.edu/li05optimizing.html" }
@article{Kaporini2004,
author = {Igor Kaporin},
title = {The aggregation and cancellation techniques as a
practical tool for faster matrix multiplication},
journal = {Theor. Comput. Sci.},
volume = {315},
number = {2-3},
year = {2004},
issn = {0304-3975},
pages = {469--510},
doi = {http://dx.doi.org/10.1016/j.tcs.2004.01.004},
publisher = {Elsevier Science Publishers Ltd.},
address = {Essex, UK},
}
@Article{Kaporin1999,
author = {I. Kaporin},
title = {A practical algorithm for faster matrix
multiplication},
journal = {Numerical Linear Algebra
with Applications},
year = {1999},
OPTkey = {},
volume = {6},
number = {8},
pages = {687-700},
OPTmonth = {},
note = {Centre for Supercomputer and Massively
Parallel Applications, Computing Centre of the Russian Academy of Sciences,
Vavilova 40, Moscow 117967, Russia},
OPTannote = {}
}
@Article{KagstromLVL981,
author = {B. Kagstrom and P. Ling and C. van Loan
},
title = {Algorithm 784: {GEMM}-based level 3 {BLAS}:
portability and optimization issues},
journal = {ACM Transactions on
Mathematical Software},
year = {1998},
volume = {24},
number = {3},
pages = {303-316},
month = {Sept},
}
@Article{KagstromLVL982,
author = {B. Kagstrom and P. Ling and C. van Loan
},
title = {{GEMM}-based level 3 {BLAS}: high-performance
model implementations and performance evaluation benchmark},
journal = {ACM Transactions on
Mathematical Software},
year = {1998},
volume = {24},
number = {3},
pages = {268-302},
month = {Sept},
}
@Article{Knight1995,
author = {P. Knight},
title = {Fast rectangular matrix multiplication and
{QR}-Decomposition},
journal = {Linear algebra and its
applications},
year = {1995},
OPTkey = {},
volume = {221},
OPTnumber = {},
pages = {69--81},
OPTmonth = {},
OPTnote = {},
OPTannote = {}
}
@InProceedings{NguyenLBH2005,
author = {D. Nguyen and I.Lavallee and M.Bui
and Q.Ha},
title = {A General Scalable Implementation of Fast Matrix
Multiplication Algorithms on Distributed Memory Computers},
booktitle = {Proceedings Sixth International Conference on Software
Engineering, Artificial Intelligence, Networking and Parallel/Distributed
Computing and First ACIS International Workshop on Self-Assembling Wireless
Networks},
pages = {116-122},
year = {2005},
note = {http://doi.ieeecomputersociety.org/10.1109/SNPD-SAWN.2005.2}
}
@InProceedings{OhtakiTBS2004,
author = {Y. Ohtaki and D. Takahashi and T. Boku and
M. Sato},
title = {Parallel Implementation of Strassen's Matrix
Multiplication Algorithm for Heterogeneous Clusters},
booktitle = {Proceedings of the 18th International Parallel and
Distributed Processing Symposium},
pages = {112},
year = {2004},
note = {http://doi.ieeecomputersociety.org/10.1109/IPDPS.2004.1303066}
}
@article{Pan1984,
author = {V. Pan},
collaboration = {},
title = {How Can We Speed Up Matrix Multiplication?},
publisher = {SIAM},
year = {1984},
journal = {SIAM Review},
volume = {26},
number = {3},
pages = {393-415},
url = {http://link.aip.org/link/?SIR/26/393/1},
doi = {10.1137/1026076}
}
@inproceedings{Pan1978,
author = {V. Pan},
title = {Strassen's Algorithm Is not Optimal:
Trililnear Technique
of Aggregating, Uniting and Canceling for Constructing Fast
Algorithms for Matrix Operations},
booktitle = {FOCS},
year = {1978},
pages = {166-176},
OPTcrossref = {DBLP:conf/focs/FOCS19},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@inproceedings{Priest91,
author = "D. Priest",
title = "Algorithms for arbitrary precision floating
point arithmetic",
booktitle = "Proceedings of the 10th {IEEE} Symposium
on Computer Arithmetic (Arith-10)",
publisher = "IEEE Computer Society Press, Los Alamitos , CA",
address = "Grenoble, France",
editor = "P. Kornerup and D.~W. Matula",
pages = "132--144",
year = "1991",
url =
"citeseer.ist.psu.edu/priest91algorithms.html" }
@ARTICLE{Pueschel:05,
AUTHOR = {M. P{\"u}schel and J.M.F. Moura and J. Johnson and D.
Padua and M. Veloso and B.W. Singer and J. Xiong and F. Franchetti and A.
Ga\v{c}i\'{c} and Y. Voronenko and K. Chen and R.W. Johnson and N. Rizzolo},
TITLE = {{SPIRAL}: Code Generation for {DSP} Transforms},
JOURNAL = {Proceedings of the IEEE, special issue on "Program
Generation, Optimization, and Adaptation"},
VOLUME = {93},
NUMBER = 2,
YEAR={2005}
}
@Article{Strassen69,
author = {V. Strassen},
title = {Gaussian elimination is not optimal.},
journal = {Numerische
Mathematik},
year = {1969},
volume = {14},
number = {3},
pages = {354-356},
}
@InProceedings{ThottethodiCL98,
author = {M. Thottethodi and S. Chatterjee and A.R.
Lebeck},
title = {Tuning {S}trassen's matrix multiplication for
memory efficiency.},
booktitle = {Proc. Supercomputing },
year = {1998},
address = {Orlando, FL},
month = {nov},
}
@inproceedings{WhaleyD98,
author = {R. Whaley and J. Dongarra},
title = {Automatically tuned linear algebra software},
booktitle = {Proceedings of the 1998 ACM/IEEE conference on Supercomputing
(CDROM)},
year = {1998},
isbn = {0-89791-984-X},
pages = {1--27},
location = {San Jose, CA},
publisher = {IEEE Computer Society},
}