@book
(
maekawa_et_al,
author = {Maekawa, M. and Oldehoeft, A.E. and Oldehoeft, R.R.},
title = {Operating Systems: Advanced Concepts},
publisher = {Benjamin/Cummings, Menlo Park, CA},
year = {1987}
)


%The {ACM} Journal of Experimental Algorithmics Vol 5 2000
@article{xiao_et_al,
author = {Li Xiao and Xiaodong Zhang and Stefan A. Kubricht},
title= {Improving memory performance of sorting algorithms},
journal = {Journal of Experimental Algorithmics (JEA)},
volume = {5},
year = {2000},
issn = {1084-6654},
pages = {3}
}

@article{bojesen_et_al,
author = {Jesper Bojesen and Jyrki Katajainen and Maz Spork},
title= {Performance engineering case study: heap construction},
journal = {Journal of Experimental Algorithmics (JEA)},
volume = {5},
year = {2000},
issn = {1084-6654},
pages = {15},
note ={\url{http://www.jea.acm.org/2000/BojesenPerformance/}}
}

@article{sanders,
author = {Peter Sanders},
title= {Fast priority queues for cached memory},
journal = {Journal of Experimental Algorithmics (JEA)},
volume = {5},
year = {2000},
issn = {1084-6654},
pages = {7}
}

@article{hidaka_et_al90,
author = {Hideto Hidaka and Yoshio Matsuda and Mikio Asakura and Kazuyasu Fujishima},
title= {The {C}ache {DRAM} Architecture: A {DRAM} with an On-Chip Cache Memory },
journal = {{IEEE} Micro},
volume = {10},
number = {2},
pages = {14--25},
month = {March/April},
year = {1990}
}

@article{ho_et_al,
author = {Ron Ho and Kenneth W. Mai and Mark A. Horowitz},
title= {The Future of Wires},
journal = {Proc. of the {IEEE}},
volume = {89},
number = {4},
pages = {14--25},
month = {April},
year = {2001}
}

@inproceedings
(
lee_et_al99,
author = {Jang-Soo Lee and Won-Kee Hong and Shin-Dug Kim},
title = {Design and Evaluation of a Selective Compressed Memory System},
booktitle = {Proc. {IEEE} Int. Conf. on Computer Design},
address = {Austin, TX},
year = {1999},
month = {10--13 October},
pages = {184--191}
)

@article{rahman_and_raman,
author = {Naila Rahman and Rajeev Raman},
title= {Analysing cache effects in distribution sorting},
journal = {Journal of Experimental Algorithmics (JEA)},
volume = {5},
year = {2000},
issn = {1084-6654},
pages = {14}
}

%%----------

@inproceedings
(
bell_and_strecker,
author = {Bell, Gorden and Strecker, W. D.},
title = {Retrospective: what have we learned from the {PDP-11}--what we have learned from {VAX} and {Alpha} },
booktitle = {25 years of the international symposia on Computer Architecture (selected papers)},
address = {New York, {NY}},
year = {1998},
pages = {6--10}
)

@book
(
crowley,
author = {Crowley, Charles},
title = {Operating Systems: A Design-Oriented Approach},
publisher = {Irwin},
address = {Chicago},
year = {1997}
)

@book
(
hennessy_and_patterson_1,
author = {Patterson, D.A. and Hennessy, J.L.},
title = {Computer Organisation and Design: The Hardware/Software Interface},
publisher = {Morgan Kauffmann},
address = {San Francisco, CA},
year = {1994}
)
@book
(
hennessy_and_patterson_1ed,
author = {Hennessy, J.L. and Patterson, D.A.},
title = {Computer Architecture: A Quantitative Approach},
edition = {1st},
publisher = {Morgan Kauffmann},
address = {San Francisco, CA},
year = {1990}
)


@book
(
hennessy_and_patterson_2,
author = {Hennessy, J.L. and Patterson, D.A.},
title = {Computer Architecture: A Quantitative Approach},
edition = {2nd},
publisher = {Morgan Kauffmann},
address = {San Francisco, CA},
year = {1996}
)


@book
(
hennessy_and_patterson_3,
author = {Hennessy, J.L. and Patterson, D.A.},
title = {Computer Architecture: A Quantitative Approach},
edition = {3rd},
publisher = {Morgan Kauffmann},
address = {San Francisco, CA},
year = {2002},
note = {in press}
)

@article
(
chen_and_yew,
author = {Chen, Ding-Kai and Yew, Pen-Chung},
title = {Redundant Synchronization Elimination for DOACROSS Loops},
journal = {{IEEE} Trans. on Parallel and Distributed Systems},
volume = {10},
number = {5},
month = {May},
year = {1999},
pages = {459--470}
)

@article
(
kozyrakis_et_al,
author = {Kozyrakis, C.E. and Perissakis, S. and Patterson, D. and Anderson, T. and Asanovi{\'c}, K. and Cardwell, N. and Fromm, R. and Golbus, J and Gribstad, B. and Keeton, K. and Thomas, R. and Treuhaft, N. and Yelick, K.},
title = {Scalable Processors in the Billion-Transistor Era: {IRAM}},
journal = {Computer},
volume = {30},
number = {9},
month = {September},
year = {1997},
pages = {75--78}
)

@inproceedings{fromm_et_al,
author = {Richard Fromm and Stylianos Perissakis and Neal Cardwell and Christoforos Kozyrakis and Bruce McGaughy and David Patterson and Tom Anderson and Katherine Yelick},
title = {The energy efficiency of {IRAM} architectures},
booktitle = {Proc. 24th Int. Symp. on Computer Architecture},
year = {1997},
isbn = {0-89791-901-7},
pages = {327--337},
address = {Denver, CO},
location = {Denver, Colorado, United States},
doi = {http://doi.acm.org/10.1145/264107.264214}
}

@book
(
handy,
author = {Handy, J.},
title = {The Cache Memory Book},
edition = {2nd},
publisher = {Academic Press},
address = {San Diego, CA},
year = {1998}
)

@book
(
silberschatz_et_al,
author = {Abraham Silberschatz and Peter Baer Galvin and Greg Gagne},
title = {Operating System Concepts},
publisher = {John Wiley},
address = {New York},
edition = {6th},
year = {2002}
)



@article
(
burks_et_al,
author = {Burks, A.W. and Goldstine, H.H. and von Neumann, J.},
title = {Preliminary Discussion of the Logical Design of an Electronic Computing Instrument},
howpublished = {Appears in \textit{The Origins of Digital Computers: Selected Papers, Second Edition}, Ed. Brian Randell, Springer Verlag},
year = {1975}
)




@article
(
smith,
author = {Smith, A.J.},
title = {Cache Memories},
journal = {{ACM} Computing Surveys},
volume = {14},
number = {3},
year = {1982},
month = {September},
pages = {473--530}
)


@article
(
kilburn_et_al,
author = {Kilburn, T. and Edwards, D.B.J. and Lanigan, M.J. and Sumner,
F.H.},
title = {One-level storage system},
journal = {{IRE} Trans. on Electronic Computers},
volume = {{EC}-11},
number = {2},
year = {1962},
month = {April},
pages = {223--235}
)


@article
(
moore,
author = {Gordon E. Moore},
title = {Cramming more components onto integrated circuits},
journal = {Electronics},
volume = {38},
number = {8},
month = {19 April},
pages = {114--117},
year = {1965}
)

@article
(
kilburn_et_al_alternative,
author = {Kilburn, T. and Edwards, D.B.G. and Lanigan, M.J. and Summer, F.H.},
title = {One-Level Storage System},
howpublished = {Appears in \textit{Computer Structures: Principles and Examples}, McGraw-Hill, New York},
year = {1982}
)

@article
(
belayneh_and_kaeli,
author = {Belayneh, S. and Kaeli, D.R.},
title = {A Discussion of Non-Blocking/Lockup-Free Caches},
journal = {Computer Architecture News},
volume = {24},
number = {3},
year = {1996},
month = {June},
pages = {18--25}
)

@inproceedings{dahlgren_et_al,
author = {F. Dahlgren and M. Dubois and P. Stenstr{\"o}m},
title = {Combined performance gains of simple cache protocol extensions},
booktitle = {Proc. 21st annual Int. Symp. on Computer Architecture},
year = {1994},
pages = {187--197},
address = {Chicago, Ill},
}
% isbn = {0-8186-5510-0},
% doi = {http://doi.acm.org/10.1145/191995.192028},
% publisher = {IEEE Computer Society Press},

@inproceedings{rivers_et_al,
author = "Jude A. Rivers and Gary S. Tyson and Edward S. Davidson and Todd M. Austin",
title = "On High-Bandwidth Data Cache Design for Multi-Issue Processors",
booktitle = "Int. Symp. on Microarchitecture",
pages = "46-56",
year = "1997",
month = "December",
address = "Research Triangle Park, NC"
}
% url = "citeseer.nj.nec.com/18876.html"


@article
(
lenoski_et_al_1996,
author = {Lenoski, D. and Laudon, J. and Gharachorloo, K. and Weber, W-D.
and Gupta, A. and Hennessy, J. and Horowitz, M. and Lam, M.S.},
title = {The {S}tanford {DASH} Multiprocessor},
journal = {Computer},
volume = {25},
number = {3},
year = {1992},
month = {March},
pages = {63--79}
)

@article
(
hill,
author = {Hill, M.D.},
title = {A Case for Direct Mapped Caches},
journal = {Computer},
volume = {21},
number = {12},
year = {1988},
month = {December},
pages = {25--40}
)

@inproceedings
(
bennet_et_al,
author = {Bennet, J.K. and Carter, J.B. and Zwaenepoel, W.},
title = {Adaptive Software Cache Management for Distributed Shared Memory Architectures},
booktitle = {Proc. 17th Int. Symp. on Computer Architecture (ISCA '90)},
address = {Seattle, {W}{A}},
year = {1990},
month = {May},
pages = {125--134}
)

@inproceedings
(
dwarkadas_et_al,
author = {Dwarkadas, S. and Keleher, P. and Cox, A.L. and Zwaenepoel, W.},
title = {Release Consistent Software Distributed Shared Memory on Emerging Network Technology},
booktitle = {Proc. 20th Int. Symp. on Computer Architecture (ISCA '93)},
address = {San Diego, {C}{A}},
year = {1993},
month = {May},
pages = {144--155}
)


@inproceedings
(
agarwal_and_pudar,
author = {Agarwal, A. and Pudar, S.D.},
title = {Column-Associative Caches: A Technique for Reducing the Miss Rate of Direct Mapped Caches},
booktitle = {Proc. 20th Int. Symp. on Computer Architecture (ISCA '93)},
year = {1993},
month = {May},
pages = {179--190}
)

@inproceedings
(
topham_et_al,
author = {Topham, N. and Gonz\'alez, A. and
Gonz\'alez, J.},
title = {The Design and Performance of a Conflict-Avoiding Cache},
booktitle = {Proc. 30th Int. Symp. on Microarchitecture ({MICRO-30})},
address = {Research Triangle Park, NC},
month = {1 -- 3 December},
year = {1997},
pages = {71--80}
)

@inproceedings
(
bershad_et_al,
author = {Bershad, B.N. and Lee, D. and Romer, T.H. and Chen, J.B.},
title = {Avoiding Conflict Misses Dynamically in Large Direct-Mapped Caches},
booktitle = {Proc. 6th Int. Conf. on Architectural Support for Programming Languages
and Operating Systems (ASPLOS-6)},
year = {1994},
month = {October},
pages = {158--170}
)
%http://doi.acm.org/10.1145/195473.195527

@inproceedings
(
olukotun_et_al,
author = {Olukotun, Kunle and Nayfeh, Basem A. and Hammond, Lance and Wilson, Ken and Chang, Kunyung},
title = {The case for a single-chip multiprocessor},
booktitle = {Proc. 7th Int. Conf. on Architectural Support for Programming Languages
and Operating Systems {(ASPLOS-7)}},
year = {1996},
month = {October},
address = {Cambridge, {MA}},
pages = {2--11}
)

@article
{krishnan_torrellas,
author = {Venkata Krishnan and Josep Torrellas},
title = {A Chip-Multiprocessor Architecture with Speculative Multithreading},
journal = {{IEEE} Trans. on Computers},
volume = {48},
number = {9},
month = {September},
year = {1999},
pages = {866--880}
}

@inproceedings
(
cheriton_et_al86,
author = {Cheriton, D.R. and Slavenburg, G. and Boyle, P.},
title = {Software-controlled caches in the {VMP} multiprocessor},
booktitle = {Proc. 13th Int. Symp. on Computer Architecture (ISCA '86)},
address = {Tokyo},
year = {1986},
month = {June},
pages = {366--374}
)

@inproceedings
(
cheriton_et_al88,
author = {Cheriton, D.R. and Gupta, A. and Boyle, P.D. and Goosen, H.A},
title = {The {VMP} Multiprocessor: Initial Experience,
Refinements and Performance Evaluation},
booktitle = {Proc. 15th Int. Symp. on Computer Architecture (ISCA '88)},
address = {Honolulu},
year = {1988},
month = {May/June},
pages = {410--421}
)

@inproceedings
(
cheriton_et_al89,
author = {Cheriton, D.R. and Goosen, H.A and Boyle, P.D.},
title = {Multi-Level Shared Caching Techniques for Scalability {VMP-MC}},
booktitle = {Proc. 16th Int. Symp. on Computer Architecture (ISCA '89)},
address = {Jerusalem},
year = {1989},
month = {May/June},
pages = {16--24}
)

@inproceedings
(
cheriton_et_al93,
author = {Cheriton, D.R. and Goosen, H.A. and Holbrook, H. and
Machanick, P.},
title = {Restructuring a Parallel Simulation to Improve Cache Behavior in a Shared-Memory Multiprocessor: The Value of Distributed Synchronization},
booktitle = {Proc. 7th Workshop on Parallel and Distributed Simulation},
address = {San Diego},
year = {1993},
month = {May},
pages = {159--162}
)


@misc
(
cheriton_et_al93_anon,
note = {Detail suppressed for blind refereeing},
author = {Anonymous}
)

@inproceedings{lim_agarwal94,
author = {Beng-Hong Lim and Anant Agarwal},
title= {Reactive synchronization algorithms for multiprocessors},
booktitle = {Proceedings Sixth Int. Conf. on Architectural Support for Programming Languages and Operating Systems},
year = {1994},
isbn = {0-89791-660-3},
pages = {25--35},
address = {San Jose, CA},
doi = {http://doi.acm.org/10.1145/195473.195490}
}

@article{mellorCrummey_scott91,
author = {John M. Mellor-Crummey and Michael L. Scott},
title= {Algorithms for scalable synchronization on shared-memory multiprocessors},
journal = {{ACM} Trans. on Computer Systems (TOCS)},
volume = {9},
number = {1},
year = {1991},
issn = {0734-2071},
pages = {21--65},
doi = {http://doi.acm.org/10.1145/103727.103729}
}


@inproceedings
(
cheriton_et_al91b,
author = {Cheriton, D.R. and Goosen, H.A. and P Machanick},
title = {Restructuring a Parallel Simulation to Improve Cache Behavior in a Shared-MemoryMultiprocessor: A First Experience},
booktitle = {Proc. Int. Symp. on Shared Memory Multiprocessing},
address = {Tokyo},
year = {1991},
month = {April},
pages = {109--118}
)

@article
(
cheriton_et_al_91,
author = {Cheriton, David R and Goosen, Hendrik A and Boyle, Patrick D},
title = {{ParaDiGM}: A Highly Scalable Shared-Memory Multicomputer Architecture},
journal = {Computer},
volume = {24},
number = {2},
year = {1991},
month = {February},
pages = {33--46}
)

@inproceedings
(
wilson_and_olukotun,
author = {Wilson, K.M. and Olukotun, K},
title = {Designing High Bandwidth On-Chip Caches},
booktitle = {Proc. 24th Int. Symp. on Computer Architecture (ISCA '97)},
address = {Denver},
year = {1997},
month = {June},
pages = {121--132}
)

@book
(
inouye_et_al,
author = {Inouye, J. and Konuru, R. and Walpole, J. and Sears, B.},
title = {The Effects of Virtually Addressed Caches on Virtual Memory Design and Performance},
publisher = {Tech. Report No. CS/E 92-010, Department of Computer Science and Engineering, Oregon Graduate Institute of Science and Engineering},
year = {1992},
month = {March}
)

@article
(
crisp,
author = {Crisp, R.},
title = {{D}irect {R}ambus Technology: The New Main Memory Standard},
journal = {{IEEE} Micro},
volume = {17},
number = {6},
year = {1997},
month = {November/December},
pages = {18--28}
)

@inproceedings
(
jacob_and_mudge,
author = {Jacob, B. and Mudge, T.},
title = {Software-Managed Address Translation},
booktitle = {Proc. Third Int. Symp. on High-Performance Computer Architecture},
address = {San Antonio, TX},
month = {February},
year = {1997},
pages = {156--167}
)

(
jacob_and_mudgeVM98a,
author = {Jacob, B. and Mudge, T.},
title = {Virtual Memory: Issues of Implementation},
journal = {Computer},
volume = {31},
number = {6},
month = {June},
year = {1998},
pages = {33--43}
)

(
jacob_and_mudgeVM98b,
author = {Jacob, B. and Mudge, T.},
title = {Virtual Memory in Contemporary Microprocessors},
journal = {{IEEE} Micro},
volume = {18},
number = {4},
month = {July/August},
year = {1998},
pages = {60--75}
)


@inproceedings
(
cuppu,
author = {Cuppu, V. and Jacob, B. and Davis, B. and Mudge, T.},
title = {Performance Comparison of Contemporary {DRAM} Architectures},
booktitle = {Proc. 26th Annual Int. Symp. on Computer Architecture},
address = {Atlanta, Georgia},
month = {May},
year = {1999},
pages = {222-233}
)

@inproceedings{285573,
author = {Thomas Sterling and Daniel Savaresse and Peter MacNeice and Kevin Olson and Clark Mobarry and Bruce Fryxell and Phillip Merkey},
title = {A Performance Evaluation of the Convex SPP-1000 Scalable Shared Memory Parallel Computer},
booktitle = {Proc. 1995 {{ACM}/IEEE} Conf. on Supercomputing {(CDROM)}},
year = {1995},
pages = {55},
address = {San Diego, California, United States},
}
% isbn = {0-89791-816-9},
% doi = {http://doi.acm.org/10.1145/224170.285573},
% publisher = {{ACM} Press},



@book
(
kane_and_heinrich,
author = {Kane, G. and Heinrich, J.},
title = {MIPS RISC Architecture},
publisher = {Prentice Hall, Englewood Cliffs, NJ},
year = {1992}
)


@book
(
machanick_96,
author = {Philip Machanick},
title = {An Object-Oriented Library for Shared-Memory Parallel Simulations},
publisher = {PhD Thesis, Department of Computer Science, University of Cape Town},
year = {1996},
month = {October}
)



@misc
(
machanick_96_anon,
note = {Detail suppressed for blind refereeing},
author = {Anonymous}
)

@techreport{
machanick_moad,
author="P. Machanick",
title ="{The Case for a Multiprocessor on a Die: MoaD}",
institution = "{School of Computer Science, University of the Witwatersrand}",
year = 2001,
month = May,
type = "Technical Report",
number = TR-Wits-CS-2001-3,
note = "(6 pages)"
}

@techreport{
machanick_wall_survey,
author="Philip Machanick",
title ="{Approaches to Addressing the Memory Wall}",
institution = "{School of IT and Electrical Engineering, University of Queensland}",
year = 2002,
month = November,
type = "Technical Report",
pages = {22},
url = {http://www.itee.uq.edu.au/~philip/Publications/Techreports/2002/Reports/memory-wall-survey.pdf}
}

@techreport{
machanick_multithreading,
author="Philip Machanick",
title ="{How Multithreading Addresses the Memory Wall}",
institution = "{School of IT and Electrical Engineering, University of Queensland}",
year = 2002,
month = December,
type = "Technical Report",
pages = {10},
url = {http://www.itee.uq.edu.au/~philip/Publications/Techreports/2002/Reports/memory-wall-threads-TR.pdf}
}
@techreport{
machanick_et_al_reconfig_ram,
author={Philip Machanick and Peter Sutton and Adam Postula},
title ="{Reconfigurable Memory for Reconfigurable Computing}",
institution = "{School of IT and Electrical Engineering, University of Queensland}",
year = 2003,
month = February,
type = "Technical Report",
pages = {7},
url = {http://www.itee.uq.edu.au/~philip/Publications/Techreports/2003/Reports/reconfigurable-memory.pdf}
}

@techreport={machanick_and_patel_2002,
author="P. Machanick and Z. Patel",
title ="{L1 Cache and TLB Enhancements to the RAMpage Memory Hierarchy}",
institution = "{School of Computer Science, University of the
Witwatersrand}",
year = 2002,
month = December,
type = "Technical Report",
number = TR-Wits-CS-2002-6,
note = "(11 pages)"
}

@techreport{
machanick_patel,
author={Philip Machanick and Zunaid Patel},
title ="{L1 Cache and TLB Enhancements to the RAMpage Memory Hierarchy}",
institution = "{School of IT and Electrical Engineering, University of Queensland}",
year = 2002,
month = November,
type = "Technical Report",
pages = {11},
url = {http://www.itee.uq.edu.au/~philip/Publications/Techreports/2002/Reports/RAMpageTLBtechreport.pdf}
}

@inproceedings{
machanick_and_patel_africon,
author= {Machanick, P. and Patel, Z.},
title = {The Effect of First-Level Cache Improvements on the RAMpage Memory Hierarchy},
booktitle = {Proc. {IEEE} Africon '02},
year = 2002,
month = October,
address = {George, South Africa},
pages = {27--40},
}

@techreport{
machanick_and_patel,
author= {Machanick, P. and Patel, Z.},
title = {Further Cache and {TLB} Investigation of the RAMpage Memory Hierarchy},
institution = {School of Computer Science, University of the Witwatersrand},
year = 2001,
month = May,
type = {Technical Report},
number = TR-Wits-CS-2001-4,
note = "(9 pages)"
}

@article
(
machanick,
author = {Machanick, P.},
title = {The Case for {SRAM} Main Memory},
journal = {Computer Architecture News},
volume = {24},
number = {5},
year = {1996},
month = {December},
pages = {23--30}
)


@article
(
machanick_ramnet,
author = {Philip Machanick},
title = {What if {DRAM} is a Slow Peripheral?},
journal = {Computer Architecture News},
pages = {in press},
month = {December},
year = {2002}
)
% volume = {30},
% number = {5},


@article
(
machanick_ramnet_anon,
note = {Detail suppressed for blind refereeing},
author = {Anonymous}
)

@misc
(
machanick_anon,
note = {Detail suppressed for blind refereeing},
author = {Anonymous}
)

@article
(
machanick_oosh,
author = {Machanick, P.},
title = {Efficient Shared Memory Multiprocessing and Object-Oriented Programming},
journal = {South African Computer Journal},
number = {16},
year = {1996},
month = {April},
pages = {23--30}
)

@misc
(
machanick_and_salverda_98a_anon,
author = {Anonymous},
note = {Detail suppressed for blind refereeing}
)

@article
(
machanick_and_salverda_98a,
author = {Machanick, P. and Salverda, P.},
title = {Preliminary Investigation of the {RAMpage} Memory Hierarchy},
journal = {South African Computer Journal},
number = {21},
month = {August},
year = {1998},
pages = {16--25}
)

@misc
(
machanick_2000_anon,
author = {Anonymous},
note = {Detail suppressed for blind refereeing}
)

@article
(
machanick_2000,
author = {Machanick, P.},
title = {Scalability of the {RAMpage} Memory Hierarchy},
journal = {South African Computer Journal},
number = {25},
month = {August},
year = {2000},
pages = {68--73}
)

@misc
(
machanick_and_salverda_et_al_98_anon,
author = {Anonymous},
note = {Detail suppressed for blind refereeing}
)

@inproceedings
(
machanick_and_salverda_et_al_98,
author = {Machanick, P. and Salverda, P. and Pompe, L.},
title = {Hardware-Software Trade-Offs in a {D}irect {R}ambus Implementation
of the {RAMpage} Memory Hierarchy},
booktitle = {Proc. 8th Int. Conf. on Architectural Support for Programming Languages
and Operating Systems (ASPLOS-VIII)},
address = {San Jose, {CA}},
year = {1998},
month = {October},
pages = {105--114}
)


@article
(
machanick99,
author = {Machanick, P.},
title = {Correction to {RAMpage} {ASPLOS} Paper},
journal = {Computer Architecture News},
volume = {27},
number = {4},
month = {September},
pages = {2--5},
year = {1999}
)

@misc
(
machanick99_anon,
author = {Anonymous},
note = {Detail suppressed for blind refereeing}
)

@book
(
burger_and_austin,
author = {Burger, D. and Austin, T.M.},
title = {The SimpleScalar Tool Set},
note = {\\ \url{ftp://ftp.cs.wisc.edu/galileo/dburger/papers/TR_1342.ps}},
publisher = {Version 2.0, Tech. Report No. 1342, Computer Sciences
Department, University of Wisconsin-Madison},
year = {1997},
month = {June}
)


@misc
(
machanick_and_salverda_98b_backup,
author = {Anonymous},
note = {Detail suppressed for blind refereeing}
)

@article
(
machanick_and_salverda_98b,
author = {Machanick, P. and Salverda, P.},
title = {Implications of Emerging {DRAM} Technologies for the {RAMpage} Memory Hierarchy},
note = {\url{http://www.cs.wits.ac.za/~philip/papers/rampage-s{DRAM}.html}},
howpublished = {submitted for publication},
year = {1997}
)

@article
(
simmons_et_al,
author = {Simmons, M.L. and Wasserman, H.J. and Lubeck, O.A. and Eoyang, C.
and Mendez, R. and Harada, H. and Ishiguru, M.},
title = {A Performance Comparison of Four Supercomputers},
journal = {Comm. {ACM}},
volume = {35},
number = {8},
month = {August},
pages = {116--124},
year = {1992}
)

@inproceedings
(
fatoohi,
author = {Fatoohi, R.A.},
title = {Vector Performance Analysis of the {NEC} {SX}-2},
booktitle = {Proc. Int. Conf. on Supercomputing},
pages = {389--400},
year = {1990}
)


@inproceedings
(
burger_et_al,
author = {Burger, Doug and Goodman, James R. and K\"agi, Alain},
title = {Memory bandwidth limitations of future microprocessors},
booktitle = {Proc. 23rd Annual Int. Symp. on Computer Architecture},
address = {Philadelphia, PA},
pages = {78--89},
month = {May},
year = {1996}
)


@inproceedings
(
lin_et_al,
author = {Lin, Wei-Fen and Reinhardt, Steven and Burger, Doug},
title = {Reducing {DRAM} Latencies with an Integrated Memory Hierarchy Design},
booktitle = {Proc. 7th Int. Symp. on High Performance Computer Architecture ({HPCA-7})},
address = {Monterrey, Mexico},
pages = {301--312},
month = {January},
year = {2001}
)

@webpage
(
amd,
author = {{AMD}},
title = {{AMD} {A}thlon Processor Model 4 Data Sheet},
url = {http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/23792.pdf},
month = {November},
year = {2001}
)



@Manual{Athlon,

title = {{The AMD Athlon XP Processor -- Technical White Paper 26485A}},

author = {Jack Huynh},

organization = {Advanced Micro Devices},

address = {Sunnyvale, CA},

month = {June},

year = 2002

}

@webpage
(
hypertransport,
author = {{AMD}},
title = {{HyperTransport} Technology: Simplifying System Design},
url = {http://www.hypertransport.org/docs/26635A_HT_System_Design.pdf},
month = {October},
year = {2002}
)

@webpage
(
ibm97_1,
author = {{IBM}},
title = {Next-generation high-performance 64-bit Microprocessor family unveiled by {IBM}},
url = {http://www.chips.ibm.com/news/power3.html},
month = {October},
year = {1997}
)

@webpage
(
ibm98,
author = {{IBM}},
title = {{PowerPC} 750 {RISC} Microprocessor Technical Summary},
url = {http://www-3.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF778525699300470399/$file/750_ts.pdf},
month = {January},
year = {1998}
)

@webpage
(
ibm97_2,
author = {{IBM}},
title = {Synchronous {DRAM}s: The {DRAM} of the Future},
url = {http://www.chips.ibm.com/products/memory/s{DRAM}art/s{DRAM}art.html},
year = {1997}
)


@book
(
intel98,
author = {{Intel}},
title = {Pentium {II} Processor Product Overview},
publisher = {\url{http://developer.intel.com/design/PentiumII/prodbref/index.htm}},
year = {1998}
)

@article
(
rosenblum_et_al_95,
author = {Rosenblum, M. and Herrod, S.A. and Witchel, E. and Gupta, A.},
title = {Complete Computer System Simulation: {T}he {S}im{OS} Approach},
journal = {{IEEE} Parallel and Distributed Technology},
volume = {3},
number = {4},
month = {Winter},
year = {1995},
pages = {34--43}
)

@article{uhlig_et_al,
author = {Richard Uhlig and David Nagle and Trevor Mudge and Stuart Sechrest},
title = {Trap-driven memory simulation with {Tapeworm II}},
journal = {{ACM} Trans. on Modeling and Computer Simulation (TOMACS)},
volume = {7},
number = {1},
year = {1997},
issn = {1049-3301},
pages = {7--41}
}
% doi = {http://doi.acm.org/10.1145/244804.244805},
% publisher = {{ACM} Press},

@inproceedings
(
rosenblum_et_al_95b,
author = {Rosenblum, M. and Bugnion, E. and Herrod, S.A. and Witchel, E. and Gupta, A},
title = {The impact of architectural trends on operating system performance},
booktitle = {Proc. 15th {ACM} Symp. on Operating systems principles},
pages = {285--298},
address = {Copper Mountain, CO},
month = {December},
year = {1995}
)

@article
(
hamilton,
author = {Scott Hamilton},
title = {Semiconductor Research Corporation: Taking {M}oore's {L}aw Into the Next Century},
journal = {Computer},
volume = {32},
number = {1},
month = {January},
year = {1999},
pages = {43--48}
)

@article
(
kaeli_and_emma,
author = {Kaeli, D.R. and Emma, P.G.},
title = {Improving the Accuracy of History-Based Branch Prediction},
journal = {{IEEE} Trans. on Computers},
volume = {46},
number = {4},
month = {April},
year = {1997},
pages = {469--472}
)

@article{young_and_smith,
author = {Cliff Young and Michael D. Smith},
title = {Static correlated branch prediction},
journal = {{ACM} Trans. on Programming Languages and Systems (TOPLAS)},
volume = {21},
number = {5},
year = {1999},
issn = {0164-0925},
pages = {1028--1075}
}
%doi = {http://doi.acm.org/10.1145/330249.330255},
%publisher = {{ACM} Press},

@article
(
wulf_and_mckee,
author = {Wulf, W.A. and McKee, S.A.},
title = {Hitting the Memory Wall: Implications of the Obvious},
journal = {Computer Architecture News},
volume = {23},
number = {1},
year = {1995},
month = {March},
pages = {20--24}
)


@inproceedings{saulsbury_et_al_96,
author = {Ashley Saulsbury and Fong Pong and Andreas Nowatzyk},
title = {Missing the memory wall: the case for processor/memory integration},
booktitle = {Proc. 23rd annual Int. Symp. on Computer architecture},
year = {1996},
isbn = {0-89791-786-3},
pages = {90--101},
address = {Philadelphia, Pennsylvania, United States},
doi = {http://doi.acm.org/10.1145/232973.232984},
}

@inproceedings{cuppu_and_jacob,
author = {Vinodh Cuppu and Bruce Jacob},
title = {Concurrency, latency, or system overhead: which has the largest impact on uniprocessor {DRAM}-system performance?},
booktitle = {Proc. 28th annual Int. Symp. on on Computer Architecture},
year = {2001},
isbn = {0-7695-1162-7},
pages = {62--71},
address = {G{\"o}teborg, Sweden},
doi = {http://doi.acm.org/10.1145/379240.379252}
}

@inproceedings{davis_et_al,
author = {B. Davis and T. Mudge and B. Jacob and V. Cuppu},
title = {{DDR2} and low latency variants},
booktitle = {Solving the Memory Wall Problem Workshop},
note = {In conjunction with 26th Annual lnt. Symp. on Computer Architecture},
month = {June},
year = {2000},
address = {Vancouver, Canada}
}

@inproceedings{hallnor_and_reinhardt,
author = {Erik G. Hallnor and Steven K. Reinhardt},
title = {A fully associative software-managed cache design},
booktitle = {Proc. 27th Annual Int. Symp. on Computer Architecture},
year = {2000},
pages = {107--116},
address = {Vancouver, BC},
}
% isbn = {1-58113-232-8},
% doi = {http://doi.acm.org/10.1145/339647.339660}

@inproceedings{kandiraju_and_sivasubramaniam,
author = {Gokul B. Kandiraju and Anand Sivasubramaniam},
title = {Characterizing the d-{TLB} behavior of {SPEC} {CPU2000} benchmarks},
booktitle = {Proc. Int. Conf. on Measurement and Modeling of Computer Systems},
year = {2002},
isbn = {1-58113-531-9},
pages = {129--139},
address = {Marina Del Rey, CA},
doi = {http://doi.acm.org/10.1145/511334.511351}
}

@article
(
wilkes,
author = {Wilkes, M.V.},
title = {The Memory Wall and the {CMOS} End-Point},
journal = {Computer Architecture News},
volume = {23},
number = {4},
year = {1995},
month = {September},
pages = {4--6}
)



@article
(
johnson,
author = {Johnson, E.E.},
title = {Graffiti on the Memory Wall},
journal = {Computer Architecture News},
volume = {23},
number = {4},
year = {1995},
month = {September},
pages = {7--8}
)



@article
(
geppert,
author = {Geppert, L.},
title = {Semiconductor Lithography for the Next Millennium},
journal = {IEEE Spectrum},
volume = {33},
number = {4},
year = {1996},
month = {April},
pages = {33--38},
)



@book
(
exponential,
author = {Exponential Technology Inc.},
title = {Exponential's {BiCMOS} Technology},
publisher = {\url{http://www.exp.com.products.x704/bicmos.html}},
year = {1996}
)



@article
(
bacon_et_al,
author = {Bacon, D.F. and Graham, S.L. and Sharp, O.J.},
title = {Compiler Transformations for High Performance Computing},
journal = {{ACM} Computing Surveys},
volume = {26},
number = {4},
year = {1994},
month = {December},
pages = {345--420}
)



@inproceedings
(
wall,
author = {Wall, D.W.},
title = {Limits of Instruction Level Parallelism},
booktitle = {Proc. 4th Int. Conf. on Architectural Support for Programming Languages
and Operating Systems (ASPLOS-4)},
address = {Santa Clara, CA},
year = {1991},
month = {April},
pages = {176--188}
)

@inproceedings
(
lam_and_wilson,
author = {Lam, Monica S. and Wilson, Robert P.},
title = {Limits of control flow on parallelism},
booktitle = {Proc. 19th Annual Int. Symp. on Computer Architecture},
address = {Queensland, Australia},
year = {1992},
month = {19--21 May},
pages = {46--57}
)

@inproceedings{nakatani_ebcioglu,
author = {Toshio Nakatani and Kemal Ebcio{\u{g}}lu},
title = {Using a lookahead window in a compaction-based parallelizing compiler},
booktitle = {Proc. 23rd Annual Workshop and Symp. on Microprogramming and Microarchitecture},
year = {1990},
isbn = {0-89791-413-9},
pages = {57--68},
address = {Orlando, FL},
location = {Orlando, Florida, United States},
}

@inproceedings{martel_et_al,
author = {Iv{\'a}n Martel and Daniel Ortega and Eduard Ayguad{\'e} and Mateo Valero},
title = {Increasing effective {IPC} by exploiting distant parallelism},
booktitle = {Proc. 13th Int. Conf. on Supercomputing},
year = {1999},
isbn = {1-58113-164-X},
pages = {348--355},
location = {Rhodes, Greece},
doi = {http://doi.acm.org/10.1145/305138.305212}
}

@inproceedings{yeh_and_patt_92,
author = {Tse-Yu Yeh and Yale N. Patt},
title = {Alternative implementations of two-level adaptive branch prediction},
booktitle = {Proc. 19th annual Int. Symp. on Computer Architecture},
year = {1992},
isbn = {0-89791-509-7},
pages = {124--134},
location = {Queensland, Australia},
doi = {http://doi.acm.org/10.1145/139669.139709}
}


@inproceedings
(
yeh_and_patt,
author = {Yeh, Tse-Yu and Patt, Yale N},
title = {A comparison of dynamic branch predictors that use two levels of branch history},
booktitle = {Proc. 20th Annual Int. Symp. on Computer Architecture},
address = {San Diego, CA},
year = {1993},
month = {16--19 May},
pages = {257--266}
)

@inproceedings
(
tyson_predicated,
author = {Tyson, Gary Scott},
title = {The effects of predicated execution on branch prediction},
booktitle = {Proc. 27th Annual Int. Symp. on Microarchitecture},
address = {San Jose, CA},
year = {1994},
month = {30 November--2 December},
pages = {196--206}
)


@inproceedings
(
reinman_et_al,
author = {Reinman, Glenn and Calder, Brad and Tullsen, Dean and Tyson, Gary
and Austin, Todd},
title = {Classifying load and store instructions for memory renaming},
booktitle = {Proc. 1999 Int. Conf. on Supercomputing},
address = {Rhodes, Greece},
year = {1999},
month = {June},
pages = {399--407}
)

@inproceedings
(
hiraki_et_al,
author = {Hiraki, Kei and Tamatsukuri, Junji and Matsumoto, Takashi},
title = {Speculative execution model with duplication},
booktitle = {Proc. 1998 Int. Conf. on Supercomputing},
address = {Melbourne, Australia},
year = {1998},
month = {July},
pages = {321--328}
)

@inproceedings{lee_et_al95,
author = {Dennis Lee and Jean-Loup Baer and Brad Calder and Dirk Grunwald},
title = {Instruction cache fetch policies for speculative execution},
booktitle = {Proc. 22nd annual Int. Symp. on Computer Architecture},
year = {1995},
isbn = {0-89791-698-0},
pages = {357--367},
address = {S. Margherita Ligure, Italy},
location = {S. Margherita Ligure, Italy},
doi = {http://doi.acm.org/10.1145/223982.224446}
}

@inproceedings{rogers_lee,
author = {Anne Rogers and Kai Li},
title = {Software support for speculative loads},
booktitle = {Proc. Fifth Int. Conf. on Architectural Support for Programming Languages and Operating Systems},
year = {1992},
pages = {38--50},
address = {Boston, Massachusetts, United States},
}
% isbn = {0-89791-534-8},
% doi = {http://doi.acm.org/10.1145/143365.143484},
% publisher = {{ACM} Press},

@inproceedings
(
lee_et_al,
author = {Lee, Hsien-Hsin and Wu, Youfeng and Tyson, Gary},
title = {Quantifying Instruction-Level Parallelism Limits on an {EPIC} Architecture},
booktitle = {Proc. {IEEE} Int. Symp. on Performance Analysis of Systems and Software {(ISPASS)}},
address = {Austin, {TX}},
year = {2000},
month = {April},
pages = {21--27}
)

@inproceedings{palacharla_et_al,
author = {Subbarao Palacharla and Norman P. Jouppi and J. E. Smith},
title = {Complexity-effective superscalar processors},
booktitle = {Proc. 24th Annual Int. Symp. on Computer Architecture},
month = {1--4 June},
year = {1997},
isbn = {0-89791-901-7},
pages = {206--218},
address = {Denver, CO},
location = {Denver, Colorado, United States},
doi = {http://doi.acm.org/10.1145/264107.264201}
}


@article{skadron,
author = {Kevin Skadron and Pritpal S. Ahuja and Margaret Martonosi and Douglas W. Clark},
title = {Branch Prediction, Instruction-Window Size, and Cache Size: Performance Trade-Offs and Simulation Techniques},
journal ={{IEEE} Trans. on Computers},
volume = {48},
number = {11},
month = {November},
year = {1999},
pages = {1260--1281},
}

@inproceedings
(
gonzalez_et_al,
author = {
Gonz\'alez, A. and Tubella, J. and Molina, C.},
title = {Trace-Level Reuse},
booktitle = {Int. Conf. on Parallel Processing},
year = {1999},
month = {21--24 September},
address = {Wakamatsu, Japan},
pages = {21--27}
)

@inproceedings
(
monreal_et_al,
author = {Monreal, Teresa and Gonz&Mac195;Ǭález, Antonio and Valero, Mateo and
Gonz{\'a}lez, Jos{\'e} and Vi{\~n}als, Victor},
title = {Delaying physical register allocation through virtual-physical registers},
booktitle = {Proc. 23rd Annual {{ACM}/IEEE} Int. Symp. on
Microarchitecture (MICRO-32)},
year = {1999},
month = {November},
address = {Haifa, Israel},
pages = {186--192}
)

@article
(
diefendorff,
author = {Diefendorff, Keith},
title = {Power4 Focuses on Memory Bandwidth},
journal = {Microprocessor Report},
volume = {13},
number = {3},
year = {1999},
month = {6 October},
note = {\\ \url{http://www.chips.ibm.com/news/1999/microprocessor99.pdf}},
)


@article
(
tendler_et_al,
author = {J. M. Tendler and J. S. Dodson and J. S. Fields, Jr. and H. Le and B. Sinharoy},
title = {{POWER4} system microarchitecture},
journal = {{IBM} Journal of Research and Development},
volume = {46},
number = {1},
year = {2002},
pages = {5--25},
note = {\\ \url{http://researchweb.watson.ibm.com/journal/rd/461/tendler.html}},
)



@article
(
borkenhagen_et_al,
author = {J. M. Borkenhagen and R. J. Eickemeyer and R. N. Kalla and S. R. Kunkel},
title = {A multithreaded {PowerPC} processor for commercial servers},
journal = {{IBM} Journal of Research and Development},
volume = {44},
number = {6},
month = {November},
year = {2000},
pages = {885--898},
note = {\\ \url{http://www.research.ibm.com/journal/rd/446/borkenhagen.pdf}},
)
% claims that multithreading good for absorbing cache misses


@article
(
hennessy_and_jouppi,
author = {Hennessy, J.L. and Jouppi, N.P.},
title = {Computer Technology and Architecture: An Evolving Interaction},
journal = {Computer},
volume = {24},
number = {9},
year = {1991},
month = {September},
pages = {18--29}
)


@article
(
martonosi_et_al,
author = {Martonosi, M. and Gupta, A. and Anderson, T.E.},
title = {Tuning Memory Performance of Sequential and Parallel Programs},
journal = {Computer},
volume = {28},
number = {4},
year = {1995},
month = {April},
pages = {32--40}
)



@article
(
halfhill,
author = {Halfhill, T.R.},
title = {Intel's {P6}},
journal = {Byte},
volume = {20},
number = {4},
year = {1995},
month = {April},
pages = {42--58}
)



@article
(
bhandarkar,
author = {Bhandarkar, D.},
title = {Alpha Implementations},
howpublished = {{IEEE} {CS} Tech. Committee on Comp. Arch. Newsletter},
year = {1995},
month = {December},
pages = {1--10}
)



@article
(
mirapuri_et_al,
author = {Mirapuri, S. and Woodacre, M. and Vasseghi, N.},
title = {The {MIPS} {R4000} Processor},
journal = {{IEEE} Micro},
year = {1992},
month = {April},
pages = {10--22}
)



@article
(
boland_and_dollas,
author = {Boland, K. and Dollas, A.},
title = {Predicting and Precluding Problems with Memory Latency},
journal = {{IEEE} Micro},
volume = {14},
number = {4},
year = {1994},
month = {August},
pages = {59--67}
)



@article
(
clark_and_emer,
author = {Clark, D.W. and Emer, J.S.},
title = {Performance of the {VAX-11/780} Translation Buffer: Simulation and Measurement},
journal = {{ACM} Trans. on Computer Systems},
volume = {3},
number = {1},
year = {1985},
month = {February},
pages = {31--62}
)

@inproceedings
(
wheeler_and_bershad,
author = {Wheeler, B. and Bershad, B.N.},
title = {Consistency Management for Virtually Indexed Caches},
booktitle = {Proc. 5th Int. Conf. on Architectural Support for Programming Languages
and Operating Systems ({ASPLOS}-5)},
year = {1992},
month = {September},
pages = {124--136}
)

@inproceedings
(
jouppi,
author = {Jouppi, N.P.},
title = {Improving Direct-Mapped Cache Performance by the Addition of a Small Fully-Associative Cache and Prefetch Buffers},
booktitle = {Proc. 17th Int. Symp. on Computer Architecture (ISCA '90)},
year = {1990},
month = {May},
pages = {364--373}
)

%Very good summary of cache write policies and performance implications
@inproceedings{jouppi93,
author = {Norman P. Jouppi},
title = {Cache write policies and performance},
booktitle = {Proc. 20th annual Int. Symp. on Computer Architecture},
year = {1993},
pages = {191--201},
address = {San Diego, California, United States}
}
% isbn = {0-8186-3810-9},
% doi = {http://doi.acm.org/10.1145/165123.165154},
% publisher = {{ACM} Press},



@inproceedings
(
barroso,
author = {Barroso, Luiz Andr\'e and Gharachorloo, Kourosh and Bugnion, Edouard},
title = {Memory system characterization of commercial workloads},
booktitle = {Proc. 25th Int. Symp. on Computer Architecture (ISCA '98)},
year = {1998},
month = {27 June--2 July},
address = {Barcelona, Spain},
pages = {3--14}
)


@inproceedings
(
lo_et_al_98,
author = {Lo, Jack L. and Barroso, Luiz Andr\'e and Eggers, Susan J. and Gharachorloo,
Kourosh and Levy, Henry M. and Parekh, Sujay S.},
title = {An analysis of database workload performance on simultaneous
multithreaded processors},
booktitle = {Proc. 25th Int. Symp. on Computer Architecture (ISCA '98)},
year = {1998},
month = {27 June--2 July},
address = {Barcelona, Spain},
pages = {39--50}
)

@inproceedings
(
short_and_levy,
author = {Short, R.T. and Levy, H.M.},
title = {A Simulation Study of Two-Level Caches},
booktitle = {Proc. 15th Int. Symp. on Computer Architecture (ISCA '88)},
year = {1988},
month = {May/June},
pages = {81--88}
)


@inproceedings
(
chen_and_baer,
author = {Chen, T. and Baer, J.},
title = {Reducing Memory Latency via Non-blocking and Prefetching Caches},
booktitle = {Proc. 5th Int. Conf. on Architectural Support for Programming Languages
and Operating Systems (ASPLOS-5)},
year = {1992},
month = {September},
pages = {51--61}
)

@inproceedings{alexander_kedem,
author = "Thomas Alexander and Gershon Kedem",
title = "Distributed Prefetch-buffer/Cache Design for High-Performance Memory Systems",
booktitle = "{Proc. 2nd {IEEE} Symp. on High-Performance Computer Architecture (HPCA)}",
pages = "254-263",
address = {San Jose, {CA}},
month = {Fberuary},
year = "1996"
}
% url = "citeseer.nj.nec.com/kedem96distributed.html"

@inproceedings
(
rogers_and_li,
author = {Rogers, A. and Li, K.},
title = {Software Support for Speculative Loads},
booktitle = {Proc. 5th Int. Conf. on Architectural Support for Programming Languages
and Operating Systems (ASPLOS-5)},
year = {1992},
month = {September},
pages = {38--50}
)

@article
(
bordawekar,
author = {Bordawekar, Rajesh R.},
title = {Quantitative Characterization and Analysis of the I/O Behavior of
a Commercial Distributed-Shared-Memory Machine},
journal = {{IEEE} Trans. on parallel and distributed systems},
volume = {11},
number = {5},
year = {2000},
month = {May},
pages = {509--526}
)

@article
(
postiff_et_al,
author = {Postiff, Matthew A. and Green, David A. and Tyson, Gary S. and Mudge, Trevor N.},
title = {Limits of Instruction Level Parallelism in {SPEC95} Applications},
journal = {Computer Architecture News},
volume = {27},
number = {1},
year = {1999},
month = {March},
note = {presented at {INTERACT-3} Workshop on
Interaction between Compilers and Computer Architectures, part
of {ASPLOS VIII}, San Jose, {CA}, October 1998},
pages = {31--34}
)

@article
(
dulong,
author = {Dulong, C.},
title = {The {IA-64} Architecture at Work},
journal = {Computer},
volume = {31},
number = {7},
year = {1998},
month = {July},
pages = {24--32}
)

@inproceedings{sprangle_and_carmean,
author = {Eric Sprangle and Doug Carmean},
title = {Increasing processor performance by implementing deeper pipelines},
booktitle = {Proceedings of the 29th annual international symposium on Computer architecture},
year = {2002},
isbn = {0-7695-1605-X},
pages = {25--34},
address = {Anchorage, Alaska},
location = {Anchorage, Alaska}
}
% publisher = {IEEE Computer Society},

@article
(
lo_et_al,
author = {Lo, J.L. and Emer, J.S. and Levy, H.M. and Stamm, R.L.
and Tullsen, D.M.},
title = {Converting thread-level parallelism to instruction-level parallelism via simultaneous multithreading},
journal = {{ACM} Trans. on Computer Systems},
volume = {15},
number = {3},
year = {1997},
month = {August},
pages = {322--354}
)

@inproceedings
(
waldspurger_and_weihl,
author = {Waldspurger, C.A. and Weihl, W.E.},
title = {Register relocation: flexible contexts for multithreading},
booktitle = {Proc. 20th Annual Int. Symp. on Computer Architecture (ISCA '93)},
year = {1993},
month = {May},
address = {San Diego, CA},
pages = {120--130}
)

@inproceedings
(
hidaka_et_al,
author = {Hidaka, Y. and Koike, H. and Tanaka, H},
title = {Multiple threads in cyclic register windows},
booktitle = {Proc. 20th Annual Int. Symp. on Computer Architecture (ISCA '93)},
year = {1993},
month = {May},
address = {San Diego, CA},
pages = {131--142}
)

@article{cringely,
author = {Robert X. Cringely},
title = {Be absolute for death: life after {M}oore's law},
journal = {Communications of the {ACM}},
volume = {44},
number = {3},
year = {2001},
issn = {0001-0782},
pages = {94}
}
% doi = {http://doi.acm.org/10.1145/365181.365219},
% publisher = {{ACM} Press},

@inproceedings
(
tullsen_et_al,
author = {Tullsen, Dean M. and Eggers, Susan J. and Levy, Henry M.},
title = {Simultaneous multithreading: maximizing on-chip parallelism},
booktitle = {Proc. 22nd Annual Int. Symp. on Computer Architecture (ISCA '95)},
year = {1995},
month = {June},
address = {S. Margherita Ligure, Italy },
pages = {392--403}
)


@inproceedings
(
mowry_et_al,
author = {Mowry, T.C. and Lam, M.S. and Gupta, A.},
title = {Design and Evaluation of a Compiler Algorithm for Prefetching},
booktitle = {Proc. 5th Int. Conf. on Architectural Support for Programming Languages and Operating Systems},
year = {1992},
month = {September},
pages = {62--73}
)

@inproceedings
(
ki_and_knowles,
author = {Ki, A. and Knowles, A. E.},
title = {Adaptive data prefetching using cache information},
booktitle = {Proc. 1997 Int. Conf. on Supercomputing},
address = {Vienna},
year = {1997},
pages = {204--212}
)

@inproceedings
(
chen,
author = {Chen, T-F.},
title = {An effective programmable prefetch engine for on-chip caches},
booktitle = {Proc. 28th Int. Symp. on Microarchitecture ({MICRO-28})},
address = {Ann Arbor, MI},
month = {29 November -- 1 December},
year = {1995},
pages = {237--242}
)

@inproceedings
(
lam_et_al,
author = {Lam, M.S. and Rothberg, E.E. and Wolf, M.E.},
title = {The Cache Performance and Optimizations of Blocked Algorithms},
booktitle = {Proc. 4th Int. Conf. on Architectural Support for Programming Languages
and Operating Systems (ASPLOS-4)},
address = {Santa Clara, {C}{A}},
year = {1991},
pages = {63--74}
)


@inproceedings
(
kroft,
author = {Kroft, D.},
title = {Lockup-Free Instruction Fetch/Prefetch Cache Organisation},
booktitle = {Proc. 8th Int. Symp. on Computer Architecture (ISCA '81)},
year = {1981},
month = {May},
pages = {81--84}
)



@inproceedings
(
sohi_and_franklin,
author = {Sohi, G.S. and Franklin, M.},
title = {High-Bandwidth Data Memory Systems for Superscalar Processors},
booktitle = {Proc. 4th Int. Conf. on Architectural Support for Programming Languages
and Operating Systems (ASPLOS-4)},
year = {1991},
month = {April},
pages = {53--62}
)

@inproceedings
(
przybylski_et_al_1,
author = {Przybylski, S. and Horowitz, M. and Hennessy, J.},
title = {Design Tradeoffs in Cache Design},
booktitle = {Proc. 15th Int. Symp. on Computer Architecture (ISCA '88)},
year = {1988},
month = {May/June},
pages = {290--298}
)



@inproceedings
(
przybylski_et_al_2,
author = {Przybylski, S. and Horowitz, M. and Hennessy, J.},
title = {Characteristics of Performance-Optimal Multi-Level Cache Hierarchies},
booktitle = {Proc. 16th Int. Symp. on Computer Architecture (ISCA '89)},
year = {1989},
month = {May},
pages = {114--121}
)



@inproceedings
(
nagle_et_al,
author = {Nagle, D. and Uhlig, R. and Stanley, T. and Sechrest,
S. and Mudge, T. and Brown, R.},
title = {Design Tradeoffs for Software-Managed {TLB}s},
booktitle = {Proc. 20th Int. Symp. on Computer Architecture (ISCA '93)},
year = {1993},
month = {May},
address = {San Diego, CA},
pages = {27--38}
)

@inproceedings{jacob_mudge,
author = {Bruce L. Jacob and Trevor N. Mudge},
title = {A look at several memory management units, {TLB}-refill mechanisms, and page table organizations},
booktitle = {Proc. 8th Int. Conf. on Architectural Support for Programming Languages and Operating Systems ({ASPLOS-VIII})},
year = {1998},
isbn = {1-58113-107-0},
pages = {295--306},
address = {San Jose, {CA}},
location = {San Jose, California, United States},
doi = {http://doi.acm.org/10.1145/291069.291065},
}
% publisher = {{ACM} Press},

@inproceedings
(
seznec,
author = {Seznec, A.},
title = {A Case For Two-Way Skewed-Associative Caches},
booktitle = {Proc. 20th Int. Symp. on Computer Architecture (ISCA '93)},
year = {1993},
month = {May},
address = {San Diego, CA},
pages = {169--178}
)

@inproceedings
(
kessler_et_al,
author = {Kessler, R.E. and Jooss, R. and Lebeck, A. and Hill, M.D.},
title = {Inexpensive Implementations of Set-Associativity},
booktitle = {Proc. 16th Int. Symp. on Computer Architecture (ISCA '89)},
year = {1989},
month = {May/June},
pages = {131--139}
)



@article
(
agarwal_et_al,
author = {Agarwal, A. and Hennessy, J. and Horowitz, M.},
title = {Cache Performance of Operating System and Multiprogramming Workloads},
journal = {{ACM} Trans. on Computer Systems},
volume = {6},
number = {4},
year = {1988},
month = {November},
pages = {393--431}
)



@inproceedings
(
huck_and_hays,
author = {Huck, J. and Hays, J.},
title = {Architectural Support for Translation Table Management in Large Address Space Machines},
booktitle = {Proc. 20th Int. Symp. on Computer Architecture (ISCA '93)},
year = {1993},
month = {May},
address = {San Diego, CA},
pages = {39--50}
)

@article
(
kessler_and_hill,
author = {Kessler, R.E. and Hill, M.D.},
title = {Page Placement Algorithms for Large Real-Indexed Caches},
journal = {{ACM} Trans. on Computer Systems},
volume = {10},
number = {4},
year = {1992},
month = {November},
pages = {338--359}
)


@inproceedings
(
calder_et_al,
author = {Calder, B. and Krintz, C. and Simmi, J. and Austin, T.},
title = {Cache-Conscious Data Placement},
booktitle = {Proc. 8th Int. Conf. on Architectural Support for Programming Languages
and Operating Systems (ASPLOS-VIII)},
address = {San Jose, {CA}},
year = {1998},
month = {October},
pages = {139--149}
)

@inproceedings
(
borg_et_al,
author = {Borg, A. and Kessler, R.E. and Wall, D.W.},
title = {Generation and Analysis of Very Long Address Traces},
booktitle = {Proc. 17th Int. Symp. on Computer Architecture (ISCA '90)},
year = {1990},
month = {May},
pages = {270--279}
)



@book
(
veenstra_and_fowler,
author = {Veenstra, J.E. and Fowler, R.J.},
title = {{MINT} Tutorial and User Manual},
publisher = {Tech. Report 452, Computer Science Department, University of Rochester, New York},
year = {1993},
month = {June}
)