# *****************************************************************************
# *  CP2K: A general program to perform molecular dynamics simulations        *
# *  Copyright (C) 2000 - 2013 the CP2K developers group                      *
# *  Authors: Peter Messmer <pmessmer@nvidia.com>,                            *
# *           Nikolay Markovskiy <nmarkovskiy@nvidia.com>                     *
# *****************************************************************************

[

 Kernel_dnt_largeDB(m=29, n=29, k=29, blockdim=128, tile_m=2, tile_n=4, w=4, v=13 , grouping=16),
 Kernel_dnt_largeDB(m=29, n=29, k=16, blockdim=128, tile_m=4, tile_n=2, w=4, v=13 , grouping=16),
 Kernel_dnt_largeDB(m=29, n=29, k=14, blockdim=128, tile_m=4, tile_n=2, w=4, v=13 , grouping=16),
 Kernel_dnt_largeDB(m=29, n=16, k=29, blockdim=128, tile_m=2, tile_n=2, w=4, v=13 , grouping=16),
 Kernel_dnt_largeDB(m=29, n=16, k=16, blockdim=128, tile_m=2, tile_n=2, w=4, v=8 , grouping=16),
 Kernel_dnt_largeDB(m=29, n=16, k=14, blockdim=128, tile_m=2, tile_n=2, w=4, v=13 , grouping=16),
 Kernel_dnt_largeDB(m=29, n=14, k=29, blockdim=128, tile_m=2, tile_n=2, w=4, v=14 , grouping=16),
 Kernel_dnt_largeDB(m=29, n=14, k=16, blockdim=128, tile_m=4, tile_n=1, w=13, v=14 , grouping=16),
 Kernel_dnt_largeDB(m=29, n=14, k=14, blockdim=128, tile_m=2, tile_n=2, w=12, v=14 , grouping=16),
 Kernel_dnt_largeDB(m=16, n=29, k=29, blockdim=128, tile_m=2, tile_n=2, w=4, v=24 , grouping=16),
 Kernel_dnt_largeDB(m=16, n=29, k=16, blockdim=128, tile_m=2, tile_n=2, w=12, v=24 , grouping=16),
 Kernel_dnt_largeDB(m=16, n=29, k=14, blockdim=128, tile_m=2, tile_n=2, w=10, v=24 , grouping=16),
 Kernel_dnt_largeDB(m=16, n=16, k=29, blockdim=96, tile_m=2, tile_n=2, w=6, v=10 , grouping=16),
 Kernel_dnt_largeDB(m=16, n=16, k=16, blockdim=96, tile_m=2, tile_n=2, w=12, v=10 , grouping=16),
 Kernel_dnt_largeDB(m=16, n=16, k=14, blockdim=96, tile_m=2, tile_n=2, w=10, v=12 , grouping=16),
 Kernel_dnt_largeDB(m=16, n=14, k=29, blockdim=128, tile_m=2, tile_n=1, w=6, v=14 , grouping=16),
 Kernel_dnt_largeDB(m=16, n=14, k=16, blockdim=96, tile_m=2, tile_n=2, w=13, v=14 , grouping=16),
 Kernel_dnt_largeDB(m=16, n=14, k=14, blockdim=128, tile_m=2, tile_n=1, w=8, v=14 , grouping=16),
 Kernel_dnt_largeDB(m=14, n=29, k=29, blockdim=128, tile_m=2, tile_n=2, w=4, v=24 , grouping=16),
 Kernel_dnt_largeDB(m=14, n=29, k=16, blockdim=128, tile_m=2, tile_n=2, w=13, v=10 , grouping=16),
 Kernel_dnt_largeDB(m=14, n=29, k=14, blockdim=128, tile_m=2, tile_n=2, w=13, v=14 , grouping=16),
 Kernel_dnt_largeDB(m=14, n=16, k=29, blockdim=128, tile_m=2, tile_n=1, w=6, v=8 , grouping=16),
 Kernel_dnt_largeDB(m=14, n=16, k=16, blockdim=128, tile_m=1, tile_n=2, w=6, v=8 , grouping=16),
 Kernel_dnt_largeDB(m=14, n=16, k=14, blockdim=128, tile_m=2, tile_n=1, w=8, v=8 , grouping=16),
 Kernel_dnt_largeDB(m=14, n=14, k=29, blockdim=96, tile_m=2, tile_n=2, w=6, v=14 , grouping=16),
 Kernel_dnt_largeDB(m=14, n=14, k=16, blockdim=96, tile_m=4, tile_n=1, w=4, v=8 , grouping=16),
 Kernel_dnt_largeDB(m=14, n=14, k=14, blockdim=96, tile_m=2, tile_n=2, w=4, v=14 , grouping=16),


 Kernel_dnt_largeDB(m=13, n=26, k=26, tile_m=1, tile_n=3, w=4, v=26, blockdim=128, grouping=16),
 Kernel_dnt_largeDB(m=26, n=13, k=26, tile_m=2, tile_n=2, w=3, v=13, blockdim=96,  grouping=16),
 Kernel_dnt_largeDB(m=26, n=26, k=26, tile_m=2, tile_n=3, w=4, v=10, blockdim=128, grouping=16),
 Kernel_dnt_largeDB(m=23, n=23, k=23, tile_m=2, tile_n=3, w=4, v=13, blockdim=96,  grouping=16),
 Kernel_dnt_medium(m=13, n=13, k=13, panel_in=96,  panel_out=0, tile_m=2, tile_n=2, threads=96,  grouping=16),    # 171
 Kernel_dnt_medium(m=13, n=13, k=26, panel_in=128, panel_out=0, tile_m=2, tile_n=2, threads=128, grouping=16),    # 204
 Kernel_dnt_medium(m=13, n=26, k=13, panel_in=96,  panel_out=0, tile_m=2, tile_n=2, threads=96,  grouping=16),    # 241
 Kernel_dnt_medium(m=26, n=13, k=13, panel_in=128, panel_out=0, tile_m=2, tile_n=2, threads=128, grouping=16),    # 229
 Kernel_dnt_medium(m=26, n=26, k=13, panel_in=128, panel_out=0, tile_m=2, tile_n=3, threads=128, grouping=16),    # 333

 Kernel_dnt_small(m=13, n=13, k=5,  tile_m=1, tile_n=2, threads=96, grouping=16),  # 112.8
 Kernel_dnt_small(m=13, n=5,  k=13, tile_m=1, tile_n=1, threads=96, grouping=16),  # 76
 Kernel_dnt_small(m=13, n=5,  k=5,  tile_m=1, tile_n=1, threads=96, grouping=16),  # 45
 Kernel_dnt_small(m=5,  n=13, k=13, tile_m=1, tile_n=1, threads=96, grouping=16),  # 74.9
 Kernel_dnt_small(m=5,  n=13, k=5,  tile_m=1, tile_n=1, threads=96, grouping=16),  # 46

 Kernel_dnt_tiny(m=5,  n=5,  k=13, split_thread=32,  threads=128, grouping=16),  # 62.7
 Kernel_dnt_tiny(m=5,  n=5,  k=5,  split_thread=32,  threads=64, grouping=16),   # 28.8

 Kernel_dnt_largeDB(m=54, n=54, k=54, blockdim=128, tile_m=5, tile_n=5, w=6, v=24, grouping=16),
 Kernel_dnt_largeDB(m=54, n=54, k=24, blockdim=128, tile_m=5, tile_n=5, w=6, v=24, grouping=16),
 Kernel_dnt_largeDB(m=54, n=54, k=13, blockdim=256, tile_m=2, tile_n=6, w=13, v=24,grouping=16),
 Kernel_dnt_largeDB(m=54, n=24, k=54, blockdim=256, tile_m=3, tile_n=2, w=4, v=10, grouping=16),
 Kernel_dnt_largeDB(m=54, n=13, k=54, blockdim=256, tile_m=3, tile_n=1, w=4, v=13, grouping=16),
 Kernel_dnt_largeDB(m=24, n=54, k=54, blockdim=256, tile_m=2, tile_n=3, w=4, v=24, grouping=16),
 Kernel_dnt_largeDB(m=24, n=54, k=24, blockdim=128, tile_m=4, tile_n=3, w=4, v=24, grouping=16),
 Kernel_dnt_largeDB(m=24, n=13, k=54, blockdim=128, tile_m=3, tile_n=1, w=4, v=13, grouping=16),
 Kernel_dnt_largeDB(m=13, n=54, k=54, blockdim=256, tile_m=1, tile_n=3, w=4, v=24, grouping=16),
 Kernel_dnt_largeDB(m=13, n=54, k=24, blockdim=128, tile_m=2, tile_n=3, w=4, v=24, grouping=16),
 Kernel_dnt_largeDB(m=13, n=24, k=54, blockdim=128, tile_m=3, tile_n=1, w=4, v=24, grouping=16),
 Kernel_dnt_largeDB(m=13, n=13, k=54, blockdim=128, tile_m=1, tile_n=2, w=8, v=12, grouping=16),
 Kernel_dnt_largeDB(m=24, n=24, k=54, blockdim=96,  tile_m=2, tile_n=3, w=8, v=8,  grouping=16),
 Kernel_dnt_largeDB(m=24, n=24, k=24, blockdim=96, tile_m=2, tile_n=3, w=8, v=17 ,grouping=16),
 Kernel_dnt_largeDB(m=15, n=24, k=24, blockdim=96, tile_m=2, tile_n=2, w=4, v=16 ,grouping=16),

 
 Kernel_dnt_medium(m=54, n=24, k=24, threads=256, tile_m=3, tile_n=4, panel_in=256, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=54, n=24, k=13, threads=192, tile_m=3, tile_n=4, panel_in=192, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=54, n=13, k=24, threads=320, tile_m=3, tile_n=2, panel_in=320, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=54, n=13, k=13, threads=192, tile_m=3, tile_n=2, panel_in=192, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=24, n=54, k=13, threads=256, tile_m=2, tile_n=3, panel_in=256, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=24, n=24, k=13, threads=128, tile_m=2, tile_n=3, panel_in=128, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=24, n=13, k=24, threads=160, tile_m=2, tile_n=2, panel_in=160, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=24, n=13, k=13, threads=96,  tile_m=2, tile_n=2, panel_in=96,  panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=13, n=54, k=13, threads=256, tile_m=2, tile_n=2, panel_in=256, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=13, n=24, k=24, threads=160, tile_m=1, tile_n=2, panel_in=160, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=13, n=24, k=13, threads=128, tile_m=2, tile_n=2, panel_in=128, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=13, n=13, k=24, threads=128, tile_m=2, tile_n=2, panel_in=128, panel_out=0 ,grouping=16),

 Kernel_dnt_medium(m=24, n=24, k=15, threads=128, tile_m=2, tile_n=3, panel_in=128, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=24, n=13, k=15, threads=128, tile_m=2, tile_n=2, panel_in=128, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=24, n=15, k=24, threads=160, tile_m=2, tile_n=2, panel_in=160, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=24, n=15, k=13, threads=96, tile_m=2, tile_n=2, panel_in=96, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=24, n=15, k=15, threads=128, tile_m=2, tile_n=2, panel_in=128, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=13, n=24, k=15, threads=128, tile_m=2, tile_n=2, panel_in=128, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=13, n=13, k=15, threads=128, tile_m=2, tile_n=2, panel_in=128, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=13, n=15, k=24, threads=128, tile_m=2, tile_n=2, panel_in=128, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=13, n=15, k=13, threads=96, tile_m=2, tile_n=2, panel_in=96, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=13, n=15, k=15, threads=96, tile_m=2, tile_n=2, panel_in=96, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=15, n=24, k=13, threads=96, tile_m=2, tile_n=2, panel_in=96, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=15, n=24, k=15, threads=128, tile_m=2, tile_n=2, panel_in=128, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=15, n=13, k=24, threads=128, tile_m=2, tile_n=1, panel_in=128, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=15, n=13, k=13, threads=96, tile_m=4, tile_n=1, panel_in=96, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=15, n=13, k=15, threads=96, tile_m=2, tile_n=2, panel_in=96, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=15, n=15, k=24, threads=128, tile_m=2, tile_n=2, panel_in=128, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=15, n=15, k=13, threads=128, tile_m=2, tile_n=2, panel_in=128, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=15, n=15, k=15, threads=128, tile_m=1, tile_n=2, panel_in=128, panel_out=0 ,grouping=16),
 
 Kernel_dnt_medium(m=24, n=24, k=5, threads=128, tile_m=2, tile_n=3, panel_in=128, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=24, n=13, k=5, threads=160, tile_m=3, tile_n=1, panel_in=160, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=24, n=5, k=24, threads=128, tile_m=1, tile_n=1, panel_in=128, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=24, n=5, k=13, threads=128, tile_m=1, tile_n=1, panel_in=128, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=24, n=5, k=5, threads=160, tile_m=1, tile_n=2, panel_in=160, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=13, n=24, k=5, threads=192, tile_m=1, tile_n=2, panel_in=192, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=13, n=5, k=24, threads=128, tile_m=1, tile_n=1, panel_in=128, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=5, n=24, k=24, threads=160, tile_m=1, tile_n=1, panel_in=160, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=5, n=24, k=13, threads=128, tile_m=1, tile_n=1, panel_in=128, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=5, n=24, k=5, threads=192, tile_m=1, tile_n=2, panel_in=192, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=5, n=13, k=24, threads=128, tile_m=1, tile_n=1, panel_in=128, panel_out=0 ,grouping=16),
 Kernel_dnt_medium(m=5, n=5, k=24, threads=192, tile_m=1, tile_n=1, panel_in=192, panel_out=0 ,grouping=16),


 ]
#EOF
