Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import numpy as N
import sympy.ntheory as SN
import itertools
####################
# LOW LEVEL ROUTINES
####################
# independent of dimension
# take out trivial divisors 1 and n
def divisors(n):
return SN.divisors(n)[1:-1]
# non-trivial decompositions of integer "n" into "dim" integer factors
# returns an arbitrary length list of lists of length "dim"
# possibly with repeats
def decompList(n, dim):
dl = divisors(n)
if dim == 2:
ret = []
for d in dl:
tmp = [d, n/d]
tmp.sort()
ret.append(tmp)
ret.sort()
else:
ret = []
for d in dl:
tmp = decompList(n/d, dim-1)
for i in tmp:
i.append(d)
i.sort()
ret.append(i)
return ret
# returns a list of a unique set of rank-"dim" tuples
def decompSet(n, dim, reverse=False):
ret = decompList(n,dim)
ret = map(tuple, ret)
ret = set(ret)
ret = list(ret)
ret.sort(reverse=reverse)
return ret
# decompositions sorted by increasing aspect ratio of largest/smallest factor
# can also limit by maximum aspect ratio
def decompAspectRatio(n, dim, maxAspectRatio=0.0):
ret = decompSet(n, dim, False)
ret = map(lambda x: [1.0*x[-1]/x[0],x], ret)
ret.sort()
if maxAspectRatio > 0.0:
ret = filter(lambda x: x[0] <= maxAspectRatio, ret)
return ret
# decompositions based on ranks, independent of grid dimensions
# check whether a 2D decomp is commensurate with subset of 3D decomp
# checks both orders of 2D decomp and returns True if either works
# a = 3D subset, 2-tuple
# b = 2D 2-tuple
def check2d2d(a, b):
return (b[0]%a[0]==0 and b[1]%a[1]==0) or (b[1]%a[0]==0 and b[0]%a[1]==0)
# filter list of 2D decomps for those commensurate with subset of 3D decomp
# a = 3D subset 2-tuple
# b = list of 2D 2-tuples
def filter2d2d(a, blist):
return filter(lambda x: check2d2d(a,x), blist)
# return xpencils, ypencils, and zpencils commensurate with a 3D decomp
# pencil lists could be emtpy if there are no co-commensurate decomps
# a = 3D 3-tuple
def filter3d(a):
nranks = a[0]*a[1]*a[2]
blist = decompSet(nranks, 2)
blist.sort(reverse=True)
xpencils = filter2d2d((a[1],a[2]), blist)
ypencils = filter2d2d((a[2],a[0]), blist)
zpencils = filter2d2d((a[0],a[1]), blist)
return [xpencils, ypencils, zpencils]
# sum the remainders of ng%tuple
# works for arbitrary-rank tuples
def sumRemainders(ng, tuple):
return N.array(map(lambda x: ng%x, tuple)).sum()
# find pencil decompositions co-commensurate with 3D decomp and ng
def filterNg3d2d(ng, tuple3d, filter3doutput):
if sumRemainders(ng, tuple3d) == 0:
xpencils = filter(lambda x: sumRemainders(ng, x)==0, filter3doutput[0])
ypencils = filter(lambda x: sumRemainders(ng, x)==0, filter3doutput[1])
zpencils = filter(lambda x: sumRemainders(ng, x)==0, filter3doutput[2])
if len(xpencils) > 0 and len(ypencils) > 0 and len(zpencils) > 0:
return [xpencils, ypencils, zpencils]
return []
######################
# CONVENIENCE ROUTINES
######################
def filterNg3d(ng, tuple3d):
filter3doutput = filter3d(tuple3d)
return filterNg3d2d(ng, tuple3d, filter3doutput)
# find ng in range [ng0,ng1] that have pencil decomps for a given 3D decomp
def ngCandidates3d(tuple3d, ng0, ng1):
ret = []
filter3doutput = filter3d(tuple3d)
for i in xrange(ng0, ng1+1):
if len(filterNg3d2d(i, tuple3d, filter3doutput)) > 0:
ret.append(i)
return ret
# cannot remember what permutations does, seems unused
def permutations(tuple):
ret = list(set(list(itertools.permutations(tuple))))
ret.sort()
return ret
# cannot remember what doublecheck does, seems unused
def doublecheck(nranks, ng, t3d, t2dx, t2dy, t2dz):
# decomps have right number of ranks
if N.array(t3d).prod() != nranks:
return False
if N.array(t2dx).prod() != nranks:
return False
if N.array(t2dy).prod() != nranks:
return False
if N.array(t2dz).prod() != nranks:
return False
# 2D decomps are commensurate with 3D decomps
if t2dx[1] % t3d[1] != 0 or t2dx[2] % t3d[2] != 0:
return False
if t2dy[0] % t3d[0] != 0 or t2dy[2] % t3d[2] != 0:
return False
if t2dz[0] % t3d[0] != 0 or t2dz[1] % t3d[1] != 0:
return False
# decomps are divisors of ng
if sumRemainders(ng, t3d) != 0:
return False
if sumRemainders(ng, t2dx) != 0:
return False
if sumRemainders(ng, t2dy) != 0:
return False
if sumRemainders(ng, t2dz) != 0:
return False
# ok looks fine
return True
#####################
# HIGH LEVEL ROUTINES
#####################
# find ng in range [ng0,ng1] that have pencil decomps for any valid 3D decomp
# optionally specify maximum 3D decomp aspect ratio to check
def ngCandidatesNranks(nranks, ng0, ng1, maxAspectRatio=0.0):
tuple3dlist = decompAspectRatio(nranks, 3, maxAspectRatio)
ret = []
for i in tuple3dlist:
tuple3d = i[1]
ret += ngCandidates3d(tuple3d, ng0, ng1)
ret = set(ret)
ret = list(ret)
ret.sort()
return ret
# find 3D decomps that have co-commensurate pencil decomps for ng and nranks
# optionally specify maximum 3D decomp aspect ratio to check
def filterNgNranks(ng, nranks, maxAspectRatio=0.0):
list3d = decompAspectRatio(nranks, 3, maxAspectRatio)
return filter(lambda x: len(filterNg3d(ng, x[1])) > 0, list3d)
# expand pencils into 3D tuples in all orders that work
def enumeratePencilsNg3d(ng, tuple3d):
pencils = filterNg3d(ng, tuple3d)
xret = []
for pencil in pencils[0]:
if pencil[0]%tuple3d[1]==0 and pencil[1]%tuple3d[2]==0:
xret.append( (1, pencil[0], pencil[1]) )
if pencil[1]%tuple3d[1]==0 and pencil[0]%tuple3d[2]==0:
xret.append( (1, pencil[1], pencil[0]) )
yret = []
for pencil in pencils[1]:
if pencil[0]%tuple3d[0]==0 and pencil[1]%tuple3d[2]==0:
yret.append( (pencil[0], 1, pencil[1]) )
if pencil[1]%tuple3d[0]==0 and pencil[0]%tuple3d[2]==0:
yret.append( (pencil[1], 1, pencil[0]) )
zret = []
for pencil in pencils[2]:
if pencil[0]%tuple3d[0]==0 and pencil[1]%tuple3d[1]==0:
zret.append( (pencil[0], pencil[1], 1) )
if pencil[1]%tuple3d[0]==0 and pencil[0]%tuple3d[1]==0:
zret.append( (pencil[1], pencil[0], 1) )
return [xret, yret, zret]