1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package pointer
6
7// This file implements Hash-Value Numbering (HVN), a pre-solver
8// constraint optimization described in Hardekopf & Lin, SAS'07 (see
9// doc.go) that analyses the graph topology to determine which sets of
10// variables are "pointer equivalent" (PE), i.e. must have identical
11// points-to sets in the solution.
12//
13// A separate ("offline") graph is constructed.  Its nodes are those of
14// the main-graph, plus an additional node *X for each pointer node X.
15// With this graph we can reason about the unknown points-to set of
16// dereferenced pointers.  (We do not generalize this to represent
17// unknown fields x->f, perhaps because such fields would be numerous,
18// though it might be worth an experiment.)
19//
20// Nodes whose points-to relations are not entirely captured by the
21// graph are marked as "indirect": the *X nodes, the parameters of
22// address-taken functions (which includes all functions in method
23// sets), or nodes updated by the solver rules for reflection, etc.
24//
25// All addr (y=&x) nodes are initially assigned a pointer-equivalence
26// (PE) label equal to x's nodeid in the main graph.  (These are the
27// only PE labels that are less than len(a.nodes).)
28//
29// All offsetAddr (y=&x.f) constraints are initially assigned a PE
30// label; such labels are memoized, keyed by (x, f), so that equivalent
31// nodes y as assigned the same label.
32//
33// Then we process each strongly connected component (SCC) of the graph
34// in topological order, assigning it a PE label based on the set P of
35// PE labels that flow to it from its immediate dependencies.
36//
37// If any node in P is "indirect", the entire SCC is assigned a fresh PE
38// label.  Otherwise:
39//
40// |P|=0  if P is empty, all nodes in the SCC are non-pointers (e.g.
41//        uninitialized variables, or formal params of dead functions)
42//        and the SCC is assigned the PE label of zero.
43//
44// |P|=1  if P is a singleton, the SCC is assigned the same label as the
45//        sole element of P.
46//
47// |P|>1  if P contains multiple labels, a unique label representing P is
48//        invented and recorded in an hash table, so that other
49//        equivalent SCCs may also be assigned this label, akin to
50//        conventional hash-value numbering in a compiler.
51//
52// Finally, a renumbering is computed such that each node is replaced by
53// the lowest-numbered node with the same PE label.  All constraints are
54// renumbered, and any resulting duplicates are eliminated.
55//
56// The only nodes that are not renumbered are the objects x in addr
57// (y=&x) constraints, since the ids of these nodes (and fields derived
58// from them via offsetAddr rules) are the elements of all points-to
59// sets, so they must remain as they are if we want the same solution.
60//
61// The solverStates (node.solve) for nodes in the same equivalence class
62// are linked together so that all nodes in the class have the same
63// solution.  This avoids the need to renumber nodeids buried in
64// Queries, cgnodes, etc (like (*analysis).renumber() does) since only
65// the solution is needed.
66//
67// The result of HVN is that the number of distinct nodes and
68// constraints is reduced, but the solution is identical (almost---see
69// CROSS-CHECK below).  In particular, both linear and cyclic chains of
70// copies are each replaced by a single node.
71//
72// Nodes and constraints created "online" (e.g. while solving reflection
73// constraints) are not subject to this optimization.
74//
75// PERFORMANCE
76//
77// In two benchmarks (guru and godoc), HVN eliminates about two thirds
78// of nodes, the majority accounted for by non-pointers: nodes of
79// non-pointer type, pointers that remain nil, formal parameters of dead
80// functions, nodes of untracked types, etc.  It also reduces the number
81// of constraints, also by about two thirds, and the solving time by
82// 30--42%, although we must pay about 15% for the running time of HVN
83// itself.  The benefit is greater for larger applications.
84//
85// There are many possible optimizations to improve the performance:
86// * Use fewer than 1:1 onodes to main graph nodes: many of the onodes
87//   we create are not needed.
88// * HU (HVN with Union---see paper): coalesce "union" peLabels when
89//   their expanded-out sets are equal.
90// * HR (HVN with deReference---see paper): this will require that we
91//   apply HVN until fixed point, which may need more bookkeeping of the
92//   correspondance of main nodes to onodes.
93// * Location Equivalence (see paper): have points-to sets contain not
94//   locations but location-equivalence class labels, each representing
95//   a set of locations.
96// * HVN with field-sensitive ref: model each of the fields of a
97//   pointer-to-struct.
98//
99// CROSS-CHECK
100//
101// To verify the soundness of the optimization, when the
102// debugHVNCrossCheck option is enabled, we run the solver twice, once
103// before and once after running HVN, dumping the solution to disk, and
104// then we compare the results.  If they are not identical, the analysis
105// panics.
106//
107// The solution dumped to disk includes only the N*N submatrix of the
108// complete solution where N is the number of nodes after generation.
109// In other words, we ignore pointer variables and objects created by
110// the solver itself, since their numbering depends on the solver order,
111// which is affected by the optimization.  In any case, that's the only
112// part the client cares about.
113//
114// The cross-check is too strict and may fail spuriously.  Although the
115// H&L paper describing HVN states that the solutions obtained should be
116// identical, this is not the case in practice because HVN can collapse
117// cycles involving *p even when pts(p)={}.  Consider this example
118// distilled from testdata/hello.go:
119//
120//	var x T
121//	func f(p **T) {
122//		t0 = *p
123//		...
124//		t1 = φ(t0, &x)
125//		*p = t1
126//	}
127//
128// If f is dead code, we get:
129// 	unoptimized:  pts(p)={} pts(t0)={} pts(t1)={&x}
130// 	optimized:    pts(p)={} pts(t0)=pts(t1)=pts(*p)={&x}
131//
132// It's hard to argue that this is a bug: the result is sound and the
133// loss of precision is inconsequential---f is dead code, after all.
134// But unfortunately it limits the usefulness of the cross-check since
135// failures must be carefully analyzed.  Ben Hardekopf suggests (in
136// personal correspondence) some approaches to mitigating it:
137//
138// 	If there is a node with an HVN points-to set that is a superset
139// 	of the NORM points-to set, then either it's a bug or it's a
140// 	result of this issue. If it's a result of this issue, then in
141// 	the offline constraint graph there should be a REF node inside
142// 	some cycle that reaches this node, and in the NORM solution the
143// 	pointer being dereferenced by that REF node should be the empty
144// 	set. If that isn't true then this is a bug. If it is true, then
145// 	you can further check that in the NORM solution the "extra"
146// 	points-to info in the HVN solution does in fact come from that
147// 	purported cycle (if it doesn't, then this is still a bug). If
148// 	you're doing the further check then you'll need to do it for
149// 	each "extra" points-to element in the HVN points-to set.
150//
151// 	There are probably ways to optimize these checks by taking
152// 	advantage of graph properties. For example, extraneous points-to
153// 	info will flow through the graph and end up in many
154// 	nodes. Rather than checking every node with extra info, you
155// 	could probably work out the "origin point" of the extra info and
156// 	just check there. Note that the check in the first bullet is
157// 	looking for soundness bugs, while the check in the second bullet
158// 	is looking for precision bugs; depending on your needs, you may
159// 	care more about one than the other.
160//
161// which we should evaluate.  The cross-check is nonetheless invaluable
162// for all but one of the programs in the pointer_test suite.
163
164import (
165	"fmt"
166	"go/types"
167	"io"
168	"reflect"
169
170	"golang.org/x/tools/container/intsets"
171)
172
173// A peLabel is a pointer-equivalence label: two nodes with the same
174// peLabel have identical points-to solutions.
175//
176// The numbers are allocated consecutively like so:
177// 	0	not a pointer
178//	1..N-1	addrConstraints (equals the constraint's .src field, hence sparse)
179//	...	offsetAddr constraints
180//	...	SCCs (with indirect nodes or multiple inputs)
181//
182// Each PE label denotes a set of pointers containing a single addr, a
183// single offsetAddr, or some set of other PE labels.
184//
185type peLabel int
186
187type hvn struct {
188	a        *analysis
189	N        int                // len(a.nodes) immediately after constraint generation
190	log      io.Writer          // (optional) log of HVN lemmas
191	onodes   []*onode           // nodes of the offline graph
192	label    peLabel            // the next available PE label
193	hvnLabel map[string]peLabel // hash-value numbering (PE label) for each set of onodeids
194	stack    []onodeid          // DFS stack
195	index    int32              // next onode.index, from Tarjan's SCC algorithm
196
197	// For each distinct offsetAddrConstraint (src, offset) pair,
198	// offsetAddrLabels records a unique PE label >= N.
199	offsetAddrLabels map[offsetAddr]peLabel
200}
201
202// The index of an node in the offline graph.
203// (Currently the first N align with the main nodes,
204// but this may change with HRU.)
205type onodeid uint32
206
207// An onode is a node in the offline constraint graph.
208// (Where ambiguous, members of analysis.nodes are referred to as
209// "main graph" nodes.)
210//
211// Edges in the offline constraint graph (edges and implicit) point to
212// the source, i.e. against the flow of values: they are dependencies.
213// Implicit edges are used for SCC computation, but not for gathering
214// incoming labels.
215//
216type onode struct {
217	rep onodeid // index of representative of SCC in offline constraint graph
218
219	edges    intsets.Sparse // constraint edges X-->Y (this onode is X)
220	implicit intsets.Sparse // implicit edges *X-->*Y (this onode is X)
221	peLabels intsets.Sparse // set of peLabels are pointer-equivalent to this one
222	indirect bool           // node has points-to relations not represented in graph
223
224	// Tarjan's SCC algorithm
225	index, lowlink int32 // Tarjan numbering
226	scc            int32 // -ve => on stack; 0 => unvisited; +ve => node is root of a found SCC
227}
228
229type offsetAddr struct {
230	ptr    nodeid
231	offset uint32
232}
233
234// nextLabel issues the next unused pointer-equivalence label.
235func (h *hvn) nextLabel() peLabel {
236	h.label++
237	return h.label
238}
239
240// ref(X) returns the index of the onode for *X.
241func (h *hvn) ref(id onodeid) onodeid {
242	return id + onodeid(len(h.a.nodes))
243}
244
245// hvn computes pointer-equivalence labels (peLabels) using the Hash-based
246// Value Numbering (HVN) algorithm described in Hardekopf & Lin, SAS'07.
247//
248func (a *analysis) hvn() {
249	start("HVN")
250
251	if a.log != nil {
252		fmt.Fprintf(a.log, "\n\n==== Pointer equivalence optimization\n\n")
253	}
254
255	h := hvn{
256		a:                a,
257		N:                len(a.nodes),
258		log:              a.log,
259		hvnLabel:         make(map[string]peLabel),
260		offsetAddrLabels: make(map[offsetAddr]peLabel),
261	}
262
263	if h.log != nil {
264		fmt.Fprintf(h.log, "\nCreating offline graph nodes...\n")
265	}
266
267	// Create offline nodes.  The first N nodes correspond to main
268	// graph nodes; the next N are their corresponding ref() nodes.
269	h.onodes = make([]*onode, 2*h.N)
270	for id := range a.nodes {
271		id := onodeid(id)
272		h.onodes[id] = &onode{}
273		h.onodes[h.ref(id)] = &onode{indirect: true}
274	}
275
276	// Each node initially represents just itself.
277	for id, o := range h.onodes {
278		o.rep = onodeid(id)
279	}
280
281	h.markIndirectNodes()
282
283	// Reserve the first N PE labels for addrConstraints.
284	h.label = peLabel(h.N)
285
286	// Add offline constraint edges.
287	if h.log != nil {
288		fmt.Fprintf(h.log, "\nAdding offline graph edges...\n")
289	}
290	for _, c := range a.constraints {
291		if debugHVNVerbose && h.log != nil {
292			fmt.Fprintf(h.log, "; %s\n", c)
293		}
294		c.presolve(&h)
295	}
296
297	// Find and collapse SCCs.
298	if h.log != nil {
299		fmt.Fprintf(h.log, "\nFinding SCCs...\n")
300	}
301	h.index = 1
302	for id, o := range h.onodes {
303		if id > 0 && o.index == 0 {
304			// Start depth-first search at each unvisited node.
305			h.visit(onodeid(id))
306		}
307	}
308
309	// Dump the solution
310	// (NB: somewhat redundant with logging from simplify().)
311	if debugHVNVerbose && h.log != nil {
312		fmt.Fprintf(h.log, "\nPointer equivalences:\n")
313		for id, o := range h.onodes {
314			if id == 0 {
315				continue
316			}
317			if id == int(h.N) {
318				fmt.Fprintf(h.log, "---\n")
319			}
320			fmt.Fprintf(h.log, "o%d\t", id)
321			if o.rep != onodeid(id) {
322				fmt.Fprintf(h.log, "rep=o%d", o.rep)
323			} else {
324				fmt.Fprintf(h.log, "p%d", o.peLabels.Min())
325				if o.indirect {
326					fmt.Fprint(h.log, " indirect")
327				}
328			}
329			fmt.Fprintln(h.log)
330		}
331	}
332
333	// Simplify the main constraint graph
334	h.simplify()
335
336	a.showCounts()
337
338	stop("HVN")
339}
340
341// ---- constraint-specific rules ----
342
343// dst := &src
344func (c *addrConstraint) presolve(h *hvn) {
345	// Each object (src) is an initial PE label.
346	label := peLabel(c.src) // label < N
347	if debugHVNVerbose && h.log != nil {
348		// duplicate log messages are possible
349		fmt.Fprintf(h.log, "\tcreate p%d: {&n%d}\n", label, c.src)
350	}
351	odst := onodeid(c.dst)
352	osrc := onodeid(c.src)
353
354	// Assign dst this label.
355	h.onodes[odst].peLabels.Insert(int(label))
356	if debugHVNVerbose && h.log != nil {
357		fmt.Fprintf(h.log, "\to%d has p%d\n", odst, label)
358	}
359
360	h.addImplicitEdge(h.ref(odst), osrc) // *dst ~~> src.
361}
362
363// dst = src
364func (c *copyConstraint) presolve(h *hvn) {
365	odst := onodeid(c.dst)
366	osrc := onodeid(c.src)
367	h.addEdge(odst, osrc)                       //  dst -->  src
368	h.addImplicitEdge(h.ref(odst), h.ref(osrc)) // *dst ~~> *src
369}
370
371// dst = *src + offset
372func (c *loadConstraint) presolve(h *hvn) {
373	odst := onodeid(c.dst)
374	osrc := onodeid(c.src)
375	if c.offset == 0 {
376		h.addEdge(odst, h.ref(osrc)) // dst --> *src
377	} else {
378		// We don't interpret load-with-offset, e.g. results
379		// of map value lookup, R-block of dynamic call, slice
380		// copy/append, reflection.
381		h.markIndirect(odst, "load with offset")
382	}
383}
384
385// *dst + offset = src
386func (c *storeConstraint) presolve(h *hvn) {
387	odst := onodeid(c.dst)
388	osrc := onodeid(c.src)
389	if c.offset == 0 {
390		h.onodes[h.ref(odst)].edges.Insert(int(osrc)) // *dst --> src
391		if debugHVNVerbose && h.log != nil {
392			fmt.Fprintf(h.log, "\to%d --> o%d\n", h.ref(odst), osrc)
393		}
394	} else {
395		// We don't interpret store-with-offset.
396		// See discussion of soundness at markIndirectNodes.
397	}
398}
399
400// dst = &src.offset
401func (c *offsetAddrConstraint) presolve(h *hvn) {
402	// Give each distinct (addr, offset) pair a fresh PE label.
403	// The cache performs CSE, effectively.
404	key := offsetAddr{c.src, c.offset}
405	label, ok := h.offsetAddrLabels[key]
406	if !ok {
407		label = h.nextLabel()
408		h.offsetAddrLabels[key] = label
409		if debugHVNVerbose && h.log != nil {
410			fmt.Fprintf(h.log, "\tcreate p%d: {&n%d.#%d}\n",
411				label, c.src, c.offset)
412		}
413	}
414
415	// Assign dst this label.
416	h.onodes[c.dst].peLabels.Insert(int(label))
417	if debugHVNVerbose && h.log != nil {
418		fmt.Fprintf(h.log, "\to%d has p%d\n", c.dst, label)
419	}
420}
421
422// dst = src.(typ)  where typ is an interface
423func (c *typeFilterConstraint) presolve(h *hvn) {
424	h.markIndirect(onodeid(c.dst), "typeFilter result")
425}
426
427// dst = src.(typ)  where typ is concrete
428func (c *untagConstraint) presolve(h *hvn) {
429	odst := onodeid(c.dst)
430	for end := odst + onodeid(h.a.sizeof(c.typ)); odst < end; odst++ {
431		h.markIndirect(odst, "untag result")
432	}
433}
434
435// dst = src.method(c.params...)
436func (c *invokeConstraint) presolve(h *hvn) {
437	// All methods are address-taken functions, so
438	// their formal P-blocks were already marked indirect.
439
440	// Mark the caller's targets node as indirect.
441	sig := c.method.Type().(*types.Signature)
442	id := c.params
443	h.markIndirect(onodeid(c.params), "invoke targets node")
444	id++
445
446	id += nodeid(h.a.sizeof(sig.Params()))
447
448	// Mark the caller's R-block as indirect.
449	end := id + nodeid(h.a.sizeof(sig.Results()))
450	for id < end {
451		h.markIndirect(onodeid(id), "invoke R-block")
452		id++
453	}
454}
455
456// markIndirectNodes marks as indirect nodes whose points-to relations
457// are not entirely captured by the offline graph, including:
458//
459//    (a) All address-taken nodes (including the following nodes within
460//        the same object).  This is described in the paper.
461//
462// The most subtle cause of indirect nodes is the generation of
463// store-with-offset constraints since the offline graph doesn't
464// represent them.  A global audit of constraint generation reveals the
465// following uses of store-with-offset:
466//
467//    (b) genDynamicCall, for P-blocks of dynamically called functions,
468//        to which dynamic copy edges will be added to them during
469//        solving: from storeConstraint for standalone functions,
470//        and from invokeConstraint for methods.
471//        All such P-blocks must be marked indirect.
472//    (c) MakeUpdate, to update the value part of a map object.
473//        All MakeMap objects's value parts must be marked indirect.
474//    (d) copyElems, to update the destination array.
475//        All array elements must be marked indirect.
476//
477// Not all indirect marking happens here.  ref() nodes are marked
478// indirect at construction, and each constraint's presolve() method may
479// mark additional nodes.
480//
481func (h *hvn) markIndirectNodes() {
482	// (a) all address-taken nodes, plus all nodes following them
483	//     within the same object, since these may be indirectly
484	//     stored or address-taken.
485	for _, c := range h.a.constraints {
486		if c, ok := c.(*addrConstraint); ok {
487			start := h.a.enclosingObj(c.src)
488			end := start + nodeid(h.a.nodes[start].obj.size)
489			for id := c.src; id < end; id++ {
490				h.markIndirect(onodeid(id), "A-T object")
491			}
492		}
493	}
494
495	// (b) P-blocks of all address-taken functions.
496	for id := 0; id < h.N; id++ {
497		obj := h.a.nodes[id].obj
498
499		// TODO(adonovan): opt: if obj.cgn.fn is a method and
500		// obj.cgn is not its shared contour, this is an
501		// "inlined" static method call.  We needn't consider it
502		// address-taken since no invokeConstraint will affect it.
503
504		if obj != nil && obj.flags&otFunction != 0 && h.a.atFuncs[obj.cgn.fn] {
505			// address-taken function
506			if debugHVNVerbose && h.log != nil {
507				fmt.Fprintf(h.log, "n%d is address-taken: %s\n", id, obj.cgn.fn)
508			}
509			h.markIndirect(onodeid(id), "A-T func identity")
510			id++
511			sig := obj.cgn.fn.Signature
512			psize := h.a.sizeof(sig.Params())
513			if sig.Recv() != nil {
514				psize += h.a.sizeof(sig.Recv().Type())
515			}
516			for end := id + int(psize); id < end; id++ {
517				h.markIndirect(onodeid(id), "A-T func P-block")
518			}
519			id--
520			continue
521		}
522	}
523
524	// (c) all map objects' value fields.
525	for _, id := range h.a.mapValues {
526		h.markIndirect(onodeid(id), "makemap.value")
527	}
528
529	// (d) all array element objects.
530	// TODO(adonovan): opt: can we do better?
531	for id := 0; id < h.N; id++ {
532		// Identity node for an object of array type?
533		if tArray, ok := h.a.nodes[id].typ.(*types.Array); ok {
534			// Mark the array element nodes indirect.
535			// (Skip past the identity field.)
536			for range h.a.flatten(tArray.Elem()) {
537				id++
538				h.markIndirect(onodeid(id), "array elem")
539			}
540		}
541	}
542}
543
544func (h *hvn) markIndirect(oid onodeid, comment string) {
545	h.onodes[oid].indirect = true
546	if debugHVNVerbose && h.log != nil {
547		fmt.Fprintf(h.log, "\to%d is indirect: %s\n", oid, comment)
548	}
549}
550
551// Adds an edge dst-->src.
552// Note the unusual convention: edges are dependency (contraflow) edges.
553func (h *hvn) addEdge(odst, osrc onodeid) {
554	h.onodes[odst].edges.Insert(int(osrc))
555	if debugHVNVerbose && h.log != nil {
556		fmt.Fprintf(h.log, "\to%d --> o%d\n", odst, osrc)
557	}
558}
559
560func (h *hvn) addImplicitEdge(odst, osrc onodeid) {
561	h.onodes[odst].implicit.Insert(int(osrc))
562	if debugHVNVerbose && h.log != nil {
563		fmt.Fprintf(h.log, "\to%d ~~> o%d\n", odst, osrc)
564	}
565}
566
567// visit implements the depth-first search of Tarjan's SCC algorithm.
568// Precondition: x is canonical.
569func (h *hvn) visit(x onodeid) {
570	h.checkCanonical(x)
571	xo := h.onodes[x]
572	xo.index = h.index
573	xo.lowlink = h.index
574	h.index++
575
576	h.stack = append(h.stack, x) // push
577	assert(xo.scc == 0, "node revisited")
578	xo.scc = -1
579
580	var deps []int
581	deps = xo.edges.AppendTo(deps)
582	deps = xo.implicit.AppendTo(deps)
583
584	for _, y := range deps {
585		// Loop invariant: x is canonical.
586
587		y := h.find(onodeid(y))
588
589		if x == y {
590			continue // nodes already coalesced
591		}
592
593		xo := h.onodes[x]
594		yo := h.onodes[y]
595
596		switch {
597		case yo.scc > 0:
598			// y is already a collapsed SCC
599
600		case yo.scc < 0:
601			// y is on the stack, and thus in the current SCC.
602			if yo.index < xo.lowlink {
603				xo.lowlink = yo.index
604			}
605
606		default:
607			// y is unvisited; visit it now.
608			h.visit(y)
609			// Note: x and y are now non-canonical.
610
611			x = h.find(onodeid(x))
612
613			if yo.lowlink < xo.lowlink {
614				xo.lowlink = yo.lowlink
615			}
616		}
617	}
618	h.checkCanonical(x)
619
620	// Is x the root of an SCC?
621	if xo.lowlink == xo.index {
622		// Coalesce all nodes in the SCC.
623		if debugHVNVerbose && h.log != nil {
624			fmt.Fprintf(h.log, "scc o%d\n", x)
625		}
626		for {
627			// Pop y from stack.
628			i := len(h.stack) - 1
629			y := h.stack[i]
630			h.stack = h.stack[:i]
631
632			h.checkCanonical(x)
633			xo := h.onodes[x]
634			h.checkCanonical(y)
635			yo := h.onodes[y]
636
637			if xo == yo {
638				// SCC is complete.
639				xo.scc = 1
640				h.labelSCC(x)
641				break
642			}
643			h.coalesce(x, y)
644		}
645	}
646}
647
648// Precondition: x is canonical.
649func (h *hvn) labelSCC(x onodeid) {
650	h.checkCanonical(x)
651	xo := h.onodes[x]
652	xpe := &xo.peLabels
653
654	// All indirect nodes get new labels.
655	if xo.indirect {
656		label := h.nextLabel()
657		if debugHVNVerbose && h.log != nil {
658			fmt.Fprintf(h.log, "\tcreate p%d: indirect SCC\n", label)
659			fmt.Fprintf(h.log, "\to%d has p%d\n", x, label)
660		}
661
662		// Remove pre-labeling, in case a direct pre-labeled node was
663		// merged with an indirect one.
664		xpe.Clear()
665		xpe.Insert(int(label))
666
667		return
668	}
669
670	// Invariant: all peLabels sets are non-empty.
671	// Those that are logically empty contain zero as their sole element.
672	// No other sets contains zero.
673
674	// Find all labels coming in to the coalesced SCC node.
675	for _, y := range xo.edges.AppendTo(nil) {
676		y := h.find(onodeid(y))
677		if y == x {
678			continue // already coalesced
679		}
680		ype := &h.onodes[y].peLabels
681		if debugHVNVerbose && h.log != nil {
682			fmt.Fprintf(h.log, "\tedge from o%d = %s\n", y, ype)
683		}
684
685		if ype.IsEmpty() {
686			if debugHVNVerbose && h.log != nil {
687				fmt.Fprintf(h.log, "\tnode has no PE label\n")
688			}
689		}
690		assert(!ype.IsEmpty(), "incoming node has no PE label")
691
692		if ype.Has(0) {
693			// {0} represents a non-pointer.
694			assert(ype.Len() == 1, "PE set contains {0, ...}")
695		} else {
696			xpe.UnionWith(ype)
697		}
698	}
699
700	switch xpe.Len() {
701	case 0:
702		// SCC has no incoming non-zero PE labels: it is a non-pointer.
703		xpe.Insert(0)
704
705	case 1:
706		// already a singleton
707
708	default:
709		// SCC has multiple incoming non-zero PE labels.
710		// Find the canonical label representing this set.
711		// We use String() as a fingerprint consistent with Equals().
712		key := xpe.String()
713		label, ok := h.hvnLabel[key]
714		if !ok {
715			label = h.nextLabel()
716			if debugHVNVerbose && h.log != nil {
717				fmt.Fprintf(h.log, "\tcreate p%d: union %s\n", label, xpe.String())
718			}
719			h.hvnLabel[key] = label
720		}
721		xpe.Clear()
722		xpe.Insert(int(label))
723	}
724
725	if debugHVNVerbose && h.log != nil {
726		fmt.Fprintf(h.log, "\to%d has p%d\n", x, xpe.Min())
727	}
728}
729
730// coalesce combines two nodes in the offline constraint graph.
731// Precondition: x and y are canonical.
732func (h *hvn) coalesce(x, y onodeid) {
733	xo := h.onodes[x]
734	yo := h.onodes[y]
735
736	// x becomes y's canonical representative.
737	yo.rep = x
738
739	if debugHVNVerbose && h.log != nil {
740		fmt.Fprintf(h.log, "\tcoalesce o%d into o%d\n", y, x)
741	}
742
743	// x accumulates y's edges.
744	xo.edges.UnionWith(&yo.edges)
745	yo.edges.Clear()
746
747	// x accumulates y's implicit edges.
748	xo.implicit.UnionWith(&yo.implicit)
749	yo.implicit.Clear()
750
751	// x accumulates y's pointer-equivalence labels.
752	xo.peLabels.UnionWith(&yo.peLabels)
753	yo.peLabels.Clear()
754
755	// x accumulates y's indirect flag.
756	if yo.indirect {
757		xo.indirect = true
758	}
759}
760
761// simplify computes a degenerate renumbering of nodeids from the PE
762// labels assigned by the hvn, and uses it to simplify the main
763// constraint graph, eliminating non-pointer nodes and duplicate
764// constraints.
765//
766func (h *hvn) simplify() {
767	// canon maps each peLabel to its canonical main node.
768	canon := make([]nodeid, h.label)
769	for i := range canon {
770		canon[i] = nodeid(h.N) // indicates "unset"
771	}
772
773	// mapping maps each main node index to the index of the canonical node.
774	mapping := make([]nodeid, len(h.a.nodes))
775
776	for id := range h.a.nodes {
777		id := nodeid(id)
778		if id == 0 {
779			canon[0] = 0
780			mapping[0] = 0
781			continue
782		}
783		oid := h.find(onodeid(id))
784		peLabels := &h.onodes[oid].peLabels
785		assert(peLabels.Len() == 1, "PE class is not a singleton")
786		label := peLabel(peLabels.Min())
787
788		canonId := canon[label]
789		if canonId == nodeid(h.N) {
790			// id becomes the representative of the PE label.
791			canonId = id
792			canon[label] = canonId
793
794			if h.a.log != nil {
795				fmt.Fprintf(h.a.log, "\tpts(n%d) is canonical : \t(%s)\n",
796					id, h.a.nodes[id].typ)
797			}
798
799		} else {
800			// Link the solver states for the two nodes.
801			assert(h.a.nodes[canonId].solve != nil, "missing solver state")
802			h.a.nodes[id].solve = h.a.nodes[canonId].solve
803
804			if h.a.log != nil {
805				// TODO(adonovan): debug: reorganize the log so it prints
806				// one line:
807				// 	pe y = x1, ..., xn
808				// for each canonical y.  Requires allocation.
809				fmt.Fprintf(h.a.log, "\tpts(n%d) = pts(n%d) : %s\n",
810					id, canonId, h.a.nodes[id].typ)
811			}
812		}
813
814		mapping[id] = canonId
815	}
816
817	// Renumber the constraints, eliminate duplicates, and eliminate
818	// any containing non-pointers (n0).
819	addrs := make(map[addrConstraint]bool)
820	copys := make(map[copyConstraint]bool)
821	loads := make(map[loadConstraint]bool)
822	stores := make(map[storeConstraint]bool)
823	offsetAddrs := make(map[offsetAddrConstraint]bool)
824	untags := make(map[untagConstraint]bool)
825	typeFilters := make(map[typeFilterConstraint]bool)
826	invokes := make(map[invokeConstraint]bool)
827
828	nbefore := len(h.a.constraints)
829	cc := h.a.constraints[:0] // in-situ compaction
830	for _, c := range h.a.constraints {
831		// Renumber.
832		switch c := c.(type) {
833		case *addrConstraint:
834			// Don't renumber c.src since it is the label of
835			// an addressable object and will appear in PT sets.
836			c.dst = mapping[c.dst]
837		default:
838			c.renumber(mapping)
839		}
840
841		if c.ptr() == 0 {
842			continue // skip: constraint attached to non-pointer
843		}
844
845		var dup bool
846		switch c := c.(type) {
847		case *addrConstraint:
848			_, dup = addrs[*c]
849			addrs[*c] = true
850
851		case *copyConstraint:
852			if c.src == c.dst {
853				continue // skip degenerate copies
854			}
855			if c.src == 0 {
856				continue // skip copy from non-pointer
857			}
858			_, dup = copys[*c]
859			copys[*c] = true
860
861		case *loadConstraint:
862			if c.src == 0 {
863				continue // skip load from non-pointer
864			}
865			_, dup = loads[*c]
866			loads[*c] = true
867
868		case *storeConstraint:
869			if c.src == 0 {
870				continue // skip store from non-pointer
871			}
872			_, dup = stores[*c]
873			stores[*c] = true
874
875		case *offsetAddrConstraint:
876			if c.src == 0 {
877				continue // skip offset from non-pointer
878			}
879			_, dup = offsetAddrs[*c]
880			offsetAddrs[*c] = true
881
882		case *untagConstraint:
883			if c.src == 0 {
884				continue // skip untag of non-pointer
885			}
886			_, dup = untags[*c]
887			untags[*c] = true
888
889		case *typeFilterConstraint:
890			if c.src == 0 {
891				continue // skip filter of non-pointer
892			}
893			_, dup = typeFilters[*c]
894			typeFilters[*c] = true
895
896		case *invokeConstraint:
897			if c.params == 0 {
898				panic("non-pointer invoke.params")
899			}
900			if c.iface == 0 {
901				continue // skip invoke on non-pointer
902			}
903			_, dup = invokes[*c]
904			invokes[*c] = true
905
906		default:
907			// We don't bother de-duping advanced constraints
908			// (e.g. reflection) since they are uncommon.
909
910			// Eliminate constraints containing non-pointer nodeids.
911			//
912			// We use reflection to find the fields to avoid
913			// adding yet another method to constraint.
914			//
915			// TODO(adonovan): experiment with a constraint
916			// method that returns a slice of pointers to
917			// nodeids fields to enable uniform iteration;
918			// the renumber() method could be removed and
919			// implemented using the new one.
920			//
921			// TODO(adonovan): opt: this is unsound since
922			// some constraints still have an effect if one
923			// of the operands is zero: rVCall, rVMapIndex,
924			// rvSetMapIndex.  Handle them specially.
925			rtNodeid := reflect.TypeOf(nodeid(0))
926			x := reflect.ValueOf(c).Elem()
927			for i, nf := 0, x.NumField(); i < nf; i++ {
928				f := x.Field(i)
929				if f.Type() == rtNodeid {
930					if f.Uint() == 0 {
931						dup = true // skip it
932						break
933					}
934				}
935			}
936		}
937		if dup {
938			continue // skip duplicates
939		}
940
941		cc = append(cc, c)
942	}
943	h.a.constraints = cc
944
945	if h.log != nil {
946		fmt.Fprintf(h.log, "#constraints: was %d, now %d\n", nbefore, len(h.a.constraints))
947	}
948}
949
950// find returns the canonical onodeid for x.
951// (The onodes form a disjoint set forest.)
952func (h *hvn) find(x onodeid) onodeid {
953	// TODO(adonovan): opt: this is a CPU hotspot.  Try "union by rank".
954	xo := h.onodes[x]
955	rep := xo.rep
956	if rep != x {
957		rep = h.find(rep) // simple path compression
958		xo.rep = rep
959	}
960	return rep
961}
962
963func (h *hvn) checkCanonical(x onodeid) {
964	if debugHVN {
965		assert(x == h.find(x), "not canonical")
966	}
967}
968
969func assert(p bool, msg string) {
970	if debugHVN && !p {
971		panic("assertion failed: " + msg)
972	}
973}
974