summaryrefslogtreecommitdiffhomepage
path: root/patches/llvm_host/llvm-0004-Fix-ScalarEvolutionExpander-step-scaling-bug.patch
blob: 9945bf35678057609ba61c5909347df14156ff95 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
From c8ce9e59a883f452bc538b5f16fb83649472dc3e Mon Sep 17 00:00:00 2001
From: Keno Fischer <kfischer@college.harvard.edu>
Date: Wed, 13 Jul 2016 01:28:12 +0000
Subject: [PATCH] Fix ScalarEvolutionExpander step scaling bug

The expandAddRecExprLiterally function incorrectly transforms
`[Start + Step * X]` into `Step * [Start + X]` instead of the correct
transform of `[Step * X] + Start`.

This caused https://github.com/JuliaLang/julia/issues/14704#issuecomment-174126219
due to what appeared to be sufficiently complicated loop interactions.

Patch by Jameson Nash (jameson@juliacomputing.com).

Reviewers: sanjoy
Differential Revision: http://reviews.llvm.org/D16505

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275239 91177308-0d34-0410-b5e6-96231b3b80d8

Alpine maintainer notes:
This patch replaces deps/patches/llvm-3.7.1_3.patch from Julia.
---
 lib/Analysis/ScalarEvolutionExpander.cpp           |  7 ++++
 .../ScalarEvolution/incorrect-offset-scaling.ll    | 48 ++++++++++++++++++++++
 2 files changed, 55 insertions(+)
 create mode 100644 test/Analysis/ScalarEvolution/incorrect-offset-scaling.ll

diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 7716435..77e4ec7 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1288,6 +1288,13 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
   if (!SE.dominates(Step, L->getHeader())) {
     PostLoopScale = Step;
     Step = SE.getConstant(Normalized->getType(), 1);
+    if (!Start->isZero()) {
+        // The normalization below assumes that Start is constant zero, so if
+        // it isn't re-associate Start to PostLoopOffset.
+        assert(!PostLoopOffset && "Start not-null but PostLoopOffset set?");
+        PostLoopOffset = Start;
+        Start = SE.getConstant(Normalized->getType(), 0);
+    }
     Normalized =
       cast<SCEVAddRecExpr>(SE.getAddRecExpr(
                              Start, Step, Normalized->getLoop(),
diff --git a/test/Analysis/ScalarEvolution/incorrect-offset-scaling.ll b/test/Analysis/ScalarEvolution/incorrect-offset-scaling.ll
new file mode 100644
index 0000000..7ffb093
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/incorrect-offset-scaling.ll
@@ -0,0 +1,48 @@
+; RUN: opt -S -loop-reduce < %s | FileCheck %s
+
+target triple = "x86_64-unknown-unknown"
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @incorrect_offset_scaling(i64, i64*) {
+top:
+  br label %L
+
+L:                                                ; preds = %idxend.10, %idxend, %L2, %top
+  br i1 undef, label %L, label %L1
+
+L1:                                               ; preds = %L1.preheader, %L2
+  %r13 = phi i64 [ %r1, %L2 ], [ 1, %L ]
+; CHECK:  %lsr.iv = phi i64 [ 0, %L{{[^ ]+}} ], [ %lsr.iv.next, %L2 ]
+; CHECK-NOT:  %lsr.iv = phi i64 [ -1, %L{{[^ ]+}} ], [ %lsr.iv.next, %L2 ]
+; CHECK:  br
+  %r0 = add i64 %r13, -1
+  br label %idxend.8
+
+L2:                                               ; preds = %idxend.8
+  %r1 = add i64 %r13, 1
+  br i1 undef, label %L, label %L1
+
+if6:                                              ; preds = %idxend.8
+  %r2 = add i64 %0, -1
+  %r3 = load i64, i64* %1, align 8
+; CHECK-NOT:  %r2
+; CHECK:  %r3 = load i64
+  br label %ib
+
+idxend.8:                                         ; preds = %L1
+  br i1 undef, label %if6, label %L2
+
+ib:                                               ; preds = %if6
+  %r4 = mul i64 %r3, %r0
+  %r5 = add i64 %r2, %r4
+  %r6 = icmp ult i64 %r5, undef
+; CHECK  %2 = mul i64 %lsr.iv, %r3
+; CHECK  %3 = add i64 %1, -1
+; CHECK  %4 = add i64 %0, %r3
+; CHECK  %r6
+  %r7 = getelementptr i64, i64* undef, i64 %r5
+  store i64 1, i64* %r7, align 8
+; CHECK  %5 = mul i64 %lsr.iv, %r3
+; CHECK  %6 = add i64 %5, -1
+  br label %L
+}