Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions sync/upstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,8 +629,9 @@ def remove_complete_backouts(commits: Iterable[Commit]) -> Sequence[Commit]:
if commit.is_backout:
backed_out_commits, _ = commit.wpt_commits_backed_out()
backed_out = {item.sha1 for item in backed_out_commits}
if backed_out.issubset(commits_remaining):
commits_remaining -= backed_out
intersecion = backed_out.intersection(commits_remaining)
if len(intersecion) > 0:
commits_remaining -= intersecion
continue
commits_remaining.add(commit.sha1)

Expand Down Expand Up @@ -686,9 +687,9 @@ def updates_for_backout(git_gecko: Repo,
sync = syncs.pop()
assert isinstance(sync, UpstreamSync)
if commit in sync.gecko_commits:
# This commit was already processed
backed_out_commit_shas = set()
return {}, {}
# This commit was already processed for this sync
backed_out_commit_shas.remove(backed_out_commit.sha1)
continue
Copy link
Member

@jgraham jgraham Jul 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This must be wrong, because we'll skip removing the commit from the set of remaining backed_out_commit_shas, which means that we'll end up trying to create a new sync for it below.

We could just remove it from the SHA1s here, but there's still a theoretical (if somewhat unlikely) problem. Let's say that we have a backout B that backs out commits C1 and C2 from different bugs. Now let's say that C1 and C2 already have syncs S1 and S2. But, for whatever reason S1 has been merged upstream, but S2 hasn't. So for S2 I think the current logic is fine; when we update the sync we remove the backed out commits from the upstreamed set and only end up with the net result of removing all the backouts. But for S1, everything is broken; assuming we go through the "create a sync" logic below we'll create a sync that backs out both C1 and C2. Initially that will have a merge conflict (because S2 containing C2 hasn't landed yet), but once S2 lands that new sync will revert both sets of changes, not just S1.

And at this point there's a problem because we're currently defining an UpstreamSync as a commit range and a bug number the commits have to match. When we change the commit range we just move all the wpt-parts of the matching commits to the GitHub PR. But in this case we want to define the sync specifically as the backout of some earlier commit that already landed. Which I don't think is trivial in the current model. I think the sync would need additional data to say specifically which commits it should revert, and then we'd need special handling for sync with a non-empty list of commits that they're reverting. But maybe you can see a simpler solution?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, you're right about the removing from backed_out_commit_shas, I've updated this.
Regarding the scenario when S1 is merged, I guess I'm missing something, but shouldn't S1 then have status complete and because of that excluded from all the backout updates, so it shouldn't come to the "create a sync" point?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's exactly because it's merged that we get to the create a sync point. The whole "create a new sync for the backout commit" is designed to handle the case where e.g. something is backed out of central after the corresponding upstream sync has already been merged. In that case we can't do anything with the original PR (because GitHub quite rightly doesn't allow adding new commits to a merged PR) so instead we want to create an entirely new sync/PR for the backout.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I think I'm up to speed with what you're saying :)
Just to reiterate, basically the problem is that in the case you described, we're creating a new sync with only the backout commit, which works fine for one bug backout, but doesn't for multiple bugs, where we should revert only the commits, which belong to the bug, which was merged already.
But so far I don't have a better idea, than, as you said, creating a sync with reverted commits for the bug. So, I guess, I'll try to sketch something.

if backed_out_commit in sync.upstreamed_gecko_commits:
backed_out_commit_shas.remove(backed_out_commit.sha1)
assert sync.bug is not None
Expand Down
19 changes: 16 additions & 3 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,9 @@ def setup(self):
self.init()
with open(os.path.join(self.working_tree, ".hg", "hgrc"), "w") as f:
f.write("""[ui]
username=test""")
username=test
[extensions]
shelve=""")

def __getattr__(self, name):
def call(self, *args):
Expand Down Expand Up @@ -349,12 +351,23 @@ def inner(revs, bugs, message=None, bookmarks="mozilla/autoland"):
if isinstance(bugs, int):
bugs = [bugs] * len(revs)
assert len(bugs) == len(revs)
msg = [b"Backed out %i changesets (bug %d) for test, r=backout" % (len(revs), bugs[0]), b""]
msg = []
all_bugs_msgs = []
for rev, bug in zip(revs, bugs):
hg_gecko_upstream.backout("--no-commit", rev)
msg.append(b"Backed out changeset %s (Bug %d)" % (rev[:12].encode("ascii"), bug))
# Shelve the changes so we can perform the next backout.
hg_gecko_upstream.shelve()
bug_msg = b'bug %d' % bug
msg.append(b"Backed out changeset %s (%s)" % (rev[:12].encode("ascii"), bug_msg))
all_bugs_msgs.append(bug_msg)
bugs_msg = b", ".join(all_bugs_msgs)
if len(bugs) > 1:
title = [b"Backed out %i changesets (%s) for test, r=backout" % (len(revs), bugs_msg)]
msg = title + msg
if message is None:
message = b"\n".join(msg)
for _ in revs:
hg_gecko_upstream.unshelve()
return hg_commit(hg_gecko_upstream, message, bookmarks)
return inner

Expand Down
51 changes: 51 additions & 0 deletions test/test_upstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,57 @@ def test_create_partial_backout_reland(git_gecko, git_wpt, upstream_gecko_commit
sync.wpt_commits[1].metadata["gecko-commit"] == relanding_rev


def test_create_backout_of_multiple_bugs(git_gecko, git_wpt, upstream_gecko_commit,
upstream_gecko_backout):
bug = 1234
test_changes = {"README": b"Change README\n"}
rev = upstream_gecko_commit(test_changes=test_changes, bug=bug,
message=b"Change README")

update_repositories(git_gecko, git_wpt, wait_gecko_commit=rev)
upstream.gecko_push(git_gecko, git_wpt, "autoland", rev,
raise_on_error=True)

syncs = upstream.UpstreamSync.for_bug(git_gecko, git_wpt, bug)

bug_2 = 5678
test_changes_2 = {"OTHER": b"Add other file\n"}
rev_2 = upstream_gecko_commit(test_changes=test_changes_2, bug=bug_2,
message=b"Add other")

update_repositories(git_gecko, git_wpt, wait_gecko_commit=rev_2)
upstream.gecko_push(git_gecko, git_wpt, "autoland", rev_2,
raise_on_error=True)

syncs_2 = upstream.UpstreamSync.for_bug(git_gecko, git_wpt, bug_2)

backout_rev = upstream_gecko_backout([rev_2, rev], [bug_2, bug])

update_repositories(git_gecko, git_wpt, wait_gecko_commit=backout_rev)
upstream.gecko_push(git_gecko, git_wpt, "autoland", backout_rev, raise_on_error=True)

syncs = upstream.UpstreamSync.for_bug(git_gecko, git_wpt, bug)
syncs_2 = upstream.UpstreamSync.for_bug(git_gecko, git_wpt, bug_2)

assert list(syncs.keys()) == ["incomplete"]
assert len(syncs["incomplete"]) == 1
sync = syncs["incomplete"].pop()
assert sync.status == "incomplete"
assert sync.process_name.seq_id == 0
assert len(sync.gecko_commits) == 0
assert len(sync.upstreamed_gecko_commits) == 1
assert len(sync.wpt_commits) == 1

assert list(syncs_2.keys()) == ["incomplete"]
assert len(syncs_2["incomplete"]) == 1
sync_2 = syncs_2["incomplete"].pop()
assert sync_2.status == "incomplete"
assert sync_2.process_name.seq_id == 0
assert len(sync_2.gecko_commits) == 0
assert len(sync_2.upstreamed_gecko_commits) == 1
assert len(sync_2.wpt_commits) == 1


def test_land_pr(env, git_gecko, git_wpt, hg_gecko_upstream, upstream_gecko_commit):
bug = 1234
test_changes = {"README": b"Change README\n"}
Expand Down