diff --git a/Documentation/git-history.adoc b/Documentation/git-history.adoc index 24dc907033b469..0a350c11cd0c28 100644 --- a/Documentation/git-history.adoc +++ b/Documentation/git-history.adoc @@ -40,13 +40,39 @@ at once. LIMITATIONS ----------- -This command does not (yet) work with histories that contain merges. You -should use linkgit:git-rebase[1] with the `--rebase-merges` flag instead. - -Furthermore, the command does not support operations that can result in merge -conflicts. This limitation is by design as history rewrites are not intended to -be stateful operations. The limitation can be lifted once (if) Git learns about -first-class conflicts. +This command supports two-parent merge commits in the rewrite path: +the auto-remerged tree of the original parents, the merge commit +itself, and the auto-merged tree of the rewritten parents are +combined so that the user's manual conflict resolution (textual or +semantic) is preserved through the replay. Octopus merges (more than +two parents) are not supported and are rejected with an error. + +When only one of the two parents of a merge has been rewritten, it +is possible for the rewritten parent and the unchanged parent to +conflict on a region that the original parents did not conflict on. +The user's original resolution of the merge cannot speak to such a +fresh conflict because it never existed at the time the merge was +made. In that situation the replay stops and reports a conflict on +the affected path rather than silently committing a tree whose +content is taken from the rewritten side. Resolve the new conflict +the same way you would resolve any other merge conflict, then +continue. Paths managed by a binary, custom, or other non-textual +merge driver are treated the same way: a fresh inner conflict on +such a path is surfaced, never absorbed. + +The replay propagates the textual diffs the user actually made in +the merge commit. It does _not_ extrapolate symbol-level intent: if +rewriting the parents pulls in genuinely new content (for example, a +new caller of a function that the merge renamed), that new content +is _not_ rewritten by the replay and may need a follow-up edit. +Symbol-aware refactoring is out of scope here, just as it is for +plain rebase. + +The command does not support operations that can result in merge +conflicts on the replayed merge itself. This limitation is by design +as history rewrites are not intended to be stateful operations. Use +linkgit:git-rebase[1] with the `--rebase-merges` flag when the +rewrite is expected to require interactive conflict resolution. COMMANDS -------- diff --git a/Makefile b/Makefile index cedc234173e377..b38678b484ac6a 100644 --- a/Makefile +++ b/Makefile @@ -832,6 +832,7 @@ TEST_BUILTINS_OBJS += test-hash-speed.o TEST_BUILTINS_OBJS += test-hash.o TEST_BUILTINS_OBJS += test-hashmap.o TEST_BUILTINS_OBJS += test-hexdump.o +TEST_BUILTINS_OBJS += test-historian.o TEST_BUILTINS_OBJS += test-json-writer.o TEST_BUILTINS_OBJS += test-lazy-init-name-hash.o TEST_BUILTINS_OBJS += test-match-trees.o diff --git a/TODO b/TODO new file mode 100644 index 00000000000000..b25a9f7468f6fd --- /dev/null +++ b/TODO @@ -0,0 +1,4 @@ +if only one of the parents has been rebased (i.e. we're +replaying O with parents P1' and P2) then can we just cherry-pick the merge - +instead of merging P1' and P2, use P1 as the merge-base with O and P1' as the +merge heads? diff --git a/builtin/history.c b/builtin/history.c index 952693808574b7..00097b2226e042 100644 --- a/builtin/history.c +++ b/builtin/history.c @@ -195,15 +195,15 @@ static int parse_ref_action(const struct option *opt, const char *value, int uns return 0; } -static int revwalk_contains_merges(struct repository *repo, - const struct strvec *revwalk_args) +static int revwalk_contains_octopus_merges(struct repository *repo, + const struct strvec *revwalk_args) { struct strvec args = STRVEC_INIT; struct rev_info revs; int ret; strvec_pushv(&args, revwalk_args->v); - strvec_push(&args, "--min-parents=2"); + strvec_push(&args, "--min-parents=3"); repo_init_revisions(repo, &revs, NULL); @@ -217,7 +217,7 @@ static int revwalk_contains_merges(struct repository *repo, } if (get_revision(&revs)) { - ret = error(_("replaying merge commits is not supported yet!")); + ret = error(_("replaying octopus merges is not supported")); goto out; } @@ -289,7 +289,7 @@ static int setup_revwalk(struct repository *repo, strvec_push(&args, "HEAD"); } - ret = revwalk_contains_merges(repo, &args); + ret = revwalk_contains_octopus_merges(repo, &args); if (ret < 0) goto out; @@ -482,6 +482,9 @@ static int cmd_history_reword(int argc, if (ret < 0) { ret = error(_("failed replaying descendants")); goto out; + } else if (ret) { + ret = error(_("conflict during replay; some descendants were not rewritten")); + goto out; } ret = 0; @@ -721,6 +724,9 @@ static int cmd_history_split(int argc, if (ret < 0) { ret = error(_("failed replaying descendants")); goto out; + } else if (ret) { + ret = error(_("conflict during replay; some descendants were not rewritten")); + goto out; } ret = 0; diff --git a/merge-ll.c b/merge-ll.c index fafe2c91971856..48c97c8aec6cf9 100644 --- a/merge-ll.c +++ b/merge-ll.c @@ -141,6 +141,9 @@ static enum ll_merge_result ll_xdl_merge(const struct ll_merge_driver *drv_unuse xmp.ancestor = orig_name; xmp.file1 = name1; xmp.file2 = name2; + xmp.out_intervals = opts->out_intervals; + xmp.in_orig_intervals = opts->in_orig_intervals; + xmp.in_side2_intervals = opts->in_side2_intervals; status = xdl_merge(orig, src1, src2, &xmp, result); ret = (status > 0) ? LL_MERGE_CONFLICT : status; return ret; diff --git a/merge-ll.h b/merge-ll.h index d038ee0c1e81f7..ce51331f196a39 100644 --- a/merge-ll.h +++ b/merge-ll.h @@ -83,6 +83,20 @@ struct ll_merge_options { /* Extra xpparam_t flags as defined in xdiff/xdiff.h. */ long xdl_opts; + + /* + * Forwarded onto xmparam_t. See xdiff/xdiff.h:s_xmparam. + * `out_intervals`, when set, receives the byte ranges of + * every conflict-marker hunk xdl_merge writes into the + * result buffer. `in_orig_intervals` and + * `in_side2_intervals` request the newly-introduced-conflict + * detection in the outer merge of a replayed merge commit; + * setting `in_side2_intervals` to a non-NULL pointer is the + * opt-in. + */ + struct xdl_conflict_intervals *out_intervals; + struct xdl_conflict_intervals *in_orig_intervals; + struct xdl_conflict_intervals *in_side2_intervals; }; #define LL_MERGE_OPTIONS_INIT { .conflict_style = -1 } diff --git a/merge-ort.c b/merge-ort.c index 00923ce3cd749b..c9813677ed3a1e 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -1371,13 +1371,31 @@ static int collect_merge_info_callback(int n, * files, then we can use side2 as the resolution. We cannot * necessarily do so this for trees, because there may be rename * destinations within side2. + * + * Under the replay-merge interval side channel, however, this + * shortcut would silently install side2's blob even when an + * earlier inner merge left an unresolved conflict on the path: + * the conflict-marker bytes (or, for non-textual drivers, the + * unresolved binary blob) would land in the result verbatim. + * Skip the shortcut when a side-channel entry says side2 is + * unresolved for this path; the path then falls through to the + * full content merge below, where xdl_merge or the loud-gate + * for non-xdiff drivers will surface the conflict. */ if (side1_matches_mbase && filemask == 0x07) { - /* use side2 version as resolution */ - setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, - names, names+2, side2_null, 0, - filemask, dirmask, 1); - return mask; + int side2_unresolved = 0; + + if (opt->in_replay_side2_intervals && + strmap_get(opt->in_replay_side2_intervals, fullpath)) + side2_unresolved = 1; + + if (!side2_unresolved) { + /* use side2 version as resolution */ + setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, + names, names+2, side2_null, 0, + filemask, dirmask, 1); + return mask; + } } /* Similar to above but swapping sides 1 and 2 */ @@ -2105,6 +2123,7 @@ static int merge_3way(struct merge_options *opt, { mmfile_t orig, src1, src2; struct ll_merge_options ll_opts = LL_MERGE_OPTIONS_INIT; + struct xdl_conflict_intervals out_intervals = { 0 }; char *base, *name1, *name2; enum ll_merge_result merge_status; @@ -2115,6 +2134,14 @@ static int merge_3way(struct merge_options *opt, ll_opts.extra_marker_size = extra_marker_size; ll_opts.xdl_opts = opt->xdl_opts; ll_opts.conflict_style = opt->conflict_style; + if (opt->out_replay_intervals) + ll_opts.out_intervals = &out_intervals; + if (opt->in_replay_orig_intervals) + ll_opts.in_orig_intervals = + strmap_get(opt->in_replay_orig_intervals, path); + if (opt->in_replay_side2_intervals) + ll_opts.in_side2_intervals = + strmap_get(opt->in_replay_side2_intervals, path); if (opt->priv->call_depth) { ll_opts.virtual_ancestor = 1; @@ -2157,6 +2184,28 @@ static int merge_3way(struct merge_options *opt, "warning: Cannot merge binary files: %s (%s vs. %s)", path, name1, name2); + if (opt->out_replay_intervals && + (out_intervals.nr > 0 || + merge_status == LL_MERGE_CONFLICT || + merge_status == LL_MERGE_BINARY_CONFLICT)) { + /* + * Inner content merge for this path emitted markers + * (recorded as intervals) or otherwise failed to + * resolve (binary driver or other non-textual + * driver). Either way, the path carries an + * unresolved inner conflict that the outer merge + * needs to know about; transfer ownership of the + * recorded intervals (possibly empty) into the + * caller-owned strmap. + */ + struct xdl_conflict_intervals *stored; + stored = xmalloc(sizeof(*stored)); + *stored = out_intervals; + strmap_put(opt->out_replay_intervals, path, stored); + } else { + xdl_conflict_intervals_release(&out_intervals); + } + free(base); free(name1); free(name2); @@ -4216,6 +4265,26 @@ static int process_entry(struct merge_options *opt, assert(othermask == 2 || othermask == 4); assert(ci->merged.is_null == (ci->filemask == ci->match_mask)); + + /* + * Replay-merge side channel: when this match_mask + * shortcut would have taken side2's blob verbatim + * (match_mask == 3 means stages 0 and 1 agree, + * stage 2 differs), but an earlier inner merge + * recorded that side2 is unresolved for this + * path, surface a content conflict instead of + * silently committing the inner-conflict blob. + */ + if (ci->merged.clean && side == 2 && + !ci->merged.is_null && + opt->in_replay_side2_intervals && + strmap_get(opt->in_replay_side2_intervals, path)) { + ci->merged.clean = 0; + path_msg(opt, CONFLICT_CONTENTS, 0, + path, NULL, NULL, NULL, + _("CONFLICT (content): Merge conflict in %s"), + path); + } } } else if (ci->filemask >= 6 && (S_IFMT & ci->stages[1].mode) != @@ -5433,8 +5502,6 @@ void merge_incore_recursive(struct merge_options *opt, * allow one exception through so that builtin/am can override * with its constructed fake ancestor. */ - assert(opt->ancestor == NULL || - (merge_bases && !merge_bases->next)); trace2_region_enter("merge", "merge_start", opt->repo); merge_start(opt, result); diff --git a/merge-ort.h b/merge-ort.h index 6045579825da8b..8b1545b009b078 100644 --- a/merge-ort.h +++ b/merge-ort.h @@ -87,6 +87,39 @@ struct merge_options { unsigned record_conflict_msgs_as_headers : 1; const char *msg_header_prefix; + /* + * Conflict-interval side channel for replay-merge. Each strmap, + * when non-NULL, maps a path to a heap-allocated + * `struct xdl_conflict_intervals *`. The caller owns both the + * strmaps and the value pointers. + * + * out_replay_intervals: populated by the per-path content + * merge. After ll_merge returns for a path, the recorded + * intervals are moved into a freshly allocated struct + * stored under the path. Paths whose inner content merge + * was clean do not appear; paths whose ll_merge call + * produced conflict markers (text driver) or returned a + * non-clean status (binary, custom, union, ours/theirs) + * do. + * + * in_replay_orig_intervals, in_replay_side2_intervals: + * looked up per path during the outer merge and forwarded + * into ll_merge_options.in_orig_intervals / + * ll_merge_options.in_side2_intervals so that xdl_merge + * can detect a conflict in mf2 (side2) that has no + * counterpart in orig. + * + * The fast paths in collect_merge_info_callback() and + * process_entry() that would otherwise take side2's blob + * verbatim also consult `in_replay_side2_intervals`: a + * present entry forces the path through the full content + * merge (or, when the entry was recorded for a non-xdiff + * driver, surfaces an explicit conflict). + */ + struct strmap *out_replay_intervals; + struct strmap *in_replay_orig_intervals; + struct strmap *in_replay_side2_intervals; + /* internal fields used by the implementation */ struct merge_options_internal *priv; }; diff --git a/replay.c b/replay.c index f96f1f6551ae63..36cd69fd9ca2ae 100644 --- a/replay.c +++ b/replay.c @@ -1,6 +1,7 @@ #define USE_THE_REPOSITORY_VARIABLE #include "git-compat-util.h" +#include "commit-reach.h" #include "environment.h" #include "hex.h" #include "merge-ort.h" @@ -11,6 +12,7 @@ #include "sequencer.h" #include "strmap.h" #include "tree.h" +#include "xdiff/xdiff.h" /* * We technically need USE_THE_REPOSITORY_VARIABLE for DEFAULT_ABBREV, but @@ -77,15 +79,21 @@ static void generate_revert_message(struct strbuf *msg, repo_unuse_commit_buffer(repo, commit, message); } +/* + * Build a new commit with the given tree and parent list, copying author, + * extra headers and (for pick mode) the commit message from `based_on`. + * + * Takes ownership of `parents`: it will be freed before returning, even on + * error. Parent order is preserved as supplied by the caller. + */ static struct commit *create_commit(struct repository *repo, struct tree *tree, struct commit *based_on, - struct commit *parent, + struct commit_list *parents, enum replay_mode mode) { struct object_id ret; struct object *obj = NULL; - struct commit_list *parents = NULL; char *author = NULL; char *sign_commit = NULL; /* FIXME: cli users might want to sign again */ struct commit_extra_header *extra = NULL; @@ -96,7 +104,6 @@ static struct commit *create_commit(struct repository *repo, const char *orig_message = NULL; const char *exclude_gpgsig[] = { "gpgsig", "gpgsig-sha256", NULL }; - commit_list_insert(parent, &parents); extra = read_commit_extra_headers(based_on, exclude_gpgsig); if (mode == REPLAY_MODE_REVERT) { generate_revert_message(&msg, based_on, repo); @@ -263,6 +270,20 @@ static struct commit *mapped_commit(kh_oid_map_t *replayed_commits, return kh_value(replayed_commits, pos); } +static void release_replay_intervals_strmap(struct strmap *map) +{ + struct hashmap_iter iter; + struct strmap_entry *entry; + + strmap_for_each_entry(map, &iter, entry) { + struct xdl_conflict_intervals *intervals = entry->value; + + xdl_conflict_intervals_release(intervals); + free(intervals); + } + strmap_clear(map, 0); +} + static struct commit *pick_regular_commit(struct repository *repo, struct commit *pickme, kh_oid_map_t *replayed_commits, @@ -273,6 +294,7 @@ static struct commit *pick_regular_commit(struct repository *repo, { struct commit *base, *replayed_base; struct tree *pickme_tree, *base_tree, *replayed_base_tree; + struct commit_list *parents = NULL; if (pickme->parents) { base = pickme->parents->item; @@ -327,7 +349,289 @@ static struct commit *pick_regular_commit(struct repository *repo, if (oideq(&replayed_base_tree->object.oid, &result->tree->object.oid) && !oideq(&pickme_tree->object.oid, &base_tree->object.oid)) return replayed_base; - return create_commit(repo, result->tree, pickme, replayed_base, mode); + commit_list_insert(replayed_base, &parents); + return create_commit(repo, result->tree, pickme, parents, mode); +} + +/* + * Replay a 2-parent merge commit by composing three calls into merge-ort: + * + * R = recursive merge of pickme's two original parents (auto-remerge of + * the original merge, accepting any conflicts) + * N = recursive merge of the (possibly rewritten) parents + * O = pickme's tree (the user's actual merge, including any manual + * resolutions) + * + * The picked tree comes from a non-recursive merge using R as the base, + * O as side1 and N as side2: `git diff R O` is morally `git show + * --remerge-diff $oldmerge`, so this layers the user's original manual + * resolution on top of the freshly auto-merged rewritten parents. + * + * If the outer 3-way merge is unclean, propagate the conflict status to + * the caller via `result->clean = 0` and return NULL. The two inner + * merges (R and N) being unclean is _not_ fatal: the conflict-markered + * trees they produce are valid inputs to the outer merge, and using + * identical labels for both inner merges keeps the marker text + * byte-equal between R and N so the user's resolution recorded in O + * collapses the conflict cleanly there. + * + * The conflict-interval side channel from merge-ort goes a step + * further: each inner merge publishes the byte ranges of every + * conflict-marker hunk it emits (per path), and the outer merge + * consults those maps so a hunk that appears in N without an + * identical counterpart in R is reported as a real conflict instead + * of being silently embedded as plain marker bytes in the replayed + * commit. + * + * Octopus merges (more than two parents) and revert-of-merge are + * rejected by the caller before this function is invoked. + */ +static struct commit *pick_merge_commit(struct repository *repo, + struct commit *pickme, + kh_oid_map_t *replayed_commits, + struct merge_options *merge_opt, + struct merge_result *result) +{ + struct commit *parent1, *parent2; + struct commit *replayed_parent1, *replayed_parent2; + struct tree *pickme_tree; + struct merge_options remerge_opt = { 0 }; + struct merge_options new_merge_opt = { 0 }; + struct merge_result remerge_res = { 0 }; + struct merge_result new_merge_res = { 0 }; + struct commit_list *parent_bases = NULL; + struct commit_list *replayed_bases = NULL; + struct commit_list *parents; + struct commit *picked = NULL; + char *ancestor_name = NULL; + struct strmap r_intervals = STRMAP_INIT; + struct strmap n_intervals = STRMAP_INIT; + struct strmap *saved_orig_in; + struct strmap *saved_side2_in; + + parent1 = pickme->parents->item; + parent2 = pickme->parents->next->item; + + /* + * Map the merge's parents to their replayed counterparts. A parent + * outside the rewrite range has no entry in `replayed_commits` and + * must stay at its original commit, so use the parent itself as the + * fallback for both sides. + */ + replayed_parent1 = mapped_commit(replayed_commits, parent1, parent1); +error("%s:%d: REPLAYED %s as %s", __FILE__, __LINE__, oid_to_hex(&parent1->object.oid), oid_to_hex(&replayed_parent1->object.oid)); + replayed_parent2 = mapped_commit(replayed_commits, parent2, parent2); +error("%s:%d: REPLAYED %s as %s", __FILE__, __LINE__, oid_to_hex(&parent2->object.oid), oid_to_hex(&replayed_parent2->object.oid)); + + /* + * Compute both pairs of merge bases up front. The fast path below + * needs them for the tree-equality check, and the slow path that + * follows reuses them to avoid recomputing. + */ + if (repo_get_merge_bases(repo, parent1, parent2, &parent_bases) < 0 || + repo_get_merge_bases(repo, replayed_parent1, replayed_parent2, + &replayed_bases) < 0) { + result->clean = -1; + goto out; + } + + /* + * Fast path: when both rewritten parents carry the same trees as + * the originals AND every merge base does too (in order), the + * auto-merges R and N would be tree-equal (their inputs match + * content-wise), so the outer 3-way merge trivially yields the + * original merge's tree. Skip the inner merges and write the new + * merge commit directly. + * + * This is the common case for `git history reword`, which only + * changes commit messages and so leaves every tree on the line + * being replayed unchanged. The merge-base trees must be checked + * too: tree-same parents over a tree-different base could still + * produce a different auto-merge (a conflict region that did not + * exist before, or vice versa), and the original resolution would + * be inappropriate. + */ + if (oideq(&repo_get_commit_tree(repo, parent1)->object.oid, + &repo_get_commit_tree(repo, replayed_parent1)->object.oid) && + oideq(&repo_get_commit_tree(repo, parent2)->object.oid, + &repo_get_commit_tree(repo, replayed_parent2)->object.oid)) { + struct commit_list *bo, *bn; + int bases_match = 1; + + for (bo = parent_bases, bn = replayed_bases; + bo && bn; + bo = bo->next, bn = bn->next) { + if (!oideq(&repo_get_commit_tree(repo, bo->item)->object.oid, + &repo_get_commit_tree(repo, bn->item)->object.oid)) { + bases_match = 0; + break; + } + } + if (bo || bn) + bases_match = 0; + + if (bases_match) { + pickme_tree = repo_get_commit_tree(repo, pickme); + parents = NULL; + commit_list_insert(replayed_parent2, &parents); + commit_list_insert(replayed_parent1, &parents); + picked = create_commit(repo, pickme_tree, pickme, + parents, REPLAY_MODE_PICK); + goto out; + } + } + + /* + * Compute both pairs of merge bases up front. The fast path below + * needs them for the tree-equality check, and the slow path that + * follows reuses them to avoid recomputing. + */ + if (repo_get_merge_bases(repo, parent1, parent2, &parent_bases) < 0 || + repo_get_merge_bases(repo, replayed_parent1, replayed_parent2, + &replayed_bases) < 0) { + result->clean = -1; + goto out; + } + + /* + * Fast path: when both rewritten parents carry the same trees as + * the originals AND every merge base does too (in order), the + * auto-merges R and N would be tree-equal (their inputs match + * content-wise), so the outer 3-way merge trivially yields the + * original merge's tree. Skip the inner merges and write the new + * merge commit directly. + * + * This is the common case for `git history reword`, which only + * changes commit messages and so leaves every tree on the line + * being replayed unchanged. The merge-base trees must be checked + * too: tree-same parents over a tree-different base could still + * produce a different auto-merge (a conflict region that did not + * exist before, or vice versa), and the original resolution would + * be inappropriate. + */ + if (oideq(&repo_get_commit_tree(repo, parent1)->object.oid, + &repo_get_commit_tree(repo, replayed_parent1)->object.oid) && + oideq(&repo_get_commit_tree(repo, parent2)->object.oid, + &repo_get_commit_tree(repo, replayed_parent2)->object.oid)) { + struct commit_list *bo, *bn; + int bases_match = 1; + + for (bo = parent_bases, bn = replayed_bases; + bo && bn; + bo = bo->next, bn = bn->next) { + if (!oideq(&repo_get_commit_tree(repo, bo->item)->object.oid, + &repo_get_commit_tree(repo, bn->item)->object.oid)) { + bases_match = 0; + break; + } + } + if (bo || bn) + bases_match = 0; + + if (bases_match) { + pickme_tree = repo_get_commit_tree(repo, pickme); + parents = NULL; + commit_list_insert(replayed_parent2, &parents); + commit_list_insert(replayed_parent1, &parents); + picked = create_commit(repo, pickme_tree, pickme, + parents, REPLAY_MODE_PICK); + goto out; + } + } + + /* + * R: auto-remerge of the original parents. + * + * Use the same branch labels for the inner merges that compute R + * and N so conflict markers (if any) are textually identical + * between the two; the outer non-recursive merge can then collapse + * the manual resolution from O against them. The ancestor label is + * pinned for the same reason: under merge.conflictStyle=diff3 it is + * embedded verbatim in the marker text, and the merge bases of + * (parent1, parent2) and (replayed_parent1, replayed_parent2) need not + * be the same commit -- the auto-derived "abbreviated OID of the + * single merge base" would then differ between R and N for content + * that the inner merges produced byte-identically. Hand each inner + * merge an output strmap so xdl_merge records the byte ranges of + * every conflict-marker hunk it writes; the outer merge consults + * those maps below. + */ + init_basic_merge_options(&remerge_opt, repo); + remerge_opt.show_rename_progress = 0; + remerge_opt.branch1 = "ours"; + remerge_opt.branch2 = "theirs"; + remerge_opt.ancestor = "merge base"; + remerge_opt.out_replay_intervals = &r_intervals; + merge_incore_recursive(&remerge_opt, parent_bases, + parent1, parent2, &remerge_res); + parent_bases = NULL; /* consumed by merge_incore_recursive */ + if (remerge_res.clean < 0) { + result->clean = remerge_res.clean; + goto out; + } +error("%s:%d: R: %s", __FILE__, __LINE__, oid_to_hex(&remerge_res.tree->object.oid)); + + /* N: fresh merge of the (possibly rewritten) parents. */ + init_basic_merge_options(&new_merge_opt, repo); + new_merge_opt.show_rename_progress = 0; + new_merge_opt.branch1 = "ours"; + new_merge_opt.branch2 = "theirs"; + new_merge_opt.ancestor = "merge base"; + new_merge_opt.out_replay_intervals = &n_intervals; + merge_incore_recursive(&new_merge_opt, replayed_bases, + replayed_parent1, replayed_parent2, &new_merge_res); + replayed_bases = NULL; /* consumed by merge_incore_recursive */ + if (new_merge_res.clean < 0) { + result->clean = new_merge_res.clean; + goto out; + } +error("%s:%d: N: %s", __FILE__, __LINE__, oid_to_hex(&new_merge_res.tree->object.oid)); + + /* + * Outer non-recursive merge: base=R, side1=O (pickme), side2=N. + * Hand the inner interval strmaps to the outer merge so the + * per-path content merge can surface any conflict hunk in N that + * has no counterpart in R. + */ + pickme_tree = repo_get_commit_tree(repo, pickme); + ancestor_name = xstrfmt("auto-remerge of %s", + oid_to_hex(&pickme->object.oid)); + merge_opt->ancestor = ancestor_name; + merge_opt->branch1 = short_commit_name(repo, pickme); + merge_opt->branch2 = "merge of replayed parents"; + saved_orig_in = merge_opt->in_replay_orig_intervals; + saved_side2_in = merge_opt->in_replay_side2_intervals; + merge_opt->in_replay_orig_intervals = &r_intervals; + merge_opt->in_replay_side2_intervals = &n_intervals; + merge_incore_nonrecursive(merge_opt, + remerge_res.tree, + pickme_tree, + new_merge_res.tree, + result); + merge_opt->ancestor = NULL; + merge_opt->branch1 = NULL; + merge_opt->branch2 = NULL; + merge_opt->in_replay_orig_intervals = saved_orig_in; + merge_opt->in_replay_side2_intervals = saved_side2_in; + if (!result->clean) + goto out; +error("%s:%d: R/O/N: %s", __FILE__, __LINE__, oid_to_hex(&result->tree->object.oid)); + + parents = NULL; + commit_list_insert(replayed_parent2, &parents); + commit_list_insert(replayed_parent1, &parents); + picked = create_commit(repo, result->tree, pickme, parents, + REPLAY_MODE_PICK); + +out: + free(ancestor_name); + free_commit_list(parent_bases); + free_commit_list(replayed_bases); + merge_finalize(&remerge_opt, &remerge_res); + merge_finalize(&new_merge_opt, &new_merge_res); + release_replay_intervals_strmap(&r_intervals); + release_replay_intervals_strmap(&n_intervals); + return picked; } void replay_result_release(struct replay_result *result) @@ -407,17 +711,65 @@ int replay_revisions(struct rev_info *revs, merge_opt.show_rename_progress = 0; last_commit = onto; replayed_commits = kh_init_oid_map(); + + /* + * Seed the rewritten-commit map with each negative-side ("BOTTOM") + * cmdline entry pointing at `onto`. This matters for merge replay: + * a 2-parent merge whose first parent is the boundary (e.g. the + * commit being reworded) must replay onto the rewritten boundary, + * yet pick_merge_commit's mapped_commit fallback returns the + * original parent for anything that is not in the map. Pre-seeding + * the boundary makes the lookup return `onto` for those parents, + * which is the rewritten counterpart of the boundary commit. + * + * Only do this for the pick path; revert mode chains reverts + * through last_commit and a pre-seeded boundary would short-circuit + * that chain. + */ + if (mode == REPLAY_MODE_PICK) { + for (size_t i = 0; i < revs->cmdline.nr; i++) { + struct rev_cmdline_entry *e = &revs->cmdline.rev[i]; + struct commit *boundary; + khint_t pos; + int hr; + + if (!(e->flags & BOTTOM)) + continue; + boundary = lookup_commit_reference_gently(revs->repo, + &e->item->oid, 1); + if (!boundary) + continue; + pos = kh_put_oid_map(replayed_commits, + boundary->object.oid, &hr); + if (hr != 0) +{ error("%s:%d: BOTTOM %s -> %s", __FILE__, __LINE__, oid_to_hex(&boundary->object.oid), oid_to_hex(&onto->object.oid)); + kh_value(replayed_commits, pos) = onto; +} + } + } + while ((commit = get_revision(revs))) { const struct name_decoration *decoration; khint_t pos; int hr; - if (commit->parents && commit->parents->next) - die(_("replaying merge commits is not supported yet!")); - - last_commit = pick_regular_commit(revs->repo, commit, replayed_commits, - mode == REPLAY_MODE_REVERT ? last_commit : onto, - &merge_opt, &result, mode); + if (commit->parents && commit->parents->next) { + if (commit->parents->next->next) { + ret = error(_("replaying octopus merges is not supported")); + goto out; + } + if (mode == REPLAY_MODE_REVERT) { + ret = error(_("reverting merge commits is not supported")); + goto out; + } + last_commit = pick_merge_commit(revs->repo, commit, + replayed_commits, + &merge_opt, &result); + } else { + last_commit = pick_regular_commit(revs->repo, commit, replayed_commits, + mode == REPLAY_MODE_REVERT ? last_commit : onto, + &merge_opt, &result, mode); + } if (!last_commit) break; @@ -459,6 +811,21 @@ int replay_revisions(struct rev_info *revs, } if (!result.clean) { + struct string_list conflicted_files = STRING_LIST_INIT_NODUP; + + fprintf(stdout, "Error replaying %s\ntree %s\n", + oid_to_hex(&commit->object.oid), + oid_to_hex(&result.tree->object.oid)); + merge_display_update_messages(&merge_opt, /* detailed */ 0, + &result); + merge_get_conflicted_files(&result, &conflicted_files); + for (size_t i = 0; i < conflicted_files.nr; i++) { + const char *name = conflicted_files.items[i].string; + struct stage_info *c = conflicted_files.items[i].util; + printf("%06o %s %d\t%s\n", + c->mode, oid_to_hex(&c->oid), c->stage, name); + } + string_list_clear(&conflicted_files, 1); ret = 1; goto out; } diff --git a/t/helper/meson.build b/t/helper/meson.build index 675e64c0101b61..704edd1e1faf79 100644 --- a/t/helper/meson.build +++ b/t/helper/meson.build @@ -29,6 +29,7 @@ test_tool_sources = [ 'test-hash.c', 'test-hashmap.c', 'test-hexdump.c', + 'test-historian.c', 'test-json-writer.c', 'test-lazy-init-name-hash.c', 'test-match-trees.c', diff --git a/t/helper/test-historian.c b/t/helper/test-historian.c new file mode 100644 index 00000000000000..1385ec05d68a84 --- /dev/null +++ b/t/helper/test-historian.c @@ -0,0 +1,196 @@ +/* + * Build a small history out of a tiny declarative input. Used by tests + * that need specific merge topologies without long sequences of + * plumbing commands or fragile shell helpers. + * + * The historian reads stdin line by line and emits an equivalent + * stream to a `git fast-import` child process. It also allocates marks + * for named objects so tests can refer to commits and blobs by name. + * + * Input directives (one per line, shell-style quoting): + * + * blob NAME LINE1 LINE2 ... + * Each LINE becomes a content line in the blob; lines are + * joined with '\n' and the blob ends with a final '\n'. With + * no LINEs, the blob is empty. + * + * commit NAME BRANCH SUBJECT [parent=PARENT]... [PATH:BLOB]... + * Creates a commit on refs/heads/BRANCH using the listed + * `PATH:BLOB` mappings as the entire tree (no inheritance + * from parents). Zero or more `parent=` arguments may be + * given in order: the first becomes fast-import's `from`, + * each subsequent one becomes a `merge` parent. Without any + * `parent=` the commit defaults to the current branch tip, + * or becomes a root if BRANCH has no tip yet. + * + * Each `commit NAME` directive also creates a lightweight tag + * `refs/tags/NAME` so tests can `git rev-parse NAME`. + * + * This helper trusts its caller; malformed input results in fast-import + * errors. That is fine because test scripts feed it tightly controlled + * input. + */ + +#define USE_THE_REPOSITORY_VARIABLE + +#include "test-tool.h" +#include "git-compat-util.h" +#include "alias.h" +#include "run-command.h" +#include "setup.h" +#include "strbuf.h" +#include "strmap.h" +#include "strvec.h" + +static int next_mark = 1; + +static int resolve_mark(struct strintmap *names, const char *name) +{ + int n = strintmap_get(names, name); + if (!n) { + n = next_mark++; + strintmap_set(names, name, n); + } + return n; +} + +static void emit_data(FILE *out, const char *data, size_t len) +{ + fprintf(out, "data %"PRIuMAX"\n", (uintmax_t)len); + fwrite(data, 1, len, out); + fputc('\n', out); +} + +static void emit_blob(FILE *out, struct strintmap *names, + int argc, const char **argv) +{ + struct strbuf content = STRBUF_INIT; + int n = resolve_mark(names, argv[1]); + int i; + + for (i = 2; i < argc; i++) { + strbuf_addstr(&content, argv[i]); + strbuf_addch(&content, '\n'); + } + + fprintf(out, "blob\nmark :%d\n", n); + emit_data(out, content.buf, content.len); + strbuf_release(&content); +} + +static void emit_tag(FILE *out, const char *name, int mark) +{ + fprintf(out, "reset refs/tags/%s\nfrom :%d\n\n", name, mark); +} + +static void emit_commit(FILE *out, struct strintmap *names, + int argc, const char **argv, int seq) +{ + int n = resolve_mark(names, argv[1]); + const char *branch = argv[2]; + const char *subject = argv[3]; + const char *rest; + int seen_parent = 0; + int i; + + fprintf(out, "commit refs/heads/%s\nmark :%d\n", branch, n); + fprintf(out, "author A %d +0000\n", 1700000000 + seq); + fprintf(out, "committer A %d +0000\n", 1700000000 + seq); + emit_data(out, subject, strlen(subject)); + + /* + * fast-import requires `from` and `merge` to precede all file + * operations; emit them first regardless of argv ordering. The + * first `parent=` becomes `from`, subsequent ones become + * `merge`, in the order given. + */ + for (i = 4; i < argc; i++) { + if (!skip_prefix(argv[i], "parent=", &rest)) + continue; + fprintf(out, "%s :%d\n", + seen_parent ? "merge" : "from", + resolve_mark(names, rest)); + seen_parent = 1; + } + + /* + * The PATH:BLOB list is the entire tree; wipe whatever the + * implicit parent contributed before re-applying it. + */ + fprintf(out, "deleteall\n"); + for (i = 4; i < argc; i++) { + const char *colon; + size_t key_len; + char *path; + + if (skip_prefix(argv[i], "parent=", &rest)) + continue; + colon = strchr(argv[i], ':'); + if (!colon) + die("bad commit spec '%s' (need PATH:BLOB or parent=NAME)", + argv[i]); + key_len = colon - argv[i]; + path = xmemdupz(argv[i], key_len); + fprintf(out, "M 100644 :%d %s\n", + resolve_mark(names, colon + 1), path); + free(path); + } + + fputc('\n', out); + emit_tag(out, argv[1], n); +} + +int cmd__historian(int argc, const char **argv UNUSED) +{ + struct child_process fi = CHILD_PROCESS_INIT; + struct strintmap names = STRINTMAP_INIT; + struct strbuf line = STRBUF_INIT; + int seq = 0; + int ret = 0; + FILE *fi_in; + + if (argc != 1) + die("usage: test-tool historian = 2 && !strcmp(a[0], "blob")) + emit_blob(fi_in, &names, n, a); + else if (n >= 4 && !strcmp(a[0], "commit")) + emit_commit(fi_in, &names, n, a, seq++); + else + die("unknown directive: %s", a[0]); + + free(a); + } + + if (fclose(fi_in)) + die_errno("close fast-import stdin"); + if (finish_command(&fi)) + ret = 1; + + strbuf_release(&line); + strintmap_clear(&names); + return ret; +} diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index a7abc618b3887e..28bde98ce1b76f 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -39,6 +39,7 @@ static struct test_cmd cmds[] = { { "hashmap", cmd__hashmap }, { "hash-speed", cmd__hash_speed }, { "hexdump", cmd__hexdump }, + { "historian", cmd__historian }, { "json-writer", cmd__json_writer }, { "lazy-init-name-hash", cmd__lazy_init_name_hash }, { "match-trees", cmd__match_trees }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index 7f150fa1eb9ad2..78cec8594aac1f 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -32,6 +32,7 @@ int cmd__getcwd(int argc, const char **argv); int cmd__hashmap(int argc, const char **argv); int cmd__hash_speed(int argc, const char **argv); int cmd__hexdump(int argc, const char **argv); +int cmd__historian(int argc, const char **argv); int cmd__json_writer(int argc, const char **argv); int cmd__lazy_init_name_hash(int argc, const char **argv); int cmd__match_trees(int argc, const char **argv); diff --git a/t/meson.build b/t/meson.build index 7528e5cda5fef0..25b0119d4360eb 100644 --- a/t/meson.build +++ b/t/meson.build @@ -397,6 +397,7 @@ integration_tests = [ 't3450-history.sh', 't3451-history-reword.sh', 't3452-history-split.sh', + 't3454-history-merges.sh', 't3500-cherry.sh', 't3501-revert-cherry-pick.sh', 't3502-cherry-pick-merge.sh', diff --git a/t/t3451-history-reword.sh b/t/t3451-history-reword.sh index de7b357685db4a..d103f866a2b99a 100755 --- a/t/t3451-history-reword.sh +++ b/t/t3451-history-reword.sh @@ -201,12 +201,21 @@ test_expect_success 'can reword a merge commit' ' git switch - && git merge theirs && - # It is not possible to replay merge commits embedded in the - # history (yet). - test_must_fail git -c core.editor=false history reword HEAD~ 2>err && - test_grep "replaying merge commits is not supported yet" err && + # Reword a non-merge commit whose descendants include the + # merge: replay carries the merge through. + reword_with_message HEAD~ <<-EOF && + ours reworded + EOF + expect_graph <<-EOF && + * Merge tag ${SQ}theirs${SQ} + |\\ + | * theirs + * | ours reworded + |/ + * base + EOF - # But it is possible to reword a merge commit directly. + # And reword a merge commit directly. reword_with_message HEAD <<-EOF && Reworded merge commit EOF @@ -214,7 +223,7 @@ test_expect_success 'can reword a merge commit' ' * Reworded merge commit |\ | * theirs - * | ours + * | ours reworded |/ * base EOF diff --git a/t/t3452-history-split.sh b/t/t3452-history-split.sh index 8ed0cebb500296..ad6309f98b13c0 100755 --- a/t/t3452-history-split.sh +++ b/t/t3452-history-split.sh @@ -36,7 +36,7 @@ expect_tree_entries () { test_cmp expect actual } -test_expect_success 'refuses to work with merge commits' ' +test_expect_success 'refuses to split a merge commit' ' test_when_finished "rm -rf repo" && git init repo && ( @@ -49,9 +49,7 @@ test_expect_success 'refuses to work with merge commits' ' git switch - && git merge theirs && test_must_fail git history split HEAD 2>err && - test_grep "cannot split up merge commit" err && - test_must_fail git history split HEAD~ 2>err && - test_grep "replaying merge commits is not supported yet" err + test_grep "cannot split up merge commit" err ) ' diff --git a/t/t3454-history-merges.sh b/t/t3454-history-merges.sh new file mode 100755 index 00000000000000..4a68e5fe11ec76 --- /dev/null +++ b/t/t3454-history-merges.sh @@ -0,0 +1,524 @@ +#!/bin/sh + +test_description='git replay and git history across merge commits + +Exercises the merge-replay path in `git history reword`, +`git history split`, and `git replay` using the `test-tool +historian` fixture builder so each scenario is described in a +small declarative input rather than a sprawling sequence of +plumbing commands. +' + +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME + +. ./test-lib.sh + +# Replace the commit's message via a fake editor and run reword. +reword_to () { + GIT_EDITOR="echo \"$1\" >" \ + git history reword "$2" +} + +build_clean_merge () { + test-tool historian <<-\EOF + # Setup: + # A (a) --- C (a, h) ----+--- M (a, g, h) + # \ / + # +-- B (a, g) ------+ + # + # Topic touches `g` only; main touches `h` only. The auto-merge + # at M is clean. + blob a "shared content" + blob g guarded + blob h host + commit A main "A" a:a + commit B topic "B (introduces g)" parent=A a:a g:g + commit C main "C (introduces h)" parent=A a:a h:h + commit M main "Merge topic" parent=C parent=B a:a g:g h:h + EOF +} + +test_expect_success 'clean merge: both sides touch unrelated files' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + build_clean_merge && + + reword_to "AA" A && + + # The merge is still a 2-parent merge with the same subject + # and tree (clean replay leaves content unchanged). + test_cmp_rev HEAD^{tree} M^{tree} && + test_cmp_rev HEAD^^{tree} M^^{tree} && + test_cmp_rev HEAD^2^{tree} M^2^{tree} && + + echo "Merge topic" >expect && + git log -1 --format=%s HEAD >actual && + test_cmp expect actual + ) +' + +build_textual_resolution () { + test-tool historian <<-\EOF + # Both sides change the same line of `a`; the user resolved with + # their own combined text, recorded directly as the merge tree. + blob a_v1 line1 line2 line3 + blob a_main line1 line2-main line3 + blob a_topic line1 line2-topic line3 + blob a_resolution line1 line2-merged-by-hand line3 + commit A main "A" a:a_v1 + commit B topic "B (line2 on topic)" parent=A a:a_topic + commit C main "C (line2 on main)" parent=A a:a_main + commit M main "Merge topic" parent=C parent=B a:a_resolution + EOF +} + +test_expect_success 'non-trivial merge: textual manual resolution is preserved' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + build_textual_resolution && + + reword_to "AA" A && + + test_write_lines line1 line2-merged-by-hand line3 >expect && + git show HEAD:a >actual && + test_cmp expect actual + ) +' + +build_semantic_edit () { + test-tool historian <<-\EOF + # Topic and main conflict on line2 of `a`. The user's resolution + # at M not only picks combined text on line2 but ALSO touches + # line5 (a "semantic" edit outside any conflict region) -- this + # kind of edit is invisible to a naive pick-one-side strategy and + # must be preserved by replay. + blob a_v1 line1 line2 line3 line4 line5 + blob a_main line1 line2-main line3 line4 line5 + blob a_topic line1 line2-topic line3 line4 line5 + blob a_resolution line1 line2-merged line3 line4 line5-touched + commit A main "A" a:a_v1 + commit B topic "B (line2 on topic)" parent=A a:a_topic + commit C main "C (line2 on main)" parent=A a:a_main + commit M main "Merge topic" parent=C parent=B a:a_resolution + EOF +} + +test_expect_success 'non-trivial merge: semantic edit outside conflict region is preserved' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + build_semantic_edit && + + reword_to "AA" A && + + test_write_lines line1 line2-merged line3 line4 line5-touched \ + >expect && + git show HEAD:a >actual && + test_cmp expect actual + ) +' + +build_octopus () { + test-tool historian <<-\EOF + blob a "x" + commit A main "A" a:a + commit B b1 "B" parent=A a:a + commit C b2 "C" parent=A a:a + commit D b3 "D" parent=A a:a + commit O main "octopus" parent=A parent=B parent=C parent=D a:a + EOF +} + +test_expect_success 'octopus merge in the rewrite path is rejected' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + build_octopus && + + test_must_fail git -c core.editor=true history reword \ + --dry-run A 2>err && + test_grep "octopus" err + ) +' + +build_with_boundary_other_than_onto () { + test-tool historian <<-\EOF + # Setup a topology where the rewrite range crosses a 2-parent merge + # whose first parent sits outside that range: + # + # _------- O (a=O) + # / \ + # / M (parent1=O, parent2=R, a=M, s=top) + # / / + # X (a=X) - A (a=A) -- R (a=R) -- T (a=T, s=top) + # | + # reword target + # + # The walk for `history reword A` excludes A and its ancestors, + # so O sits outside the rewrite range and is not the boundary + # either. Replaying M correctly requires that first parent to + # remain at O (preserve, not replant). + blob X X + blob O O + blob A A + blob R R + blob T T + blob M M + blob top "marker" + commit X main "X" a:X + commit O side "O" parent=X a:O + commit A main "A" parent=X a:A + commit R main "R" parent=A a:R + commit T off "T" parent=R a:T s:top + commit M main "Merge side into main" parent=O parent=R a:M s:top + EOF +} + +# A descendant merge whose first parent sits outside the rewrite +# range is a topology that any reasonable replay of merges has to +# handle correctly: the first parent must be preserved verbatim, +# while the in-range second parent is rewritten. Without that, the +# replayed merge would silently graft itself onto a different +# ancestry than the author chose, which is far worse than a loud +# failure. +test_expect_success 'merge whose first parent sits outside the rewrite range keeps that parent' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + build_with_boundary_other_than_onto && + + reword_to "AA" A && + + # The replayed M (i.e. the tip of main) is still a 2-parent + # merge. Its first parent is the original O (preserved, + # outside the rewrite range), its second parent is the + # rewritten R. + test_cmp_rev ! main M && + test_cmp_rev main^ O && + test_cmp_rev ! main^2 R && + test_cmp_rev main^2^{tree} R^{tree} && + + # The `off` branch is changed, the `side` branch is not + test_cmp_rev side O && + test_cmp_rev ! off T + ) +' + +build_function_rename () { + test-tool historian <<-\EOF + # Truly exercise the R/O/N algorithm: The topic renames harry() to + # hermione() (def plus caller1). main adds caller2 calling harry(); + # the original merge M manually renames caller2 to hermione(). The + # "newer" base on a side branch contains caller2 AND a brand-new + # caller3 calling harry(); replaying onto `newer` therefore introduces + # caller3 into the merged tree. + blob def_harry "void harry(void);" + blob def_hermione "void hermione(void);" + blob harry_call "harry();" + blob hermione_call "hermione();" + commit A main "A" def:def_harry caller1:harry_call + commit B topic "B (rename)" parent=A def:def_hermione caller1:hermione_call + commit C main "C (caller2 calls harry)" parent=A def:def_harry caller1:harry_call caller2:harry_call + commit M main "Merge topic" parent=C parent=B def:def_hermione caller1:hermione_call caller2:hermione_call + commit NEW newer "newer base with caller3" parent=C def:def_harry caller1:harry_call caller2:harry_call caller3:harry_call + EOF +} + +# This case checks two things at once. First, the manual semantic edit in M +# (renaming caller2) must be preserved when we replay onto a different base; +# that is the case `git replay` needs to handle correctly, even though nothing +# in the conflict markers tells us about it. Second, a file that only enters +# the tree via the rewritten parents (caller3, present on the `newer` base) is +# _not_ renamed by the replay. The replay propagates the textual diffs the user +# actually made in M; it does _not_ infer the user's symbol-level intent +# ("rename every caller of harry"). This is a known and intentional +# limitation. Symbol-aware refactoring is out of scope here, just as it is for +# plain rebases. +test_expect_success 'preserves resolutions; does not extrapolate' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + build_function_rename && + + # Replay M onto the newer base. + git replay --ref-action=print \ + --onto NEW --ancestry-path C..main >out && + new_tip=$(cut -f 3 -d " " out) && + + # def and caller1 came from B (clean cherry-pick of the rename + # commit) and must reflect the rename. + git show $new_tip:def >actual && + test_grep hermione actual && + + git show $new_tip:caller1 >actual && + test_grep hermione actual && + + # caller2 existed in the original M; its manual rename to + # hermione() is the semantic edit the replay must preserve. + git show $new_tip:caller2 >actual && + test_grep hermione actual && + + # caller3 only exists on the newer base, so it was brought + # in by N (the auto-merge of the rebased parents). The + # replay has no way to know the user intended to rename + # every caller; caller3 keeps harry(). The resulting tree + # is therefore _not_ symbol-correct and needs a follow-up + # edit. This is the documented limitation. + git show $new_tip:caller3 >actual && + test_grep ! hermione actual + ) +' + +build_diverging_conflicts () { + test-tool historian <<-\EOF + # Topology where only one parent of the merge sits on the + # rewrite path: + # + # _-- D --_ + # / \ + # A - B - C - M + # \ + # E + # + # D is on branch `feature`, child of A. + # E is on branch `target`, child of A. + # M = merge(C, D), manual resolution. + # + # D and E conflict in different ways than C and D, therefore + # the resolution in M cannot cover the first conflict and + # replaying B..M onto E must fail. + blob a L1 L2 L3 L4 L5 + blob c L1 L2 _3 L4 L5 + blob d _1 L2 +3 L4 _5 + blob m _1 L2 M3 L4 _5 + blob e L1 L2 L3 L4 +5 + commit A main "A" a:a + commit B main "B (no change)" parent=A a:a + commit C main "C (changes line 3 only)" parent=B a:c + commit D feature "D (changes all odd lines)" parent=A a:d + commit M main "Merge feature into main" parent=C parent=D a:m + commit E target "E (changes line 5)" parent=A a:e + EOF +} + +test_expect_success 'newly-introduced conflict in a replayed merge' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + build_diverging_conflicts && + + test_must_fail git replay --ref-action=print --branches \ + --onto E --ancestry-path B..M >out 2>err && + + # Discriminate against the "git replay refuses 2-parent + # merges entirely" failure mode that earlier commits + # in this series exhibit: the test must fail because of + # a real content conflict on `b`, not for any other + # reason. merge-ort writes CONFLICT messages to stdout. + test_grep "CONFLICT (content)" out && + test_grep "Merge conflict in a" out + ) +' + +build_new_conflict_binary () { + test-tool historian <<-\EOF + # New conflicts are routed through the binary merge driver. + blob attrs "* -text" + blob bin_orig original + blob bin_D dee + blob bin_E ehh + blob misc_orig "misc orig" + blob misc_C "misc changed by C" + commit A main "A" .gitattributes:attrs bin:bin_orig misc:misc_orig + commit B main "B (no change)" parent=A .gitattributes:attrs bin:bin_orig misc:misc_orig + commit C main "C (changes misc)" parent=B .gitattributes:attrs bin:bin_orig misc:misc_C + commit D feature "D (changes bin)" parent=A .gitattributes:attrs bin:bin_D misc:misc_orig + commit M main "Merge feature" parent=C parent=D .gitattributes:attrs bin:bin_D misc:misc_C + commit E target "E (changes bin)" parent=A .gitattributes:attrs bin:bin_E misc:misc_orig + EOF +} + +test_expect_success 'newly-introduced binary conflict is reported' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + build_new_conflict_binary && + + test_must_fail git replay --ref-action=print --branches \ + --onto E --ancestry-path B..M >out 2>err && + + # Discriminate against the "git replay refuses 2-parent + # merges entirely" failure mode (no conflict message at + # all). The replay must fail because of a real content + # conflict on `bin`, not for any other reason. + # merge-ort writes CONFLICT messages to stdout. + test_grep "CONFLICT (content)" out && + test_grep "Merge conflict in bin" out + ) +' + +build_vanishing_conflict () { + test-tool historian <<-\EOF + # B + # / \ + # A - C - M + # \ + # D - X (= cherry-picked B) + # + # X cherry-picks B on top of D that replaying C..M onto X + # no longer has a merge conflict + blob a L1 L2 L3 L4 + blob b L1 L2 _3 L4 + blob c L1 L2 +3 L4 + blob m _1 L2 _3 L4 + blob d L1 L2 +3 L4 L5 + blob x _1 L2 _3 L4 L5 + commit A main "A" a:a + commit B main "B (changes line 3)" parent=A a:b + commit C feature "C (changes line 3 differently)" parent=A a:c + commit M feature "Merge C and B (fix lines 1 and 3)" parent=B parent=C a:m + commit D feature "D (changes line 3 like B, adds a line)" parent=C a:d + commit X feature "X (cherry-picks B with fixup)" parent=D a:x + EOF +} + +test_expect_success 'vanishing conflict in a replayed merge' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + build_vanishing_conflict && + + git replay --ref-action=print --branches \ + --onto X --ancestry-path C..M >out && + new_tip=$(cut -f 3 -d " " out) && + + # Since X already includes the changes of B, it is tree-same + # to the replayed merge + test_cmp_rev "$new_tip:a" X:a + ) +' + +build_fixture_with_marker_text () { + test-tool historian <<-\EOF + # A file `doc/markers.txt` whose plain content happens to + # contain the byte sequences `<<<<<<<`, `=======`, and + # `>>>>>>>` (e.g. a documentation page describing how Git + # writes conflict markers). The merge replay must NOT mistake + # those literal bytes for a real conflict on the path. + # + # The topology runs an actual merge replay so the regression + # guard means something: file `a` is the one that genuinely + # gets merged, and `doc/markers.txt` rides along unchanged + # across the history. A correct implementation can tell that + # `doc/markers.txt` was never the subject of an unresolved + # inner merge and leaves it alone. + blob a L1 L2 L3 + blob c L1 L2 _3 + blob d _1 L2 +3 + blob m _1 L2 M3 + blob markers "Example" "<<<<<<< ours" "first variant" "=======" "second variant" ">>>>>>> theirs" "EOF" + commit A main "A" a:a + commit B main "B (no a change)" parent=A a:a + commit C main "C (changes a only)" parent=B a:c + commit D feature "D (changes a only)" parent=A a:d + commit M main "Merge feature" parent=C parent=D a:m + commit E target "E (no a change)" parent=A a:a doc:markers + EOF +} + +test_expect_success 'file containing literal conflict-marker bytes survives a replay-merge' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + build_fixture_with_marker_text && + + git replay --ref-action=print --branches \ + --onto E --ancestry-path B..M >out && + new_tip=$(cut -f 3 -d " " out) && + test -n "$new_tip" && + + # The fixture file is byte-for-byte identical across the + # whole history. Nothing about replay should have touched it. + test_cmp_rev "$new_tip:doc" E:doc + ) +' + +build_resolution_preserved_under_diff3 () { + test-tool historian <<-\EOF + # Topology where the inner remerge of (C, D) and the inner + # remerge of (C', D) have DIFFERENT merge bases: the original + # pair's merge base is A (close, on main), while the rewritten + # pair's merge base is R (far, where the long-lived target + # branch diverged before A). + # + # R ---- E (target branch) + # \ + # A ----+---- B ---- C ---- M (main) + # \ / + # +------ D --------+ (feature) + # + # Under `merge.conflictStyle=diff3`, conflict markers embed + # the merge base's abbreviated OID. When the inner remerges + # have different bases, the abbreviated-OID line in the + # markers differs even though both inner remerges produce + # byte-identical content for the conflicting region. A naive + # implementation lets that label difference look like a real + # delta between the two inner remerges and reports a spurious + # conflict where the original resolution should have collapsed + # through. The replay must pin the ancestor label across the + # two inner remerges so byte-identical conflicting content + # really compares as byte-identical. + # + # E touches a file `b` so that C' (cherry-pick of C onto E) + # carries a different tree from C; otherwise the trivial + # tree-equal fast path in the outer pick collapses everything + # and the diff3 marker text is never exercised. + blob a L1 L2 L3 + blob c L1 L2 _3 + blob d L1 L2 +3 + blob m L1 L2 M3 + blob b b_orig + blob e b_E_only + commit R main "R (root)" a:a b:b + commit A main "A" parent=R a:a b:b + commit B main "B (no change)" parent=A a:a b:b + commit C main "C (changes a)" parent=B a:c b:b + commit D feature "D (changes a differently)" parent=A a:d b:b + commit M main "Merge feature into main" parent=C parent=D a:m b:b + commit E target "E (changes b only)" parent=R a:a b:e + EOF +} + +test_expect_success 'previously-resolved conflicts with diff3' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + git config merge.conflictStyle diff3 && + build_resolution_preserved_under_diff3 && + + git replay --ref-action=print --branches \ + --onto E --ancestry-path B..M >out && + new_tip=$(cut -f 3 -d " " out) && + test -n "$new_tip" && + + test_write_lines L1 L2 M3 >expect && + git show "$new_tip:a" >actual && + test_cmp expect actual + ) +' + +test_done diff --git a/t/t3650-replay-basics.sh b/t/t3650-replay-basics.sh index 3353bc4a4dc6ed..368b1b0f9a670a 100755 --- a/t/t3650-replay-basics.sh +++ b/t/t3650-replay-basics.sh @@ -103,10 +103,48 @@ test_expect_success 'cannot advance target ... ordering would be ill-defined' ' test_cmp expect actual ' -test_expect_success 'replaying merge commits is not supported yet' ' - echo "fatal: replaying merge commits is not supported yet!" >expect && - test_must_fail git replay --advance=main main..topic-with-merge 2>actual && - test_cmp expect actual +test_expect_success 'using replay to rebase a 2-parent merge' ' + # main..topic-with-merge contains a 2-parent merge (P) introduced + # via test_merge. Use --ref-action=print so this test does not + # mutate state for subsequent tests in this file. + git replay --ref-action=print --onto main main..topic-with-merge >result && + test_line_count = 1 result && + + new_tip=$(cut -f 3 -d " " result) && + + # Result is still a 2-parent merge. + git cat-file -p $new_tip >cat && + grep -c "^parent " cat >count && + echo 2 >expect && + test_cmp expect count && + + # Merge subject is preserved. + echo P >expect && + git log -1 --format=%s $new_tip >actual && + test_cmp expect actual && + + # The replayed merge sits on top of main: walking back via the + # first-parent chain reaches main. + git merge-base --is-ancestor main $new_tip +' + +test_expect_success 'replaying an octopus merge is rejected' ' + # Build an octopus side-branch so the rest of the test state stays + # untouched. + test_when_finished "git update-ref -d refs/heads/octopus-tip" && + octopus_tip=$(git commit-tree -p topic4 -p topic1 -p topic3 \ + -m "octopus" $(git rev-parse topic4^{tree})) && + git update-ref refs/heads/octopus-tip "$octopus_tip" && + + test_must_fail git replay --ref-action=print --onto main \ + topic4..octopus-tip 2>actual && + test_grep "octopus merges" actual +' + +test_expect_success 'reverting a merge commit is rejected' ' + test_must_fail git replay --ref-action=print --revert=topic-with-merge \ + topic4..topic-with-merge 2>actual && + test_grep "reverting merge commits" actual ' test_expect_success 'using replay to rebase two branches, one on top of other' ' diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h index dc370712e92860..02314d1ef527ed 100644 --- a/xdiff/xdiff.h +++ b/xdiff/xdiff.h @@ -130,6 +130,30 @@ long xdl_mmfile_size(mmfile_t *mmf); int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdemitconf_t const *xecfg, xdemitcb_t *ecb); +/* + * A half-open byte range identifying one conflict-marker block + * (from the leading "<<<<<<<" line through the trailing newline of + * the matching ">>>>>>>" line) inside a buffer produced by + * xdl_merge. + */ +struct xdl_conflict_interval { + long start; /* offset of the first byte of the opener line */ + long end; /* offset one past the trailing newline of the closer line */ +}; + +/* + * A growable list of xdl_conflict_interval records, in order of + * appearance in the produced buffer. Initialize all-zero; release + * with xdl_conflict_intervals_release. + */ +struct xdl_conflict_intervals { + struct xdl_conflict_interval *items; + long nr; + long alloc; +}; + +void xdl_conflict_intervals_release(struct xdl_conflict_intervals *intervals); + typedef struct s_xmparam { xpparam_t xpp; int marker_size; @@ -139,6 +163,33 @@ typedef struct s_xmparam { const char *ancestor; /* label for orig */ const char *file1; /* label for mf1 */ const char *file2; /* label for mf2 */ + /* + * Side channel for detecting a fresh conflict introduced by + * mf2 (side2) that has no counterpart in orig (the merge + * base). Useful when xdl_merge is the outer merge of a + * replayed merge commit: orig and mf2 may themselves be the + * output of earlier xdl_merge calls that emitted + * conflict-marker hunks into their buffers, and the normal + * three-way merge cannot tell those literal marker bytes + * apart from any other text. + * + * out_intervals (output): when non-NULL, xdl_merge populates + * it with the byte range of every conflict-marker hunk it + * writes into the result buffer. Use this on the inner + * merges whose output will later be fed to an outer merge as + * orig or mf2. The caller initializes the struct all-zero + * and releases it with xdl_conflict_intervals_release. + * + * in_orig_intervals, in_side2_intervals (inputs): when + * in_side2_intervals is non-NULL, xdl_merge counts every + * recorded mf2 hunk that has no byte-equal counterpart in + * orig as an additional conflict in the returned status; if + * in_orig_intervals is NULL, every mf2 hunk is counted. Pass + * NULL on side2 to opt out. + */ + struct xdl_conflict_intervals *out_intervals; + struct xdl_conflict_intervals *in_orig_intervals; + struct xdl_conflict_intervals *in_side2_intervals; } xmparam_t; #define DEFAULT_CONFLICT_MARKER_SIZE 7 diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c index 29dad98c496b07..69991e303ecc02 100644 --- a/xdiff/xmerge.c +++ b/xdiff/xmerge.c @@ -197,12 +197,14 @@ static int fill_conflict_hunk(xdfenv_t *xe1, const char *name1, xdfenv_t *xe2, const char *name2, const char *name3, int size, int i, int style, - xdmerge_t *m, char *dest, int marker_size) + xdmerge_t *m, char *dest, int marker_size, + struct xdl_conflict_intervals *intervals) { int marker1_size = (name1 ? strlen(name1) + 1 : 0); int marker2_size = (name2 ? strlen(name2) + 1 : 0); int marker3_size = (name3 ? strlen(name3) + 1 : 0); int needs_cr = is_cr_needed(xe1, xe2, m); + int block_start = -1; if (marker_size <= 0) marker_size = DEFAULT_CONFLICT_MARKER_SIZE; @@ -211,6 +213,9 @@ static int fill_conflict_hunk(xdfenv_t *xe1, const char *name1, size += xdl_recs_copy(xe1, i, m->i1 - i, 0, 0, dest ? dest + size : NULL); + if (dest && intervals) + block_start = size; + if (!dest) { size += marker_size + 1 + needs_cr + marker1_size; } else { @@ -277,6 +282,12 @@ static int fill_conflict_hunk(xdfenv_t *xe1, const char *name1, dest[size++] = '\r'; dest[size++] = '\n'; } + + if (dest && intervals && block_start >= 0) { + intervals->items[intervals->nr].start = block_start; + intervals->items[intervals->nr].end = size; + intervals->nr++; + } return size; } @@ -285,7 +296,8 @@ static int xdl_fill_merge_buffer(xdfenv_t *xe1, const char *name1, const char *ancestor_name, int favor, xdmerge_t *m, char *dest, int style, - int marker_size) + int marker_size, + struct xdl_conflict_intervals *intervals) { int size, i; @@ -297,7 +309,7 @@ static int xdl_fill_merge_buffer(xdfenv_t *xe1, const char *name1, size = fill_conflict_hunk(xe1, name1, xe2, name2, ancestor_name, size, i, style, m, dest, - marker_size); + marker_size, intervals); else if (m->mode & 3) { /* Before conflicting part */ size += xdl_recs_copy(xe1, i, m->i1 - i, 0, 0, @@ -664,23 +676,53 @@ static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1, /* output */ if (result) { int marker_size = xmp->marker_size; + struct xdl_conflict_intervals *intervals = xmp->out_intervals; int size = xdl_fill_merge_buffer(xe1, name1, xe2, name2, ancestor_name, favor, changes, NULL, style, - marker_size); + marker_size, NULL); result->ptr = xdl_malloc(size); if (!result->ptr) { xdl_cleanup_merge(changes); return -1; } result->size = size; + if (intervals) { + xdmerge_t *p; + long n_conflicts = 0; + + for (p = changes; p; p = p->next) + if (p->mode == 0) + n_conflicts++; + intervals->nr = 0; + if (n_conflicts > 0 && + XDL_ALLOC_GROW(intervals->items, n_conflicts, + intervals->alloc)) { + xdl_free(result->ptr); + result->ptr = NULL; + result->size = 0; + xdl_cleanup_merge(changes); + return -1; + } + } xdl_fill_merge_buffer(xe1, name1, xe2, name2, ancestor_name, favor, changes, - result->ptr, style, marker_size); + result->ptr, style, marker_size, + intervals); } return xdl_cleanup_merge(changes); } +void xdl_conflict_intervals_release(struct xdl_conflict_intervals *intervals) +{ + if (!intervals) + return; + xdl_free(intervals->items); + intervals->items = NULL; + intervals->nr = 0; + intervals->alloc = 0; +} + int xdl_merge(mmfile_t *orig, mmfile_t *mf1, mmfile_t *mf2, xmparam_t const *xmp, mmbuffer_t *result) { @@ -727,6 +769,43 @@ int xdl_merge(mmfile_t *orig, mmfile_t *mf1, mmfile_t *mf2, &xe2, xscr2, xmp, result); } + + if (status >= 0 && xmp->in_side2_intervals) { + /* + * Count every conflict-marker hunk in mf2 whose bytes + * do not appear verbatim in orig. The result buffer + * already contains those bytes (xdl_do_merge applied + * the side2-only change, or the !xscr1 short-circuit + * copied mf2 wholesale); the only thing the caller + * needs from us is for the returned status to reflect + * them as real conflicts. + */ + const struct xdl_conflict_intervals *oi = xmp->in_orig_intervals; + const struct xdl_conflict_intervals *si = xmp->in_side2_intervals; + long i, j; + + for (i = 0; i < si->nr; i++) { + long s_len = si->items[i].end - si->items[i].start; + int matched = 0; + + if (oi) { + for (j = 0; j < oi->nr; j++) { + long o_len = oi->items[j].end - oi->items[j].start; + + if (o_len != s_len) + continue; + if (!memcmp((const char *)orig->ptr + oi->items[j].start, + (const char *)mf2->ptr + si->items[i].start, + s_len)) { + matched = 1; + break; + } + } + } + if (!matched) + status++; + } + } out: xdl_free_script(xscr1); xdl_free_script(xscr2);