Fossil

Check-in [6b520e7d]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Modified to break all backward symbols, not only branches, removed the other custom circle breaking code, should not be needed any longer (See comments for proof).
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:6b520e7d97c3d610a2282685fa37f83f1c0dd774
User & Date: aku 2007-11-27 09:07:37
Context
2007-11-28
05:39
Added convenience method for assertions and used it in place of the existing if/trouble internal constructions. Changed API of 'log write' so that we can defer substituation of the message to when the write actually happen, and converted all places which would be hit by double-substitution. The remaining 'log write' calls will be converted incrementally. check-in: 47d52d1e user: aku tags: trunk
2007-11-27
09:07
Modified to break all backward symbols, not only branches, removed the other custom circle breaking code, should not be needed any longer (See comments for proof). check-in: 6b520e7d user: aku tags: trunk
09:05
Bugfix: Sort pending nodes fully deterministic, and moved to separate helper command. Tweaked log output. check-in: 3e18606b user: aku tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to tools/cvs2fossil/lib/c2f_pbreakacycle.tcl.

64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
...
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
...
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
...
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
...
179
180
181
182
183
184
185

186

187
188
189
190
191
192
193
194
195
196
197
198
199
200
...
311
312
313
314
315
316
317



318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336


337
338
339
340
341
342
343


















344
345
346
347
348
349
350
...
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502

    typemethod run {} {
	# Pass manager interface. Executed to perform the
	# functionality of the pass.

	set len [string length [project::rev num]]
	set myatfmt %${len}s
	incr len 6
	set mycsfmt %${len}s

	cyclebreaker precmd   [myproc BreakBackwardBranches]
	cyclebreaker savecmd  [myproc KeepOrder]
	cyclebreaker breakcmd [myproc BreakCycle]

	state transaction {
	    LoadCommitOrder
	    cyclebreaker run break-all [myproc Changesets]
	}

	repository printcsetstatistics
................................................................................
	state transaction {
	    foreach {cid pos} [state run { SELECT cid, pos FROM csorder }] {
		set cset [project::rev of $cid]
		$cset setpos $pos
		set mycset($pos) $cset
		lappend myrevisionchangesets $cset
	    }
	    # Remove the order information now that we have it in
	    # memory, so that we can save it once more, for all
	    # changesets, while breaking the remaining cycles.
	    state run { DELETE FROM csorder }
	}
	return
    }

    # # ## ### ##### ######## #############

    proc BreakBackwardBranches {graph} {
	# We go over all branch changesets, i.e. the changesets
	# created by the symbols which are translated as branches, and
	# break any which are 'backward', which means that they have
	# at least one incoming revision changeset which is committed
	# after at least one of the outgoing revision changesets, per
	# the order computed in pass 6. In "cvs2svn" this is called
	# "retrograde".
................................................................................
	# having to check each changeset separately. Consider this
	# later, get the pass working first.
	#
	# NOTE 2: Might we even be able to select the backward branch
	# changesets too ?

	foreach cset [$graph nodes] {
	    if {![$cset isbranch]} continue
	    CheckAndBreakBackwardBranch $graph $cset
	}
	return
    }

    proc CheckAndBreakBackwardBranch {graph cset} {
	while {[IsABackwardBranch $graph $cset]} {
	    log write 5 breakacycle "Breaking backward branch changeset [$cset str]"

	    # Knowing that the branch is backward we now look at the
	    # individual revisions in the changeset and determine
	    # which of them are responsible for the overlap. This
	    # allows us to split them into two sets, one of
	    # non-overlapping revisions, and of overlapping ones. Each
	    # induces a new changeset, and the second may still be
	    # backward and need further splitting. Hence the looping.
................................................................................
	    # changesets are predecessors and/or successors, leaving
	    # the limits partially or completely undefined.

	    # limits : dict (revision -> list (max predecessor commit, min sucessor commit))

	    ComputeLimits $cset limits border

	    log write 6 breakacycle "Using commit position $border as border"

	    # Then we sort the file level items based on there they
	    # sit relative to the border into before and after the
	    # border.

	    SplitRevisions $limits $border normalrevisions backwardrevisions

................................................................................
	    cyclebreaker replace $graph $cset $replacements

	    # At last check that the normal frament is indeed not
	    # backward, and iterate over the possibly still backward
	    # second fragment.

	    struct::list assign $replacements normal backward

	    if {[IsABackwardBranch $graph $normal]} { trouble internal "The normal fragment is unexpectedly a backward branch" }


	    set cset $backward
	}
	return
    }

    proc IsABackwardBranch {dg cset} {
	# A branch is "backward" if it has at least one incoming
	# revision changeset which is committed after at least one of
	# the outgoing revision changesets, per the order computed in
	# pass 6.

	# Rephrased, the maximal commit position found among the
	# incoming revision changesets is larger than the minimal
................................................................................
	return
    }


    # # ## ### ##### ######## #############

    proc KeepOrder {graph at cset} {



	set cid [$cset id]

	log write 4 breakacycle "Changeset @ [format $myatfmt $at]: [format $mycsfmt [$cset str]] <<[FormatTR $graph $cset]>>"

	# We see here a mixture of symbol and revision changesets.
	# The symbol changesets are ignored as irrelevant.

	if {[$cset pos] eq ""} return

	# For the revision changesets we are sure that they are
	# consumed in the same order as generated by pass 7
	# (RevTopologicalSort). Per the code in cvs2svn.

	# NOTE: I cannot see that. Assume cs A and cs B, not dependent
	#       on each other in the set of revisions, now B after A
	#       simply means that B has a later time or depends on
	#       something wit a later time than A. In the full graph A
	#       may now have dependencies which shift it after B,
	#       violating the above assumption.


	#
	# Well, it seems to work if I do not make the NTDB root a
	# successor of the regular root. Doing so seems to tangle the
	# changesets into a knots regarding time vs dependencies and
	# trigger such shifts. Keeping these two roots separate OTOH
	# disappears the tangle. So, for now I accept that, and for
	# paranoia I add code which checks this assumption.



















	struct::set exclude myrevisionchangesets $cset

	::variable mylastpos
	set new [$cset pos]

	if {$new != ($mylastpos + 1)} {
................................................................................

    typevariable mylastpos            -1 ; # Position of last revision changeset saved.
    typevariable myrevisionchangesets {} ; # Set of revision changesets

    typevariable myatfmt ; # Format for log output to gain better alignment of the various columns.
    typevariable mycsfmt ; # Ditto for the changesets.

    # # ## ### ##### ######## #############

    proc BreakCycle {graph} {
	# In this pass the cycle breaking can be made a bit more
	# targeted, hence this custom callback.
	#
	# First we use the data remembered by 'SaveOrder', about the
	# last commit position it handled, to deduce the next revision
	# changeset it would encounter. Then we look for the shortest
	# predecessor path from it to all other revision changesets
	# and break this path. Without such a path we fall back to the
	# generic cycle breaker.

	::variable mylastpos
	::variable mycset
	::variable myrevisionchangesets

	set nextpos [expr {$mylastpos + 1}]
	set next    $mycset($nextpos)

	puts "** Last: $mylastpos = [$mycset($mylastpos) str] @ [$mycset($mylastpos) pos]"
	puts "** Next: $nextpos = [$next str] @ [$next pos]"

	set path [SearchForPath $graph $next $myrevisionchangesets]
	if {[llength $path]} {
	    cyclebreaker break-segment $graph $path
	    return
	}

	# We were unable to find an ordered changeset in the reachable
	# predecessors, fall back to the generic code for breaking the
	# found cycle.

	cyclebreaker break $graph
    }

    proc SearchForPath {graph n stopnodes} {
	# Search for paths to prerequisites of N.
	#
	# Try to find the shortest dependency path that causes the
	# changeset N to depend (directly or indirectly) on one of the
	# changesets contained in STOPNODES.
	#
	# We consider direct and indirect dependencies in the sense
	# that the changeset can be reached by following a chain of
	# predecessor nodes.
	#
	# When one of the csets in STOPNODES is found, we terminate
	# the search and return the path from that cset to N.  If no
	# path is found to a node in STOP_SET, we return the empty
	# list/path.

	# This is in essence a multi-destination Dijkstra starting at
	# N which stops when one of the destinations in STOPNODES has
	# been reached, traversing the predecessor arcs.

	# REACHABLE :: array (NODE -> list (STEPS, PREVIOUS))
	#
	# Semantics: NODE can be reached from N in STEPS steps, and
	# PREVIOUS is the previous node in the path which reached it,
	# allowing us at the end to construct the full path by
	# following these backlinks from the found destination. N is
	# only included as a key if there is a loop leading back to
	# it.

	# PENDING :: list (list (NODE, STEPS))
	#
	# Semantics: A list of possibilities that still have to be
	# investigated, where STEPS is the number of steps to get to
	# NODE.

	array set reachable {}
	set pending [list [list $n 0]]
	set at 0

	puts "** Searching shortest path ..."

	while {$at < [llength $pending]} {
	    struct::list assign [lindex $pending $at] current steps

	    #puts "** [lindex $pending $at] ** [$current str] **"
	    incr at

	    # Process the possibility. This is a breadth-first traversal.
	    incr steps
	    foreach pre [$graph nodes -in $current] {
	        # Since the search is breadth-first, we only have to #
	        # set nodes that don't already exist. If they do they
	        # have been reached already on a shorter path.

		if {[info exists reachable($pre)]} continue

		set reachable($pre) [list $steps $current]
		lappend pending [list $pre $steps]

		# Continue the search while have not reached any of
		# our destinations?
		if {![struct::set contain $pre $stopnodes]} continue

		# We have arrived, PRE is one of the destination; now
		# construct and return the path to it from N by
		# following the backlinks in the search state.
		set path [list $pre]
		while {1} {
		    set pre [lindex $reachable($pre) 1]
		    if {$pre eq $n} break
		    lappend path $pre
		}
		lappend path $n

		puts "** Searching shortest path ... Found ([project rev strlist $path])"
		return $path
	    }
	}

	puts "** Searching shortest path ... Not found"

	# No path found.
	return {}
    }

    # # ## ### ##### ######## #############

    typevariable mycset -array {} ; # Map from commit positions to the
				    # changeset (object ref) at that
				    # position.

    # # ## ### ##### ######## #############







|


|

<







 







<
<
<
<






|







 







|
|




|
|
<
<







 







|







 







>
|
>






|







 







>
>
>


|










|
|
|
|
|
|
>
>
|
<
<
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







64
65
66
67
68
69
70
71
72
73
74
75

76
77
78
79
80
81
82
...
103
104
105
106
107
108
109




110
111
112
113
114
115
116
117
118
119
120
121
122
123
...
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142


143
144
145
146
147
148
149
...
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
...
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
...
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337


338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
...
384
385
386
387
388
389
390

























































































































391
392
393
394
395
396
397

    typemethod run {} {
	# Pass manager interface. Executed to perform the
	# functionality of the pass.

	set len [string length [project::rev num]]
	set myatfmt %${len}s
	incr len 12
	set mycsfmt %${len}s

	cyclebreaker precmd   [myproc BreakBackward]
	cyclebreaker savecmd  [myproc KeepOrder]


	state transaction {
	    LoadCommitOrder
	    cyclebreaker run break-all [myproc Changesets]
	}

	repository printcsetstatistics
................................................................................
	state transaction {
	    foreach {cid pos} [state run { SELECT cid, pos FROM csorder }] {
		set cset [project::rev of $cid]
		$cset setpos $pos
		set mycset($pos) $cset
		lappend myrevisionchangesets $cset
	    }




	}
	return
    }

    # # ## ### ##### ######## #############

    proc BreakBackward {graph} {
	# We go over all branch changesets, i.e. the changesets
	# created by the symbols which are translated as branches, and
	# break any which are 'backward', which means that they have
	# at least one incoming revision changeset which is committed
	# after at least one of the outgoing revision changesets, per
	# the order computed in pass 6. In "cvs2svn" this is called
	# "retrograde".
................................................................................
	# having to check each changeset separately. Consider this
	# later, get the pass working first.
	#
	# NOTE 2: Might we even be able to select the backward branch
	# changesets too ?

	foreach cset [$graph nodes] {
	    if {![$cset bysymbol]} continue
	    CheckAndBreakBackward $graph $cset
	}
	return
    }

    proc CheckAndBreakBackward {graph cset} {
	while {[IsBackward $graph $cset]} {


	    # Knowing that the branch is backward we now look at the
	    # individual revisions in the changeset and determine
	    # which of them are responsible for the overlap. This
	    # allows us to split them into two sets, one of
	    # non-overlapping revisions, and of overlapping ones. Each
	    # induces a new changeset, and the second may still be
	    # backward and need further splitting. Hence the looping.
................................................................................
	    # changesets are predecessors and/or successors, leaving
	    # the limits partially or completely undefined.

	    # limits : dict (revision -> list (max predecessor commit, min sucessor commit))

	    ComputeLimits $cset limits border

	    log write 5 breakacycle "Breaking backward changeset [$cset str] with commit position $border as border"

	    # Then we sort the file level items based on there they
	    # sit relative to the border into before and after the
	    # border.

	    SplitRevisions $limits $border normalrevisions backwardrevisions

................................................................................
	    cyclebreaker replace $graph $cset $replacements

	    # At last check that the normal frament is indeed not
	    # backward, and iterate over the possibly still backward
	    # second fragment.

	    struct::list assign $replacements normal backward
	    if {[IsBackward $graph $normal]} {
		trouble internal "The normal fragment is unexpectedly backward"
	    }

	    set cset $backward
	}
	return
    }

    proc IsBackward {dg cset} {
	# A branch is "backward" if it has at least one incoming
	# revision changeset which is committed after at least one of
	# the outgoing revision changesets, per the order computed in
	# pass 6.

	# Rephrased, the maximal commit position found among the
	# incoming revision changesets is larger than the minimal
................................................................................
	return
    }


    # # ## ### ##### ######## #############

    proc KeepOrder {graph at cset} {
	::variable myatfmt
	::variable mycsfmt

	set cid [$cset id]

	log write 8 breakacycle "Changeset @ [format $myatfmt $at]: [format $mycsfmt [$cset str]] <<[FormatTR $graph $cset]>>"

	# We see here a mixture of symbol and revision changesets.
	# The symbol changesets are ignored as irrelevant.

	if {[$cset pos] eq ""} return

	# For the revision changesets we are sure that they are
	# consumed in the same order as generated by pass 7
	# (RevTopologicalSort). Per the code in cvs2svn.

	# IMHO this will work if and only if none of the symbol
	# changesets are "backwards", which explains the breaking of
	# the backward changesets first, in the pre-hook. A difference
	# to cvs2svn however is that we are breaking all backward
	# symbol changesets, both branch and tag. cvs2svn can
	# apparently assume here that tag symbol changesets are not
	# backwards, ever. We can't, apparently. It is unclear to me
	# where the difference is.



	# An interesting thing IMHO, is that after breaking backward
	# symbol changesets we should not have any circles any
	# longer. Each circle which was still present had to involve a
	# backward symbol, and that we split.

	# Proof: Let us assume we that have a circle
	# 	C: R1 -> ... -> Rx -> S -> Ry -> ... -> Rn -> R1
	# Let us further assume that S is not backward. That means
	# ORD(Rx) < ORD(Ry).  The earlier topological sorting without
	# symbols now forces this relationship through to be
	#	ORD(Rx) < ORD(R1) < ORD(Rx).
	# We have reached an impossibility, a paradox. Our initial
	# assumption of S not being backward cannot hold.
	#
	# Alternate, direct, reasoning: Without S the chain of
	# dependencies is Ry -> .. -> R1 -> .. -> Rx, therefore
	# ORD(Ry) < ORD(Rx) holds, and this means S is backward.

	# NOTE. Even with the backward symbols broken, it is not clear
	# to me yet what this means in terms of tagging revisions
	# later, as we now have more than one place where the symbol
	# occurs on the relevant LOD.

	struct::set exclude myrevisionchangesets $cset

	::variable mylastpos
	set new [$cset pos]

	if {$new != ($mylastpos + 1)} {
................................................................................

    typevariable mylastpos            -1 ; # Position of last revision changeset saved.
    typevariable myrevisionchangesets {} ; # Set of revision changesets

    typevariable myatfmt ; # Format for log output to gain better alignment of the various columns.
    typevariable mycsfmt ; # Ditto for the changesets.


























































































































    # # ## ### ##### ######## #############

    typevariable mycset -array {} ; # Map from commit positions to the
				    # changeset (object ref) at that
				    # position.

    # # ## ### ##### ######## #############