Fossil

Check-in [530168ec]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Split internals of breakinternaldependencies into more manageable pieces in prep for upcoming work on the handling of pseudo-dependencies.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 530168ec30a1e9f382db32b2a6813501bcb25132
User & Date: aku 2008-02-23 20:18:35.000
Context
2008-02-24
00:14
Changed the encoding of the values stored in DEPC. Keep only start/end of the range, not the list of all positions in it. That caused the memory-blowup. ... (check-in: 59b54efa user: aku tags: trunk)
2008-02-23
20:18
Split internals of breakinternaldependencies into more manageable pieces in prep for upcoming work on the handling of pseudo-dependencies. ... (check-in: 530168ec user: aku tags: trunk)
20:17
Fix code handling --memory-track, missed import of its supporting command. ... (check-in: 02467830 user: aku tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to tools/cvs2fossil/lib/c2f_prev.tcl.
51
52
53
54
55
56
57
58
59
60
61
62
63

64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
	set mypos       {} ; # Commit location is not known yet.

	# Keep track of the generated changesets and of the inverse
	# mapping from items to them.
	lappend mychangesets   $self
	lappend mytchangesets($cstype) $self
	set     myidmap($myid) $self
	foreach iid $items {
	    set key [list $cstype $iid]
	    set myitemmap($key) $self
	    lappend mytitems $key
	    log write 8 csets {MAP+ item <$key> $self = [$self str]}
	}

	return
    }

    destructor {
	# The main thing is to keep track of the itemmap and remove
	# the object from it. The lists of changesets (mychangesets,
	# mytchangesets) are not maintained (= reduced), for the
	# moment. We may be able to get rid of this entirely, at least
	# for (de)construction and pass InitCSets.

	foreach iid $myitems {
	    set key [list $mytype $iid]
	    unset myitemmap($key)
	    log write 8 csets {MAP- item <$key> $self = [$self str]}
	}
	return
    }

    method str {} {
	set str    "<"
	set detail ""
	if {[$mytypeobj bysymbol]} {







|
<
<
<
<
|
>










|
<
<
<
<







51
52
53
54
55
56
57
58




59
60
61
62
63
64
65
66
67
68
69
70
71




72
73
74
75
76
77
78
	set mypos       {} ; # Commit location is not known yet.

	# Keep track of the generated changesets and of the inverse
	# mapping from items to them.
	lappend mychangesets   $self
	lappend mytchangesets($cstype) $self
	set     myidmap($myid) $self
	foreach iid $items { lappend mytitems [list $cstype $iid] }





	MapItems $cstype $items
	return
    }

    destructor {
	# The main thing is to keep track of the itemmap and remove
	# the object from it. The lists of changesets (mychangesets,
	# mytchangesets) are not maintained (= reduced), for the
	# moment. We may be able to get rid of this entirely, at least
	# for (de)construction and pass InitCSets.

	UnmapItems $mytype $myitems




	return
    }

    method str {} {
	set str    "<"
	set detail ""
	if {[$mytypeobj bysymbol]} {
163
164
165
166
167
168
169

170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
	##

	# This method inspects the changesets for internal
	# dependencies. Nothing is done if there are no
	# such. Otherwise the changeset is split into a set of
	# fragments without internal dependencies, transforming the
	# internal dependencies into external ones. The new changesets

	# are added to the list of all changesets.

	# We perform all necessary splits in one go, instead of only
	# one. The previous algorithm, adapted from cvs2svn, computed
	# a lot of state which was thrown away and then computed again
	# for each of the fragments. It should be easier to update and
	# reuse that state.

	# The code checks only successor dependencies, as this
	# automatically covers the predecessor dependencies as well (A
	# successor dependency a -> b is also a predecessor dependency
	# b -> a).

	# Array of dependencies (parent -> child). This is pulled from
	# the state, and limited to successors within the changeset.

	array set dependencies {}
	$mytypeobj internalsuccessors dependencies $myitems
	if {![array size dependencies]} {
	    return {}
	} ; # Nothing to break.

	log write 5 csets ...[$self str].......................................................
	vc::tools::mem::mark

	# We have internal dependencies to break. We now iterate over
	# all positions in the list (which is chronological, at least
	# as far as the timestamps are correct and unique) and
	# determine the best position for the break, by trying to
	# break as many dependencies as possible in one go. When a
	# break was found this is redone for the fragments coming and
	# after, after upding the crossing information.

	# Data structures:
	# Map:  POS   revision id      -> position in list.
	#       CROSS position in list -> number of dependencies crossing it
	#       DEPC  dependency       -> positions it crosses
	# List: RANGE Of the positions itself.
	# Map:  DELTA position in list -> time delta between its revision
	#                                 and the next, if any.
	# A dependency is a single-element map parent -> child

	# InitializeBreakState initializes their contents after
	# upvar'ing them from this scope. It uses the information in
	# DEPENDENCIES to do so.

	InitializeBreakState $myitems

	set fragments {}
	set new       [list $range]
	array set breaks {}

	# Instead of one list holding both processed and pending
	# fragments we use two, one for the framents to process, one
	# to hold the new fragments, and the latter is copied to the
	# former when they run out. This keeps the list of pending
	# fragments short without sacrificing speed by shifting stuff
	# down. We especially drop the memory of fragments broken
	# during processing after a short time, instead of letting it
	# consume memory.

	while {[llength $new]} {

	    set pending $new
	    set new     {}
	    set at      0

	    while {$at < [llength $pending]} {
		set current [lindex $pending $at]

		log write 6 csets {. . .. ... ..... ........ .............}
		log write 6 csets {Scheduled   [join [PRs [lrange $pending $at end]] { }]}
		log write 6 csets {Considering [PR $current] \[$at/[llength $pending]\]}

		set best [FindBestBreak $current]

		if {$best < 0} {
		    # The inspected range has no internal
		    # dependencies. This is a complete fragment.
		    lappend fragments $current

		    log write 6 csets "No breaks, final"
		} else {
		    # Split the range and schedule the resulting
		    # fragments for further inspection. Remember the
		    # number of dependencies cut before we remove them
		    # from consideration, for documentation later.

		    set breaks($best) $cross($best)

		    log write 6 csets "Best break @ $best, cutting [nsp $cross($best) dependency dependencies]"

		    # Note: The value of best is an abolute location
		    # in myitems. Use the start of current to make it
		    # an index absolute to current.

		    set brel [expr {$best - [lindex $current 0]}]
		    set bnext $brel ; incr bnext
		    set fragbefore [lrange $current 0 $brel]
		    set fragafter  [lrange $current $bnext end]

		    log write 6 csets "New pieces  [PR $fragbefore] [PR $fragafter]"

		    integrity assert {[llength $fragbefore]} {Found zero-length fragment at the beginning}
		    integrity assert {[llength $fragafter]}  {Found zero-length fragment at the end}

		    lappend new $fragbefore $fragafter
		    CutAt $best
		}

		incr at
	    }
	}

	log write 6 csets ". . .. ... ..... ........ ............."

	# (*) We clear out the associated part of the myitemmap
	# in-memory index in preparation for new data. A simple unset
	# is enough, we have no symbol changesets at this time, and
	# thus never more than one reference in the list.

	foreach iid $myitems {
	    set key [list $mytype $iid]
	    unset myitemmap($key)
	    log write 8 csets {MAP- item <$key> $self = [$self str]}
	}

	# Create changesets for the fragments, reusing the current one
	# for the first fragment. We sort them in order to allow
	# checking for gaps and nice messages.

	set newcsets  {}
	set fragments [lsort -index 0 -integer $fragments]

	#puts \t.[join [PRs $fragments] .\n\t.].

	Border [lindex $fragments 0] firsts firste

	integrity assert {$firsts == 0} {Bad fragment start @ $firsts, gap, or before beginning of the range}

	set laste $firste
	foreach fragment [lrange $fragments 1 end] {
	    Border $fragment s e
	    integrity assert {$laste == ($s - 1)} {Bad fragment border <$laste | $s>, gap or overlap}

	    set new [$type %AUTO% $myproject $mytype $mysrcid [lrange $myitems $s $e]]
	    lappend newcsets $new
	    incr counter

            log write 4 csets "Breaking [$self str ] @ $laste, new [$new str], cutting $breaks($laste)"

	    set laste $e
	}

	integrity assert {
	    $laste == ([llength $myitems]-1)
	} {Bad fragment end @ $laste, gap, or beyond end of the range}

	# Put the first fragment into the current changeset, and
	# update the in-memory index. We can simply (re)add the items
	# because we cleared the previously existing information, see
	# (*) above. Persistence does not matter here, none of the
	# changesets has been saved to the persistent state yet.

	set myitems  [lrange $myitems  0 $firste]
	set mytitems [lrange $mytitems 0 $firste]
	foreach iid $myitems {
	    set key [list $mytype $iid]
	    set myitemmap($key) $self
	    log write 8 csets {MAP+ item <$key> $self = [$self str]}
	}

	return $newcsets
    }

    method persist {} {
	set tid $mycstype($mytype)
	set pid [$myproject id]
	set pos 0








>
|
<
<
<
<
<
<









<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<


<
<
<
<
<
<
<
<
|
<

<
<
<
|
<
<

<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







156
157
158
159
160
161
162
163
164






165
166
167
168
169
170
171
172
173


































174
175








176

177



178


179



180



































































































181
182
183
184
185
186
187
	##

	# This method inspects the changesets for internal
	# dependencies. Nothing is done if there are no
	# such. Otherwise the changeset is split into a set of
	# fragments without internal dependencies, transforming the
	# internal dependencies into external ones. The new changesets
	# generated from the fragment information are added to the
	# list of all changesets.







	# The code checks only successor dependencies, as this
	# automatically covers the predecessor dependencies as well (A
	# successor dependency a -> b is also a predecessor dependency
	# b -> a).

	# Array of dependencies (parent -> child). This is pulled from
	# the state, and limited to successors within the changeset.



































	array set breaks {}









	set fragments [BreakDirectDependencies $myitems breaks]





	if {![llength $fragments]} { return {} }






	return [$self CreateFromFragments $fragments counter breaks]



































































































    }

    method persist {} {
	set tid $mycstype($mytype)
	set pid [$myproject id]
	set pos 0

377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
	state transaction {
	    state run {
		DELETE FROM changeset   WHERE cid = $myid;
		DELETE FROM csitem      WHERE cid = $myid;
		DELETE FROM cssuccessor WHERE cid = $myid;
	    }
	}
	foreach iid $myitems {
	    set key [list $mytype $iid]
	    unset myitemmap($key)
	    log write 8 csets {MAP- item <$key> $self = [$self str]}
	}
	set pos          [lsearch -exact $mychangesets $self]
	set mychangesets [lreplace $mychangesets $pos $pos]
	set pos                    [lsearch -exact $mytchangesets($mytype) $self]
	set mytchangesets($mytype) [lreplace $mytchangesets($mytype) $pos $pos]

	# Return the list of predecessors so that they can be adjusted.
	return [struct::list map [state run {
	    SELECT cid
	    FROM   cssuccessor
	    WHERE  nid = $myid
	}] [mytypemethod of]]







|
|
<
<
|
|
|

|







215
216
217
218
219
220
221
222
223


224
225
226
227
228
229
230
231
232
233
234
235
	state transaction {
	    state run {
		DELETE FROM changeset   WHERE cid = $myid;
		DELETE FROM csitem      WHERE cid = $myid;
		DELETE FROM cssuccessor WHERE cid = $myid;
	    }
	}

	UnmapItems $mytype $myitems



	set pos                    [lsearch -exact $mychangesets $self]
	set mychangesets           [lreplace       $mychangesets $pos $pos]
	set pos                    [lsearch -exact $mytchangesets($mytype) $self]
	set mytchangesets($mytype) [lreplace       $mytchangesets($mytype) $pos $pos]

	# Return the list of predecessors so that they can be adjusted.
	return [struct::list map [state run {
	    SELECT cid
	    FROM   cssuccessor
	    WHERE  nid = $myid
	}] [mytypemethod of]]
797
798
799
800
801
802
803












































































































































































804
805
806
807
808
809
810
	# Initialize the counter from the state
	log write 2 csets {Loading changeset counter}
	set mycounter [state one { SELECT MAX(cid) FROM changeset }]
	return
    }

    typemethod num {} { return $mycounter }













































































































































































    proc InitializeBreakState {revisions} {
	upvar 1 pos pos cross cross range range depc depc delta delta \
	    dependencies dependencies

	# First we create a map of positions to make it easier to
	# determine whether a dependency crosses a particular index.







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
	# Initialize the counter from the state
	log write 2 csets {Loading changeset counter}
	set mycounter [state one { SELECT MAX(cid) FROM changeset }]
	return
    }

    typemethod num {} { return $mycounter }

    # # ## ### ##### ######## #############

    method CreateFromFragments {fragments cv bv} {
	upvar 1 $cv counter $bv breaks
	UnmapItems $mytype $myitems

	# Create changesets for the fragments, reusing the current one
	# for the first fragment. We sort them in order to allow
	# checking for gaps and nice messages.

	set newcsets  {}
	set fragments [lsort -index 0 -integer $fragments]

	#puts \t.[join [PRs $fragments] .\n\t.].

	Border [lindex $fragments 0] firsts firste

	integrity assert {$firsts == 0} {Bad fragment start @ $firsts, gap, or before beginning of the range}

	set laste $firste
	foreach fragment [lrange $fragments 1 end] {
	    Border $fragment s e
	    integrity assert {$laste == ($s - 1)} {Bad fragment border <$laste | $s>, gap or overlap}

	    set new [$type %AUTO% $myproject $mytype $mysrcid [lrange $myitems $s $e]]
	    lappend newcsets $new
	    incr counter

            log write 4 csets "Breaking [$self str ] @ $laste, new [$new str], cutting $breaks($laste)"

	    set laste $e
	}

	integrity assert {
	    $laste == ([llength $myitems]-1)
	} {Bad fragment end @ $laste, gap, or beyond end of the range}

	# Put the first fragment into the current changeset, and
	# update the in-memory index. We can simply (re)add the items
	# because we cleared the previously existing information, see
	# 'UnmapItems' above. Persistence does not matter here, none
	# of the changesets has been saved to the persistent state
	# yet.

	set myitems  [lrange $myitems  0 $firste]
	set mytitems [lrange $mytitems 0 $firste]
	MapItems $mytype $myitems
	return $newcsets
    }

    # # ## ### ##### ######## #############

    proc BreakDirectDependencies {theitems bv} {
	upvar 1 mytypeobj mytypeobj self self $bv breaks

	array set dependencies {}
	$mytypeobj internalsuccessors dependencies $theitems
	if {![array size dependencies]} {
	    return {}
	} ; # Nothing to break.

	log write 5 csets ...[$self str].......................................................
	vc::tools::mem::mark

	return [BreakerCore $theitems dependencies breaks]
    }

    proc BreakerCore {theitems dv bv} {
	# Break a set of revisions into fragments which have no
	# internal dependencies.

	# We perform all necessary splits in one go, instead of only
	# one. The previous algorithm, adapted from cvs2svn, computed
	# a lot of state which was thrown away and then computed again
	# for each of the fragments. It should be easier to update and
	# reuse that state.

	upvar 1 $dv dependencies $bv breaks

	# We have internal dependencies to break. We now iterate over
	# all positions in the list (which is chronological, at least
	# as far as the timestamps are correct and unique) and
	# determine the best position for the break, by trying to
	# break as many dependencies as possible in one go. When a
	# break was found this is redone for the fragments coming and
	# after, after upding the crossing information.

	# Data structures:
	# Map:  POS   revision id      -> position in list.
	#       CROSS position in list -> number of dependencies crossing it
	#       DEPC  dependency       -> positions it crosses
	# List: RANGE Of the positions itself.
	# Map:  DELTA position in list -> time delta between its revision
	#                                 and the next, if any.
	# A dependency is a single-element map parent -> child

	# InitializeBreakState initializes their contents after
	# upvar'ing them from this scope. It uses the information in
	# DEPENDENCIES to do so.

	InitializeBreakState $theitems

	set fragments {}
	set new       [list $range]

	# Instead of one list holding both processed and pending
	# fragments we use two, one for the framents to process, one
	# to hold the new fragments, and the latter is copied to the
	# former when they run out. This keeps the list of pending
	# fragments short without sacrificing speed by shifting stuff
	# down. We especially drop the memory of fragments broken
	# during processing after a short time, instead of letting it
	# consume memory.

	while {[llength $new]} {

	    set pending $new
	    set new     {}
	    set at      0

	    while {$at < [llength $pending]} {
		set current [lindex $pending $at]

		log write 6 csets {. . .. ... ..... ........ .............}
		log write 6 csets {Scheduled   [join [PRs [lrange $pending $at end]] { }]}
		log write 6 csets {Considering [PR $current] \[$at/[llength $pending]\]}

		set best [FindBestBreak $current]

		if {$best < 0} {
		    # The inspected range has no internal
		    # dependencies. This is a complete fragment.
		    lappend fragments $current

		    log write 6 csets "No breaks, final"
		} else {
		    # Split the range and schedule the resulting
		    # fragments for further inspection. Remember the
		    # number of dependencies cut before we remove them
		    # from consideration, for documentation later.

		    set breaks($best) $cross($best)

		    log write 6 csets "Best break @ $best, cutting [nsp $cross($best) dependency dependencies]"

		    # Note: The value of best is an abolute location
		    # in myitems. Use the start of current to make it
		    # an index absolute to current.

		    set brel [expr {$best - [lindex $current 0]}]
		    set bnext $brel ; incr bnext
		    set fragbefore [lrange $current 0 $brel]
		    set fragafter  [lrange $current $bnext end]

		    log write 6 csets "New pieces  [PR $fragbefore] [PR $fragafter]"

		    integrity assert {[llength $fragbefore]} {Found zero-length fragment at the beginning}
		    integrity assert {[llength $fragafter]}  {Found zero-length fragment at the end}

		    lappend new $fragbefore $fragafter
		    CutAt $best
		}

		incr at
	    }
	}

	log write 6 csets ". . .. ... ..... ........ ............."

	return $fragments
    }

    proc InitializeBreakState {revisions} {
	upvar 1 pos pos cross cross range range depc depc delta delta \
	    dependencies dependencies

	# First we create a map of positions to make it easier to
	# determine whether a dependency crosses a particular index.
1019
1020
1021
1022
1023
1024
1025





























1026
1027
1028
1029
1030
1031
1032
	return <${s}...${e}>
    }

    proc Border {range sv ev} {
	upvar 1 $sv s $ev e
	set s [lindex $range 0]
	set e [lindex $range end]





























	return
    }

    # # ## ### ##### ######## #############

    typevariable mychangesets         {} ; # List of all known
					   # changesets.







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
	return <${s}...${e}>
    }

    proc Border {range sv ev} {
	upvar 1 $sv s $ev e
	set s [lindex $range 0]
	set e [lindex $range end]
	return
    }

    # # ## ### ##### ######## #############

    proc UnmapItems {thetype theitems} {
	# (*) We clear out the associated part of the myitemmap
	# in-memory index in preparation for new data, or as part of
	# object destruction. A simple unset is enough, we have no
	# symbol changesets at this time, and thus never more than one
	# reference in the list.

	upvar 1 myitemmap myitemmap self self
	foreach iid $theitems {
	    set key [list $thetype $iid]
	    unset myitemmap($key)
	    log write 8 csets {MAP- item <$key> $self = [$self str]}
	}
	return
    }

    proc MapItems {thetype theitems} {
	upvar 1 myitemmap myitemmap self self

	foreach iid $theitems {
	    set key [list $thetype $iid]
	    set myitemmap($key) $self
	    log write 8 csets {MAP+ item <$key> $self = [$self str]}
	}
	return
    }

    # # ## ### ##### ######## #############

    typevariable mychangesets         {} ; # List of all known
					   # changesets.