Fossil

Check-in [8c6488de]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Continued work on the integrity checks for changesets. Moved callers out of transactions. Two checks are already tripping on bad changesets made by InitCSets (pass 5).
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:8c6488ded2d656ba8e025c0abf9ed9bfaeab5007
User & Date: aku 2007-11-27 04:26:56
Context
2007-11-27
05:08
Bugfixes when generating revision changesets. (1) The dependencies for a revision are a list, not single. (2) Use pseudo-dependencies to separate revisions of the same file from each other if they have no direct dependencies in the state. check-in: 67876506 user: aku tags: trunk
04:26
Continued work on the integrity checks for changesets. Moved callers out of transactions. Two checks are already tripping on bad changesets made by InitCSets (pass 5). check-in: 8c6488de user: aku tags: trunk
02:37
Outline for more integrity checks, focusing on the changesets. check-in: bf83201c user: aku tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to tools/cvs2fossil/lib/c2f_integrity.tcl.

26
27
28
29
30
31
32


33
34
35
36
37
38
39


40
41
42
43
44
45


46
47

48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
...
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
...
287
288
289
290
291
292
293
294



295


296






















































































































297



298


299

















300



301

302


303






















































304
305
306
307
308
309























310
311
312
313
314
315
316
##

snit::type ::vc::fossil::import::cvs::integrity {
    # # ## ### ##### ######## #############
    ## Public API

    typemethod strict {} {


	set n 0
	AllButMeta
	Meta
	return
    }

    typemethod metarelaxed {} {


	set n 0
	AllButMeta
	return
    }

    typemethod changesets {} {


	set n 0
	RevisionCSetLinkage

	RevisionChangesets
	SymbolChangesets
	return
    }

    # # ## ### ##### ######## #############
    ## Internal methods

    proc AllButMeta {} {
	# This code performs a number of paranoid checks of the
	# database, searching for inconsistent cross-references.
	log write 4 integrity {Check database consistency}

	upvar 1 n n ; # Counter for the checks (we print an id before
		      # the main label).

	# Find all revisions which disagree with their line of
	# development about the project they are owned by.
	Check \
................................................................................
	    }
	return
    }

    proc Meta {} {
	# This code performs a number of paranoid checks of the
	# database, searching for inconsistent cross-references.
	log write 4 integrity {Check database consistency}

	upvar 1 n n ; # Counter for the checks (we print an id before
		      # the main label).

	# Find all revisions which disgree with their meta data about
	# the branch/line of development they belong to.
	Check \
................................................................................
		AND   R.lod != M.bid
		AND   R.fid = F.fid
		;
	    }
	return
    }

    proc RevisionCSetLinkage {} {



    }

























































































































    proc RevisionChangesets {} {



    }




















    proc SymbolChangesets {} {



    }



























































    proc Check {header label sql} {
	upvar 1 n n
	set ok 1
	foreach {fname revnr} [state run $sql] {
	    set ok 0
	    trouble fatal "$fname <$revnr> $label"























	}
	log write 5 integrity "\[[format %02d [incr n]]\] [expr {$ok ? "Ok    " : "Failed"}] ... $header"
	return
    }

    # # ## ### ##### ######## #############
    ## Configuration







>
>







>
>






>
>

<
>











<







 







<







 







|
>
>
>
|
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
>
>
|
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
>
>
|
>

>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>






>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

53
54
55
56
57
58
59
60
61
62
63
64

65
66
67
68
69
70
71
...
272
273
274
275
276
277
278

279
280
281
282
283
284
285
...
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
##

snit::type ::vc::fossil::import::cvs::integrity {
    # # ## ### ##### ######## #############
    ## Public API

    typemethod strict {} {
	log write 4 integrity {Check database consistency}

	set n 0
	AllButMeta
	Meta
	return
    }

    typemethod metarelaxed {} {
	log write 4 integrity {Check database consistency}

	set n 0
	AllButMeta
	return
    }

    typemethod changesets {} {
	log write 4 integrity {Check database consistency}

	set n 0

	AllChangesets
	RevisionChangesets
	SymbolChangesets
	return
    }

    # # ## ### ##### ######## #############
    ## Internal methods

    proc AllButMeta {} {
	# This code performs a number of paranoid checks of the
	# database, searching for inconsistent cross-references.


	upvar 1 n n ; # Counter for the checks (we print an id before
		      # the main label).

	# Find all revisions which disagree with their line of
	# development about the project they are owned by.
	Check \
................................................................................
	    }
	return
    }

    proc Meta {} {
	# This code performs a number of paranoid checks of the
	# database, searching for inconsistent cross-references.


	upvar 1 n n ; # Counter for the checks (we print an id before
		      # the main label).

	# Find all revisions which disgree with their meta data about
	# the branch/line of development they belong to.
	Check \
................................................................................
		AND   R.lod != M.bid
		AND   R.fid = F.fid
		;
	    }
	return
    }

    proc AllChangesets {} {
	# This code performs a number of paranoid checks of the
	# database, searching for inconsistent changeset/revision
	# information.

	upvar 1 n n ; # Counter for the checks (we print an id before
		      # the main label).

	# Find all revisions which are not used by at least one
	# revision changeset.
	Check \
	    {All revisions have to be used by least one revision changeset} \
	    {is not used by a revision changeset} {
		-- Unused revisions = All revisions
		--                  - revisions used by revision changesets.
		--
		-- Both sets can be computed easily, and subtracted
                -- from each other. Then we can get the associated
                -- file (name) for display.

		SELECT F.name, R.rev
		FROM revision R, file F
		WHERE R.rid IN (SELECT rid FROM revision                      -- All revisions
				EXCEPT                                     -- subtract
				SELECT CR.rid FROM csrevision CR, changeset C -- revisions used
				WHERE C.cid = CR.cid                          -- by any revision
				AND C.type = 0)                               -- changeset
		AND   R.fid = F.fid              -- get file of unused revision
	    }
	# Find all revisions which are used by more than one revision
	# changeset.
	Check \
	    {All revisions have to be used by at most one revision changeset} \
	    {is used by multiple revision changesets} {
		-- Principle of operation: Get all revision/changeset
                -- pairs for all revision changesets, group by
                -- revision to aggregate the changeset, counting
                -- them. From the resulting revision/count table
                -- select those with more than one user, and get their
                -- associated file (name) for display.

		SELECT F.name, R.rev
		FROM revision R, file F,
		     (SELECT CR.rid AS rid, count(CR.cid) AS count
		      FROM csrevision CR, changeset C
		      WHERE C.type = 0
		      AND   C.cid = CR.cid
		      GROUP BY CR.rid ) AS U
		WHERE U.count > 1
		AND R.rid = U.rid
		AND R.fid = F.fid
	    }
	# All revisions in all changesets have to agree on the LOD
	# their changeset belongs to. In other words, all revisions in
	# a changeset have to refer to the same line of development.
	#
	# Instead of looking at all pairs of revisions in all
	# changesets we generate the distinct set of all LODs
	# referenced by the revisions of a changeset, look for those
	# with cardinality > 1, and get the identifying information
	# for the changesets found thusly.
	CheckCS \
	    {All revisions in a changeset have to belong to the same LOD} \
	    {: Its revisions disagree about the LOD they belong to} {
		SELECT T.name, C.cid
		FROM   changeset C, cstype T
		WHERE  C.cid IN (SELECT U.cid
				 FROM (SELECT DISTINCT CR.cid AS cid, R.lod AS lod
				       FROM   csrevision CR, revision R
				       WHERE  CR.rid = R.rid) AS U
				 GROUP BY U.cid HAVING COUNT(U.lod) > 1)
		AND    T.tid = C.type
	    }
	# All revisions in all changesets have to agree on the project
	# their changeset belongs to. In other words, all revisions in
	# a changeset have to refer to the same project.
	#
	# Instead of looking at all pairs of revisions in all
	# changesets we generate the distinct set of all projects
	# referenced by the revisions of a changeset, look for those
	# with cardinality > 1, and get the identifying information
	# for the changesets found thusly.
	CheckCS \
	    {All revisions in a changeset have to belong to the same project} \
	    {: Its revisions disagree about the project they belong to} {
		SELECT T.name, C.cid
		FROM   changeset C, cstype T
		WHERE  C.cid IN (SELECT U.cid
				 FROM (SELECT DISTINCT CR.cid AS cid, F.pid AS pid
				       FROM   csrevision CR, revision R, file F
				       WHERE  CR.rid = R.rid
				       AND    F.fid  = R.fid) AS U
				 GROUP BY U.cid HAVING COUNT(U.pid) > 1)
		AND    T.tid = C.type
	    }
	# All revisions in a single changeset have to belong to
	# different files. Conversely: No two revisions of a single
	# file are allowed to be in the same changeset.
	#
	# Instead of looking at all pairs of revisions in all
	# changesets we generate the distinct set of all files
	# referenced by the revisions of a changeset, and look for
	# those with cardinality < the cardinality of the set of
	# revisions, and get the identifying information for the
	# changesets found thusly.
	CheckCS \
	    {All revisions in a changeset have to belong to different files} \
	    {: Its revisions share files} {
		SELECT T.name, C.cid
		FROM   changeset C, cstype T
		WHERE  C.cid IN (SELECT VV.cid
				 FROM (SELECT U.cid as cid, COUNT (U.fid) AS fcount
				       FROM (SELECT DISTINCT CR.cid AS cid, R.fid AS fid
					     FROM   csrevision CR, revision R
					     WHERE  CR.rid = R.rid) AS U
				       GROUP BY U.cid) AS UU,
				      (SELECT V.cid AS cid, COUNT (V.rid) AS rcount
				       FROM csrevision V
				       GROUP BY V.cid) AS VV
				 WHERE VV.cid = UU.cid
				 AND   UU.fcount < VV.rcount)
		AND    T.tid = C.type
	    }
	return
    }

    proc RevisionChangesets {} {
	# This code performs a number of paranoid checks of the
	# database, searching for inconsistent changeset/revision
	# information.

	upvar 1 n n ; # Counter for the checks (we print an id before
		      # the main label).

	# All revisions used by revision changesets have to refer to
	# the same meta information as their changeset.
	CheckInCS \
	    {All revisions have to agree with their revision changeset about the used meta information} \
	    {disagrees with its revision changeset @ about the meta information} {
		SELECT CT.name, C.cid, F.name, R.rev
		FROM changeset C, cstype CT, revision R, file F, csrevision CR
		WHERE C.type = 0       -- revision changesets only
		AND   C.cid  = CR.cid  -- changeset --> its revisions
		AND   R.rid  = CR.rid  -- look at them
		AND   R.mid != C.src   -- Only those which disagree with changeset about the meta
		AND   R.fid = F.fid    -- get file of the revision
		AND   CT.tid = C.type  -- get changeset type, for labeling
	    }
	return
    }

    proc SymbolChangesets {} {
	# This code performs a number of paranoid checks of the
	# database, searching for inconsistent changeset/revision
	# information.

	return ; # Disabled for now, bottlenecks ...

	upvar 1 n n ; # Counter for the checks (we print an id before
		      # the main label).

	# The next two checks are BOTTLENECKS. In essence we are
	# checking each symbol changeset one by one.

	# TODO: Try to rephrase the checks to make more use of
	# indices, set and stream operations.

	# All revisions used by tag symbol changesets have to have the
	# changeset's tag associated with them.
	CheckInCS \
	    {All revisions used by tag symbol changesets have to have the changeset's tag attached to them} \
	    {does not have the tag of its symbol changeset @ attached to it} {
		SELECT CT.name, C.cid, F.name, R.rev
		FROM   changeset C, cstype CT, revision R, file F, csrevision CR, tag T
		WHERE  C.type = 1       -- symbol changesets only
		AND    C.src  = T.sid   -- tag only, linked by symbol id 
		AND    C.cid  = CR.cid  -- changeset --> its revisions
		AND    R.rid  = CR.rid  -- look at the revisions
		-- and look for the tag among the attached ones.
		AND    T.sid NOT IN (SELECT TB.sid
				     FROM   tag TB
				     WHERE  TB.rev = R.rid)
		AND    R.fid = F.fid    -- get file of revision
	    }

	# All revisions used by branch symbol changesets have to have
	# the changeset's branch associated with them.

	CheckInCS \
	    {All revisions used by branch symbol changesets have to have the changeset's branch attached to them} \
	    {does not have the branch of its symbol changeset @ attached to it} {
		SELECT CT.name, C.cid, F.name, R.rev, C.cid
		FROM   changeset C, cstype CT, revision R, file F, csrevision CR, branch B
		WHERE  C.type = 1       -- symbol changesets only
		AND    C.src  = B.sid   -- branches only
		AND    C.cid  = CR.cid  -- changeset --> its revisions
		AND    R.rid  = CR.rid  -- look at the revisions
		-- and look for the branch among the attached ones.
		AND    B.sid NOT IN (SELECT BB.sid
				     FROM   branch BB
				     WHERE  BB.root = R.rid)
		AND    R.fid = F.fid    -- get file of revision
	    }

	# TODO
	# The state has to contain at least one tag symbol changeset
	# for all known tags.

	# TODO
	# The state has to contain at least one branch symbol changeset
	# for all known branches.
	return
    }


    proc Check {header label sql} {
	upvar 1 n n
	set ok 1
	foreach {fname revnr} [state run $sql] {
	    set ok 0
	    trouble fatal "$fname <$revnr> $label"
	}
	log write 5 integrity "\[[format %02d [incr n]]\] [expr {$ok ? "Ok    " : "Failed"}] ... $header"
	return
    }

    proc CheckCS {header label sql} {
	upvar 1 n n
	set ok 1
	foreach {ctype cid} [state run $sql] {
	    set ok 0
	    trouble fatal "<$ctype $cid> $label"
	}
	log write 5 integrity "\[[format %02d [incr n]]\] [expr {$ok ? "Ok    " : "Failed"}] ... $header"
	return
    }

    proc CheckInCS {header label sql} {
	upvar 1 n n
	set ok 1
	foreach {cstype csid fname revnr} [state run $sql] {
	    set ok 0
	    set b "<$cstype $csid>"
	    trouble fatal "$fname <$revnr> [string map [list @ $b] $label]"
	}
	log write 5 integrity "\[[format %02d [incr n]]\] [expr {$ok ? "Ok    " : "Failed"}] ... $header"
	return
    }

    # # ## ### ##### ######## #############
    ## Configuration

Changes to tools/cvs2fossil/lib/c2f_pbreakacycle.tcl.

74
75
76
77
78
79
80
81

82
83
84
85
86
87
88
89
90
91
	cyclebreaker precmd   [myproc BreakBackwardBranches]
	cyclebreaker savecmd  [myproc KeepOrder]
	cyclebreaker breakcmd [myproc BreakCycle]

	state transaction {
	    LoadCommitOrder
	    cyclebreaker run break-all [myproc Changesets]


	    repository printcsetstatistics
	    integrity changesets
	}
	return
    }

    typemethod discard {} {
	# Pass manager interface. Executed for all passes after the
	# run passes, to remove all data of this pass from the state,
	# as being out of date.







|
>
|
|
<







74
75
76
77
78
79
80
81
82
83
84

85
86
87
88
89
90
91
	cyclebreaker precmd   [myproc BreakBackwardBranches]
	cyclebreaker savecmd  [myproc KeepOrder]
	cyclebreaker breakcmd [myproc BreakCycle]

	state transaction {
	    LoadCommitOrder
	    cyclebreaker run break-all [myproc Changesets]
	}

	repository printcsetstatistics
	integrity changesets

	return
    }

    typemethod discard {} {
	# Pass manager interface. Executed for all passes after the
	# run passes, to remove all data of this pass from the state,
	# as being out of date.

Changes to tools/cvs2fossil/lib/c2f_pbreakrcycle.tcl.

64
65
66
67
68
69
70
71

72
73
74
75
76
77
78
79
80
81
	# Pass manager interface. Executed to perform the
	# functionality of the pass.

	cyclebreaker breakcmd {::vc::fossil::import::cvs::cyclebreaker break}

	state transaction {
	    cyclebreaker run break-rev [myproc Changesets]


	    repository printcsetstatistics
	    integrity changesets
	}
	return
    }

    typemethod discard {} {
	# Pass manager interface. Executed for all passes after the
	# run passes, to remove all data of this pass from the state,
	# as being out of date.







|
>
|
|
<







64
65
66
67
68
69
70
71
72
73
74

75
76
77
78
79
80
81
	# Pass manager interface. Executed to perform the
	# functionality of the pass.

	cyclebreaker breakcmd {::vc::fossil::import::cvs::cyclebreaker break}

	state transaction {
	    cyclebreaker run break-rev [myproc Changesets]
	}

	repository printcsetstatistics
	integrity changesets

	return
    }

    typemethod discard {} {
	# Pass manager interface. Executed for all passes after the
	# run passes, to remove all data of this pass from the state,
	# as being out of date.

Changes to tools/cvs2fossil/lib/c2f_pbreakscycle.tcl.

63
64
65
66
67
68
69
70

71
72
73
74
75
76
77
78
79
80
	# Pass manager interface. Executed to perform the
	# functionality of the pass.

	cyclebreaker breakcmd {::vc::fossil::import::cvs::cyclebreaker break}

	state transaction {
	    cyclebreaker run break-sym [myproc Changesets]


	    repository printcsetstatistics
	    integrity changesets
	}
	return
    }

    typemethod discard {} {
	# Pass manager interface. Executed for all passes after the
	# run passes, to remove all data of this pass from the state,
	# as being out of date.







|
>
|
|
<







63
64
65
66
67
68
69
70
71
72
73

74
75
76
77
78
79
80
	# Pass manager interface. Executed to perform the
	# functionality of the pass.

	cyclebreaker breakcmd {::vc::fossil::import::cvs::cyclebreaker break}

	state transaction {
	    cyclebreaker run break-sym [myproc Changesets]
	}

	repository printcsetstatistics
	integrity changesets

	return
    }

    typemethod discard {} {
	# Pass manager interface. Executed for all passes after the
	# run passes, to remove all data of this pass from the state,
	# as being out of date.

Changes to tools/cvs2fossil/lib/c2f_pinitcsets.tcl.

130
131
132
133
134
135
136
137

138
139
140
141
142
143
144
145
146
147
	# functionality of the pass.

	state transaction {
	    CreateRevisionChangesets  ; # Group file revisions into csets.
	    BreakInternalDependencies ; # Split the csets based on internal conflicts.
	    CreateSymbolChangesets    ; # Create csets for tags and branches.
	    PersistTheChangesets


	    repository printcsetstatistics
	    integrity changesets
	}
	return
    }

    typemethod discard {} {
	# Pass manager interface. Executed for all passes after the
	# run passes, to remove all data of this pass from the state,
	# as being out of date.







|
>
|
|
<







130
131
132
133
134
135
136
137
138
139
140

141
142
143
144
145
146
147
	# functionality of the pass.

	state transaction {
	    CreateRevisionChangesets  ; # Group file revisions into csets.
	    BreakInternalDependencies ; # Split the csets based on internal conflicts.
	    CreateSymbolChangesets    ; # Create csets for tags and branches.
	    PersistTheChangesets
	}

	repository printcsetstatistics
	integrity changesets

	return
    }

    typemethod discard {} {
	# Pass manager interface. Executed for all passes after the
	# run passes, to remove all data of this pass from the state,
	# as being out of date.