1 |
|
---|
2 | #!/bin/sh
|
---|
3 | exec perl -w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*-
|
---|
4 | #!perl -w
|
---|
5 |
|
---|
6 | ##############################################################
|
---|
7 | ### ###
|
---|
8 | ### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ###
|
---|
9 | ### ###
|
---|
10 | ##############################################################
|
---|
11 |
|
---|
12 | ## $Revision: 7297 $
|
---|
13 | ## $Date: 2004-05-10 01:58:46 +0000 (Mon, 10 May 2004) $
|
---|
14 | ## $Author: mdewsnip $
|
---|
15 | ##
|
---|
16 | ## (C) 1999 Karl Fogel <[email protected]>, under the GNU GPL.
|
---|
17 | ##
|
---|
18 | ## (Extensively hacked on by Melissa O'Neill <[email protected]>.)
|
---|
19 | ##
|
---|
20 | ## cvs2cl.pl is free software; you can redistribute it and/or modify
|
---|
21 | ## it under the terms of the GNU General Public License as published by
|
---|
22 | ## the Free Software Foundation; either version 2, or (at your option)
|
---|
23 | ## any later version.
|
---|
24 | ##
|
---|
25 | ## cvs2cl.pl is distributed in the hope that it will be useful,
|
---|
26 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
27 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
28 | ## GNU General Public License for more details.
|
---|
29 | ##
|
---|
30 | ## You may have received a copy of the GNU General Public License
|
---|
31 | ## along with cvs2cl.pl; see the file COPYING. If not, write to the
|
---|
32 | ## Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
---|
33 | ## Boston, MA 02111-1307, USA.
|
---|
34 |
|
---|
35 | use strict;
|
---|
36 | use Text::Wrap;
|
---|
37 | use Time::Local;
|
---|
38 | use File::Basename;
|
---|
39 |
|
---|
40 | # The Plan:
|
---|
41 | #
|
---|
42 | # Read in the logs for multiple files, spit out a nice ChangeLog that
|
---|
43 | # mirrors the information entered during `cvs commit'.
|
---|
44 | #
|
---|
45 | # The problem presents some challenges. In an ideal world, we could
|
---|
46 | # detect files with the same author, log message, and checkin time --
|
---|
47 | # each <filelist, author, time, logmessage> would be a changelog entry.
|
---|
48 | # We'd sort them; and spit them out. Unfortunately, CVS is *not atomic*
|
---|
49 | # so checkins can span a range of times. Also, the directory structure
|
---|
50 | # could be hierarchical.
|
---|
51 | #
|
---|
52 | # Another question is whether we really want to have the ChangeLog
|
---|
53 | # exactly reflect commits. An author could issue two related commits,
|
---|
54 | # with different log entries, reflecting a single logical change to the
|
---|
55 | # source. GNU style ChangeLogs group these under a single author/date.
|
---|
56 | # We try to do the same.
|
---|
57 | #
|
---|
58 | # So, we parse the output of `cvs log', storing log messages in a
|
---|
59 | # multilevel hash that stores the mapping:
|
---|
60 | # directory => author => time => message => filelist
|
---|
61 | # As we go, we notice "nearby" commit times and store them together
|
---|
62 | # (i.e., under the same timestamp), so they appear in the same log
|
---|
63 | # entry.
|
---|
64 | #
|
---|
65 | # When we've read all the logs, we twist this mapping into
|
---|
66 | # a time => author => message => filelist mapping for each directory.
|
---|
67 | #
|
---|
68 | # If we're not using the `--distributed' flag, the directory is always
|
---|
69 | # considered to be `./', even as descend into subdirectories.
|
---|
70 |
|
---|
71 | ############### Globals ################
|
---|
72 |
|
---|
73 | # What we run to generate it:
|
---|
74 | my $Log_Source_Command = "cvs log";
|
---|
75 |
|
---|
76 | # In case we have to print it out:
|
---|
77 | my $VERSION = '$Revision: 7297 $';
|
---|
78 | $VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/;
|
---|
79 |
|
---|
80 | ## Vars set by options:
|
---|
81 |
|
---|
82 | # Print debugging messages?
|
---|
83 | my $Debug = 0;
|
---|
84 |
|
---|
85 | # Just show version and exit?
|
---|
86 | my $Print_Version = 0;
|
---|
87 |
|
---|
88 | # Just print usage message and exit?
|
---|
89 | my $Print_Usage = 0;
|
---|
90 |
|
---|
91 | # Single top-level ChangeLog, or one per subdirectory?
|
---|
92 | my $Distributed = 0;
|
---|
93 |
|
---|
94 | # What file should we generate (defaults to "ChangeLog")?
|
---|
95 | my $Log_File_Name = "ChangeLog";
|
---|
96 |
|
---|
97 | # Expand usernames to email addresses based on a map file?
|
---|
98 | my $User_Map_File = "";
|
---|
99 |
|
---|
100 | # Output to a file or to stdout?
|
---|
101 | my $Output_To_Stdout = 0;
|
---|
102 |
|
---|
103 | # Eliminate empty log messages?
|
---|
104 | my $Prune_Empty_Msgs = 0;
|
---|
105 |
|
---|
106 | # Don't call Text::Wrap on the body of the message
|
---|
107 | my $No_Wrap = 0;
|
---|
108 |
|
---|
109 | # Separates header from log message
|
---|
110 | my $After_Header = " ";
|
---|
111 |
|
---|
112 | # Format more for programs than for humans.
|
---|
113 | my $XML_Output = 0;
|
---|
114 |
|
---|
115 | # Show times in UTC instead of local time
|
---|
116 | my $UTC_Times = 0;
|
---|
117 |
|
---|
118 | # Show day of week in output?
|
---|
119 | my $Show_Day_Of_Week = 0;
|
---|
120 |
|
---|
121 | # Show revision numbers in output?
|
---|
122 | my $Show_Revisions = 0;
|
---|
123 |
|
---|
124 | # Show tags (symbolic names) in output?
|
---|
125 | my $Show_Tags = 0;
|
---|
126 |
|
---|
127 | # Show branches by symbolic name in output?
|
---|
128 | my $Show_Branches = 0;
|
---|
129 |
|
---|
130 | # Show only revisions on these branches or their ancestors.
|
---|
131 | my @Follow_Branches;
|
---|
132 |
|
---|
133 | # Don't bother with files matching this regexp.
|
---|
134 | my @Ignore_Files;
|
---|
135 |
|
---|
136 | # How exactly we match entries. We definitely want "o",
|
---|
137 | # and user might add "i" by using --case-insensitive option.
|
---|
138 | my $Case_Insensitive = 0;
|
---|
139 |
|
---|
140 | # Maybe only show log messages matching a certain regular expression.
|
---|
141 | my $Regexp_Gate = "";
|
---|
142 |
|
---|
143 | # Pass this global option string along to cvs, to the left of `log':
|
---|
144 | my $Global_Opts = "";
|
---|
145 |
|
---|
146 | # Pass this option string along to the cvs log subcommand:
|
---|
147 | my $Command_Opts = "";
|
---|
148 |
|
---|
149 | # Read log output from stdin instead of invoking cvs log?
|
---|
150 | my $Input_From_Stdin = 0;
|
---|
151 |
|
---|
152 | # Max checkin duration. CVS checkin is not atomic, so we may have checkin
|
---|
153 | # times that span a range of time. We assume that checkins will last no
|
---|
154 | # longer than $Max_Checkin_Duration seconds, and that similarly, no
|
---|
155 | # checkins will happen from the same users with the same message less
|
---|
156 | # than $Max_Checkin_Duration seconds apart.
|
---|
157 | my $Max_Checkin_Duration = 180;
|
---|
158 |
|
---|
159 | # What to put at the front of [each] ChangeLog.
|
---|
160 | my $ChangeLog_Header = "";
|
---|
161 |
|
---|
162 | ## end vars set by options.
|
---|
163 |
|
---|
164 | # In 'cvs log' output, one long unbroken line of equal signs separates
|
---|
165 | # files:
|
---|
166 | my $file_separator = "======================================="
|
---|
167 | . "======================================";
|
---|
168 |
|
---|
169 | # In 'cvs log' output, a shorter line of dashes separates log messages
|
---|
170 | # within a file:
|
---|
171 | my $logmsg_separator = "----------------------------";
|
---|
172 |
|
---|
173 | ############### End globals ############
|
---|
174 |
|
---|
175 | &parse_options ();
|
---|
176 | &derive_change_log ();
|
---|
177 |
|
---|
178 | ### Everything below is subroutine definitions. ###
|
---|
179 |
|
---|
180 | # Fills up a ChangeLog structure in the current directory.
|
---|
181 | sub derive_change_log ()
|
---|
182 | {
|
---|
183 | # See "The Plan" above for a full explanation.
|
---|
184 |
|
---|
185 | my %grand_poobah;
|
---|
186 |
|
---|
187 | my $file_full_path;
|
---|
188 | my $time;
|
---|
189 | my $revision;
|
---|
190 | my $author;
|
---|
191 | my $msg_txt;
|
---|
192 | my $detected_file_separator;
|
---|
193 |
|
---|
194 | # We might be expanding usernames
|
---|
195 | my %usermap;
|
---|
196 |
|
---|
197 | # In general, it's probably not very maintainable to use state
|
---|
198 | # variables like this to tell the loop what it's doing at any given
|
---|
199 | # moment, but this is only the first one, and if we never have more
|
---|
200 | # than a few of these, it's okay.
|
---|
201 | my $collecting_symbolic_names = 0;
|
---|
202 | my %symbolic_names; # Where tag names get stored.
|
---|
203 | my %branch_names; # We'll grab branch names while we're at it.
|
---|
204 | my %branch_numbers; # Save some revisions for @Follow_Branches
|
---|
205 | my @branch_roots; # For showing which files are branch ancestors.
|
---|
206 |
|
---|
207 | if (! $Input_From_Stdin) {
|
---|
208 | open (LOG_SOURCE, "$Log_Source_Command |")
|
---|
209 | or die "unable to run \"${Log_Source_Command}\"";
|
---|
210 | }
|
---|
211 | else {
|
---|
212 | open (LOG_SOURCE, "-") or die "unable to open stdin for reading";
|
---|
213 | }
|
---|
214 |
|
---|
215 | %usermap = &maybe_read_user_map_file ();
|
---|
216 |
|
---|
217 | while (<LOG_SOURCE>)
|
---|
218 | {
|
---|
219 | # If on a new file and don't see filename, skip until we find it, and
|
---|
220 | # when we find it, grab it.
|
---|
221 | if ((! (defined $file_full_path)) and /^Working file: (.*)/) {
|
---|
222 | $file_full_path = $1;
|
---|
223 | if (@Ignore_Files) {
|
---|
224 | my $base;
|
---|
225 | ($base, undef, undef) = fileparse ($file_full_path);
|
---|
226 | # Ouch, I wish trailing operators in regexps could be
|
---|
227 | # evaluated on the fly!
|
---|
228 | if ($Case_Insensitive) {
|
---|
229 | if (grep ($file_full_path =~ m|$_|i, @Ignore_Files)) {
|
---|
230 | undef $file_full_path;
|
---|
231 | }
|
---|
232 | }
|
---|
233 | elsif (grep ($file_full_path =~ m|$_|, @Ignore_Files)) {
|
---|
234 | undef $file_full_path;
|
---|
235 | }
|
---|
236 | }
|
---|
237 | next;
|
---|
238 | }
|
---|
239 |
|
---|
240 | # Just spin wheels if no file defined yet.
|
---|
241 | next if (! $file_full_path);
|
---|
242 |
|
---|
243 | # Collect tag names in case we're asked to print them in the output.
|
---|
244 | if (/^symbolic names:$/) {
|
---|
245 | $collecting_symbolic_names = 1;
|
---|
246 | next; # There's no more info on this line, so skip to next
|
---|
247 | }
|
---|
248 | if ($collecting_symbolic_names)
|
---|
249 | {
|
---|
250 | # All tag names are listed with whitespace in front in cvs log
|
---|
251 | # output; so if see non-whitespace, then we're done collecting.
|
---|
252 | if (/^\S/) {
|
---|
253 | $collecting_symbolic_names = 0;
|
---|
254 | }
|
---|
255 | else # we're looking at a tag name, so parse & store it
|
---|
256 | {
|
---|
257 | # According to the Cederqvist manual, in node "Tags", tag
|
---|
258 | # names must start with an uppercase or lowercase letter and
|
---|
259 | # can contain uppercase and lowercase letters, digits, `-',
|
---|
260 | # and `_'. However, it's not our place to enforce that, so
|
---|
261 | # we'll allow anything CVS hands us to be a tag:
|
---|
262 | /^\s([^:]+): ([\d.]+)$/;
|
---|
263 | my $tag_name = $1;
|
---|
264 | my $tag_rev = $2;
|
---|
265 |
|
---|
266 | # You can always tell a branch by the ".0." as the
|
---|
267 | # second-to-last digit in the revision number.
|
---|
268 | if ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/) {
|
---|
269 | my $real_branch_rev = $1 . $3;
|
---|
270 | $branch_names{$real_branch_rev} = $tag_name;
|
---|
271 | if (@Follow_Branches) {
|
---|
272 | if (grep ($_ eq $tag_name, @Follow_Branches)) {
|
---|
273 | $branch_numbers{$tag_name} = $real_branch_rev;
|
---|
274 | }
|
---|
275 | }
|
---|
276 | }
|
---|
277 | else {
|
---|
278 | # Else it's just a regular (non-branch) tag.
|
---|
279 | push (@{$symbolic_names{$tag_rev}}, $tag_name);
|
---|
280 | }
|
---|
281 | }
|
---|
282 | }
|
---|
283 | # End of code for collecting tag names.
|
---|
284 |
|
---|
285 | # If have file name, but not revision, and see revision, then grab
|
---|
286 | # it. (We collect unconditionally, even though we may or may not
|
---|
287 | # ever use it.)
|
---|
288 | if ((! (defined $revision)) and (/^revision (\d+\.[\d.]+)/))
|
---|
289 | {
|
---|
290 | $revision = $1;
|
---|
291 |
|
---|
292 | if (@Follow_Branches)
|
---|
293 | {
|
---|
294 | foreach my $branch (@Follow_Branches)
|
---|
295 | {
|
---|
296 | # Special case for following trunk revisions
|
---|
297 | if (($branch =~ /^trunk$/i) and ($revision =~ /^[0-9]+\.[0-9]+$/))
|
---|
298 | {
|
---|
299 | goto dengo;
|
---|
300 | }
|
---|
301 |
|
---|
302 | my $branch_number = $branch_numbers{$branch};
|
---|
303 | if ($branch_number)
|
---|
304 | {
|
---|
305 | # Are we on one of the follow branches or an ancestor of
|
---|
306 | # same?
|
---|
307 | #
|
---|
308 | # If this revision is a prefix of the branch number, or
|
---|
309 | # possibly is less in the minormost number, OR if this
|
---|
310 | # branch number is a prefix of the revision, then yes.
|
---|
311 | # Otherwise, no.
|
---|
312 | #
|
---|
313 | # So below, we determine if any of those conditions are
|
---|
314 | # met.
|
---|
315 |
|
---|
316 | # Trivial case: is this revision on the branch?
|
---|
317 | # (Compare this way to avoid regexps that screw up Emacs
|
---|
318 | # indentation, argh.)
|
---|
319 | if ((substr ($revision, 0, ((length ($branch_number)) + 1)))
|
---|
320 | eq ($branch_number . "."))
|
---|
321 | {
|
---|
322 | goto dengo;
|
---|
323 | }
|
---|
324 | # Non-trivial case: check if rev is ancestral to branch
|
---|
325 | elsif ((length ($branch_number)) > (length ($revision)))
|
---|
326 | {
|
---|
327 | $revision =~ /^([\d\.]+)(\d+)$/;
|
---|
328 | my $r_left = $1; # still has the trailing "."
|
---|
329 | my $r_end = $2;
|
---|
330 |
|
---|
331 | $branch_number =~ /^([\d\.]+)(\d+)\.\d+$/;
|
---|
332 | my $b_left = $1; # still has trailing "."
|
---|
333 | my $b_mid = $2; # has no trailing "."
|
---|
334 |
|
---|
335 | if (($r_left eq $b_left)
|
---|
336 | && ($r_end <= $b_mid))
|
---|
337 | {
|
---|
338 | goto dengo;
|
---|
339 | }
|
---|
340 | }
|
---|
341 | }
|
---|
342 | }
|
---|
343 | }
|
---|
344 | else # (! @Follow_Branches)
|
---|
345 | {
|
---|
346 | next;
|
---|
347 | }
|
---|
348 |
|
---|
349 | # Else we are following branches, but this revision isn't on the
|
---|
350 | # path. So skip it.
|
---|
351 | undef $revision;
|
---|
352 | dengo:
|
---|
353 | next;
|
---|
354 | }
|
---|
355 |
|
---|
356 | # If we don't have a revision right now, we couldn't possibly
|
---|
357 | # be looking at anything useful.
|
---|
358 | if (! (defined ($revision))) {
|
---|
359 | $detected_file_separator = /^$file_separator$/o;
|
---|
360 | if ($detected_file_separator) {
|
---|
361 | # No revisions for this file; can happen, e.g. "cvs log -d DATE"
|
---|
362 | goto CLEAR;
|
---|
363 | }
|
---|
364 | else {
|
---|
365 | next;
|
---|
366 | }
|
---|
367 | }
|
---|
368 |
|
---|
369 | # If have file name but not date and author, and see date or
|
---|
370 | # author, then grab them:
|
---|
371 | unless (defined $time) {
|
---|
372 | if (/^date: .*/)
|
---|
373 | {
|
---|
374 | ($time, $author) = &parse_date_and_author ($_);
|
---|
375 | if (defined ($usermap{$author}) and $usermap{$author}) {
|
---|
376 | $author = $usermap{$author};
|
---|
377 | }
|
---|
378 | }
|
---|
379 | else {
|
---|
380 | $detected_file_separator = /^$file_separator$/o;
|
---|
381 | if ($detected_file_separator) {
|
---|
382 | # No revisions for this file; can happen, e.g. "cvs log -d DATE"
|
---|
383 | goto CLEAR;
|
---|
384 | }
|
---|
385 | }
|
---|
386 | # If the date/time/author hasn't been found yet, we couldn't
|
---|
387 | # possibly care about anything we see. So skip:
|
---|
388 | next;
|
---|
389 | }
|
---|
390 |
|
---|
391 | # A "branches: ..." line here indicates that one or more branches
|
---|
392 | # are rooted at this revision. If we're showing branches, then we
|
---|
393 | # want to show that fact as well, so we collect all the branches
|
---|
394 | # that this is the latest ancestor of and store them in
|
---|
395 | # @branch_roots. Just for reference, the format of the line we're
|
---|
396 | # seeing at this point is:
|
---|
397 | #
|
---|
398 | # branches: 1.5.2; 1.5.4; ...;
|
---|
399 | #
|
---|
400 | # Okay, here goes:
|
---|
401 |
|
---|
402 | if (/^branches:\s+(.*);$/)
|
---|
403 | {
|
---|
404 | if ($Show_Branches)
|
---|
405 | {
|
---|
406 | my $lst = $1;
|
---|
407 | $lst =~ s/(1\.)+1;|(1\.)+1$//; # ignore the trivial branch 1.1.1
|
---|
408 | if ($lst) {
|
---|
409 | @branch_roots = split (/;\s+/, $lst);
|
---|
410 | }
|
---|
411 | else {
|
---|
412 | undef @branch_roots;
|
---|
413 | }
|
---|
414 | next;
|
---|
415 | }
|
---|
416 | else
|
---|
417 | {
|
---|
418 | # Ugh. This really bothers me. Suppose we see a log entry
|
---|
419 | # like this:
|
---|
420 | #
|
---|
421 | # ----------------------------
|
---|
422 | # revision 1.1
|
---|
423 | # date: 1999/10/17 03:07:38; author: jrandom; state: Exp;
|
---|
424 | # branches: 1.1.2;
|
---|
425 | # Intended first line of log message begins here.
|
---|
426 | # ----------------------------
|
---|
427 | #
|
---|
428 | # The question is, how we can tell the difference between that
|
---|
429 | # log message and a *two*-line log message whose first line is
|
---|
430 | #
|
---|
431 | # "branches: 1.1.2;"
|
---|
432 | #
|
---|
433 | # See the problem? The output of "cvs log" is inherently
|
---|
434 | # ambiguous.
|
---|
435 | #
|
---|
436 | # For now, we punt: we liberally assume that people don't
|
---|
437 | # write log messages like that, and just toss a "branches:"
|
---|
438 | # line if we see it but are not showing branches. I hope no
|
---|
439 | # one ever loses real log data because of this.
|
---|
440 | next;
|
---|
441 | }
|
---|
442 | }
|
---|
443 |
|
---|
444 | # If have file name, time, and author, then we're just grabbing
|
---|
445 | # log message texts:
|
---|
446 | $detected_file_separator = /^$file_separator$/o;
|
---|
447 | if ($detected_file_separator && ! (defined $revision)) {
|
---|
448 | # No revisions for this file; can happen, e.g. "cvs log -d DATE"
|
---|
449 | goto CLEAR;
|
---|
450 | }
|
---|
451 | unless ($detected_file_separator || /^$logmsg_separator$/o)
|
---|
452 | {
|
---|
453 | $msg_txt .= $_; # Normally, just accumulate the message...
|
---|
454 | next;
|
---|
455 | }
|
---|
456 | # ... until a msg separator is encountered:
|
---|
457 | # Ensure the message contains something:
|
---|
458 | if ((! $msg_txt)
|
---|
459 | || ($msg_txt =~ /^\s*\.\s*$|^\s*$/)
|
---|
460 | || ($msg_txt =~ /\*\*\* empty log message \*\*\*/)) {
|
---|
461 | if ($Prune_Empty_Msgs) {
|
---|
462 | goto CLEAR;
|
---|
463 | }
|
---|
464 | # else
|
---|
465 | $msg_txt = "[no log message]\n";
|
---|
466 | }
|
---|
467 |
|
---|
468 | ### Store it all in the Grand Poobah:
|
---|
469 | {
|
---|
470 | my $dir_key; # key into %grand_poobah
|
---|
471 | my %qunk; # complicated little jobbie, see below
|
---|
472 |
|
---|
473 | # Each revision of a file has a little data structure (a `qunk')
|
---|
474 | # associated with it. That data structure holds not only the
|
---|
475 | # file's name, but any additional information about the file
|
---|
476 | # that might be needed in the output, such as the revision
|
---|
477 | # number, tags, branches, etc. The reason to have these things
|
---|
478 | # arranged in a data structure, instead of just appending them
|
---|
479 | # textually to the file's name, is that we may want to do a
|
---|
480 | # little rearranging later as we write the output. For example,
|
---|
481 | # all the files on a given tag/branch will go together, followed
|
---|
482 | # by the tag in parentheses (so trunk or otherwise non-tagged
|
---|
483 | # files would go at the end of the file list for a given log
|
---|
484 | # message). This rearrangement is a lot easier to do if we
|
---|
485 | # don't have to reparse the text.
|
---|
486 | #
|
---|
487 | # A qunk looks like this:
|
---|
488 | #
|
---|
489 | # {
|
---|
490 | # filename => "hello.c",
|
---|
491 | # revision => "1.4.3.2",
|
---|
492 | # time => a timegm() return value (moment of commit)
|
---|
493 | # tags => [ "tag1", "tag2", ... ],
|
---|
494 | # branch => "branchname" # There should be only one, right?
|
---|
495 | # branchroots => [ "branchtag1", "branchtag2", ... ]
|
---|
496 | # }
|
---|
497 |
|
---|
498 | if ($Distributed) {
|
---|
499 | # Just the basename, don't include the path.
|
---|
500 | ($qunk{'filename'}, $dir_key, undef) = fileparse ($file_full_path);
|
---|
501 | }
|
---|
502 | else {
|
---|
503 | $dir_key = "./";
|
---|
504 | $qunk{'filename'} = $file_full_path;
|
---|
505 | }
|
---|
506 |
|
---|
507 | # This may someday be used in a more sophisticated calculation
|
---|
508 | # of what other files are involved in this commit. For now, we
|
---|
509 | # don't use it, because the common-commit-detection algorithm is
|
---|
510 | # hypothesized to be "good enough" as it stands.
|
---|
511 | $qunk{'time'} = $time;
|
---|
512 |
|
---|
513 | # We might be including revision numbers and/or tags and/or
|
---|
514 | # branch names in the output. Most of the code from here to
|
---|
515 | # loop-end deals with organizing these in qunk.
|
---|
516 |
|
---|
517 | $qunk{'revision'} = $revision;
|
---|
518 |
|
---|
519 | # Grab the branch, even though we may or may not need it:
|
---|
520 | $qunk{'revision'} =~ /([\d.]+)\d+/;
|
---|
521 | my $branch_prefix = $1;
|
---|
522 | $branch_prefix =~ s/\.$//; # strip off final dot
|
---|
523 | if ($branch_names{$branch_prefix}) {
|
---|
524 | $qunk{'branch'} = $branch_names{$branch_prefix};
|
---|
525 | }
|
---|
526 |
|
---|
527 | # If there's anything in the @branch_roots array, then this
|
---|
528 | # revision is the root of at least one branch. We'll display
|
---|
529 | # them as branch names instead of revision numbers, the
|
---|
530 | # substitution for which is done directly in the array:
|
---|
531 | if (@branch_roots) {
|
---|
532 | my @roots = map { $branch_names{$_} } @branch_roots;
|
---|
533 | $qunk{'branchroots'} = \@roots;
|
---|
534 | }
|
---|
535 |
|
---|
536 | # Save tags too.
|
---|
537 | if (defined ($symbolic_names{$revision})) {
|
---|
538 | $qunk{'tags'} = $symbolic_names{$revision};
|
---|
539 | delete $symbolic_names{$revision};
|
---|
540 | }
|
---|
541 |
|
---|
542 | # Add this file to the list
|
---|
543 | # (We use many spoonfuls of autovivication magic. Hashes and arrays
|
---|
544 | # will spring into existence if they aren't there already.)
|
---|
545 |
|
---|
546 | &debug ("(pushing log msg for ${dir_key}$qunk{'filename'})\n");
|
---|
547 |
|
---|
548 | # Store with the files in this commit. Later we'll loop through
|
---|
549 | # again, making sure that revisions with the same log message
|
---|
550 | # and nearby commit times are grouped together as one commit.
|
---|
551 | push (@{$grand_poobah{$dir_key}{$author}{$time}{$msg_txt}}, \%qunk);
|
---|
552 | }
|
---|
553 |
|
---|
554 | CLEAR:
|
---|
555 | # Make way for the next message
|
---|
556 | undef $msg_txt;
|
---|
557 | undef $time;
|
---|
558 | undef $revision;
|
---|
559 | undef $author;
|
---|
560 | undef @branch_roots;
|
---|
561 |
|
---|
562 | # Maybe even make way for the next file:
|
---|
563 | if ($detected_file_separator) {
|
---|
564 | undef $file_full_path;
|
---|
565 | undef %branch_names;
|
---|
566 | }
|
---|
567 | }
|
---|
568 |
|
---|
569 | close (LOG_SOURCE);
|
---|
570 |
|
---|
571 | ### Process each ChangeLog
|
---|
572 |
|
---|
573 | while (my ($dir,$authorhash) = each %grand_poobah)
|
---|
574 | {
|
---|
575 | &debug ("DOING DIR: $dir\n");
|
---|
576 |
|
---|
577 | # Here we twist our hash around, from being
|
---|
578 | # author => time => message => filelist
|
---|
579 | # in %$authorhash to
|
---|
580 | # time => author => message => filelist
|
---|
581 | # in %changelog.
|
---|
582 | #
|
---|
583 | # This is also where we merge entries. The algorithm proceeds
|
---|
584 | # through the timeline of the changelog with a sliding window of
|
---|
585 | # $Max_Checkin_Duration seconds; within that window, entries that
|
---|
586 | # have the same log message are merged.
|
---|
587 | #
|
---|
588 | # (To save space, we zap %$authorhash after we've copied
|
---|
589 | # everything out of it.)
|
---|
590 |
|
---|
591 | my %changelog;
|
---|
592 | while (my ($author,$timehash) = each %$authorhash)
|
---|
593 | {
|
---|
594 | my $lasttime;
|
---|
595 | my %stamptime;
|
---|
596 | foreach my $time (sort {$main::a <=> $main::b} (keys %$timehash))
|
---|
597 | {
|
---|
598 | my $msghash = $timehash->{$time};
|
---|
599 | while (my ($msg,$qunklist) = each %$msghash)
|
---|
600 | {
|
---|
601 | my $stamptime = $stamptime{$msg};
|
---|
602 | if ((defined $stamptime)
|
---|
603 | and (($time - $stamptime) < $Max_Checkin_Duration)
|
---|
604 | and (defined $changelog{$stamptime}{$author}{$msg}))
|
---|
605 | {
|
---|
606 | push(@{$changelog{$stamptime}{$author}{$msg}}, @$qunklist);
|
---|
607 | }
|
---|
608 | else {
|
---|
609 | $changelog{$time}{$author}{$msg} = $qunklist;
|
---|
610 | $stamptime{$msg} = $time;
|
---|
611 | }
|
---|
612 | }
|
---|
613 | }
|
---|
614 | }
|
---|
615 | undef (%$authorhash);
|
---|
616 |
|
---|
617 | ### Now we can write out the ChangeLog!
|
---|
618 |
|
---|
619 | my ($logfile_here, $logfile_bak, $tmpfile);
|
---|
620 |
|
---|
621 | if (! $Output_To_Stdout) {
|
---|
622 | $logfile_here = $dir . $Log_File_Name;
|
---|
623 | $logfile_here =~ s/^\.\/\//\//; # fix any leading ".//" problem
|
---|
624 | $tmpfile = "${logfile_here}.cvs2cl$$.tmp";
|
---|
625 | $logfile_bak = "${logfile_here}.bak";
|
---|
626 |
|
---|
627 | open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\"";
|
---|
628 | }
|
---|
629 | else {
|
---|
630 | open (LOG_OUT, ">-") or die "Unable to open stdout for writing";
|
---|
631 | }
|
---|
632 |
|
---|
633 | print LOG_OUT $ChangeLog_Header;
|
---|
634 |
|
---|
635 | if ($XML_Output) {
|
---|
636 | print LOG_OUT "<?xml version=\"1.0\"?>\n\n<changelog>\n\n";
|
---|
637 | }
|
---|
638 |
|
---|
639 | foreach my $time (sort {$main::b <=> $main::a} (keys %changelog))
|
---|
640 | {
|
---|
641 | my $authorhash = $changelog{$time};
|
---|
642 | while (my ($author,$mesghash) = each %$authorhash)
|
---|
643 | {
|
---|
644 | while (my ($msg,$qunklist) = each %$mesghash)
|
---|
645 | {
|
---|
646 | my $files = &pretty_file_list ($qunklist);
|
---|
647 | my $logtext = &pretty_msg_text ($msg);
|
---|
648 | my $header_line; # date and author
|
---|
649 | my $body; # see below
|
---|
650 | my $wholething; # $header_line + $body
|
---|
651 |
|
---|
652 | # Set up the date/author line.
|
---|
653 | # kff todo: do some more XML munging here, on the header
|
---|
654 | # part of the entry:
|
---|
655 | my ($ignore,$min,$hour,$mday,$mon,$year,$wday)
|
---|
656 | = $UTC_Times ? gmtime($time) : localtime($time);
|
---|
657 |
|
---|
658 | # XML output includes everything else, we might as well make
|
---|
659 | # it always include Day Of Week too, for consistency.
|
---|
660 | if ($Show_Day_Of_Week or $XML_Output) {
|
---|
661 | $wday = ("Sunday", "Monday", "Tuesday", "Wednesday",
|
---|
662 | "Thursday", "Friday", "Saturday")[$wday];
|
---|
663 | $wday = ($XML_Output) ? "<weekday>${wday}</weekday>\n" : " $wday";
|
---|
664 | }
|
---|
665 | else {
|
---|
666 | $wday = "";
|
---|
667 | }
|
---|
668 |
|
---|
669 | if ($XML_Output) {
|
---|
670 | $author = &xml_escape ($author);
|
---|
671 | $header_line =
|
---|
672 | sprintf ("<date>%4u-%02u-%02u</date>\n"
|
---|
673 | . "${wday}"
|
---|
674 | . "<time>%02u:%02u</time>\n"
|
---|
675 | . "<author>%s</author>\n",
|
---|
676 | $year+1900, $mon+1, $mday, $hour, $min, $author);
|
---|
677 | }
|
---|
678 | else {
|
---|
679 | $header_line =
|
---|
680 | sprintf ("%4u-%02u-%02u${wday} %02u:%02u %s\n\n",
|
---|
681 | $year+1900, $mon+1, $mday, $hour, $min, $author);
|
---|
682 | }
|
---|
683 |
|
---|
684 | # Reshape the body according to user preferences.
|
---|
685 | if ($XML_Output) {
|
---|
686 | $body = $files . $logtext;
|
---|
687 | }
|
---|
688 | elsif ($No_Wrap) {
|
---|
689 | $files = wrap ("\t", " ", "$files");
|
---|
690 | $logtext =~ s/\n(.*)/\n\t$1/g;
|
---|
691 | unless ($After_Header eq " ") {
|
---|
692 | $logtext =~ s/^(.*)/\t$1/g;
|
---|
693 | }
|
---|
694 | $body = $files . $After_Header . $logtext;
|
---|
695 | }
|
---|
696 | else {
|
---|
697 | $body = $files . $After_Header . $logtext;
|
---|
698 | $body = wrap ("\t", " ", "$body");
|
---|
699 | }
|
---|
700 |
|
---|
701 | $wholething = $header_line . $body;
|
---|
702 |
|
---|
703 | if ($XML_Output) {
|
---|
704 | $wholething = "<entry>\n${wholething}</entry>\n";
|
---|
705 | }
|
---|
706 |
|
---|
707 | # One last check: make sure it passes the regexp test, if the
|
---|
708 | # user asked for that. We have to do it here, so that the
|
---|
709 | # test can match against information in the header as well
|
---|
710 | # as in the text of the log message.
|
---|
711 |
|
---|
712 | # How annoying to duplicate so much code just because I
|
---|
713 | # can't figure out a way to evaluate scalars on the trailing
|
---|
714 | # operator portion of a regular expression. Grrr.
|
---|
715 | if ($Case_Insensitive) {
|
---|
716 | unless ($Regexp_Gate && ($wholething !~ /$Regexp_Gate/oi)) {
|
---|
717 | print LOG_OUT "${wholething}\n";
|
---|
718 | }
|
---|
719 | }
|
---|
720 | else {
|
---|
721 | unless ($Regexp_Gate && ($wholething !~ /$Regexp_Gate/o)) {
|
---|
722 | print LOG_OUT "${wholething}\n";
|
---|
723 | }
|
---|
724 | }
|
---|
725 | }
|
---|
726 | }
|
---|
727 | }
|
---|
728 |
|
---|
729 | if ($XML_Output) {
|
---|
730 | print LOG_OUT "</changelog>\n";
|
---|
731 | }
|
---|
732 |
|
---|
733 | close (LOG_OUT);
|
---|
734 |
|
---|
735 | if (! $Output_To_Stdout)
|
---|
736 | {
|
---|
737 | if (-f $logfile_here) {
|
---|
738 | rename ($logfile_here, $logfile_bak);
|
---|
739 | }
|
---|
740 | rename ($tmpfile, $logfile_here);
|
---|
741 | }
|
---|
742 | }
|
---|
743 | }
|
---|
744 |
|
---|
745 | sub parse_date_and_author ()
|
---|
746 | {
|
---|
747 | # Parses the date/time and author out of a line like:
|
---|
748 | #
|
---|
749 | # date: 1999/02/19 23:29:05; author: apharris; state: Exp;
|
---|
750 |
|
---|
751 | my $line = shift;
|
---|
752 |
|
---|
753 | my ($year, $mon, $mday, $hours, $min, $secs, $author) = $line =~
|
---|
754 | m#(\d+)/(\d+)/(\d+)\s+(\d+):(\d+):(\d+);\s+author:\s+([^;]+);#
|
---|
755 | or die "Couldn't parse date ``$line''";
|
---|
756 | die "Bad date or Y2K issues" unless ($year > 1969 and $year < 2258);
|
---|
757 | # Kinda arbitrary, but useful as a sanity check
|
---|
758 | my $time = timegm($secs,$min,$hours,$mday,$mon-1,$year-1900);
|
---|
759 |
|
---|
760 | return ($time, $author);
|
---|
761 | }
|
---|
762 |
|
---|
763 | # Here we take a bunch of qunks and convert them into printed
|
---|
764 | # summary that will include all the information the user asked for.
|
---|
765 | sub pretty_file_list ()
|
---|
766 | {
|
---|
767 | my $qunksref = shift;
|
---|
768 | my @qunkrefs = @$qunksref;
|
---|
769 | my @filenames;
|
---|
770 | my $beauty = ""; # The accumulating header string for this entry.
|
---|
771 | my %non_unanimous_tags; # Tags found in a proper subset of qunks
|
---|
772 | my %unanimous_tags; # Tags found in all qunks
|
---|
773 | my %all_branches; # Branches found in any qunk
|
---|
774 | my $common_dir; # Dir of all files, or "" if no common dir
|
---|
775 | my $fbegun = 0; # Did we begin printing filenames yet?
|
---|
776 |
|
---|
777 | # First, loop over the qunks gathering all the tag/branch names.
|
---|
778 | # We'll put them all in non_unanimous_tags, and take out the
|
---|
779 | # unanimous ones later.
|
---|
780 | foreach my $qunkref (@qunkrefs)
|
---|
781 | {
|
---|
782 | # Keep track of whether all the files in this commit were in the
|
---|
783 | # same directory, and memorize it if so. We can make the output a
|
---|
784 | # little more compact by mentioning the directory only once.
|
---|
785 | if ((scalar (@qunkrefs)) > 1)
|
---|
786 | {
|
---|
787 | if (! (defined ($common_dir))) {
|
---|
788 | my ($base, $dir);
|
---|
789 | ($base, $dir, undef) = fileparse ($$qunkref{'filename'});
|
---|
790 |
|
---|
791 | if (($dir eq "./") || ($dir eq ".\\")) {
|
---|
792 | $common_dir = "";
|
---|
793 | }
|
---|
794 | else {
|
---|
795 | $common_dir = $dir;
|
---|
796 | }
|
---|
797 |
|
---|
798 | ($dir eq "./") ? ($common_dir = "") : ($common_dir = $dir);
|
---|
799 | }
|
---|
800 | elsif ($common_dir) {
|
---|
801 | $common_dir = &common_path_prefix ($$qunkref{'filename'}, $common_dir);
|
---|
802 | }
|
---|
803 | }
|
---|
804 | else # only one file in this entry anyway, so common dir not an issue
|
---|
805 | {
|
---|
806 | $common_dir = "";
|
---|
807 | }
|
---|
808 |
|
---|
809 | if (defined ($$qunkref{'branch'})) {
|
---|
810 | $all_branches{$$qunkref{'branch'}} = 1;
|
---|
811 | }
|
---|
812 | if (defined ($$qunkref{'tags'})) {
|
---|
813 | foreach my $tag (@{$$qunkref{'tags'}}) {
|
---|
814 | $non_unanimous_tags{$tag} = 1;
|
---|
815 | }
|
---|
816 | }
|
---|
817 | }
|
---|
818 |
|
---|
819 | # Any tag held by all qunks will be printed specially... but only if
|
---|
820 | # there are multiple qunks in the first place!
|
---|
821 | if ((scalar (@qunkrefs)) > 1) {
|
---|
822 | foreach my $tag (keys (%non_unanimous_tags)) {
|
---|
823 | my $everyone_has_this_tag = 1;
|
---|
824 | foreach my $qunkref (@qunkrefs) {
|
---|
825 | if ((! (defined ($$qunkref{'tags'})))
|
---|
826 | or (! (grep ($_ eq $tag, @{$$qunkref{'tags'}})))) {
|
---|
827 | $everyone_has_this_tag = 0;
|
---|
828 | }
|
---|
829 | }
|
---|
830 | if ($everyone_has_this_tag) {
|
---|
831 | $unanimous_tags{$tag} = 1;
|
---|
832 | delete $non_unanimous_tags{$tag};
|
---|
833 | }
|
---|
834 | }
|
---|
835 | }
|
---|
836 |
|
---|
837 | if ($XML_Output)
|
---|
838 | {
|
---|
839 | # If outputting XML, then our task is pretty simple, because we
|
---|
840 | # don't have to detect common dir, common tags, branch prefixing,
|
---|
841 | # etc. We just output exactly what we have, and don't worry about
|
---|
842 | # redundancy or readability.
|
---|
843 |
|
---|
844 | foreach my $qunkref (@qunkrefs)
|
---|
845 | {
|
---|
846 | my $filename = $$qunkref{'filename'};
|
---|
847 | my $revision = $$qunkref{'revision'};
|
---|
848 | my $tags = $$qunkref{'tags'};
|
---|
849 | my $branch = $$qunkref{'branch'};
|
---|
850 | my $branchroots = $$qunkref{'branchroots'};
|
---|
851 |
|
---|
852 | $filename = &xml_escape ($filename); # probably paranoia
|
---|
853 | $revision = &xml_escape ($revision); # definitely paranoia
|
---|
854 |
|
---|
855 | $beauty .= "<file>\n";
|
---|
856 | $beauty .= "<name>${filename}</name>\n";
|
---|
857 | $beauty .= "<revision>${revision}</revision>\n";
|
---|
858 | if ($branch) {
|
---|
859 | $branch = &xml_escape ($branch); # more paranoia
|
---|
860 | $beauty .= "<branch>${branch}</branch>\n";
|
---|
861 | }
|
---|
862 | foreach my $tag (@$tags) {
|
---|
863 | $tag = &xml_escape ($tag); # by now you're used to the paranoia
|
---|
864 | $beauty .= "<tag>${tag}</tag>\n";
|
---|
865 | }
|
---|
866 | foreach my $root (@$branchroots) {
|
---|
867 | $root = &xml_escape ($root); # which is good, because it will continue
|
---|
868 | $beauty .= "<branchroot>${root}</branchroot>\n";
|
---|
869 | }
|
---|
870 | $beauty .= "</file>\n";
|
---|
871 | }
|
---|
872 |
|
---|
873 | # Theoretically, we could go home now. But as long as we're here,
|
---|
874 | # let's print out the common_dir and utags, as a convenience to
|
---|
875 | # the receiver (after all, earlier code calculated that stuff
|
---|
876 | # anyway, so we might as well take advantage of it).
|
---|
877 |
|
---|
878 | if ((scalar (keys (%unanimous_tags))) > 1) {
|
---|
879 | foreach my $utag ((keys (%unanimous_tags))) {
|
---|
880 | $utag = &xml_escape ($utag); # the usual paranoia
|
---|
881 | $beauty .= "<utag>${utag}</utag>\n";
|
---|
882 | }
|
---|
883 | }
|
---|
884 | if ($common_dir) {
|
---|
885 | $common_dir = &xml_escape ($common_dir);
|
---|
886 | $beauty .= "<commondir>${common_dir}</commondir>\n";
|
---|
887 | }
|
---|
888 |
|
---|
889 | # That's enough for XML, time to go home:
|
---|
890 | return $beauty;
|
---|
891 | }
|
---|
892 |
|
---|
893 | # Else not XML output, so complexly compactify for chordate
|
---|
894 | # consumption. At this point we have enough global information
|
---|
895 | # about all the qunks to organize them non-redundantly for output.
|
---|
896 |
|
---|
897 | if ($common_dir) {
|
---|
898 | # Note that $common_dir still has its trailing slash
|
---|
899 | $beauty .= "$common_dir: ";
|
---|
900 | }
|
---|
901 |
|
---|
902 | if ($Show_Branches)
|
---|
903 | {
|
---|
904 | # For trailing revision numbers.
|
---|
905 | my @brevisions;
|
---|
906 |
|
---|
907 | foreach my $branch (keys (%all_branches))
|
---|
908 | {
|
---|
909 | foreach my $qunkref (@qunkrefs)
|
---|
910 | {
|
---|
911 | if ((defined ($$qunkref{'branch'}))
|
---|
912 | and ($$qunkref{'branch'} eq $branch))
|
---|
913 | {
|
---|
914 | if ($fbegun) {
|
---|
915 | # kff todo: comma-delimited in XML too? Sure.
|
---|
916 | $beauty .= ", ";
|
---|
917 | }
|
---|
918 | else {
|
---|
919 | $fbegun = 1;
|
---|
920 | }
|
---|
921 | my $fname = substr ($$qunkref{'filename'}, length ($common_dir));
|
---|
922 | $beauty .= $fname;
|
---|
923 | $$qunkref{'printed'} = 1; # Just setting a mark bit, basically
|
---|
924 |
|
---|
925 | if ($Show_Tags && (defined @{$$qunkref{'tags'}})) {
|
---|
926 | my @tags = grep ($non_unanimous_tags{$_}, @{$$qunkref{'tags'}});
|
---|
927 | if (@tags) {
|
---|
928 | $beauty .= " (tags: ";
|
---|
929 | $beauty .= join (', ', @tags);
|
---|
930 | $beauty .= ")";
|
---|
931 | }
|
---|
932 | }
|
---|
933 |
|
---|
934 | if ($Show_Revisions) {
|
---|
935 | # Collect the revision numbers' last components, but don't
|
---|
936 | # print them -- they'll get printed with the branch name
|
---|
937 | # later.
|
---|
938 | $$qunkref{'revision'} =~ /.+\.([\d])+$/;
|
---|
939 | push (@brevisions, $1);
|
---|
940 |
|
---|
941 | # todo: we're still collecting branch roots, but we're not
|
---|
942 | # showing them anywhere. If we do show them, it would be
|
---|
943 | # nifty to just call them revision "0" on a the branch.
|
---|
944 | # Yeah, that's the ticket.
|
---|
945 | }
|
---|
946 | }
|
---|
947 | }
|
---|
948 | $beauty .= " ($branch";
|
---|
949 | if (@brevisions) {
|
---|
950 | if ((scalar (@brevisions)) > 1) {
|
---|
951 | $beauty .= ".[";
|
---|
952 | $beauty .= (join (',', @brevisions));
|
---|
953 | $beauty .= "]";
|
---|
954 | }
|
---|
955 | else {
|
---|
956 | $beauty .= ".$brevisions[0]";
|
---|
957 | }
|
---|
958 | }
|
---|
959 | $beauty .= ")";
|
---|
960 | }
|
---|
961 | }
|
---|
962 |
|
---|
963 | # Okay; any qunks that were done according to branch are taken care
|
---|
964 | # of, and marked as printed. Now print everyone else.
|
---|
965 |
|
---|
966 | foreach my $qunkref (@qunkrefs)
|
---|
967 | {
|
---|
968 | next if (defined ($$qunkref{'printed'})); # skip if already printed
|
---|
969 |
|
---|
970 | if ($fbegun) {
|
---|
971 | $beauty .= ", ";
|
---|
972 | }
|
---|
973 | else {
|
---|
974 | $fbegun = 1;
|
---|
975 | }
|
---|
976 | $beauty .= substr ($$qunkref{'filename'}, length ($common_dir));
|
---|
977 | $$qunkref{'printed'} = 1; # Set a mark bit.
|
---|
978 |
|
---|
979 | if ($Show_Revisions || $Show_Tags)
|
---|
980 | {
|
---|
981 | my $started_addendum = 0;
|
---|
982 |
|
---|
983 | if ($Show_Revisions) {
|
---|
984 | $started_addendum = 1;
|
---|
985 | $beauty .= " (";
|
---|
986 | $beauty .= "$$qunkref{'revision'}";
|
---|
987 | }
|
---|
988 | if ($Show_Tags && (defined $$qunkref{'tags'})) {
|
---|
989 | my @tags = grep ($non_unanimous_tags{$_}, @{$$qunkref{'tags'}});
|
---|
990 | if ((scalar (@tags)) > 0) {
|
---|
991 | if ($started_addendum) {
|
---|
992 | $beauty .= ", ";
|
---|
993 | }
|
---|
994 | else {
|
---|
995 | $beauty .= " (tags: ";
|
---|
996 | }
|
---|
997 | $beauty .= join (', ', @tags);
|
---|
998 | $started_addendum = 1;
|
---|
999 | }
|
---|
1000 | }
|
---|
1001 | if ($started_addendum) {
|
---|
1002 | $beauty .= ")";
|
---|
1003 | }
|
---|
1004 | }
|
---|
1005 | }
|
---|
1006 |
|
---|
1007 | # Unanimous tags always come last.
|
---|
1008 | if ($Show_Tags && %unanimous_tags)
|
---|
1009 | {
|
---|
1010 | $beauty .= " (utags: ";
|
---|
1011 | $beauty .= join (', ', keys (%unanimous_tags));
|
---|
1012 | $beauty .= ")";
|
---|
1013 | }
|
---|
1014 |
|
---|
1015 | # todo: still have to take care of branch_roots?
|
---|
1016 |
|
---|
1017 | $beauty = "* $beauty:";
|
---|
1018 |
|
---|
1019 | return $beauty;
|
---|
1020 | }
|
---|
1021 |
|
---|
1022 | sub common_path_prefix ()
|
---|
1023 | {
|
---|
1024 | my $path1 = shift;
|
---|
1025 | my $path2 = shift;
|
---|
1026 |
|
---|
1027 | my ($dir1, $dir2);
|
---|
1028 | (undef, $dir1, undef) = fileparse ($path1);
|
---|
1029 | (undef, $dir2, undef) = fileparse ($path2);
|
---|
1030 |
|
---|
1031 | # Transmogrify Windows filenames to look like Unix.
|
---|
1032 | # (It is far more likely that someone is running cvs2cl.pl under
|
---|
1033 | # Windows than that they would genuinely have backslashes in their
|
---|
1034 | # filenames.)
|
---|
1035 | $dir1 =~ tr#\\#/#;
|
---|
1036 | $dir2 =~ tr#\\#/#;
|
---|
1037 |
|
---|
1038 | my $accum1 = "";
|
---|
1039 | my $accum2 = "";
|
---|
1040 | my $last_common_prefix = "";
|
---|
1041 |
|
---|
1042 | while ($accum1 eq $accum2)
|
---|
1043 | {
|
---|
1044 | $last_common_prefix = $accum1;
|
---|
1045 | last if ($accum1 eq $dir1);
|
---|
1046 | my ($tmp1) = split (/\//, (substr ($dir1, length ($accum1))));
|
---|
1047 | my ($tmp2) = split (/\//, (substr ($dir2, length ($accum2))));
|
---|
1048 | $accum1 .= "$tmp1/" if ((defined ($tmp1)) and $tmp1);
|
---|
1049 | $accum2 .= "$tmp2/" if ((defined ($tmp2)) and $tmp2);
|
---|
1050 | }
|
---|
1051 |
|
---|
1052 | return $last_common_prefix;
|
---|
1053 | }
|
---|
1054 |
|
---|
1055 | sub pretty_msg_text ()
|
---|
1056 | {
|
---|
1057 | my $text = shift;
|
---|
1058 |
|
---|
1059 | # Strip out carriage returns (as they probably result from DOSsy editors).
|
---|
1060 | $text =~ s/\r\n/\n/g;
|
---|
1061 |
|
---|
1062 | # If it *looks* like two newlines, make it *be* two newlines:
|
---|
1063 | $text =~ s/\n\s*\n/\n\n/g;
|
---|
1064 |
|
---|
1065 | if ($XML_Output)
|
---|
1066 | {
|
---|
1067 | $text = &xml_escape ($text);
|
---|
1068 | $text = "<msg>${text}</msg>\n";
|
---|
1069 | }
|
---|
1070 | elsif (! $No_Wrap)
|
---|
1071 | {
|
---|
1072 | # Strip off lone newlines, but only for lines that don't begin with
|
---|
1073 | # whitespace or a mail-quoting character, since we want to preserve
|
---|
1074 | # that kind of formatting. Also don't strip newlines that follow a
|
---|
1075 | # period; we handle those specially next.
|
---|
1076 | 1 while ($text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g);
|
---|
1077 |
|
---|
1078 | # If a newline follows a period, make sure that when we bring up the
|
---|
1079 | # bottom sentence, it begins with two spaces.
|
---|
1080 | 1 while ($text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2 $3/g);
|
---|
1081 | }
|
---|
1082 |
|
---|
1083 | return $text;
|
---|
1084 | }
|
---|
1085 |
|
---|
1086 | sub xml_escape ()
|
---|
1087 | {
|
---|
1088 | my $txt = shift;
|
---|
1089 | $txt =~ s/&/&/g;
|
---|
1090 | $txt =~ s/</</g;
|
---|
1091 | $txt =~ s/>/>/g;
|
---|
1092 | return $txt;
|
---|
1093 | }
|
---|
1094 |
|
---|
1095 | sub maybe_read_user_map_file ()
|
---|
1096 | {
|
---|
1097 | my %expansions;
|
---|
1098 |
|
---|
1099 | if ($User_Map_File)
|
---|
1100 | {
|
---|
1101 | open (MAPFILE, "<$User_Map_File")
|
---|
1102 | or die ("Unable to open $User_Map_File ($!)");
|
---|
1103 |
|
---|
1104 | while (<MAPFILE>)
|
---|
1105 | {
|
---|
1106 | my ($username, $expansion) = split ':';
|
---|
1107 | chomp $expansion;
|
---|
1108 | $expansion =~ s/^'(.*)'$/$1/;
|
---|
1109 | $expansion =~ s/^"(.*)"$/$1/;
|
---|
1110 |
|
---|
1111 | # If it looks like the expansion has a real name already, then
|
---|
1112 | # we toss the username we got from CVS log. Otherwise, keep
|
---|
1113 | # it to use in combination with the email address.
|
---|
1114 |
|
---|
1115 | if ($expansion =~ /^\s*<{0,1}\S+@.*/) {
|
---|
1116 | # Also, add angle brackets if none present
|
---|
1117 | if (! ($expansion =~ /<\S+@\S+>/)) {
|
---|
1118 | $expansions{$username} = "$username <$expansion>";
|
---|
1119 | }
|
---|
1120 | else {
|
---|
1121 | $expansions{$username} = "$username $expansion";
|
---|
1122 | }
|
---|
1123 | }
|
---|
1124 | else {
|
---|
1125 | $expansions{$username} = $expansion;
|
---|
1126 | }
|
---|
1127 | }
|
---|
1128 |
|
---|
1129 | close (MAPFILE);
|
---|
1130 | }
|
---|
1131 |
|
---|
1132 | return %expansions;
|
---|
1133 | }
|
---|
1134 |
|
---|
1135 | sub parse_options ()
|
---|
1136 | {
|
---|
1137 | # Check this internally before setting the global variable.
|
---|
1138 | my $output_file;
|
---|
1139 |
|
---|
1140 | # If this gets set, we encountered unknown options and will exit at
|
---|
1141 | # the end of this subroutine.
|
---|
1142 | my $exit_with_admonishment = 0;
|
---|
1143 |
|
---|
1144 | while (my $arg = shift (@ARGV))
|
---|
1145 | {
|
---|
1146 | if ($arg =~ /^-h$|^-help$|^--help$|^--usage$|^-?$/) {
|
---|
1147 | $Print_Usage = 1;
|
---|
1148 | }
|
---|
1149 | elsif ($arg =~ /^--debug$/) { # unadvertised option, heh
|
---|
1150 | $Debug = 1;
|
---|
1151 | }
|
---|
1152 | elsif ($arg =~ /^--version$/) {
|
---|
1153 | $Print_Version = 1;
|
---|
1154 | }
|
---|
1155 | elsif ($arg =~ /^-g$|^--global-opts$/) {
|
---|
1156 | my $narg = shift (@ARGV) || die "$arg needs argument.\n";
|
---|
1157 | # Don't assume CVS is called "cvs" on the user's system:
|
---|
1158 | $Log_Source_Command =~ s/(^\S*)/$1 $narg/;
|
---|
1159 | }
|
---|
1160 | elsif ($arg =~ /^-l$|^--log-opts$/) {
|
---|
1161 | my $narg = shift (@ARGV) || die "$arg needs argument.\n";
|
---|
1162 | $Log_Source_Command .= " $narg";
|
---|
1163 | }
|
---|
1164 | elsif ($arg =~ /^-f$|^--file$/) {
|
---|
1165 | my $narg = shift (@ARGV) || die "$arg needs argument.\n";
|
---|
1166 | $output_file = $narg;
|
---|
1167 | }
|
---|
1168 | elsif ($arg =~ /^-U$|^--usermap$/) {
|
---|
1169 | my $narg = shift (@ARGV) || die "$arg needs argument.\n";
|
---|
1170 | $User_Map_File = $narg;
|
---|
1171 | }
|
---|
1172 | elsif ($arg =~ /^-W$|^--window$/) {
|
---|
1173 | my $narg = shift (@ARGV) || die "$arg needs argument.\n";
|
---|
1174 | $Max_Checkin_Duration = $narg;
|
---|
1175 | }
|
---|
1176 | elsif ($arg =~ /^-I$|^--ignore$/) {
|
---|
1177 | my $narg = shift (@ARGV) || die "$arg needs argument.\n";
|
---|
1178 | push (@Ignore_Files, $narg);
|
---|
1179 | }
|
---|
1180 | elsif ($arg =~ /^-C$|^--case-insensitive$/) {
|
---|
1181 | $Case_Insensitive = 1;
|
---|
1182 | }
|
---|
1183 | elsif ($arg =~ /^-R$|^--regexp$/) {
|
---|
1184 | my $narg = shift (@ARGV) || die "$arg needs argument.\n";
|
---|
1185 | $Regexp_Gate = $narg;
|
---|
1186 | }
|
---|
1187 | elsif ($arg =~ /^--stdout$/) {
|
---|
1188 | $Output_To_Stdout = 1;
|
---|
1189 | }
|
---|
1190 | elsif ($arg =~ /^--version$/) {
|
---|
1191 | $Print_Version = 1;
|
---|
1192 | }
|
---|
1193 | elsif ($arg =~ /^-d$|^--distributed$/) {
|
---|
1194 | $Distributed = 1;
|
---|
1195 | }
|
---|
1196 | elsif ($arg =~ /^-P$|^--prune$/) {
|
---|
1197 | $Prune_Empty_Msgs = 1;
|
---|
1198 | }
|
---|
1199 | elsif ($arg =~ /^-S$|^--separate-header$/) {
|
---|
1200 | $After_Header = "\n\n";
|
---|
1201 | }
|
---|
1202 | elsif ($arg =~ /^--no-wrap$/) {
|
---|
1203 | $No_Wrap = 1;
|
---|
1204 | }
|
---|
1205 | elsif ($arg =~ /^--gmt$|^--utc$/) {
|
---|
1206 | $UTC_Times = 1;
|
---|
1207 | }
|
---|
1208 | elsif ($arg =~ /^-w$|^--day-of-week$/) {
|
---|
1209 | $Show_Day_Of_Week = 1;
|
---|
1210 | }
|
---|
1211 | elsif ($arg =~ /^-r$|^--revisions$/) {
|
---|
1212 | $Show_Revisions = 1;
|
---|
1213 | }
|
---|
1214 | elsif ($arg =~ /^-t$|^--tags$/) {
|
---|
1215 | $Show_Tags = 1;
|
---|
1216 | }
|
---|
1217 | elsif ($arg =~ /^-b$|^--branches$/) {
|
---|
1218 | $Show_Branches = 1;
|
---|
1219 | }
|
---|
1220 | elsif ($arg =~ /^-F$|^--follow$/) {
|
---|
1221 | my $narg = shift (@ARGV) || die "$arg needs argument.\n";
|
---|
1222 | push (@Follow_Branches, $narg);
|
---|
1223 | }
|
---|
1224 | elsif ($arg =~ /^--stdin$/) {
|
---|
1225 | $Input_From_Stdin = 1;
|
---|
1226 | }
|
---|
1227 | elsif ($arg =~ /^--header$/) {
|
---|
1228 | my $narg = shift (@ARGV) || die "$arg needs argument.\n";
|
---|
1229 | $ChangeLog_Header = &slurp_file ($narg);
|
---|
1230 | if (! defined ($ChangeLog_Header)) {
|
---|
1231 | $ChangeLog_Header = "";
|
---|
1232 | }
|
---|
1233 | }
|
---|
1234 | elsif ($arg =~ /^--xml$/) {
|
---|
1235 | $XML_Output = 1;
|
---|
1236 | }
|
---|
1237 | else {
|
---|
1238 | # Just add a filename as argument to the log command
|
---|
1239 | $Log_Source_Command .= " $arg";
|
---|
1240 | }
|
---|
1241 | }
|
---|
1242 |
|
---|
1243 | ## Check for contradictions...
|
---|
1244 |
|
---|
1245 | if ($Output_To_Stdout && $Distributed) {
|
---|
1246 | print STDERR "cannot pass both --stdout and --distributed\n";
|
---|
1247 | $exit_with_admonishment = 1;
|
---|
1248 | }
|
---|
1249 |
|
---|
1250 | if ($Output_To_Stdout && $output_file) {
|
---|
1251 | print STDERR "cannot pass both --stdout and --file\n";
|
---|
1252 | $exit_with_admonishment = 1;
|
---|
1253 | }
|
---|
1254 |
|
---|
1255 | # Or if any other error message has already been printed out, we
|
---|
1256 | # just leave now:
|
---|
1257 | if ($exit_with_admonishment) {
|
---|
1258 | &usage ();
|
---|
1259 | exit (1);
|
---|
1260 | }
|
---|
1261 | elsif ($Print_Usage) {
|
---|
1262 | &usage ();
|
---|
1263 | exit (0);
|
---|
1264 | }
|
---|
1265 | elsif ($Print_Version) {
|
---|
1266 | &version ();
|
---|
1267 | exit (0);
|
---|
1268 | }
|
---|
1269 |
|
---|
1270 | ## Else no problems, so proceed.
|
---|
1271 |
|
---|
1272 | if ($Output_To_Stdout) {
|
---|
1273 | undef $Log_File_Name; # not actually necessary
|
---|
1274 | }
|
---|
1275 | elsif ($output_file) {
|
---|
1276 | $Log_File_Name = $output_file;
|
---|
1277 | }
|
---|
1278 | }
|
---|
1279 |
|
---|
1280 | sub slurp_file ()
|
---|
1281 | {
|
---|
1282 | my $filename = shift || die ("no filename passed to slurp_file()");
|
---|
1283 | my $retstr;
|
---|
1284 |
|
---|
1285 | open (SLURPEE, "<${filename}") or die ("unable to open $filename ($!)");
|
---|
1286 | my $saved_sep = $/;
|
---|
1287 | undef $/;
|
---|
1288 | $retstr = <SLURPEE>;
|
---|
1289 | $/ = $saved_sep;
|
---|
1290 | close (SLURPEE);
|
---|
1291 | return $retstr;
|
---|
1292 | }
|
---|
1293 |
|
---|
1294 | sub debug ()
|
---|
1295 | {
|
---|
1296 | if ($Debug) {
|
---|
1297 | my $msg = shift;
|
---|
1298 | print STDERR $msg;
|
---|
1299 | }
|
---|
1300 | }
|
---|
1301 |
|
---|
1302 | sub version ()
|
---|
1303 | {
|
---|
1304 | print "cvs2cl.pl version ${VERSION}; distributed under the GNU GPL.\n";
|
---|
1305 | }
|
---|
1306 |
|
---|
1307 | sub usage ()
|
---|
1308 | {
|
---|
1309 | &version ();
|
---|
1310 | print <<'END_OF_INFO';
|
---|
1311 | Generate GNU-style ChangeLogs in CVS working copies.
|
---|
1312 |
|
---|
1313 | Notes about the output format(s):
|
---|
1314 |
|
---|
1315 | The default output of cvs2cl.pl is designed to be compact, formally
|
---|
1316 | unambiguous, but still easy for humans to read. It is largely
|
---|
1317 | self-explanatory, I hope; the one abbreviation that might not be
|
---|
1318 | obvious is "utags". That stands for "universal tags" -- a
|
---|
1319 | universal tag is one held by all the files in a given change entry.
|
---|
1320 |
|
---|
1321 | If you need output that's easy for a program to parse, use the
|
---|
1322 | --xml option. Note that with XML output, just about all available
|
---|
1323 | information is included with each change entry, whether you asked
|
---|
1324 | for it or not, on the theory that your parser can ignore anything
|
---|
1325 | it's not looking for.
|
---|
1326 |
|
---|
1327 | Notes about the options and arguments (the actual options are listed
|
---|
1328 | last in this usage message):
|
---|
1329 |
|
---|
1330 | * The -I and -F options may appear multiple times.
|
---|
1331 |
|
---|
1332 | * To follow trunk revisions, use "-F trunk" ("-F TRUNK" also works).
|
---|
1333 | This is okay because no would ever, ever be crazy enough to name a
|
---|
1334 | branch "trunk", right? Right.
|
---|
1335 |
|
---|
1336 | * For the -U option, the UFILE should be formatted like
|
---|
1337 | CVSROOT/users. That is, each line of UFILE looks like this
|
---|
1338 | jrandom:[email protected]
|
---|
1339 | or maybe even like this
|
---|
1340 | jrandom:'Jesse Q. Random <[email protected]>'
|
---|
1341 | Don't forget to quote the portion after the colon if necessary.
|
---|
1342 |
|
---|
1343 | * Many people want to filter by date. To do so, invoke cvs2cl.pl
|
---|
1344 | like this:
|
---|
1345 | cvs2cl.pl -l "-d'DATESPEC'"
|
---|
1346 | where DATESPEC is any date specification valid for "cvs log -d".
|
---|
1347 | (Note that CVS 1.10.7 and below requires there be no space between
|
---|
1348 | -d and its argument).
|
---|
1349 |
|
---|
1350 | Options/Arguments:
|
---|
1351 |
|
---|
1352 | -h, -help, --help, or -? Show this usage and exit
|
---|
1353 | --version Show version and exit
|
---|
1354 | -r, --revisions Show revision numbers in output
|
---|
1355 | -b, --branches Show branch names in revisions when possible
|
---|
1356 | -t, --tags Show tags (symbolic names) in output
|
---|
1357 | --stdin Read from stdin, don't run cvs log
|
---|
1358 | --stdout Output to stdout not to ChangeLog
|
---|
1359 | -d, --distributed Put ChangeLogs in subdirs
|
---|
1360 | -f FILE, --file FILE Write to FILE instead of "ChangeLog"
|
---|
1361 | -W SECS, --window SECS Window of time within which log entries unify
|
---|
1362 | -U UFILE, --usermap UFILE Expand usernames to email addresses from UFILE
|
---|
1363 | -R REGEXP, --regexp REGEXP Include only entries that match REGEXP
|
---|
1364 | -I REGEXP, --ignore REGEXP Ignore files whose names match REGEXP
|
---|
1365 | -C, --case-insensitive Any regexp matching is done case-insensitively
|
---|
1366 | -F BRANCH, --follow BRANCH Show only revisions on or ancestral to BRANCH
|
---|
1367 | -S, --separate-header Blank line between each header and log message
|
---|
1368 | --no-wrap Don't auto-wrap log message (recommend -S also)
|
---|
1369 | --gmt, --utc Show times in GMT/UTC instead of local time
|
---|
1370 | -w, --day-of-week Show day of week
|
---|
1371 | --header FILE Get ChangeLog header from FILE ("-" means stdin)
|
---|
1372 | --xml Output XML instead of ChangeLog format
|
---|
1373 | -P, --prune Don't show empty log messages
|
---|
1374 | -g OPTS, --global-opts OPTS Invoke like this "cvs OPTS log ..."
|
---|
1375 | -l OPTS, --log-opts OPTS Invoke like this "cvs ... log OPTS"
|
---|
1376 | FILE1 [FILE2 ...] Show only log information for the named FILE(s)
|
---|
1377 |
|
---|
1378 | See http://www.red-bean.com/~kfogel/cvs2cl.shtml for maintenance and bug info.
|
---|
1379 | END_OF_INFO
|
---|
1380 | }
|
---|
1381 |
|
---|
1382 | __END__
|
---|
1383 |
|
---|
1384 | =head1 NAME
|
---|
1385 |
|
---|
1386 | cvs2cl.pl - produces GNU-style ChangeLogs in CVS working copies, by
|
---|
1387 | running "cvs log" and parsing the output. Shared log entries are
|
---|
1388 | unified in an intuitive way.
|
---|
1389 |
|
---|
1390 | =head1 DESCRIPTION
|
---|
1391 |
|
---|
1392 | This script generates GNU-style ChangeLog files from CVS log
|
---|
1393 | information. Basic usage: just run it inside a working copy and a
|
---|
1394 | ChangeLog will appear. It requires repository access (i.e., 'cvs log'
|
---|
1395 | must work). Run "cvs2cl.pl --help" to see more advanced options.
|
---|
1396 |
|
---|
1397 | See http://www.red-bean.com/~kfogel/cvs2cl.shtml for updates, and
|
---|
1398 | for instructions on getting anonymous CVS access to this script.
|
---|
1399 |
|
---|
1400 | Maintainer: Karl Fogel <[email protected]>
|
---|
1401 | Please report bugs to <[email protected]>.
|
---|
1402 |
|
---|
1403 | =head1 README
|
---|
1404 |
|
---|
1405 | This script generates GNU-style ChangeLog files from CVS log
|
---|
1406 | information. Basic usage: just run it inside a working copy and a
|
---|
1407 | ChangeLog will appear. It requires repository access (i.e., 'cvs log'
|
---|
1408 | must work). Run "cvs2cl.pl --help" to see more advanced options.
|
---|
1409 |
|
---|
1410 | See http://www.red-bean.com/~kfogel/cvs2cl.shtml for updates, and
|
---|
1411 | for instructions on getting anonymous CVS access to this script.
|
---|
1412 |
|
---|
1413 | Maintainer: Karl Fogel <[email protected]>
|
---|
1414 | Please report bugs to <[email protected]>.
|
---|
1415 |
|
---|
1416 | =head1 PREREQUISITES
|
---|
1417 |
|
---|
1418 | This script requires C<Text::Wrap>, C<Time::Local>, and
|
---|
1419 | C<File::Basename>.
|
---|
1420 | It also seems to require C<Perl 5.004_04> or higher.
|
---|
1421 |
|
---|
1422 | =pod OSNAMES
|
---|
1423 |
|
---|
1424 | any
|
---|
1425 |
|
---|
1426 | =pod SCRIPT CATEGORIES
|
---|
1427 |
|
---|
1428 | Version_Control/CVS
|
---|
1429 |
|
---|
1430 | =cut
|
---|
1431 |
|
---|
1432 | -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*-
|
---|
1433 |
|
---|
1434 | Note about a bug-slash-opportunity:
|
---|
1435 | -----------------------------------
|
---|
1436 |
|
---|
1437 | There's a bug in Text::Wrap, which affects cvs2cl. This script
|
---|
1438 | reveals it:
|
---|
1439 |
|
---|
1440 | #!/usr/bin/perl -w
|
---|
1441 |
|
---|
1442 | use Text::Wrap;
|
---|
1443 |
|
---|
1444 | my $test_text =
|
---|
1445 | "This script demonstrates a bug in Text::Wrap. The very long line
|
---|
1446 | following this paragraph will be relocated relative to the surrounding
|
---|
1447 | text:
|
---|
1448 |
|
---|
1449 | ====================================================================
|
---|
1450 |
|
---|
1451 | See? When the bug happens, we'll get the line of equal signs below
|
---|
1452 | this paragraph, even though it should be above.";
|
---|
1453 |
|
---|
1454 |
|
---|
1455 | # Print out the test text with no wrapping:
|
---|
1456 | print "$test_text";
|
---|
1457 | print "\n";
|
---|
1458 | print "\n";
|
---|
1459 |
|
---|
1460 | # Now print it out wrapped, and see the bug:
|
---|
1461 | print wrap ("\t", " ", "$test_text");
|
---|
1462 | print "\n";
|
---|
1463 | print "\n";
|
---|
1464 |
|
---|
1465 | If the line of equal signs were one shorter, then the bug doesn't
|
---|
1466 | happen. Interesting.
|
---|
1467 |
|
---|
1468 | Anyway, rather than fix this in Text::Wrap, we might as well write a
|
---|
1469 | new wrap() which has the following much-needed features:
|
---|
1470 |
|
---|
1471 | * initial indentation, like current Text::Wrap()
|
---|
1472 | * subsequent line indentation, like current Text::Wrap()
|
---|
1473 | * user chooses among: force-break long words, leave them alone, or die()?
|
---|
1474 | * preserve existing indentation: chopped chunks from an indented line
|
---|
1475 | are indented by same (like this line, not counting the asterisk!)
|
---|
1476 | * optional list of things to preserve on line starts, default ">"
|
---|
1477 |
|
---|
1478 | Note that the last two are essentially the same concept, so unify in
|
---|
1479 | implementation and give a good interface to controlling them.
|
---|
1480 |
|
---|
1481 | And how about:
|
---|
1482 |
|
---|
1483 | Optionally, when encounter a line pre-indented by same as previous
|
---|
1484 | line, then strip the newline and refill, but indent by the same.
|
---|
1485 | Yeah...
|
---|