Skip to content
Snippets Groups Projects
cloc-2.00.pl 726 KiB
Newer Older
#!/usr/bin/env perl
# cloc -- Count Lines of Code                  {{{1
# Copyright (C) 2006-2024 Al Danial <al.danial@gmail.com>
# First release August 2006
#
# Includes code from:
#   - SLOCCount v2.26
#     http://www.dwheeler.com/sloccount/
#     by David Wheeler.
#   - Regexp::Common v2017060201
#     https://metacpan.org/pod/Regexp::Common
#     by Damian Conway and Abigail.
#   - Win32::Autoglob 1.01
#     https://metacpan.org/pod/Win32::Autoglob
#     by Sean M. Burke.
#   - Algorithm::Diff 1.1902
#     https://metacpan.org/pod/Algorithm::Diff
#     by Tye McQueen.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details:
# <http://www.gnu.org/licenses/gpl.txt>.
#
# 1}}}
my $VERSION = "2.00";  # odd number == beta; even number == stable
my $URL     = "github.com/AlDanial/cloc";  # 'https://' pushes header too wide
require 5.10.0;
# use modules                                  {{{1
use warnings;
use strict;

use Getopt::Long;
use File::Basename;
use File::Temp qw { tempfile tempdir };
use File::Find;
use File::Path;
use File::Spec;
use IO::File;
use List::Util qw( min max );
use Cwd;
use POSIX qw { strftime ceil};
# Parallel::ForkManager isn't in the standard distribution.
# Use it only if installed, and only if --processes=N is given.
# The module load happens in get_max_processes().
my $HAVE_Parallel_ForkManager = 0;

# Digest::MD5 isn't in the standard distribution. Use it only if installed.
my $HAVE_Digest_MD5 = 0;
eval "use Digest::MD5;";
if (defined $Digest::MD5::VERSION) {
    $HAVE_Digest_MD5 = 1;
} else {
    warn "Digest::MD5 not installed; will skip file uniqueness checks.\n";
}

# Time::HiRes became standard with Perl 5.8
my $HAVE_Time_HiRes = 0;
eval "use Time::HiRes;";
$HAVE_Time_HiRes = 1 if defined $Time::HiRes::VERSION;

my $HAVE_Rexexp_Common;
# Regexp::Common isn't in the standard distribution.  It will
# be installed in a temp directory if necessary.
eval "use Regexp::Common qw ( comment ) ";
if (defined $Regexp::Common::VERSION) {
    $HAVE_Rexexp_Common = 1;
} else {
    $HAVE_Rexexp_Common = 0;
}

my $HAVE_Algorithm_Diff = 0;
# Algorithm::Diff isn't in the standard distribution.  It will
# be installed in a temp directory if necessary.
eval "use Algorithm::Diff qw ( sdiff ) ";
if (defined $Algorithm::Diff::VERSION) {
    $HAVE_Algorithm_Diff = 1;
} else {
    Install_Algorithm_Diff();
}

# print "2 HAVE_Algorithm_Diff = $HAVE_Algorithm_Diff\n";
# test_alg_diff($ARGV[$#ARGV - 1], $ARGV[$#ARGV]); die;
# die "Hre=$HAVE_Rexexp_Common  Had=$HAVE_Algorithm_Diff";

# Uncomment next two lines when building Windows executable with perl2exe
# or if running on a system that already has Regexp::Common.
#use Regexp::Common;
#$HAVE_Rexexp_Common = 1;

#perl2exe_include "Regexp/Common/whitespace.pm"
#perl2exe_include "Regexp/Common/URI.pm"
#perl2exe_include "Regexp/Common/URI/fax.pm"
#perl2exe_include "Regexp/Common/URI/file.pm"
#perl2exe_include "Regexp/Common/URI/ftp.pm"
#perl2exe_include "Regexp/Common/URI/gopher.pm"
#perl2exe_include "Regexp/Common/URI/http.pm"
#perl2exe_include "Regexp/Common/URI/pop.pm"
#perl2exe_include "Regexp/Common/URI/prospero.pm"
#perl2exe_include "Regexp/Common/URI/news.pm"
#perl2exe_include "Regexp/Common/URI/tel.pm"
#perl2exe_include "Regexp/Common/URI/telnet.pm"
#perl2exe_include "Regexp/Common/URI/tv.pm"
#perl2exe_include "Regexp/Common/URI/wais.pm"
#perl2exe_include "Regexp/Common/CC.pm"
#perl2exe_include "Regexp/Common/SEN.pm"
#perl2exe_include "Regexp/Common/number.pm"
#perl2exe_include "Regexp/Common/delimited.pm"
#perl2exe_include "Regexp/Common/profanity.pm"
#perl2exe_include "Regexp/Common/net.pm"
#perl2exe_include "Regexp/Common/zip.pm"
#perl2exe_include "Regexp/Common/comment.pm"
#perl2exe_include "Regexp/Common/balanced.pm"
#perl2exe_include "Regexp/Common/lingua.pm"
#perl2exe_include "Regexp/Common/list.pm"
#perl2exe_include "File/Glob.pm"

use Text::Tabs qw { expand };
use Cwd qw { cwd };
use File::Glob;
# 1}}}
# Usage information, options processing.       {{{1
my $ON_WINDOWS = 0;
   $ON_WINDOWS = 1 if ($^O =~ /^MSWin/) or ($^O eq "Windows_NT");
if ($ON_WINDOWS and $ENV{'SHELL'}) {
    if ($ENV{'SHELL'} =~ m{^/}) {
        $ON_WINDOWS = 0;  # make Cygwin look like Unix
    } else {
        $ON_WINDOWS = 1;  # MKS defines $SHELL but still acts like Windows
    }
}

my $HAVE_Win32_Long_Path = 0;
# Win32::LongPath is an optional dependency that when available on
# Windows will be used to support reading files past the 255 char
# path length limit.
if ($ON_WINDOWS) {
    eval "use Win32::LongPath;";
    if (defined $Win32::LongPath::VERSION) {
        $HAVE_Win32_Long_Path = 1;
    }
}
my $config_file = '';
if ( $ENV{'HOME'} ) {
    $config_file = File::Spec->catfile( $ENV{'HOME'}, '.config', 'cloc', 'options.txt');
} elsif ( $ENV{'APPDATA'} and $ON_WINDOWS ) {
    $config_file = File::Spec->catfile( $ENV{'APPDATA'}, 'cloc');
}
# $config_file may be updated by check_alternate_config_files()

my $NN     = chr(27) . "[0m";  # normal
   $NN     = "" if $ON_WINDOWS or !(-t STDOUT); # -t STDOUT:  is it a terminal?
my $BB     = chr(27) . "[1m";  # bold
   $BB     = "" if $ON_WINDOWS or !(-t STDOUT);
my $script = basename $0;

#  Intended for v1.88:
#  --git-diff-simindex       Git diff strategy #3:  use git's similarity index
#                            (git diff -M --name-status) to identify file pairs
#                            to compare.  This is especially useful to compare
#                            files that were renamed between the commits.

my $brief_usage  = "
                       cloc -- Count Lines of Code

Usage:
    $script [options] <file(s)/dir(s)/git hash(es)>
        Count physical lines of source code and comments in the given files
        (may be archives such as compressed tarballs or zip files) and/or
        recursively below the given directories or git commit hashes.
        Example:    cloc src/ include/ main.c

    $script [options] --diff <set1>  <set2>
        Compute differences of physical lines of source code and comments
        between any pairwise combination of directory names, archive
        files or git commit hashes.
        Example:    cloc --diff Python-3.5.tar.xz python-3.6/

$script --help  shows full documentation on the options.
https://$URL has numerous examples and more information.
";
my $usage  = "
Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <report files>

 Count, or compute differences of, physical lines of source code in the
 given files (may be archives such as compressed tarballs or zip files,
 or git commit hashes or branch names) and/or recursively below the
 given directories.

 ${BB}Input Options${NN}
   --extract-with=<cmd>      This option is only needed if cloc is unable
                             to figure out how to extract the contents of
                             the input file(s) by itself.
                             Use <cmd> to extract binary archive files (e.g.:
Loading
Loading full blame...