summaryrefslogtreecommitdiff
path: root/scripts/git-archive-all.py
diff options
context:
space:
mode:
authorMarius Kintel <marius@kintel.net>2013-06-17 04:45:51 (GMT)
committerMarius Kintel <marius@kintel.net>2013-06-17 04:45:51 (GMT)
commit894276c6c1de5e8ed8d19c470624fe67e9ee1649 (patch)
tree0a1d8fb5b6659e0870532b68e2e47bdae1052d60 /scripts/git-archive-all.py
parent867b43bb40afe286c0f8b701953192121fb2f84a (diff)
Updated to latest version from https://github.com/Kentzo/git-archive-all
Diffstat (limited to 'scripts/git-archive-all.py')
-rwxr-xr-xscripts/git-archive-all.py571
1 files changed, 432 insertions, 139 deletions
diff --git a/scripts/git-archive-all.py b/scripts/git-archive-all.py
index ccfb08a..0088e1a 100755
--- a/scripts/git-archive-all.py
+++ b/scripts/git-archive-all.py
@@ -1,171 +1,464 @@
#! /usr/bin/env python
+# coding=utf-8
+
+from __future__ import print_function
+from __future__ import unicode_literals
+
+__version__ = "1.7"
import sys
-from os import path, chdir
-from subprocess import Popen, PIPE
-from sys import argv, stdout
-from fnmatch import fnmatch
+from os import path, extsep
+from subprocess import Popen, PIPE, CalledProcessError
class GitArchiver(object):
"""
GitArchiver
-
+
Scan a git repository and export all tracked files, and submodules.
Checks for .gitattributes files in each directory and uses 'export-ignore'
pattern entries for ignore files in the archive.
-
- Automatically detects output format extension: zip, tar, bz2, or gz
+
+ Automatically detects output format extension: zip, tar, bz2, or gz.
"""
-
- def __init__(self, prefix='', verbose=False, exclude=True, extra=[]):
- self.prefix = prefix
- self.verbose = verbose
- self.exclude = exclude
- self.extra = extra
-
- self._excludes = []
+ def __init__(self, prefix='', verbose=False, exclude=True, force_sub=False, extra=None, main_repo_abspath=None):
+ """
+ @type prefix: string
+ @param prefix: Prefix used to prepend all paths in the resulting archive.
+
+ @type verbose: bool
+ @param verbose: Determines verbosity of the output (stdout).
+
+ @type exclude: bool
+ @param exclude: Determines whether archiver should follow rules specified in .gitattributes files.
+ Defaults to True.
+
+ @type force_sub: bool
+ @param force_sub: Determines whether submodules are initialized and updated before archiving.
+ Defaults to False
- def create(self, output_file):
+ @type extra: list
+ @param extra: List of extra paths to include in the resulting archive.
+
+ @type main_repo_abspath: string
+ @param main_repo_abspath: Absolute path to the main repository (or one of subdirectories).
+ If None, current cwd is used.
+ If given path is path to a subdirectory (but not a submodule directory!)
+ it will be replaced with abspath to toplevel directory of the repository.
"""
- create(str output_file) -> None
-
- Creates the archive, written to the given output_file
- Filetype may be one of: gz, zip, bz2, tar
+ if extra is None:
+ extra = []
+
+ if main_repo_abspath is None:
+ main_repo_abspath = path.abspath('')
+ elif not path.isabs(main_repo_abspath):
+ raise ValueError("You MUST pass absolute path to the main git repository.")
+
+ # Raises an exception if there is no repo under main_repo_abspath.
+ try:
+ self.run_shell("[ -d .git ] || git rev-parse --git-dir > /dev/null 2>&1", main_repo_abspath)
+ except Exception as e:
+ raise ValueError("Not a git repository (or any of the parent directories).".format(path=main_repo_abspath))
+
+ # Detect toplevel directory of the repo.
+ main_repo_abspath = path.abspath(self.read_git_shell('git rev-parse --show-toplevel', main_repo_abspath).rstrip())
+
+ self.prefix = prefix
+ self.verbose = verbose
+ self.exclude = exclude
+ self.extra = extra
+ self.force_sub = force_sub
+ self.main_repo_abspath = main_repo_abspath
+
+ def create(self, output_path, dry_run=False, output_format=None):
"""
- #
- # determine the format
- #
- _, _, format = output_file.rpartition(".")
+ Creates the archive, written to the given output_file_path
+
+ Type of the archive is determined either by extension of output_file_path or by the format argument.
+ Supported formats are: gz, zip, bz2, tar, tgz
+
+ @type output_path: string
+ @param output_path: Output file path.
- if format.lower() == 'zip':
+ @type dry_run: bool
+ @param dry_run: Determines whether create should do nothing but print what it would archive.
+
+ @type output_format: string
+ @param output_format: Determines format of the output archive.
+ If None, format is determined from extension of output_file_path.
+ """
+ if output_format is None:
+ file_name, file_ext = path.splitext(output_path)
+ output_format = file_ext[len(extsep):].lower()
+
+ if output_format == 'zip':
from zipfile import ZipFile, ZIP_DEFLATED
- output_archive = ZipFile(path.abspath(output_file), 'w')
- add = lambda name, arcname: output_archive.write(name, self.prefix + arcname, ZIP_DEFLATED)
-
- elif format.lower() in ['tar', 'bz2', 'gz']:
+
+ if not dry_run:
+ archive = ZipFile(path.abspath(output_path), 'w')
+ add = lambda file_path, file_name: archive.write(file_path, path.join(self.prefix, file_name), ZIP_DEFLATED)
+ elif output_format in ['tar', 'bz2', 'gz', 'tgz']:
import tarfile
- t_mode = ('w:%s' % format) if format != 'tar' else 'w'
-
- output_archive = tarfile.open(path.abspath(output_file), t_mode)
- add = lambda name, arcname: output_archive.add(name, self.prefix + arcname)
+
+ if output_format == 'tar':
+ t_mode = 'w'
+ elif output_format == 'tgz':
+ t_mode = 'w:gz'
+ else:
+ t_mode = 'w:{f}'.format(f=output_format)
+
+ if not dry_run:
+ archive = tarfile.open(path.abspath(output_path), t_mode)
+ add = lambda file_path, file_name: archive.add(file_path, path.join(self.prefix, file_name))
+ else:
+ raise RuntimeError("Unknown format: {f}".format(f=output_format))
+
+ for file_path in self.extra:
+ if not dry_run:
+ if self.verbose:
+ print("Compressing {f} => {a}...".format(f=file_path,
+ a=path.join(self.prefix, file_path)))
+ add(file_path, file_path)
+ else:
+ print("{f} => {a}".format(f=file_path,
+ a=path.join(self.prefix, file_path)))
+
+ for file_path in self.list_files():
+ if not dry_run:
+ if self.verbose:
+ print("Compressing {f} => {a}...".format(f=path.join(self.main_repo_abspath, file_path),
+ a=path.join(self.prefix, file_path)))
+ add(path.join(self.main_repo_abspath, file_path), file_path)
+ else:
+ print("{f} => {a}".format(f=path.join(self.main_repo_abspath, file_path),
+ a=path.join(self.prefix, file_path)))
+
+ if not dry_run:
+ archive.close()
+
+ def get_path_components(self, repo_abspath, abspath):
+ """
+ Splits given abspath into components until repo_abspath is reached.
+
+ E.g. if repo_abspath is '/Documents/Hobby/ParaView/' and abspath is
+ '/Documents/Hobby/ParaView/Catalyst/Editions/Base/', function will return:
+ ['.', 'Catalyst', 'Editions', 'Base']
+
+ First element is always '.' (concrete symbol depends on OS).
+
+ @type repo_abspath: string
+ @param repo_abspath: Absolute path to the git repository.
+
+ @type abspath: string
+ @param abspath: Absolute path to within repo_abspath.
+
+ @rtype: list
+ @return: List of path components.
+ """
+ components = []
+
+ while not path.samefile(abspath, repo_abspath):
+ abspath, tail = path.split(abspath)
+
+ if len(tail):
+ components.insert(0, tail)
+
+ components.insert(0, path.relpath(repo_abspath, repo_abspath))
+ return components
+
+ def get_exclude_patterns(self, repo_abspath, repo_file_paths):
+ """
+ Returns exclude patterns for a given repo. It looks for .gitattributes files in repo_file_paths.
+
+ Resulting dictionary will contain exclude patterns per path (relative to the repo_abspath).
+ E.g. {('.', 'Catalyst', 'Editions', 'Base'), ['Foo*', '*Bar']}
+
+ @type repo_abspath: string
+ @param repo_abspath: Absolute path to the git repository.
+
+ @type repo_file_paths: list
+ @param repo_file_paths: List of paths relative to the repo_abspath that are under git control.
+
+ @rtype: dict
+ @return: Dictionary representing exclude patterns.
+ Keys are tuples of strings. Values are lists of strings.
+ Returns None if self.exclude is not set.
+ """
+ if not self.exclude:
+ return None
+
+ def read_attributes(attributes_abspath):
+ patterns = []
+ if path.isfile(attributes_abspath):
+ attributes = open(attributes_abspath, 'r').readlines()
+ patterns = []
+ for line in attributes:
+ tokens = line.strip().split()
+ if "export-ignore" in tokens[1:]:
+ patterns.append(tokens[0])
+ return patterns
+
+ exclude_patterns = {(): []}
+
+ # There may be no gitattributes.
+ try:
+ global_attributes_abspath = self.read_shell("git config --get core.attributesfile", repo_abspath).rstrip()
+ exclude_patterns[()] = read_attributes(global_attributes_abspath)
+ except:
+ # And valid to not have them.
+ pass
+
+ for attributes_abspath in [path.join(repo_abspath, f) for f in repo_file_paths if f.endswith(".gitattributes")]:
+ # Each .gitattributes affects only files within its directory.
+ key = tuple(self.get_path_components(repo_abspath, path.dirname(attributes_abspath)))
+ exclude_patterns[key] = read_attributes(attributes_abspath)
+
+ local_attributes_abspath = path.join(repo_abspath, ".git", "info", "attributes")
+ key = tuple(self.get_path_components(repo_abspath, repo_abspath))
+
+ if key in exclude_patterns:
+ exclude_patterns[key].extend(read_attributes(local_attributes_abspath))
else:
- raise RuntimeError("Unknown format: '%s'" % format)
-
- #
- # compress
- #
-
- # extra files first (we may change folder later)
- for name in self.extra:
- if self.verbose:
- toPath = '=> %s%s' % (self.prefix, name) if self.prefix else ""
- print 'Compressing %s %s ...' % (name, toPath)
- add(name, name)
-
- self._excludes = []
-
- for name, arcname in self.listFiles(path.abspath('')):
- if self.verbose:
- toPath = '=> %s%s' % (self.prefix, arcname) if self.prefix else ""
- print 'Compressing %s %s ...' % (arcname, toPath)
- add(name, arcname)
-
- output_archive.close()
-
-
- def listFiles(self, git_repositary_path, baselevel=''):
- """
- listFiles(str git_repository_path, str baselevel='') -> iterator
-
- An iterator method that yields a tuple(filepath, fullpath)
+ exclude_patterns[key] = read_attributes(local_attributes_abspath)
+
+ return exclude_patterns
+
+ def is_file_excluded(self, repo_abspath, repo_file_path, exclude_patterns):
+ """
+ Checks whether file at a given path is excluded.
+
+ @type repo_abspath: string
+ @param repo_abspath: Absolute path to the git repository.
+
+ @type repo_file_path: string
+ @param repo_file_path: Path to a file within repo_abspath.
+
+ @type exclude_patterns: dict
+ @param exclude_patterns: Exclude patterns with format specified for get_exclude_patterns.
+
+ @rtype: bool
+ @return: True if file should be excluded. Otherwise False.
+ """
+ if exclude_patterns is None or not len(exclude_patterns):
+ return False
+
+ from fnmatch import fnmatch
+
+ file_name = path.basename(repo_file_path)
+ components = self.get_path_components(repo_abspath, path.join(repo_abspath, path.dirname(repo_file_path)))
+
+ is_excluded = False
+ # We should check all patterns specified in intermediate directories to the given file.
+ # At the end we should also check for the global patterns (key '()' or empty tuple).
+ while not is_excluded:
+ key = tuple(components)
+ if key in exclude_patterns:
+ patterns = exclude_patterns[key]
+ for p in patterns:
+ if fnmatch(file_name, p) or fnmatch(repo_file_path, p):
+ if self.verbose:
+ print("Exclude pattern matched {pattern}: {path}".format(pattern=p, path=repo_file_path))
+ is_excluded = True
+
+ if not len(components):
+ break
+
+ components.pop()
+
+ return is_excluded
+
+ def list_files(self, repo_path=''):
+ """
+ An iterator method that yields a file path relative to main_repo_abspath
for each file that should be included in the archive.
Skips those that match the exclusion patterns found in
any discovered .gitattributes files along the way.
-
- Recurses into submodules as well.
- """
- for filepath in self.runShell('git ls-files --cached --full-name --no-empty-directory'):
- fullpath = path.join(baselevel, filepath)
- filename = path.basename(filepath)
-
- if self.exclude and filename == '.gitattributes':
- self._excludes = []
- fh = open(filepath, 'r')
- for line in fh:
- if not line: break
- tokens = line.strip().split()
- if 'export-ignore' in tokens[1:]:
- self._excludes.append(tokens[0])
- fh.close()
-
- if not filename.startswith('.git') and not path.isdir(filepath):
-
- # check the patterns first
- ignore = False
- for pattern in self._excludes:
- if fnmatch(fullpath, pattern) or fnmatch(filename, pattern):
- if self.verbose: print 'Exclude pattern matched (%s): %s' % (pattern, fullpath)
- ignore = True
- break
- if ignore:
- continue
-
- # baselevel is needed to tell the arhiver where it have to extract file
- yield filepath, fullpath
-
- # get paths for every submodule
- for submodule in self.runShell("git submodule --quiet foreach 'pwd'"):
- chdir(submodule)
- # in order to get output path we need to exclude repository path from the submodule path
- submodule = submodule[len(git_repositary_path)+1:]
- # recursion allows us to process repositories with more than one level of submodules
- for git_file in self.listFiles(git_repositary_path, submodule):
- yield git_file
-
-
-
+
+ Recurs into submodules as well.
+
+ @type repo_path: string
+ @param repo_path: Path to the git submodule repository within the main git repository.
+
+ @rtype: iterator
+ @return: Iterator to traverse files under git control relative to main_repo_abspath.
+ """
+ repo_abspath = path.join(self.main_repo_abspath, repo_path)
+ repo_file_paths = self.read_git_shell("git ls-files --cached --full-name --no-empty-directory", repo_abspath).splitlines()
+ exclude_patterns = self.get_exclude_patterns(repo_abspath, repo_file_paths)
+
+ for repo_file_path in repo_file_paths:
+ # Git puts path in quotes if file path has unicode characters.
+ repo_file_path = repo_file_path.strip('"') # file path relative to current repo
+ file_name = path.basename(repo_file_path)
+
+ # Only list symlinks and files that don't start with git.
+ if file_name.startswith(".git") or (not path.islink(repo_file_path) and path.isdir(repo_file_path)):
+ continue
+
+ main_repo_file_path = path.join(repo_path, repo_file_path) # file path relative to the main repo
+
+ if self.is_file_excluded(repo_abspath, repo_file_path, exclude_patterns):
+ continue
+
+ # Yield both repo_file_path and main_repo_file_path to preserve structure of the repo.
+ yield main_repo_file_path
+
+ if self.force_sub:
+ self.run_shell("git submodule init", repo_abspath)
+ self.run_shell("git submodule update", repo_abspath)
+
+ # List files of every submodule.
+ for submodule_path in self.read_shell("git submodule --quiet foreach 'pwd'", repo_abspath).splitlines():
+ # In order to get output path we need to exclude repository path from submodule_path.
+ submodule_path = path.relpath(submodule_path, self.main_repo_abspath)
+ for file_path in self.list_files(submodule_path):
+ yield file_path
+
@staticmethod
- def runShell(cmd):
- return Popen(cmd, shell=True, stdout=PIPE).stdout.read().splitlines()
-
-
-
-if __name__ == "__main__":
+ def run_shell(cmd, cwd=None):
+ """
+ Runs shell command.
+
+ @type cmd: string
+ @param cmd: Command to be executed.
+
+ @type cwd: string
+ @param cwd: Working directory.
+
+ @rtype: int
+ @return: Return code of the command.
+
+ @raise CalledProcessError: Raises exception if return code of the command is non-zero.
+ """
+ p = Popen(cmd, shell=True, cwd=cwd)
+ p.wait()
+
+ if p.returncode:
+ raise CalledProcessError(returncode=p.returncode, cmd=cmd)
+
+ return p.returncode
+
+ @staticmethod
+ def read_shell(cmd, cwd=None, encoding='utf-8'):
+ """
+ Runs shell command and reads output.
+
+ @type cmd: string
+ @param cmd: Command to be executed.
+
+ @type cwd: string
+ @param cwd: Working directory.
+
+ @type encoding: string
+ @param encoding: Encoding used to decode bytes returned by Popen into string.
+
+ @rtype: string
+ @return: Output of the command.
+
+ @raise CalledProcessError: Raises exception if return code of the command is non-zero.
+ """
+ p = Popen(cmd, shell=True, stdout=PIPE, cwd=cwd)
+ output, _ = p.communicate()
+ output = output.decode(encoding)
+
+ if p.returncode:
+ raise CalledProcessError(returncode=p.returncode, cmd=cmd, output=output)
+
+ return output
+
+ @staticmethod
+ def read_git_shell(cmd, cwd=None):
+ """
+ Runs git shell command, reads output and decodes it into unicode string
+
+ @type cmd: string
+ @param cmd: Command to be executed.
+
+ @type cwd: string
+ @param cwd: Working directory.
+
+ @rtype: string
+ @return: Output of the command.
+
+ @raise CalledProcessError: Raises exception if return code of the command is non-zero.
+ """
+ p = Popen(cmd, shell=True, stdout=PIPE, cwd=cwd)
+ output, _ = p.communicate()
+ output = output.decode('unicode_escape').encode('raw_unicode_escape').decode('utf-8')
+
+ if p.returncode:
+ raise CalledProcessError(returncode=p.returncode, cmd=cmd, output=output)
+
+ return output
+
+
+if __name__ == '__main__':
from optparse import OptionParser
- parser = OptionParser(usage="usage: %prog [-v] [--prefix PREFIX] [--no-exclude] OUTPUT_FILE", version="%prog 1.0")
-
- parser.add_option('--prefix', type='string', dest='prefix',
- default='', help="prepend PREFIX to each filename in the archive")
-
- parser.add_option('-v', '--verbose', action='store_true', dest='verbose', help='enable verbose mode')
-
- parser.add_option('--no-exclude', action='store_false', dest='exclude',
- default=True, help="Dont read .gitattributes files for patterns containing export-ignore attrib")
-
- parser.add_option('--extra', action='append', dest='extra', default=[],
+ parser = OptionParser(usage="usage: %prog [-v] [--prefix PREFIX] [--no-exclude] [--force-submodules] [--dry-run] OUTPUT_FILE",
+ version="%prog {version}".format(version=__version__))
+
+ parser.add_option('--prefix',
+ type='string',
+ dest='prefix',
+ default='',
+ help="Prepend PREFIX to each filename in the archive. OUTPUT_FILE name is used by default to avoid tarbomb.")
+
+ parser.add_option('-v', '--verbose',
+ action='store_true',
+ dest='verbose',
+ help='Enable verbose mode.')
+
+ parser.add_option('--no-exclude',
+ action='store_false',
+ dest='exclude',
+ default=True,
+ help="Don't read .gitattributes files for patterns containing export-ignore attrib.")
+
+ parser.add_option('--force-submodules',
+ action='store_true',
+ dest='force_sub',
+ help="Force a git submodule init && git submodule update at each level before iterating submodules.")
+
+ parser.add_option('--extra',
+ action='append',
+ dest='extra',
+ default=[],
help="Any additional files to include in the archive.")
+ parser.add_option('--dry-run',
+ action='store_true',
+ dest='dry_run',
+ help="Don't actually archive anything, just show what would be done.")
options, args = parser.parse_args()
-
+
if len(args) != 1:
- parser.error('You must specify exactly one output file')
-
- outFile = args[0]
-
- if path.isdir(outFile):
- parser.error('You cannot use directory as output')
-
- archiver = GitArchiver(options.prefix,
- options.verbose,
- options.exclude,
- options.extra)
-
+ parser.error("You must specify exactly one output file")
+
+ output_file_path = args[0]
+
+ if path.isdir(output_file_path):
+ parser.error("You cannot use directory as output")
+
+ # avoid tarbomb
+ if options.prefix:
+ options.prefix = path.join(options.prefix, '')
+ else:
+ import re
+
+ output_name = path.basename(output_file_path)
+ output_name = re.sub('(\.zip|\.tar|\.tgz|\.gz|\.bz2|\.tar\.gz|\.tar\.bz2)$', '', output_name) or "Archive"
+ options.prefix = path.join(output_name, '')
+
try:
- archiver.create(outFile)
- except Exception, e:
- parser.exit(2, "%s\n" % e)
-
+ archiver = GitArchiver(options.prefix,
+ options.verbose,
+ options.exclude,
+ options.force_sub,
+ options.extra)
+ archiver.create(output_file_path, options.dry_run)
+ except Exception as e:
+ parser.exit(2, "{exception}\n".format(exception=e))
+
sys.exit(0)
contact: Jan Huwald // Impressum