From 1988ee3819677a026d2bfb5a734b47a7c5fe8316 Mon Sep 17 00:00:00 2001 From: Andrew Ayer Date: Sun, 24 May 2015 18:55:56 -0700 Subject: [PATCH] Speed up lock/unlock by using single git check-attr process Previously, lock/unlock needed to spawn a separate `git check-attr` process for every single file in the repository (whether encrypted or not). This was extremely inefficient, especially on Windows. Now, git-crypt spawns a single `git check-attr` process and communicates with it over stdin. In a repository with thousands of files, this results in a speedup of nearly 100x. This relies on the --stdin and -z options to `git check-attr`, which were only added in Git 1.8.5 (released 27 Nov 2013). With older versions of Git, git-crypt falls back to the old and slower code. --- commands.cpp | 102 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 86 insertions(+), 16 deletions(-) diff --git a/commands.cpp b/commands.cpp index f8e9f23..19111e3 100644 --- a/commands.cpp +++ b/commands.cpp @@ -34,6 +34,7 @@ #include "key.hpp" #include "gpg.hpp" #include "parse_options.hpp" +#include "coprocess.hpp" #include #include #include @@ -326,7 +327,6 @@ static void get_git_status (std::ostream& output) static std::pair get_file_attributes (const std::string& filename) { // git check-attr filter diff -- filename - // TODO: pass -z to get machine-parseable output (this requires Git 1.8.5 or higher, which was released on 27 Nov 2013) std::vector command; command.push_back("git"); command.push_back("check-attr"); @@ -375,6 +375,36 @@ static std::pair get_file_attributes (const std::strin return std::make_pair(filter_attr, diff_attr); } +// returns filter and diff attributes as a pair +static std::pair get_file_attributes (const std::string& filename, std::ostream& check_attr_stdin, std::istream& check_attr_stdout) +{ + check_attr_stdin << filename << '\0' << std::flush; + + std::string filter_attr; + std::string diff_attr; + + // Example output: + // filename\0filter\0git-crypt\0filename\0diff\0git-crypt\0 + for (int i = 0; i < 2; ++i) { + std::string filename; + std::string attr_name; + std::string attr_value; + std::getline(check_attr_stdout, filename, '\0'); + std::getline(check_attr_stdout, attr_name, '\0'); + std::getline(check_attr_stdout, attr_value, '\0'); + + if (attr_value != "unspecified" && attr_value != "unset" && attr_value != "set") { + if (attr_name == "filter") { + filter_attr = attr_value; + } else if (attr_name == "diff") { + diff_attr = attr_value; + } + } + } + + return std::make_pair(filter_attr, diff_attr); +} + static bool check_if_blob_is_encrypted (const std::string& object_id) { // git cat-file blob object_id @@ -430,32 +460,72 @@ static bool is_git_file_mode (const std::string& mode) static void get_encrypted_files (std::vector& files, const char* key_name) { // git ls-files -cz -- path_to_top - std::vector command; - command.push_back("git"); - command.push_back("ls-files"); - command.push_back("-csz"); - command.push_back("--"); + std::vector ls_files_command; + ls_files_command.push_back("git"); + ls_files_command.push_back("ls-files"); + ls_files_command.push_back("-csz"); + ls_files_command.push_back("--"); const std::string path_to_top(get_path_to_top()); if (!path_to_top.empty()) { - command.push_back(path_to_top); + ls_files_command.push_back(path_to_top); } - std::stringstream output; - if (!successful_exit(exec_command(command, output))) { - throw Error("'git ls-files' failed - is this a Git repository?"); + Coprocess ls_files; + std::istream* ls_files_stdout = ls_files.stdout_pipe(); + ls_files.spawn(ls_files_command); + + Coprocess check_attr; + std::ostream* check_attr_stdin = NULL; + std::istream* check_attr_stdout = NULL; + if (git_version() >= make_version(1, 8, 5)) { + // In Git 1.8.5 (released 27 Nov 2013) and higher, we use a single `git check-attr` process + // to get the attributes of all files at once. In prior versions, we have to fork and exec + // a separate `git check-attr` process for each file, since -z and --stdin aren't supported. + // In a repository with thousands of files, this results in an almost 100x speedup. + std::vector check_attr_command; + check_attr_command.push_back("git"); + check_attr_command.push_back("check-attr"); + check_attr_command.push_back("--stdin"); + check_attr_command.push_back("-z"); + check_attr_command.push_back("filter"); + check_attr_command.push_back("diff"); + + check_attr_stdin = check_attr.stdin_pipe(); + check_attr_stdout = check_attr.stdout_pipe(); + check_attr.spawn(check_attr_command); } - while (output.peek() != -1) { + while (ls_files_stdout->peek() != -1) { std::string mode; std::string object_id; std::string stage; std::string filename; - output >> mode >> object_id >> stage >> std::ws; - std::getline(output, filename, '\0'); + *ls_files_stdout >> mode >> object_id >> stage >> std::ws; + std::getline(*ls_files_stdout, filename, '\0'); - // TODO: get file attributes en masse for efficiency... unfortunately this requires machine-parseable output from git check-attr to be workable, and this is only supported in Git 1.8.5 and above (released 27 Nov 2013) - if (is_git_file_mode(mode) && get_file_attributes(filename).first == attribute_name(key_name)) { - files.push_back(filename); + if (is_git_file_mode(mode)) { + std::string filter_attribute; + + if (check_attr_stdin) { + filter_attribute = get_file_attributes(filename, *check_attr_stdin, *check_attr_stdout).first; + } else { + filter_attribute = get_file_attributes(filename).first; + } + + if (filter_attribute == attribute_name(key_name)) { + files.push_back(filename); + } + } + } + + if (!successful_exit(ls_files.wait())) { + throw Error("'git ls-files' failed - is this a Git repository?"); + } + + if (check_attr_stdin) { + check_attr.close_stdin(); + if (!successful_exit(check_attr.wait())) { + throw Error("'git check-attr' failed - is this a Git repository?"); } } }