diff options
author | Andreas Fankhauser (@tux-six) | 2022-05-30 16:13:08 +0200 |
---|---|---|
committer | Andreas Fankhauser (@tux-six) | 2022-05-30 16:13:08 +0200 |
commit | 3f8ac6551aa0f53fd14b216e97e79939635b8b82 (patch) | |
tree | d71d6b71cd5781692ca95fe938de630bd286e647 | |
parent | 4d54d900cd2cfb880b5cada3b9baef6a30a04332 (diff) | |
download | bulk-ln-3f8ac6551aa0f53fd14b216e97e79939635b8b82.zip bulk-ln-3f8ac6551aa0f53fd14b216e97e79939635b8b82.tar.gz |
Add from existing sources after some cleanup.
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Makefile | 81 | ||||
-rw-r--r-- | README.md | 8 | ||||
-rw-r--r-- | src/bulk_ln/bulk_ln.c | 422 | ||||
-rw-r--r-- | src/bulk_ln/bulk_ln.h | 10 | ||||
-rw-r--r-- | src/bulk_ln/bulk_ln_main.c | 10 | ||||
-rw-r--r-- | src/common/commonbase.h | 23 |
7 files changed, 555 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..796b96d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/build diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f3abfb7 --- /dev/null +++ b/Makefile @@ -0,0 +1,81 @@ + +CC=gcc +LD=ld +AR=ar +TAR=tar +BINEXT= +TOOLS=lxGcc64 + +ifndef PROJECT_VERSION + # We just provide a primitive version string so we can see which build + # we're using while debugging. For a release we will override the version + # by providing it from cli args to 'make' as in: + # make clean package PROJECT_VERSION=1.2.3 + PROJECT_VERSION=$(shell date +0.0.0-%Y%m%d.%H%M%S) +endif + +CFLAGS= --std=c99 \ + -Wall -Wextra -Werror -fmax-errors=3 \ + -Wno-error=unused-function -Wno-error=unused-label \ + -Wno-error=unused-variable -Wno-error=unused-parameter \ + -Wno-error=unused-const-variable \ + -Werror=implicit-fallthrough=1 \ + -Wno-error=unused-but-set-variable \ + -Wno-unused-function -Wno-unused-parameter \ + -DPROJECT_VERSION=$(PROJECT_VERSION) + +LDFLAGS= -Wl,--no-demangle,--fatal-warnings + +INCDIRS= -Isrc/bulk_ln -Isrc/common + +ifndef NDEBUG + CFLAGS := $(CFLAGS) -ggdb -O0 -g3 +else + CFLAGS := $(CFLAGS) -ffunction-sections -fdata-sections -Os "-DNDEBUG=1" + LDFLAGS := $(LDFLAGS) -Wl,--gc-sections,--as-needed +endif + + +default: link package + +.PHONY: clean +clean: + @echo "\n[INFO ] Clean" + rm -rf build dist + +.PHONY: link +link: build/bin/bulk-ln$(BINEXT) + +build/obj/%.o: src/%.c + @echo "\n[INFO ] Compile '$@'" + @mkdir -p $(shell dirname build/obj/$*) + $(CC) -c -o $@ $< $(CFLAGS) $(INCDIRS) + +build/bin/bulk-ln$(BINEXT): \ + build/obj/bulk_ln/bulk_ln.o \ + build/obj/bulk_ln/bulk_ln_main.o + @echo "\n[INFO ] Link '$@'" + @mkdir -p $(shell dirname $@) + $(CC) -o $@ $(LDFLAGS) $^ $(LIBSDIR) + +.PHONY: package +package: link + @echo "\n[INFO ] Package" + @rm -rf build/dist-* dist + @mkdir dist + @echo + @bash -c 'if [[ -n `git status --porcelain` ]]; then echo "[ERROR] Worktree not clean as it should be (see: git status)"; exit 1; fi' + @# Create Executable bundle. + @rm -rf build/dist-bin && mkdir -p build/dist-bin + @cp -t build/dist-bin \ + README* + @mkdir build/dist-bin/bin + @cp -t build/dist-bin/bin \ + build/bin/*$(BINEXT) + @(cd build/dist-bin && find . -type f -not -name MD5SUM -exec md5sum -b {} \;) > build/MD5SUM + @mv build/MD5SUM build/dist-bin/. + @(cd build/dist-bin && $(TAR) --owner=0 --group=0 -czf ../../dist/BulkLn-$(PROJECT_VERSION)-$(TOOLS).tgz *) + @echo "\n[INFO ] DONE: Artifacts created and placed in 'dist'." + @echo + @echo See './dist/' for result. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..57904bd --- /dev/null +++ b/README.md @@ -0,0 +1,8 @@ + +BulkLn +====== + +'ln' like tool to handle large amount of links. + +Usage: bulk-ln --help + diff --git a/src/bulk_ln/bulk_ln.c b/src/bulk_ln/bulk_ln.c new file mode 100644 index 0000000..7c7e98a --- /dev/null +++ b/src/bulk_ln/bulk_ln.c @@ -0,0 +1,422 @@ + +/* Header for this file */ +#include "bulk_ln.h" + +/* System */ +#include <assert.h> +#include <errno.h> +#include <stdbool.h> // TODO remove +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> + +/* Other packages from this project */ +/*#include ""*/ + + +#define DATA_FILE_FIELD_SEP_CHR '\t' + + +typedef struct BulkLn BulkLn; + + +/* Root context */ +struct BulkLn { + + /** File (path) where we read the paths-to-link from. The special + * value - (dash) means to use the stdin stream. */ + char *dataFilePath; + + FILE *dataFd; + + /** If a dry-run got requested */ + bool dryRun; + + /** If true, we will print to stdout for every link we create (like we do + * in dry run) */ + bool isPrintEachCreateLink; + + /** print every call we make to mkdir to stdout. Usually only useful for + * debugging */ + bool isPrintEachMkdir; + + /** if true, we periodically print some kind of progress to stderr */ + bool isPrintStatus; + + /** if true, we print a summary what we did at the end of the run to stderr */ + bool isPrintSummary; + + /** if true, we will override existing files. This will be on for example + * if --force got specified. */ + bool isRelinkExistingFiles; + + /** Count of links we created */ + int createdLinksCount; + + /** Count of direcrories we created */ + int createdDirsCount; +}; + + + +void printHelp(){ + printf("\n %s%s\n", strrchr(__FILE__, '/') + 1, " @ " STR_QUOT(PROJECT_VERSION) "\n" + "\n" + "Utility to create links. Writing a custom implementation of 'ln'\n" + "got necessary as we found no way to instruct 'ln' to create a few thousand\n" + "links in an acceptable amount of time. So we just wrote our own ;)\n" + "\n" + "Takes paths (pairwise) from stdin (see --stdin for details) and creates a\n" + "hardlink for each pair from the 1st path to the 2nd.\n" + "\n" + "Options:\n" + "\n" + " --stdin\n" + " Read the path pairs to link from stdin. The format is like:\n" + "\n" + " <src-path> <tab> <dst-path> <newline>\n" + "\n" + " Example:\n" + "\n" + " origin/foo.txt\tnew/gugg.txt\n" + " origin/bar.txt\tnew/da.txt\n" + "\n" + " HINT: Preferred <newline> is LF. But CRLF should work too.\n" + "\n" + " --quiet\n" + " Don't print status or similar stuff. Errors will still be printed to\n" + " stderr.\n" + "\n" + " --verbose\n" + " Print stupid amount of logs. Usually only helpful for debugging. Should\n" + " NOT be combined with --quiet as this would be nonsense anyway.\n" + "\n" + " --dry-run\n" + " Will print the actions to stdout instead executing them.\n" + " HINT: The directory count in the summary will be implicitly set to zero,\n" + " as our used counting strategy would deliver wrong results when we not\n" + " actually creating the dirs.\n" + "\n" + " --force\n" + " Same meaning as in original 'ln' command.\n" + "\n"); +} + + +/** returns non-zero on errors. Error messages will already be printed + * internally. */ +int parseArgs( int argc, char**argv, BulkLn*bulkLn ){ + /* init (aka set defaults) */ + bulkLn->dataFilePath = NULL; + bulkLn->dryRun = 0; + bulkLn->isPrintEachCreateLink = 0; + bulkLn->isPrintEachMkdir = 0; + bulkLn->isPrintStatus = !0; + bulkLn->isPrintSummary = !0; + bulkLn->isRelinkExistingFiles = 0; + + // Parse args + for( int i=1 ; i<argc ; ++i ) { + char *arg = argv[i]; + if( !strcmp(arg, "--help") ){ + printHelp(); + return -1; + } + else if( !strcmp(arg, "--dry-run") ){ + bulkLn->dryRun = !0; + } + else if( !strcmp(arg, "--force") ){ + bulkLn->isRelinkExistingFiles = !0; + } + else if( !strcmp(arg, "--quiet") ){ + bulkLn->isPrintStatus = false; + bulkLn->isPrintSummary = false; + } + else if( !strcmp(arg, "--stdin") ){ + bulkLn->dataFilePath = "-"; + } + else if( !strcmp(arg, "--verbose") ){ + bulkLn->isPrintStatus = !0; + bulkLn->isPrintSummary = !0; + bulkLn->isPrintEachCreateLink = !0; + bulkLn->isPrintEachMkdir = !0; + } + else{ + fprintf(stderr, "Unknown arg '%s'.\n", arg); + return -1; + } + } + + /* MUST specify input method. Yes there is only one input method. But + * requiring args is the simplest way to prevent damage in case someone (eg + * accidentally) invokes the utility wihout args. Further this also makes + * the utility easier to extend wihout breaking everything. */ + if( bulkLn->dataFilePath == NULL ){ + fprintf(stderr, "Arg '--stdin' missing. Try --help\n"); + return -1; + } + + return 0; +} + + +/** + * Like: mkdirs -p path + * + * WARN: Passed 'path' might be temporarily changed during execution. + * Nevertheless will be in original state after return. + */ +static int mkdirs( char*path, BulkLn*bulkLn ){ + int err; + char *tmpEnd = path; + /* Backup original length so we later still can recognize the original end + * as we are going to place zeroes in the path during the run. */ + char *pathEnd = path + strlen(path); + + for(;;){ + /* Loop for each slash in the path beginning from the topmost dir toward the + * innermost dir. + * This way our path gets longer by one segment each time we call 'mkdir' below. + * This is to have the same effect as we would call 'mkdir --parents' from cli. + * In other words we make sure every parent dir exists before creating the next + * one */ + + tmpEnd = strchr(tmpEnd + 1, '/'); + if( tmpEnd == NULL ){ + /* The last (innermost) segment to create a dir for */ + tmpEnd = pathEnd; + } + + /* Temporarily zero-terminate the path so we can create the parent dir + * up to there */ + tmpEnd[0] = '\0'; + + /* Print if requested */ + if( bulkLn->dryRun || bulkLn->isPrintEachMkdir ){ + printf("mkdir(%s)\n", path); + } + + /* Create dir up to that found path */ + if( ! bulkLn->dryRun ){ + /* Perform the real action */ + /* mode gets masked by umask and after that we have the defaults + * (aka what we want) */ + err = mkdir(path, 0777); + if( err ){ + if( errno == EEXIST ){ + // Fine :) So just continue with the next one. + }else{ + fprintf(stderr, "mkdir(%s): %s\n", path, strerror(errno)); + err = -1; goto finally; + } + }else{ + /* Only increment the directory counter if we really created + * the dir. Eg if it did NOT already exist */ + bulkLn->createdDirsCount += 1; + } + } + + if( tmpEnd == pathEnd ){ + /* Nothing to restore as we point to end-of-string. + * This also means we're done. So end the loop */ + break; + }else{ + /* Restore where we did cut-off the path */ + tmpEnd[0] = '/'; + /* Then loop to add and process one more segment */ + } + } + + err = 0; + finally: + return err; +} + + +/** Like: ln -f srcPath dstPath */ +static int createHardlink( char*srcPath, char*dstPath, BulkLn*bulkLn ){ + int err; + + if( bulkLn->dryRun || bulkLn->isPrintEachCreateLink ){ + printf("link('%s', '%s')\n", srcPath, dstPath); + } + + if( ! bulkLn->dryRun ){ + /* Perform the real action */ + if( bulkLn->isRelinkExistingFiles ){ + /* Delete beforehand so we can be sure 'link' does not fail due + * already existing file */ + err = unlink(dstPath); + if( err ){ + if( errno == ENOENT ){ + /* There is no such entry we could delete. So we're already + * fine :) */ + }else{ + /* Some other (unepxected) error */ + fprintf(stderr, "unlink(%s): %s\n", dstPath, strerror(errno)); + err = -1; goto finally; + } + } + } + err = link(srcPath, dstPath); + if( err ){ + fprintf(stderr, "link('%s', '%s'): %s\n", srcPath, dstPath, strerror(errno)); + err = -1; goto finally; + } + } + + bulkLn->createdLinksCount += 1; + + err = 0; + finally: + return err; +} + + +static int onPathPair( char*srcPath, char*dstPath, BulkLn*bulkLn ){ + assert(srcPath != NULL); + assert(dstPath != NULL); + assert(bulkLn != NULL); + int err; + + if( bulkLn->isPrintStatus && bulkLn->createdLinksCount % 10000 == 0 ){ + fprintf(stderr, "Created %7d links so far.\n", bulkLn->createdLinksCount); + } + + /* Search end of parent dir path */ + char *tmpEnd = strrchr(dstPath, '/'); + if( tmpEnd != NULL ){ + /* Temporarily cut-off the last segment (filename) to create the + * parent-dirs */ + tmpEnd[0] = '\0'; + /* Create missing parent dirs */ + err = mkdirs(dstPath, bulkLn); + if (err) { err = -1; goto finally; } + /* Restore path */ + tmpEnd[0] = '/'; + } + + err = createHardlink(srcPath, dstPath, bulkLn); + if( err ){ err = -1; goto finally; } + + err = 0; + finally: + return err; +} + + +static int parseDataFileAsPairPerLine( BulkLn*bulkLn ){ + int err; + size_t buf_cap = 0; + size_t buf_len = 0; + char *buf = NULL; + size_t lineNum = 0; + + for(;;){ + lineNum += 1; + + /* Read input line-by-line. Not the most elegant way to parse stuff, + * but should suffice for our use-case */ + err = getline(&buf, &buf_cap, bulkLn->dataFd); + if(unlikely( err < 0 )){ + /* Error handling */ + if( feof(bulkLn->dataFd) ){ + break; /* End-Of-File. Just break off the loop */ + }else if( ferror(bulkLn->dataFd) ){ + fprintf(stderr, "getline(%s): %s\n", bulkLn->dataFilePath, strerror(errno)); + err = -1; goto finally; + }else{ + abort(); /* I don't know how this could happen */ + } + } + buf_len = err; + + /* Extract the two paths from our line */ + char *srcPath = buf; + char *tab = memchr(buf, DATA_FILE_FIELD_SEP_CHR, buf_len); + if( tab == NULL ){ + fprintf(stderr, "Too few field separators (tab) in '%s' @ %lu", + bulkLn->dataFilePath, lineNum); + err = -1; goto finally; + } + char *unwantedTab = memchr(tab + 1, DATA_FILE_FIELD_SEP_CHR, tab + 1 - buf); + if( unwantedTab != NULL ){ + fprintf(stderr, "Too many field separators (tab) in '%s' @ %lu", + bulkLn->dataFilePath, lineNum); + err = -1; goto finally; + } + char *dstPath = tab + 1; /* <- path starts one char after the separator */ + char *dstPath_end = buf + buf_len; + for(;; --dstPath_end ){ + if( dstPath_end < buf ){ + fprintf(stderr, "IMHO cannot happen (@%s:%d)\n", __FILE__, __LINE__); + err = -1; goto finally; + } + if( dstPath_end[0]=='\n' || dstPath_end[0]=='\0' || dstPath_end[0]=='\r' ){ + continue; /* last char not found yet */ + } + /* 'dstPath_end' now points to the last char of our line. So add + * one to point to the 'end' */ + dstPath_end += 1; + break; + } + + /* Zero-Terminate the two strings */ + tab[0] = '\0'; + dstPath_end[0] = '\0'; + + /* Publish this pair for processing */ + err = onPathPair(srcPath, dstPath, bulkLn); + if( err ){ err = -1; goto finally; } + } + + if( bulkLn->isPrintStatus ){ + fprintf(stderr, "Parsed %lu records from '%s'\n", lineNum, bulkLn->dataFilePath); + } + + err = 0; + finally: + free(buf); + return err; +} + + +int bulk_ln_main( int argc, char**argv ){ + int err; + BulkLn bulkLn = {0}; + #define bulkLn (&bulkLn) + + /* parse args */ + err = parseArgs(argc, argv, bulkLn); + if( err ){ err = -1; goto finally; } + + /* Open data source */ + if( !strcmp(bulkLn->dataFilePath, "-") ){ + bulkLn->dataFd = stdin; + }else{ + bulkLn->dataFd = fopen(bulkLn->dataFilePath, "rb"); + if( bulkLn->dataFd == NULL ){ + fprintf(stderr, "fopen(%s): %s", bulkLn->dataFilePath, strerror(errno)); + err = -1; goto finally; + } + } + + err = parseDataFileAsPairPerLine(bulkLn); + if( err ){ err = -1; goto finally; } + + if( bulkLn->isPrintSummary ){ + fprintf(stderr, "Created %d directories and linked %d files.\n", + bulkLn->createdDirsCount, bulkLn->createdLinksCount); + } + + err = 0; + finally: + if( bulkLn->dataFd != NULL && bulkLn->dataFd != stdin ){ + fclose(bulkLn->dataFd); bulkLn->dataFd = NULL; + } + return err; + #undef bulkLn +} + diff --git a/src/bulk_ln/bulk_ln.h b/src/bulk_ln/bulk_ln.h new file mode 100644 index 0000000..6054386 --- /dev/null +++ b/src/bulk_ln/bulk_ln.h @@ -0,0 +1,10 @@ +#ifndef INCGUARD_87768d11a9f7d831f2a8b9d0ab7efa9c +#define INCGUARD_87768d11a9f7d831f2a8b9d0ab7efa9c + +#include "commonbase.h" + + +int bulk_ln_main( int argc, char**argv ); + + +#endif /* INCGUARD_87768d11a9f7d831f2a8b9d0ab7efa9c */ diff --git a/src/bulk_ln/bulk_ln_main.c b/src/bulk_ln/bulk_ln_main.c new file mode 100644 index 0000000..a3ac228 --- /dev/null +++ b/src/bulk_ln/bulk_ln_main.c @@ -0,0 +1,10 @@ + +#include "bulk_ln.h" + +int main( int argc, char**argv ){ + int err = bulk_ln_main(argc, argv); + /* Ensure to return 7-bit value only (See POSIX) */ + if( err < 0 ) err = -err; + return err > 127 ? 1 : err; +} + diff --git a/src/common/commonbase.h b/src/common/commonbase.h new file mode 100644 index 0000000..497d3fa --- /dev/null +++ b/src/common/commonbase.h @@ -0,0 +1,23 @@ + +/* + * common config for project. Here goes stuff like feature-test-macros etc. + * + * Every header file MUST include this file AS THE 1ST include. + */ + + +#define _POSIX_C_SOURCE 200809L + + +#define STR_QUOT_IAHGEWIH(s) #s +#define STR_QUOT(s) STR_QUOT_IAHGEWIH(s) + +#define STR_CAT(a, b) a ## b + +#ifndef likely +# define likely(a) (a) +#endif +#ifndef unlikely +# define unlikely(a) (a) +#endif + |