summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authormidipix <writeonce@midipix.org>2024-07-13 05:03:48 +0000
committermidipix <writeonce@midipix.org>2024-07-14 20:18:46 +0000
commit5ea1d7cd4b630a91b4a72fb722a937a107d76fc0 (patch)
tree034f126157e388b09a615989d959d6ac39402824
parentc1438ffaef9c2f9ae116ebc375db2cc1468c341c (diff)
downloadtpax-5ea1d7cd4b630a91b4a72fb722a937a107d76fc0.tar.bz2
tpax-5ea1d7cd4b630a91b4a72fb722a937a107d76fc0.tar.xz
driver: -s <replstr> support: implementation and integration.
-rw-r--r--include/tpax/tpax.h4
-rw-r--r--project/common.mk1
-rw-r--r--src/driver/tpax_driver_ctx.c151
-rw-r--r--src/internal/tpax_driver_impl.h15
-rw-r--r--src/logic/tpax_archive_write.c44
-rw-r--r--src/skin/tpax_skin_default.c15
-rw-r--r--src/util/tpax_path_replstr.c120
7 files changed, 346 insertions, 4 deletions
diff --git a/include/tpax/tpax.h b/include/tpax/tpax.h
index 00271cb..fa685c3 100644
--- a/include/tpax/tpax.h
+++ b/include/tpax/tpax.h
@@ -1,6 +1,7 @@
#ifndef TPAX_H
#define TPAX_H
+#include <regex.h>
#include <stdint.h>
#include <stddef.h>
#include <sys/stat.h>
@@ -172,6 +173,9 @@ tpax_api int tpax_archive_seal (const struct tpax_driver_ctx *);
tpax_api int tpax_util_path_copy (char *, const char *, size_t, uint32_t, size_t *);
tpax_api int tpax_util_stat_compare (const struct stat *, const struct stat *);
+tpax_api int tpax_util_path_replstr (char * dstpath, const char * srcpath, const char * replstr,
+ const regex_t * regex, size_t buflen, int flags);
+
/* utility api */
tpax_api int tpax_main (char **, char **,
const struct tpax_fd_ctx *);
diff --git a/project/common.mk b/project/common.mk
index 26e171c..03e35ee 100644
--- a/project/common.mk
+++ b/project/common.mk
@@ -12,6 +12,7 @@ API_SRCS = \
src/output/tpax_output_error.c \
src/skin/tpax_skin_default.c \
src/util/tpax_path_copy.c \
+ src/util/tpax_path_replstr.c \
src/util/tpax_stat_compare.c \
INTERNAL_SRCS = \
diff --git a/src/driver/tpax_driver_ctx.c b/src/driver/tpax_driver_ctx.c
index dff6ef6..5593ec6 100644
--- a/src/driver/tpax_driver_ctx.c
+++ b/src/driver/tpax_driver_ctx.c
@@ -6,6 +6,7 @@
#define _DEFAULT_SOURCE 1
+#include <regex.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
@@ -322,6 +323,109 @@ static void tpax_set_archive_block_size(struct tpax_common_ctx * cctx)
cctx->blksize = TPAX_USTAR_BLOCK_SIZE;
}
+static int tpax_add_replstr(
+ struct argv_entry * entry,
+ struct tpax_replstr * replstr,
+ char ** mark)
+{
+ const char * src;
+ char * dst;
+ char sep;
+ int nsep;
+
+ /* non-null separator character */
+ if (!(sep = entry->arg[0]))
+ return -1;
+
+ /* exactly three separator characters */
+ for (nsep=1,src=&entry->arg[1]; *src; src++) {
+ if ((src[0] == '\\') && (src[1] == sep)) {
+ src++;
+
+ } else if (src[0] == sep) {
+ nsep++;
+ }
+ }
+
+ if (nsep != 3)
+ return -1;
+
+ /* regexp */
+ for (src=&entry->arg[1],dst=*mark; (*src != sep); src++) {
+ if ((src[0] == '\\') && (src[1] == sep))
+ src++;
+
+ *dst++ = *src;
+ }
+
+ replstr->replarg = entry->arg;
+ replstr->replstr = ++dst;
+ replstr->regexp = *mark;
+
+ /* replstr */
+ for (++src; (*src != sep); src++) {
+ if ((src[0] == '\\') && (src[1] == sep))
+ src++;
+
+ *dst++ = *src;
+ }
+
+ src++;
+ dst++;
+
+ *mark = dst;
+
+ /* flags */
+ if (src[0] && src[1] && src[2])
+ return -1;
+
+ if (src[0] && (src[0] == src[1]))
+ return -1;
+
+ if (src[0] && (src[0] != 'g') && (src[0] != 'p'))
+ return -1;
+
+ if (src[0] && src[1] && (src[1] != 'g') && (src[1] != 'p'))
+ return -1;
+
+ if (src[0] && ((src[0] == 'g') || (src[1] == 'g')))
+ replstr->flags |= TPAX_REPL_GLOBAL;
+
+ if (src[0] && ((src[0] == 'p') || (src[1] == 'p')))
+ replstr->flags |= TPAX_REPL_PRINT;
+
+ /* regex */
+ if (regcomp(&replstr->regex,replstr->regexp,0)) {
+ replstr->regexp = 0;
+ return -1;
+ }
+
+ return 0;
+}
+
+static int tpax_init_replstr_vector(
+ struct tpax_driver_ctx_impl * ctx,
+ struct argv_meta * meta)
+{
+ struct argv_entry * entry;
+ struct tpax_replstr * replstr;
+ char * mark;
+
+ if (!(replstr = ctx->replstrv))
+ return 0;
+
+ for (entry=meta->entries,mark=ctx->replstrs; entry->fopt || entry->arg; entry++) {
+ if (entry->tag == TAG_REPLSTR) {
+ if (tpax_add_replstr(entry,replstr,&mark) < 0)
+ return -1;
+
+ replstr++;
+ }
+ }
+
+ return 0;
+}
+
static int tpax_driver_is_valid_keyval(struct argv_keyval * keyval)
{
(void)keyval;
@@ -332,7 +436,9 @@ static struct tpax_driver_ctx_impl * tpax_driver_ctx_alloc(
struct argv_meta * meta,
const struct tpax_fd_ctx * fdctx,
const struct tpax_common_ctx * cctx,
- size_t nunits)
+ size_t nunits,
+ size_t nreplstr,
+ size_t sreplstr)
{
struct tpax_driver_ctx_alloc * ictx;
size_t size;
@@ -379,6 +485,19 @@ static struct tpax_driver_ctx_impl * tpax_driver_ctx_alloc(
return 0;
}
+ if (nreplstr && !(ictx->ctx.replstrv = calloc(++nreplstr,sizeof(*ictx->ctx.replstrv)))) {
+ free(ictx->ctx.keyvalv);
+ free(ictx);
+ return 0;
+ }
+
+ if (sreplstr && !(ictx->ctx.replstrs = calloc(sreplstr,1))) {
+ free(ictx->ctx.replstrv);
+ free(ictx->ctx.keyvalv);
+ free(ictx);
+ return 0;
+ }
+
if ((pkeyval = ictx->ctx.keyvalv))
for (entry=meta->entries; entry->fopt || entry->arg; entry++)
if (entry->keyv)
@@ -474,6 +593,8 @@ int tpax_lib_get_driver_ctx(
struct argv_keyval ** pkeyval;
struct tpax_fd_ctx lfdctx;
size_t nunits;
+ size_t nreplstr;
+ size_t sreplstr;
const char * program;
int fddst;
const char * ch;
@@ -497,6 +618,9 @@ int tpax_lib_get_driver_ctx(
program = argv_program_name(argv[0]);
memset(&cctx,0,sizeof(cctx));
+ nreplstr = 0;
+ sreplstr = 0;
+
cctx.drvflags = flags;
fddst = fdctx->fddst;
@@ -581,6 +705,12 @@ int tpax_lib_get_driver_ctx(
meta);
break;
+ case TAG_REPLSTR:
+ sreplstr += strlen(entry->arg);
+ sreplstr++;
+ nreplstr++;
+ break;
+
case TAG_RECURSE:
cctx.drvflags |= TPAX_DRIVER_DIR_MEMBER_RECURSE;
break;
@@ -751,13 +881,19 @@ int tpax_lib_get_driver_ctx(
}
/* driver ctx */
- if (!(ctx = tpax_driver_ctx_alloc(meta,fdctx,&cctx,nunits))) {
+ if (!(ctx = tpax_driver_ctx_alloc(meta,fdctx,&cctx,nunits,nreplstr,sreplstr))) {
if (cctx.drvflags & TPAX_DRIVER_EXEC_MODE_COPY)
close(fddst);
return tpax_get_driver_ctx_fail(meta);
}
+ /* replstr validation and vector initialization */
+ if (tpax_init_replstr_vector(ctx,meta) < 0) {
+ tpax_lib_free_driver_ctx(&ctx->ctx);
+ return TPAX_ERROR;
+ }
+
/* keyval validation */
for (pkeyval=ctx->keyvalv; pkeyval && *pkeyval; pkeyval++)
if (!tpax_driver_is_valid_keyval(*pkeyval))
@@ -781,6 +917,8 @@ static void tpax_free_driver_ctx_impl(struct tpax_driver_ctx_alloc * ictx)
size_t size;
char ** ppref;
+ struct tpax_replstr * replstrv;
+
for (; ictx->ctx.dirents; ) {
next = ictx->ctx.dirents->next;
size = ictx->ctx.dirents->size;
@@ -789,6 +927,15 @@ static void tpax_free_driver_ctx_impl(struct tpax_driver_ctx_alloc * ictx)
ictx->ctx.dirents = (struct tpax_dirent_buffer *)next;
}
+ for (replstrv=ictx->ctx.replstrv; replstrv && replstrv->regexp; replstrv++)
+ regfree(&replstrv->regex);
+
+ if (ictx->ctx.replstrv)
+ free(ictx->ctx.replstrv);
+
+ if (ictx->ctx.replstrs)
+ free(ictx->ctx.replstrs);
+
if (ictx->ctx.keyvalv)
free(ictx->ctx.keyvalv);
diff --git a/src/internal/tpax_driver_impl.h b/src/internal/tpax_driver_impl.h
index d340748..3df8244 100644
--- a/src/internal/tpax_driver_impl.h
+++ b/src/internal/tpax_driver_impl.h
@@ -7,6 +7,7 @@
#ifndef TPAX_DRIVER_IMPL_H
#define TPAX_DRIVER_IMPL_H
+#include <regex.h>
#include <stdint.h>
#include <dirent.h>
#include <stdio.h>
@@ -31,6 +32,9 @@
#define TPAX_ITEM_SYMLINK 0X4
#define TPAX_ITEM_NAMEREF 0x8
+#define TPAX_REPL_GLOBAL 0x01
+#define TPAX_REPL_PRINT 0x02
+
extern const struct argv_option tpax_default_options[];
enum app_tags {
@@ -45,6 +49,7 @@ enum app_tags {
TAG_FORMAT,
TAG_BLKSIZE,
TAG_OPTIONS,
+ TAG_REPLSTR,
TAG_RECURSE,
TAG_NORECURSE,
TAG_STRICT_PATH,
@@ -74,6 +79,14 @@ struct tpax_dirent_buffer {
struct tpax_dirent dbuf[];
};
+struct tpax_replstr {
+ const char * replarg;
+ const char * replstr;
+ const char * regexp;
+ regex_t regex;
+ uint32_t flags;
+};
+
struct tpax_driver_ctx_impl {
const char * file;
struct tpax_common_ctx cctx;
@@ -82,6 +95,8 @@ struct tpax_driver_ctx_impl {
const struct tpax_unit_ctx * euctx;
const char * eunit;
struct argv_keyval ** keyvalv;
+ struct tpax_replstr * replstrv;
+ char * replstrs;
struct tpax_error_info ** errinfp;
struct tpax_error_info ** erricap;
struct tpax_error_info * erriptr[64];
diff --git a/src/logic/tpax_archive_write.c b/src/logic/tpax_archive_write.c
index 32a5f2c..a881bd9 100644
--- a/src/logic/tpax_archive_write.c
+++ b/src/logic/tpax_archive_write.c
@@ -75,6 +75,35 @@ static int tpax_archive_write_ret(
return ret;
}
+static int tpax_apply_string_replacement(
+ const struct tpax_driver_ctx * dctx,
+ const char * path,
+ char * replbuf,
+ size_t buflen)
+{
+ int ret;
+ struct tpax_driver_ctx_impl * ictx;
+ struct tpax_replstr * replstrv;
+
+ ictx = tpax_get_driver_ictx(dctx);
+
+ if (!(replstrv = ictx->replstrv))
+ return 0;
+
+ for (ret=0; !ret && replstrv->regexp; replstrv++) {
+ ret = tpax_util_path_replstr(
+ replbuf,path,
+ replstrv->replstr,
+ &replstrv->regex,
+ buflen,replstrv->flags);
+
+ if ((ret > 0) && (replstrv->flags & TPAX_REPL_PRINT))
+ tpax_dprintf(tpax_driver_fderr(dctx),"%s >> %s\n",path,replbuf);
+ }
+
+ return ret;
+}
+
static int tpax_archive_write_impl(
const struct tpax_driver_ctx * dctx,
const struct tpax_dirent * cdent,
@@ -85,12 +114,14 @@ static int tpax_archive_write_impl(
struct tpax_ustar_header uhdr;
const struct stat * st;
struct stat stbuf;
+ const char * apath;
const char * path;
const char * slnk;
const char * mlnk;
off_t hpos;
off_t dpos;
int fdtmp;
+ int slen;
ssize_t nread;
ssize_t nbytes;
void * buf;
@@ -98,6 +129,7 @@ static int tpax_archive_write_impl(
size_t cmplen;
void * membuf;
char * ch;
+ char replbuf[PATH_MAX];
char pathbuf[PATH_MAX];
/* followed symlink? */
@@ -110,9 +142,17 @@ static int tpax_archive_write_impl(
dctx,
TPAX_ERR_FLOW_ERROR);
+ /* regex matching and patter substitution */
+ if ((slen = tpax_apply_string_replacement(dctx,path,replbuf,PATH_MAX)) < 0)
+ return TPAX_CUSTOM_ERROR(
+ dctx,
+ TPAX_ERR_FLOW_ERROR);
+
+ apath = slen ? replbuf : path;
+
/* verbose mode */
if (dctx->cctx->drvflags & TPAX_DRIVER_VERBOSE)
- tpax_dprintf(tpax_driver_fderr(dctx),"%s",path);
+ tpax_dprintf(tpax_driver_fderr(dctx),"%s",apath);
/* uctx */
if (tpax_lib_get_unit_ctx(dctx,fdcwd,path,&uctx) < 0)
@@ -171,7 +211,7 @@ static int tpax_archive_write_impl(
/* header */
if (tpax_meta_init_ustar_header(
- dctx,path,st,
+ dctx,apath,st,
slnk,&uhdr) < 0)
return tpax_archive_write_ret(
TPAX_NESTED_ERROR(dctx),
diff --git a/src/skin/tpax_skin_default.c b/src/skin/tpax_skin_default.c
index a618645..dd62b86 100644
--- a/src/skin/tpax_skin_default.c
+++ b/src/skin/tpax_skin_default.c
@@ -93,6 +93,21 @@ const tpax_hidden struct argv_option tpax_default_options[] = {
"a user-provided, format-specific keyval array of the form "
"keyword[[:]=value][,keyword[[:]=value], ...]"},
+ {"Wreplstr", 's',TAG_REPLSTR,ARGV_OPTARG_REQUIRED,
+ ARGV_OPTION_HYBRID_ONLY|ARGV_OPTION_HYBRID_SPACE,0,0,
+ "rename files and archive members as they are being added to "
+ "or extracted from the archive according to the specified "
+ "ed(1) style replacement string, which should be in the format "
+ "<sep><regex><sep><replstr><sep>[gp]; as an example, "
+ "-s ',^/git/tpax/,tpax-1.2.3/,' uses <comma> as the separator "
+ "character, and instructs pax to prefix all files rooted in "
+ "'/git/tpax/' with 'tpax-1.2.3/' while leaving the names of files which "
+ "do not match the regex expression unchanged. "
+ "When this option is repeated, pax shall attempt to match each file or "
+ "member name against all of the provided repalcement-string arguments "
+ "in the order of appearnce on the command line until the first "
+ "successful match."},
+
{"Wstrict-device-id",
'X',TAG_STRICT_DEVICE_ID,ARGV_OPTARG_NONE,
ARGV_OPTION_HYBRID_ONLY,0,0,
diff --git a/src/util/tpax_path_replstr.c b/src/util/tpax_path_replstr.c
new file mode 100644
index 0000000..1935628
--- /dev/null
+++ b/src/util/tpax_path_replstr.c
@@ -0,0 +1,120 @@
+/**************************************************************/
+/* tpax: a topological pax implementation */
+/* Copyright (C) 2020--2024 SysDeer Technologies, LLC */
+/* Released under GPLv2 and GPLv3; see COPYING.TPAX. */
+/**************************************************************/
+
+#include <regex.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <tpax/tpax.h>
+#include "tpax_driver_impl.h"
+
+static int tpax_backref_idx(const char c)
+{
+ return ((c >= '1') && (c <= '9')) ? c - '0' : 0;
+}
+
+int tpax_util_path_replstr(
+ char * dstpath,
+ const char * srcpath,
+ const char * replstr,
+ const regex_t * regex,
+ size_t buflen,
+ int flags)
+{
+ int ret;
+ int idx;
+ regoff_t ro;
+ const char * ch;
+ char * dst;
+ size_t explen;
+ regmatch_t pmatch[11];
+
+ /* attempt to match */
+ switch (regexec(regex,srcpath,11,pmatch,0)) {
+ case 0:
+ break;
+
+ case REG_NOMATCH:
+ return 0;
+
+ default:
+ return -1;
+ }
+
+ /* copy bytes leading up to match */
+ if (buflen <= (explen = pmatch[0].rm_so)) {
+ errno = ENOBUFS;
+ return -1;
+ }
+
+ for (ro=0,dst=dstpath; ro<pmatch[0].rm_so; ro++)
+ *dst++ = srcpath[ro];
+
+ buflen -= explen;
+
+ /* copy replacement string */
+ for (ch=replstr,ret=0; buflen && *ch; ch++) {
+ /* <ampersand> stands for the entire matched string */
+ if (ch[0] == '&') {
+ idx = 0;
+
+ /* back-reference semantics: a matched subexpression or an empty string */
+ } else if ((ch[0] == '\\') && (idx = tpax_backref_idx(ch[1]))) {
+ if (pmatch[idx].rm_so < 0)
+ idx = -1;
+
+ ch++;
+
+ /* all other escaped characters */
+ } else if (ch[0] == '\\') {
+ *dst++ = *++ch;
+ idx = -1;
+ buflen--;
+
+ /* all other characters */
+ } else {
+ *dst++ = *ch;
+ idx = -1;
+ buflen--;
+ }
+
+ /* copy matched string or matched subexpression, if any */
+ if (idx >= 0) {
+ if (buflen <= (explen = (pmatch[idx].rm_eo - pmatch[idx].rm_so))) {
+ errno = ENOBUFS;
+ return -1;
+ }
+
+ for (ro=pmatch[idx].rm_so; ro<pmatch[idx].rm_eo; ro++)
+ *dst++ = srcpath[ro];
+
+ buflen -= explen;
+ }
+ }
+
+ /* replace further occurrences as needed */
+ if ((flags & TPAX_REPL_GLOBAL) && srcpath[pmatch[0].rm_eo])
+ ret = tpax_util_path_replstr(
+ dst,&srcpath[pmatch[0].rm_eo],replstr,
+ regex,buflen,flags);
+
+ if (ret < 0)
+ return -1;
+
+ /* copy remaining, non-matching bytes as needed */
+ if (ret == 0) {
+ for (ch=&srcpath[pmatch[0].rm_eo]; *ch; ch++)
+ *dst++ = *ch;
+
+ *dst = '\0';
+ }
+
+ /* all done */
+ ret += (dst - dstpath);
+
+ return ret;
+}