diff options
| -rw-r--r-- | cache.c | 355 | ||||
| -rw-r--r-- | cache.h | 33 | ||||
| -rw-r--r-- | cgit.c | 166 | ||||
| -rw-r--r-- | cgit.h | 1 | ||||
| -rwxr-xr-x | tests/setup.sh | 2 | ||||
| -rwxr-xr-x | tests/t0020-validate-cache.sh | 67 | 
6 files changed, 416 insertions, 208 deletions
| @@ -4,117 +4,340 @@   *   * Licensed under GNU General Public License v2   *   (see COPYING for full license text) + * + * + * The cache is just a directory structure where each file is a cache slot, + * and each filename is based on the hash of some key (e.g. the cgit url). + * Each file contains the full key followed by the cached content for that + * key. + *   */  #include "cgit.h"  #include "cache.h" -const int NOLOCK = -1; +#define CACHE_BUFSIZE (1024 * 4) + +struct cache_slot { +	const char *key; +	int keylen; +	int ttl; +	cache_fill_fn fn; +	void *cbdata; +	int cache_fd; +	int lock_fd; +	const char *cache_name; +	const char *lock_name; +	int match; +	struct stat cache_st; +	struct stat lock_st; +	int bufsize; +	char buf[CACHE_BUFSIZE]; +}; -char *cache_safe_filename(const char *unsafe) +/* Open an existing cache slot and fill the cache buffer with + * (part of) the content of the cache file. Return 0 on success + * and errno otherwise. + */ +static int open_slot(struct cache_slot *slot)  { -	static char buf[4][PATH_MAX]; -	static int bufidx; -	char *s; -	char c; +	char *bufz; +	int bufkeylen = -1; -	bufidx++; -	bufidx &= 3; -	s = buf[bufidx]; +	slot->cache_fd = open(slot->cache_name, O_RDONLY); +	if (slot->cache_fd == -1) +		return errno; -	while(unsafe && (c = *unsafe++) != 0) { -		if (c == '/' || c == ' ' || c == '&' || c == '|' || -		    c == '>' || c == '<' || c == '.') -			c = '_'; -		*s++ = c; -	} -	*s = '\0'; -	return buf[bufidx]; +	if (fstat(slot->cache_fd, &slot->cache_st)) +		return errno; + +	slot->bufsize = read(slot->cache_fd, slot->buf, sizeof(slot->buf)); +	if (slot->bufsize == 0) +		return errno; + +	bufz = memchr(slot->buf, 0, slot->bufsize); +	if (bufz) +		bufkeylen = bufz - slot->buf; + +	slot->match = bufkeylen == slot->keylen && +	    !memcmp(slot->key, slot->buf, bufkeylen + 1); + +	return 0;  } -int cache_exist(struct cacheitem *item) +/* Close the active cache slot */ +static void close_slot(struct cache_slot *slot)  { -	if (stat(item->name, &item->st)) { -		item->st.st_mtime = 0; -		return 0; +	if (slot->cache_fd > 0) { +		close(slot->cache_fd); +		slot->cache_fd = -1;  	} -	return 1;  } -int cache_create_dirs() +/* Print the content of the active cache slot (but skip the key). */ +static int print_slot(struct cache_slot *slot)  { -	char *path; +	ssize_t i, j = 0; + +	i = lseek(slot->cache_fd, slot->keylen + 1, SEEK_SET); +	if (i != slot->keylen + 1) +		return errno; -	path = fmt("%s", ctx.cfg.cache_root); -	if (mkdir(path, S_IRWXU) && errno!=EEXIST) +	while((i=read(slot->cache_fd, slot->buf, sizeof(slot->buf))) > 0) +		j = write(STDOUT_FILENO, slot->buf, i); + +	if (j < 0) +		return errno; +	else  		return 0; +} -	if (!ctx.repo) +/* Check if the slot has expired */ +static int is_expired(struct cache_slot *slot) +{ +	if (slot->ttl < 0)  		return 0; +	else +		return slot->cache_st.st_mtime + slot->ttl*60 < time(NULL); +} -	path = fmt("%s/%s", ctx.cfg.cache_root, -		   cache_safe_filename(ctx.repo->url)); +/* Check if the slot has been modified since we opened it. + * NB: If stat() fails, we pretend the file is modified. + */ +static int is_modified(struct cache_slot *slot) +{ +	struct stat st; -	if (mkdir(path, S_IRWXU) && errno!=EEXIST) -		return 0; +	if (stat(slot->cache_name, &st)) +		return 1; +	return (st.st_ino != slot->cache_st.st_ino || +		st.st_mtime != slot->cache_st.st_mtime || +		st.st_size != slot->cache_st.st_size); +} -	if (ctx.qry.page) { -		path = fmt("%s/%s/%s", ctx.cfg.cache_root, -			   cache_safe_filename(ctx.repo->url), -			   ctx.qry.page); -		if (mkdir(path, S_IRWXU) && errno!=EEXIST) -			return 0; +/* Close an open lockfile */ +static void close_lock(struct cache_slot *slot) +{ +	if (slot->lock_fd > 0) { +		close(slot->lock_fd); +		slot->lock_fd = -1;  	} -	return 1;  } -int cache_refill_overdue(const char *lockfile) +/* Create a lockfile used to store the generated content for a cache + * slot, and write the slot key + \0 into it. + * Returns 0 on success and errno otherwise. + */ +static int lock_slot(struct cache_slot *slot)  { -	struct stat st; +	slot->lock_fd = open(slot->lock_name, O_RDWR|O_CREAT|O_EXCL, +			     S_IRUSR|S_IWUSR); +	if (slot->lock_fd == -1) +		return errno; +	write(slot->lock_fd, slot->key, slot->keylen + 1); +	return 0; +} -	if (stat(lockfile, &st)) -		return 0; +/* Release the current lockfile. If `replace_old_slot` is set the + * lockfile replaces the old cache slot, otherwise the lockfile is + * just deleted. + */ +static int unlock_slot(struct cache_slot *slot, int replace_old_slot) +{ +	int err; + +	if (replace_old_slot) +		err = rename(slot->lock_name, slot->cache_name);  	else -		return (time(NULL) - st.st_mtime > ctx.cfg.cache_max_create_time); +		err = unlink(slot->lock_name); +	return err;  } -int cache_lock(struct cacheitem *item) +/* Generate the content for the current cache slot by redirecting + * stdout to the lock-fd and invoking the callback function + */ +static int fill_slot(struct cache_slot *slot)  { -	int i = 0; -	char *lockfile = xstrdup(fmt("%s.lock", item->name)); +	int tmp; - top: -	if (++i > ctx.cfg.max_lock_attempts) -		die("cache_lock: unable to lock %s: %s", -		    item->name, strerror(errno)); +	/* Preserve stdout */ +	tmp = dup(STDOUT_FILENO); +	if (tmp == -1) +		return errno; -       	item->fd = open(lockfile, O_WRONLY|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR); +	/* Redirect stdout to lockfile */ +	if (dup2(slot->lock_fd, STDOUT_FILENO) == -1) +		return errno; -	if (item->fd == NOLOCK && errno == ENOENT && cache_create_dirs()) -		goto top; +	/* Generate cache content */ +	slot->fn(slot->cbdata); -	if (item->fd == NOLOCK && errno == EEXIST && -	    cache_refill_overdue(lockfile) && !unlink(lockfile)) -			goto top; +	/* Restore stdout */ +	if (dup2(tmp, STDOUT_FILENO) == -1) +		return errno; -	free(lockfile); -	return (item->fd > 0); +	/* Close the temporary filedescriptor */ +	close(tmp); +	return 0;  } -int cache_unlock(struct cacheitem *item) +/* Crude implementation of 32-bit FNV-1 hash algorithm, + * see http://www.isthe.com/chongo/tech/comp/fnv/ for details + * about the magic numbers. + */ +#define FNV_OFFSET 0x811c9dc5 +#define FNV_PRIME  0x01000193 + +unsigned long hash_str(const char *str)  { -	close(item->fd); -	return (rename(fmt("%s.lock", item->name), item->name) == 0); +	unsigned long h = FNV_OFFSET; +	unsigned char *s = (unsigned char *)str; + +	if (!s) +		return h; + +	while(*s) { +		h *= FNV_PRIME; +		h ^= *s++; +	} +	return h;  } -int cache_cancel_lock(struct cacheitem *item) +static int process_slot(struct cache_slot *slot)  { -	return (unlink(fmt("%s.lock", item->name)) == 0); +	int err; + +	err = open_slot(slot); +	if (!err && slot->match) { +		if (is_expired(slot)) { +			if (!lock_slot(slot)) { +				/* If the cachefile has been replaced between +				 * `open_slot` and `lock_slot`, we'll just +				 * serve the stale content from the original +				 * cachefile. This way we avoid pruning the +				 * newly generated slot. The same code-path +				 * is chosen if fill_slot() fails for some +				 * reason. +				 * +				 * TODO? check if the new slot contains the +				 * same key as the old one, since we would +				 * prefer to serve the newest content. +				 * This will require us to open yet another +				 * file-descriptor and read and compare the +				 * key from the new file, so for now we're +				 * lazy and just ignore the new file. +				 */ +				if (is_modified(slot) || fill_slot(slot)) { +					unlock_slot(slot, 0); +					close_lock(slot); +				} else { +					close_slot(slot); +					unlock_slot(slot, 1); +					slot->cache_fd = slot->lock_fd; +				} +			} +		} +		print_slot(slot); +		close_slot(slot); +		return 0; +	} + +	/* If the cache slot does not exist (or its key doesn't match the +	 * current key), lets try to create a new cache slot for this +	 * request. If this fails (for whatever reason), lets just generate +	 * the content without caching it and fool the caller to belive +	 * everything worked out (but print a warning on stdout). +	 */ + +	close_slot(slot); +	if ((err = lock_slot(slot)) != 0) { +		cache_log("[cgit] Unable to lock slot %s: %s (%d)\n", +			  slot->lock_name, strerror(err), err); +		slot->fn(slot->cbdata); +		return 0; +	} + +	if ((err = fill_slot(slot)) != 0) { +		cache_log("[cgit] Unable to fill slot %s: %s (%d)\n", +			  slot->lock_name, strerror(err), err); +		unlock_slot(slot, 0); +		close_lock(slot); +		slot->fn(slot->cbdata); +		return 0; +	} +	// We've got a valid cache slot in the lock file, which +	// is about to replace the old cache slot. But if we +	// release the lockfile and then try to open the new cache +	// slot, we might get a race condition with a concurrent +	// writer for the same cache slot (with a different key). +	// Lets avoid such a race by just printing the content of +	// the lock file. +	slot->cache_fd = slot->lock_fd; +	unlock_slot(slot, 1); +	err = print_slot(slot); +	close_slot(slot); +	return err;  } -int cache_expired(struct cacheitem *item) +/* Print cached content to stdout, generate the content if necessary. */ +int cache_process(int size, const char *path, const char *key, int ttl, +		  cache_fill_fn fn, void *cbdata)  { -	if (item->ttl < 0) +	unsigned long hash; +	int len, i; +	char filename[1024]; +	char lockname[1024 + 5];  /* 5 = ".lock" */ +	struct cache_slot slot; + +	/* If the cache is disabled, just generate the content */ +	if (size <= 0) { +		fn(cbdata); +		return 0; +	} + +	/* Verify input, calculate filenames */ +	if (!path) { +		cache_log("[cgit] Cache path not specified, caching is disabled\n"); +		fn(cbdata); +		return 0; +	} +	len = strlen(path); +	if (len > sizeof(filename) - 10) { /* 10 = "/01234567\0" */ +		cache_log("[cgit] Cache path too long, caching is disabled: %s\n", +			  path); +		fn(cbdata);  		return 0; -	return item->st.st_mtime + item->ttl * 60 < time(NULL); +	} +	if (!key) +		key = ""; +	hash = hash_str(key) % size; +	strcpy(filename, path); +	if (filename[len - 1] != '/') +		filename[len++] = '/'; +	for(i = 0; i < 8; i++) { +		sprintf(filename + len++, "%x", +			(unsigned char)(hash & 0xf)); +		hash >>= 4; +	} +	filename[len] = '\0'; +	strcpy(lockname, filename); +	strcpy(lockname + len, ".lock"); +	slot.fn = fn; +	slot.cbdata = cbdata; +	slot.ttl = ttl; +	slot.cache_name = filename; +	slot.lock_name = lockname; +	slot.key = key; +	slot.keylen = strlen(key); +	return process_slot(&slot);  } + +/* Print a message to stdout */ +void cache_log(const char *format, ...) +{ +	va_list args; +	va_start(args, format); +	vfprintf(stderr, format, args); +	va_end(args); +} + @@ -6,18 +6,27 @@  #ifndef CGIT_CACHE_H  #define CGIT_CACHE_H -struct cacheitem { -	char *name; -	struct stat st; -	int ttl; -	int fd; -}; +typedef void (*cache_fill_fn)(void *cbdata); -extern char *cache_safe_filename(const char *unsafe); -extern int cache_lock(struct cacheitem *item); -extern int cache_unlock(struct cacheitem *item); -extern int cache_cancel_lock(struct cacheitem *item); -extern int cache_exist(struct cacheitem *item); -extern int cache_expired(struct cacheitem *item); + +/* Print cached content to stdout, generate the content if necessary. + * + * Parameters + *   size    max number of cache files + *   path    directory used to store cache files + *   key     the key used to lookup cache files + *   ttl     max cache time in seconds for this key + *   fn      content generator function for this key + *   cbdata  user-supplied data to the content generator function + * + * Return value + *   0 indicates success, everyting else is an error + */ +extern int cache_process(int size, const char *path, const char *key, int ttl, +			 cache_fill_fn fn, void *cbdata); + + +/* Print a message to stdout */ +extern void cache_log(const char *format, ...);  #endif /* CGIT_CACHE_H */ @@ -45,6 +45,8 @@ void config_cb(const char *name, const char *value)  		ctx.cfg.enable_log_filecount = atoi(value);  	else if (!strcmp(name, "enable-log-linecount"))  		ctx.cfg.enable_log_linecount = atoi(value); +	else if (!strcmp(name, "cache-size")) +		ctx.cfg.cache_size = atoi(value);  	else if (!strcmp(name, "cache-root"))  		ctx.cfg.cache_root = xstrdup(value);  	else if (!strcmp(name, "cache-root-ttl")) @@ -143,6 +145,8 @@ static void prepare_context(struct cgit_context *ctx)  {  	memset(ctx, 0, sizeof(ctx));  	ctx->cfg.agefile = "info/web/last-modified"; +	ctx->cfg.nocache = 0; +	ctx->cfg.cache_size = 0;  	ctx->cfg.cache_dynamic_ttl = 5;  	ctx->cfg.cache_max_create_time = 5;  	ctx->cfg.cache_repo_ttl = 5; @@ -163,47 +167,8 @@ static void prepare_context(struct cgit_context *ctx)  	ctx->page.mimetype = "text/html";  	ctx->page.charset = PAGE_ENCODING;  	ctx->page.filename = NULL; -} - -static int cgit_prepare_cache(struct cacheitem *item) -{ -	if (!ctx.repo && ctx.qry.repo) { -		ctx.page.title = fmt("%s - %s", ctx.cfg.root_title, -				      "Bad request"); -		cgit_print_http_headers(&ctx); -		cgit_print_docstart(&ctx); -		cgit_print_pageheader(&ctx); -		cgit_print_error(fmt("Unknown repo: %s", ctx.qry.repo)); -		cgit_print_docend(); -		return 0; -	} - -	if (!ctx.repo) { -		item->name = xstrdup(fmt("%s/index.%s.html", -					 ctx.cfg.cache_root, -					 cache_safe_filename(ctx.qry.raw))); -		item->ttl = ctx.cfg.cache_root_ttl; -		return 1; -	} - -	if (!ctx.qry.page) { -		item->name = xstrdup(fmt("%s/%s/index.%s.html", ctx.cfg.cache_root, -					 cache_safe_filename(ctx.repo->url), -					 cache_safe_filename(ctx.qry.raw))); -		item->ttl = ctx.cfg.cache_repo_ttl; -	} else { -		item->name = xstrdup(fmt("%s/%s/%s/%s.html", ctx.cfg.cache_root, -					 cache_safe_filename(ctx.repo->url), -					 ctx.qry.page, -					 cache_safe_filename(ctx.qry.raw))); -		if (ctx.qry.has_symref) -			item->ttl = ctx.cfg.cache_dynamic_ttl; -		else if (ctx.qry.has_sha1) -			item->ttl = ctx.cfg.cache_static_ttl; -		else -			item->ttl = ctx.cfg.cache_repo_ttl; -	} -	return 1; +	ctx->page.modified = time(NULL); +	ctx->page.expires = ctx->page.modified;  }  struct refmatch { @@ -288,8 +253,9 @@ static int prepare_repo_cmd(struct cgit_context *ctx)  	return 0;  } -static void process_request(struct cgit_context *ctx) +static void process_request(void *cbdata)  { +	struct cgit_context *ctx = cbdata;  	struct cgit_cmd *cmd;  	cmd = cgit_get_cmd(ctx); @@ -319,82 +285,6 @@ static void process_request(struct cgit_context *ctx)  		cgit_print_docend();  } -static long ttl_seconds(long ttl) -{ -	if (ttl<0) -		return 60 * 60 * 24 * 365; -	else -		return ttl * 60; -} - -static void cgit_fill_cache(struct cacheitem *item, int use_cache) -{ -	int stdout2; - -	if (use_cache) { -		stdout2 = chk_positive(dup(STDOUT_FILENO), -				       "Preserving STDOUT"); -		chk_zero(close(STDOUT_FILENO), "Closing STDOUT"); -		chk_positive(dup2(item->fd, STDOUT_FILENO), "Dup2(cachefile)"); -	} - -	ctx.page.modified = time(NULL); -	ctx.page.expires = ctx.page.modified + ttl_seconds(item->ttl); -	process_request(&ctx); - -	if (use_cache) { -		chk_zero(close(STDOUT_FILENO), "Close redirected STDOUT"); -		chk_positive(dup2(stdout2, STDOUT_FILENO), -			     "Restoring original STDOUT"); -		chk_zero(close(stdout2), "Closing temporary STDOUT"); -	} -} - -static void cgit_check_cache(struct cacheitem *item) -{ -	int i = 0; - - top: -	if (++i > ctx.cfg.max_lock_attempts) { -		die("cgit_refresh_cache: unable to lock %s: %s", -		    item->name, strerror(errno)); -	} -	if (!cache_exist(item)) { -		if (!cache_lock(item)) { -			sleep(1); -			goto top; -		} -		if (!cache_exist(item)) { -			cgit_fill_cache(item, 1); -			cache_unlock(item); -		} else { -			cache_cancel_lock(item); -		} -	} else if (cache_expired(item) && cache_lock(item)) { -		if (cache_expired(item)) { -			cgit_fill_cache(item, 1); -			cache_unlock(item); -		} else { -			cache_cancel_lock(item); -		} -	} -} - -static void cgit_print_cache(struct cacheitem *item) -{ -	static char buf[4096]; -	ssize_t i; - -	int fd = open(item->name, O_RDONLY); -	if (fd<0) -		die("Unable to open cached file %s", item->name); - -	while((i=read(fd, buf, sizeof(buf))) > 0) -		write(STDOUT_FILENO, buf, i); - -	close(fd); -} -  static void cgit_parse_args(int argc, const char **argv)  {  	int i; @@ -429,13 +319,29 @@ static void cgit_parse_args(int argc, const char **argv)  	}  } +static int calc_ttl() +{ +	if (!ctx.repo) +		return ctx.cfg.cache_root_ttl; + +	if (!ctx.qry.page) +		return ctx.cfg.cache_repo_ttl; + +	if (ctx.qry.has_symref) +		return ctx.cfg.cache_dynamic_ttl; + +	if (ctx.qry.has_sha1) +		return ctx.cfg.cache_static_ttl; + +	return ctx.cfg.cache_repo_ttl; +} +  int main(int argc, const char **argv)  { -	struct cacheitem item;  	const char *cgit_config_env = getenv("CGIT_CONFIG"); +	int err, ttl;  	prepare_context(&ctx); -	item.st.st_mtime = time(NULL);  	cgit_repolist.length = 0;  	cgit_repolist.count = 0;  	cgit_repolist.repos = NULL; @@ -449,13 +355,15 @@ int main(int argc, const char **argv)  		ctx.qry.raw = xstrdup(getenv("QUERY_STRING"));  	cgit_parse_args(argc, argv);  	http_parse_querystring(ctx.qry.raw, querystring_cb); -	if (!cgit_prepare_cache(&item)) -		return 0; -	if (ctx.cfg.nocache) { -		cgit_fill_cache(&item, 0); -	} else { -		cgit_check_cache(&item); -		cgit_print_cache(&item); -	} -	return 0; + +	ttl = calc_ttl(); +	ctx.page.expires += ttl*60; +	if (ctx.cfg.nocache) +		ctx.cfg.cache_size = 0; +	err = cache_process(ctx.cfg.cache_size, ctx.cfg.cache_root, +			    ctx.qry.raw, ttl, process_request, &ctx); +	if (err) +		cache_log("[cgit] error %d - %s\n", +			  err, strerror(err)); +	return err;  } @@ -134,6 +134,7 @@ struct cgit_config {  	char *root_title;  	char *script_name;  	char *virtual_root; +	int cache_size;  	int cache_dynamic_ttl;  	int cache_max_create_time;  	int cache_repo_ttl; diff --git a/tests/setup.sh b/tests/setup.sh index 66bf406..e37306e 100755 --- a/tests/setup.sh +++ b/tests/setup.sh @@ -44,7 +44,7 @@ setup_repos()  virtual-root=/  cache-root=$PWD/trash/cache -nocache=0 +cache-size=1021  snapshots=tar.gz tar.bz zip  enable-log-filecount=1  enable-log-linecount=1 diff --git a/tests/t0020-validate-cache.sh b/tests/t0020-validate-cache.sh new file mode 100755 index 0000000..53ec2eb --- /dev/null +++ b/tests/t0020-validate-cache.sh @@ -0,0 +1,67 @@ +#!/bin/sh + +. ./setup.sh + +prepare_tests 'Validate cache' + +run_test 'verify cache-size=0' ' + +	rm -f trash/cache/* && +	sed -i -e "s/cache-size=1021$/cache-size=0/" trash/cgitrc && +	cgit_url "" && +	cgit_url "foo" && +	cgit_url "foo/refs" && +	cgit_url "foo/tree" && +	cgit_url "foo/log" && +	cgit_url "foo/diff" && +	cgit_url "foo/patch" && +	cgit_url "bar" && +	cgit_url "bar/refs" && +	cgit_url "bar/tree" && +	cgit_url "bar/log" && +	cgit_url "bar/diff" && +	cgit_url "bar/patch" && +	test 0 -eq $(ls trash/cache | wc -l) +' + +run_test 'verify cache-size=1' ' + +	rm -f trash/cache/* && +	sed -i -e "s/cache-size=0$/cache-size=1/" trash/cgitrc && +	cgit_url "" && +	cgit_url "foo" && +	cgit_url "foo/refs" && +	cgit_url "foo/tree" && +	cgit_url "foo/log" && +	cgit_url "foo/diff" && +	cgit_url "foo/patch" && +	cgit_url "bar" && +	cgit_url "bar/refs" && +	cgit_url "bar/tree" && +	cgit_url "bar/log" && +	cgit_url "bar/diff" && +	cgit_url "bar/patch" && +	test 1 -eq $(ls trash/cache | wc -l) +' + +run_test 'verify cache-size=1021' ' + +	rm -f trash/cache/* && +	sed -i -e "s/cache-size=1$/cache-size=1021/" trash/cgitrc && +	cgit_url "" && +	cgit_url "foo" && +	cgit_url "foo/refs" && +	cgit_url "foo/tree" && +	cgit_url "foo/log" && +	cgit_url "foo/diff" && +	cgit_url "foo/patch" && +	cgit_url "bar" && +	cgit_url "bar/refs" && +	cgit_url "bar/tree" && +	cgit_url "bar/log" && +	cgit_url "bar/diff" && +	cgit_url "bar/patch" && +	test 13 -eq $(ls trash/cache | wc -l) +' + +tests_done | 
