From 25105d7ecaba474d4b7c364ebb586aac3dfc5abb Mon Sep 17 00:00:00 2001 From: Lars Hjemli Date: Sun, 10 Dec 2006 22:31:36 +0100 Subject: Add caching infrastructure This enables internal caching of page output. Page requests are split into four groups: 1) repo listing (front page) 2) repo summary 3) repo pages w/symbolic references in query string 4) repo pages w/constant sha1's in query string Each group has a TTL specified in minutes. When a page is requested, a cached filename is stat(2)'ed and st_mtime is compared to time(2). If TTL has expired (or the file didn't exist), the cached file is regenerated. When generating a cached file, locking is used to avoid parallell processing of the request. If multiple processes tries to aquire the same lock, the ones who fail to get the lock serves the (expired) cached file. If the cached file don't exist, the process instead calls sched_yield(2) before restarting the request processing. Signed-off-by: Lars Hjemli --- .gitignore | 1 + Makefile | 6 ++-- README | 54 ++++++++++++++++++++++++++++ cache.c | 86 +++++++++++++++++++++++++++++++++++++++++++++ cgit.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++-------- cgit.h | 47 +++++++++++++++++++++++-- config.c | 4 +-- git.h | 60 ++++++++++++++++++++++++++++--- html.c | 6 ++-- 9 files changed, 353 insertions(+), 28 deletions(-) create mode 100644 README create mode 100644 cache.c diff --git a/.gitignore b/.gitignore index 4eaec97..c4c9ac3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ # Files I don't care to see in git-status/commit cgit *.o +*~ diff --git a/Makefile b/Makefile index 4e72b07..243f590 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,9 @@ INSTALL_BIN = /var/www/htdocs/cgit.cgi INSTALL_CSS = /var/www/htdocs/cgit.css EXTLIBS = ../git/libgit.a ../git/xdiff/lib.a -lz -lcrypto -OBJECTS = cgit.o config.o html.o +OBJECTS = cgit.o config.o html.o cache.o + +CFLAGS += -Wall all: cgit @@ -15,6 +17,6 @@ clean: rm -f cgit *.o cgit: $(OBJECTS) - $(CC) -o cgit $(OBJECTS) $(EXTLIBS) + $(CC) $(CFLAGS) -o cgit $(OBJECTS) $(EXTLIBS) $(OBJECTS): cgit.h git.h diff --git a/README b/README new file mode 100644 index 0000000..5917c37 --- /dev/null +++ b/README @@ -0,0 +1,54 @@ +Cache algorithm +=============== + +Cgit normally returns cached pages when invoked. If there is no cache file, or +the cache file has expired, it is regenerated. Finally, the cache file is +printed on stdout. + +When it is decided that a cache file needs to be regenerated, an attempt is +made to create a corresponding lockfile. If this fails, the process gives up +and uses the expired cache file instead. + +When there is no cache file for a request, an attempt is made to create a +corresponding lockfile. If this fails, the process calls sched_yield(2) before +restarting the request handling. + +In pseudocode: + + name = generate_cache_name(request); +top: + if (!exists(name)) { + if (lock_cache(name)) { + generate_cache(request, name); + unlock_cache(name); + } else { + sched_yield(); + goto top; + } + } else if (expired(name)) { + if (lock_cache(name)) { + generate_cache(request, name); + unlock_cache(name); + } + } + print_file(name); + + +The following options can be set in /etc/cgitrc to control cache behaviour: + cache-root: root directory for cache files + cache-root-ttl: TTL for the repo listing page + cache-repo-ttl: TTL for any repos summary page + cache-dynamic-ttl: TTL for pages with symbolic references (not SHA1) + cache-static-ttl: TTL for pages with sha1 references + +TTL is specified in minutes, -1 meaning "infinite caching". + + +Naming of cache files +--------------------- +Repository listing: /index.html +Repository summary: //index.html +Repository subpage: ///.html + +The corresponding lock files have a ".lock" suffix. + diff --git a/cache.c b/cache.c new file mode 100644 index 0000000..1be1ea4 --- /dev/null +++ b/cache.c @@ -0,0 +1,86 @@ +#include "cgit.h" + +const int NOLOCK = -1; + +int cache_lookup(struct cacheitem *item) +{ + if (!cgit_query_repo) { + item->name = xstrdup(fmt("%s/index.html", cgit_cache_root)); + item->ttl = cgit_cache_root_ttl; + } else if (!cgit_query_page) { + item->name = xstrdup(fmt("%s/%s/index.html", cgit_cache_root, + cgit_query_repo)); + item->ttl = cgit_cache_repo_ttl; + } else { + item->name = xstrdup(fmt("%s/%s/%s/%s.html", cgit_cache_root, + cgit_query_repo, cgit_query_page, + cgit_querystring)); + if (cgit_query_has_symref) + item->ttl = cgit_cache_dynamic_ttl; + else if (cgit_query_has_sha1) + item->ttl = cgit_cache_static_ttl; + else + item->ttl = cgit_cache_repo_ttl; + } + if (stat(item->name, &item->st)) { + item->st.st_mtime = 0; + return 0; + } + return 1; +} + +int cache_create_dirs() +{ + char *path; + + if (!cgit_query_repo) + return 0; + + path = fmt("%s/%s", cgit_cache_root, cgit_query_repo); + if (mkdir(path, S_IRWXU) && errno!=EEXIST) + return 0; + + if (cgit_query_page) { + path = fmt("%s/%s/%s", cgit_cache_root, cgit_query_repo, + cgit_query_page); + if (mkdir(path, S_IRWXU) && errno!=EEXIST) + return 0; + } + return 1; +} + +int cache_lock(struct cacheitem *item) +{ + int ret; + char *lockfile = fmt("%s.lock", item->name); + + top: + item->fd = open(lockfile, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR|S_IWUSR); + if (item->fd == NOLOCK && errno == ENOENT && cache_create_dirs()) + goto top; + if (item->fd == NOLOCK && errno == EEXIST) { + struct stat st; + time_t t; + if (stat(lockfile, &st)) + return ret; + t = time(NULL); + if (t-st.st_mtime > cgit_cache_max_create_time && + !unlink(lockfile)) + goto top; + return 0; + } + return (item->fd > 0); +} + +int cache_unlock(struct cacheitem *item) +{ + close(item->fd); + return (rename(fmt("%s.lock", item->name), item->name) == 0); +} + +int cache_expired(struct cacheitem *item) +{ + if (item->ttl < 0) + return 0; + return item->st.st_mtime + item->ttl * 60 < time(NULL); +} diff --git a/cgit.c b/cgit.c index 4c14f77..09c857c 100644 --- a/cgit.c +++ b/cgit.c @@ -10,29 +10,47 @@ static const char cgit_error[] = static const char cgit_lib_error[] = "
%s: %s
"; +int htmlfd = 0; -char *cgit_root = "/var/git"; +char *cgit_root = "/usr/src/git"; char *cgit_root_title = "Git repository browser"; char *cgit_css = "/cgit.css"; char *cgit_logo = "/git-logo.png"; char *cgit_logo_link = "http://www.kernel.org/pub/software/scm/git/docs/"; char *cgit_virtual_root = NULL; +char *cgit_cache_root = "/var/cache/cgit"; + +int cgit_cache_root_ttl = 5; +int cgit_cache_repo_ttl = 5; +int cgit_cache_dynamic_ttl = 5; +int cgit_cache_static_ttl = -1; +int cgit_cache_max_create_time = 5; + char *cgit_repo_name = NULL; char *cgit_repo_desc = NULL; char *cgit_repo_owner = NULL; +int cgit_query_has_symref = 0; +int cgit_query_has_sha1 = 0; + +char *cgit_querystring = NULL; char *cgit_query_repo = NULL; char *cgit_query_page = NULL; char *cgit_query_head = NULL; +char *cgit_query_sha1 = NULL; + +struct cacheitem cacheitem; int cgit_parse_query(char *txt, configfn fn) { - char *t = txt, *value = NULL, c; + char *t, *value = NULL, c; if (!txt) return 0; + t = txt = xstrdup(txt); + while((c=*t) != '\0') { if (c=='=') { *t = '\0'; @@ -82,8 +100,13 @@ void cgit_querystring_cb(const char *name, const char *value) cgit_query_repo = xstrdup(value); else if (!strcmp(name, "p")) cgit_query_page = xstrdup(value); - else if (!strcmp(name, "h")) + else if (!strcmp(name, "h")) { cgit_query_head = xstrdup(value); + cgit_query_has_symref = 1; + } else if (!strcmp(name, "id")) { + cgit_query_sha1 = xstrdup(value); + cgit_query_has_sha1 = 1; + } } char *cgit_repourl(const char *reponame) @@ -136,9 +159,32 @@ static int cgit_print_branch_cb(const char *refname, const unsigned char *sha1, return 0; } +/* Sun, 06 Nov 1994 08:49:37 GMT */ +static char *http_date(time_t t) +{ + static char day[][4] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}; + static char month[][4] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Now", "Dec"}; + struct tm *tm = gmtime(&t); + return fmt("%s, %02d %s %04d %02d:%02d:%02d GMT", day[tm->tm_wday], + tm->tm_mday, month[tm->tm_mon], 1900+tm->tm_year, + tm->tm_hour, tm->tm_min, tm->tm_sec); +} + +static int ttl_seconds(int ttl) +{ + if (ttl<0) + return 60 * 60 * 24 * 365; + else + return ttl * 60; +} + static void cgit_print_docstart(char *title) { html("Content-Type: text/html; charset=utf-8\n"); + htmlf("Last-Modified: %s\n", http_date(cacheitem.st.st_mtime)); + htmlf("Expires: %s\n", http_date(cacheitem.st.st_mtime + + ttl_seconds(cacheitem.ttl))); html("\n"); html(cgit_doctype); html("\n"); @@ -175,6 +221,7 @@ static void cgit_print_repolist() struct stat st; char *name; + chdir(cgit_root); cgit_print_docstart(cgit_root_title); cgit_print_pageheader(cgit_root_title); @@ -197,7 +244,7 @@ static void cgit_print_repolist() continue; cgit_repo_name = cgit_repo_desc = cgit_repo_owner = NULL; - name = fmt("%s/.git/info/cgit", de->d_name); + name = fmt("%s/info/cgit", de->d_name); if (cgit_read_config(name, cgit_repo_config_cb)) continue; @@ -291,7 +338,7 @@ static void cgit_print_commit_shortlog(struct commit *commit) strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", time); html_txt(buf); html(""); - char *qry = fmt("h=%s", sha1_to_hex(commit->object.sha1)); + char *qry = fmt("id=%s", sha1_to_hex(commit->object.sha1)); char *url = cgit_pageurl(cgit_query_repo, "view", qry); html_link_open(url, NULL, NULL); html_txt(subject); @@ -371,8 +418,8 @@ static void cgit_print_object(char *hex) static void cgit_print_repo_page() { - if (chdir(cgit_query_repo) || - cgit_read_config(".git/info/cgit", cgit_repo_config_cb)) { + if (chdir(fmt("%s/%s", cgit_root, cgit_query_repo)) || + cgit_read_config("info/cgit", cgit_repo_config_cb)) { char *title = fmt("%s - %s", cgit_root_title, "Bad request"); cgit_print_docstart(title); cgit_print_pageheader(title); @@ -381,7 +428,7 @@ static void cgit_print_repo_page() cgit_print_docend(); return; } - + setenv("GIT_DIR", fmt("%s/%s", cgit_root, cgit_query_repo), 1); char *title = fmt("%s - %s", cgit_repo_name, cgit_repo_desc); cgit_print_docstart(title); cgit_print_pageheader(title); @@ -390,21 +437,61 @@ static void cgit_print_repo_page() else if (!strcmp(cgit_query_page, "log")) { cgit_print_log(cgit_query_head, 0, 100); } else if (!strcmp(cgit_query_page, "view")) { - cgit_print_object(cgit_query_head); + cgit_print_object(cgit_query_sha1); } cgit_print_docend(); } -int main(int argc, const char **argv) +static void cgit_fill_cache(struct cacheitem *item) { - if (cgit_read_config("/etc/cgitrc", cgit_global_config_cb)) - die("Error reading config: %d %s", errno, strerror(errno)); - - chdir(cgit_root); - cgit_parse_query(getenv("QUERY_STRING"), cgit_querystring_cb); + htmlfd = item->fd; + item->st.st_mtime = time(NULL); if (cgit_query_repo) cgit_print_repo_page(); else cgit_print_repolist(); +} + +static void cgit_refresh_cache(struct cacheitem *item) +{ + top: + if (!cache_lookup(item)) { + if (cache_lock(item)) { + cgit_fill_cache(item); + cache_unlock(item); + } else { + sched_yield(); + goto top; + } + } else if (cache_expired(item)) { + if (cache_lock(item)) { + cgit_fill_cache(item); + cache_unlock(item); + } + } +} + +static void cgit_print_cache(struct cacheitem *item) +{ + static char buf[4096]; + ssize_t i; + + int fd = open(item->name, O_RDONLY); + if (fd<0) + die("Unable to open cached file %s", item->name); + + while((i=read(fd, buf, sizeof(buf))) > 0) + write(STDOUT_FILENO, buf, i); + + close(fd); +} + +int main(int argc, const char **argv) +{ + cgit_read_config("/etc/cgitrc", cgit_global_config_cb); + cgit_querystring = xstrdup(getenv("QUERY_STRING")); + cgit_parse_query(cgit_querystring, cgit_querystring_cb); + cgit_refresh_cache(&cacheitem); + cgit_print_cache(&cacheitem); return 0; } diff --git a/cgit.h b/cgit.h index 19f7ba7..1e084d4 100644 --- a/cgit.h +++ b/cgit.h @@ -3,6 +3,46 @@ #include "git.h" #include +#include +#include + +typedef void (*configfn)(const char *name, const char *value); + +struct cacheitem { + char *name; + struct stat st; + int ttl; + int fd; +}; + +extern char *cgit_root; +extern char *cgit_root_title; +extern char *cgit_css; +extern char *cgit_logo; +extern char *cgit_logo_link; +extern char *cgit_virtual_root; +extern char *cgit_cache_root; + +extern int cgit_cache_root_ttl; +extern int cgit_cache_repo_ttl; +extern int cgit_cache_dynamic_ttl; +extern int cgit_cache_static_ttl; +extern int cgit_cache_max_create_time; + +extern char *cgit_repo_name; +extern char *cgit_repo_desc; +extern char *cgit_repo_owner; + +extern int cgit_query_has_symref; +extern int cgit_query_has_sha1; + +extern char *cgit_querystring; +extern char *cgit_query_repo; +extern char *cgit_query_page; +extern char *cgit_query_head; +extern char *cgit_query_sha1; + +extern int htmlfd; extern char *fmt(const char *format,...); @@ -10,12 +50,15 @@ extern void html(const char *txt); extern void htmlf(const char *format,...); extern void html_txt(char *txt); extern void html_attr(char *txt); - extern void html_link_open(char *url, char *title, char *class); extern void html_link_close(void); -typedef void (*configfn)(const char *name, const char *value); extern int cgit_read_config(const char *filename, configfn fn); +extern int cache_lookup(struct cacheitem *item); +extern int cache_lock(struct cacheitem *item); +extern int cache_unlock(struct cacheitem *item); +extern int cache_expired(struct cacheitem *item); + #endif /* CGIT_H */ diff --git a/config.c b/config.c index 858ab69..ee49b62 100644 --- a/config.c +++ b/config.c @@ -32,7 +32,7 @@ int read_config_line(FILE *f, char *line, const char **value, int bufsize) skip_line(f); continue; } - if (!isname && isblank(c)) + if (!isname && isspace(c)) continue; if (c=='=' && !*value) { @@ -64,7 +64,7 @@ int cgit_read_config(const char *filename, configfn fn) if (!f) return -1; - while(len = read_config_line(f, line, &value, sizeof(line))) + while((len = read_config_line(f, line, &value, sizeof(line))) > 0) (*fn)(line, value); fclose(f); diff --git a/git.h b/git.h index 443f216..dfa3542 100644 --- a/git.h +++ b/git.h @@ -33,6 +33,26 @@ #include +/* On most systems would have given us this, but + * not on some systems (e.g. GNU/Hurd). + */ +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +#ifdef __GNUC__ +#define NORETURN __attribute__((__noreturn__)) +#else +#define NORETURN +#ifndef __attribute__ +#define __attribute__(x) +#endif +#endif + + +extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2))); + + static inline char* xstrdup(const char *str) { char *ret = strdup(str); @@ -108,9 +128,13 @@ static inline ssize_t xwrite(int fd, const void *buf, size_t len) #define MINIMUM_ABBREV 4 #define DEFAULT_ABBREV 7 +extern int sha1_object_info(const unsigned char *, char *, unsigned long *); extern void * read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size); +extern int get_sha1(const char *str, unsigned char *sha1); +extern int get_sha1_hex(const char *hex, unsigned char *sha1); +extern char *sha1_to_hex(const unsigned char *sha1); /* static buffer result! */ @@ -183,6 +207,21 @@ struct commit { }; +struct commit *lookup_commit(const unsigned char *sha1); +struct commit *lookup_commit_reference(const unsigned char *sha1); +struct commit *lookup_commit_reference_gently(const unsigned char *sha1, + int quiet); + +int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size); +int parse_commit(struct commit *item); + +struct commit_list * commit_list_insert(struct commit *item, struct commit_list **list_p); +struct commit_list * insert_by_date(struct commit *item, struct commit_list **list); + +void free_commit_list(struct commit_list *list); + +void sort_by_date(struct commit_list **list); + /* Commit formats */ enum cmit_fmt { CMIT_FMT_RAW, @@ -197,13 +236,9 @@ enum cmit_fmt { CMIT_FMT_UNSPECIFIED, }; +extern unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *, unsigned long len, char *buf, unsigned long space, int abbrev, const char *subject, const char *after_subject, int relative_date); -struct commit *lookup_commit(const unsigned char *sha1); -struct commit *lookup_commit_reference(const unsigned char *sha1); -struct commit *lookup_commit_reference_gently(const unsigned char *sha1, - int quiet); - typedef void (*topo_sort_set_fn_t)(struct commit*, void *data); typedef void* (*topo_sort_get_fn_t)(struct commit*); @@ -306,6 +341,16 @@ enum color_diff { +/* + * from git:refs.g + */ + +typedef int each_ref_fn(const char *refname, const unsigned char *sha1, int flags, void *cb_data); +extern int head_ref(each_ref_fn, void *); +extern int for_each_ref(each_ref_fn, void *); +extern int for_each_tag_ref(each_ref_fn, void *); +extern int for_each_branch_ref(each_ref_fn, void *); +extern int for_each_remote_ref(each_ref_fn, void *); @@ -391,6 +436,11 @@ struct rev_info { }; +extern void init_revisions(struct rev_info *revs, const char *prefix); +extern int setup_revisions(int argc, const char **argv, struct rev_info *revs, const char *def); +extern int handle_revision_arg(const char *arg, struct rev_info *revs,int flags,int cant_be_filename); + +extern void prepare_revision_walk(struct rev_info *revs); extern struct commit *get_revision(struct rev_info *revs); diff --git a/html.c b/html.c index 5780dc1..bf1490f 100644 --- a/html.c +++ b/html.c @@ -20,16 +20,18 @@ char *fmt(const char *format, ...) void html(const char *txt) { - fputs(txt, stdout); + write(htmlfd, txt, strlen(txt)); } void htmlf(const char *format, ...) { + static char buf[65536]; va_list args; va_start(args, format); - vprintf(format, args); + vsnprintf(buf, sizeof(buf), format, args); va_end(args); + html(buf); } void html_txt(char *txt) -- cgit