summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: d1bf0e0)
raw | patch | inline | side by side (parent: d1bf0e0)
author | Junio C Hamano <gitster@pobox.com> | |
Fri, 20 May 2011 21:33:31 +0000 (14:33 -0700) | ||
committer | Junio C Hamano <gitster@pobox.com> | |
Thu, 26 May 2011 23:47:15 +0000 (16:47 -0700) |
This introduces an API to plug custom filters to an input stream.
The caller gets get_stream_filter("path") to obtain an appropriate
filter for the path, and then uses it when opening an input stream
via open_istream(). After that, the caller can read from the stream
with read_istream(), and close it with close_istream(), just like an
unfiltered stream.
This only adds a "null" filter that is a pass-thru filter, but later
changes can add LF-to-CRLF and other filters, and the callers of the
streaming API do not have to change.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
The caller gets get_stream_filter("path") to obtain an appropriate
filter for the path, and then uses it when opening an input stream
via open_istream(). After that, the caller can read from the stream
with read_istream(), and close it with close_istream(), just like an
unfiltered stream.
This only adds a "null" filter that is a pass-thru filter, but later
changes can add LF-to-CRLF and other filters, and the callers of the
streaming API do not have to change.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
convert.c | patch | blob | history | |
convert.h | patch | blob | history | |
entry.c | patch | blob | history | |
streaming.c | patch | blob | history | |
streaming.h | patch | blob | history |
diff --git a/convert.c b/convert.c
index 264af1d5ba0924bd867962d2337bb5c019b87d81..1ec91a370e98c9bcaa2d539935ae45005b9f7d68 100644 (file)
--- a/convert.c
+++ b/convert.c
@@ -814,12 +814,69 @@ int renormalize_buffer(const char *path, const char *src, size_t len, struct str
return ret | convert_to_git(path, src, len, dst, 0);
}
+/*****************************************************************
+ *
+ * Streaming converison support
+ *
+ *****************************************************************/
+
+typedef int (*filter_fn)(struct stream_filter *,
+ const char *input, size_t *isize_p,
+ char *output, size_t *osize_p);
+typedef void (*free_fn)(struct stream_filter *);
+
+struct stream_filter_vtbl {
+ filter_fn filter;
+ free_fn free;
+};
+
+struct stream_filter {
+ struct stream_filter_vtbl *vtbl;
+};
+
+static int null_filter_fn(struct stream_filter *filter,
+ const char *input, size_t *isize_p,
+ char *output, size_t *osize_p)
+{
+ size_t count = *isize_p;
+ if (*osize_p < count)
+ count = *osize_p;
+ if (count) {
+ memmove(output, input, count);
+ *isize_p -= count;
+ *osize_p -= count;
+ }
+ return 0;
+}
+
+static void null_free_fn(struct stream_filter *filter)
+{
+ ; /* nothing -- null instances are shared */
+}
+
+static struct stream_filter_vtbl null_vtbl = {
+ null_filter_fn,
+ null_free_fn,
+};
+
+static struct stream_filter null_filter_singleton = {
+ &null_vtbl,
+};
+
+int is_null_stream_filter(struct stream_filter *filter)
+{
+ return filter == &null_filter_singleton;
+}
+
/*
- * You would be crazy to set CRLF, smuge/clean or ident to
- * a large binary blob you would want us not to slurp into
- * the memory!
+ * Return an appropriately constructed filter for the path, or NULL if
+ * the contents cannot be filtered without reading the whole thing
+ * in-core.
+ *
+ * Note that you would be crazy to set CRLF, smuge/clean or ident to a
+ * large binary blob you would want us not to slurp into the memory!
*/
-int can_bypass_conversion(const char *path)
+struct stream_filter *get_stream_filter(const char *path, const unsigned char *sha1)
{
struct conv_attrs ca;
enum crlf_action crlf_action;
if (ca.ident ||
(ca.drv && (ca.drv->smudge || ca.drv->clean)))
- return 0;
+ return NULL;
crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
if ((crlf_action == CRLF_BINARY) || (crlf_action == CRLF_INPUT) ||
(crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE))
- return 1;
- return 0;
+ return &null_filter_singleton;
+
+ return NULL;
+}
+
+void free_stream_filter(struct stream_filter *filter)
+{
+ filter->vtbl->free(filter);
+}
+
+int stream_filter(struct stream_filter *filter,
+ const char *input, size_t *isize_p,
+ char *output, size_t *osize_p)
+{
+ return filter->vtbl->filter(filter, input, isize_p, output, osize_p);
}
diff --git a/convert.h b/convert.h
index b1b4a382dfc2606a1a64fc19caf8b233da174a5e..17d75098328c65638c435d451564999db3cad281 100644 (file)
--- a/convert.h
+++ b/convert.h
size_t len, struct strbuf *dst);
extern int renormalize_buffer(const char *path, const char *src, size_t len,
struct strbuf *dst);
-extern int can_bypass_conversion(const char *path);
+
+/*****************************************************************
+ *
+ * Streaming converison support
+ *
+ *****************************************************************/
+
+struct stream_filter; /* opaque */
+
+extern struct stream_filter *get_stream_filter(const char *path, const unsigned char *);
+extern void free_stream_filter(struct stream_filter *);
+extern int is_null_stream_filter(struct stream_filter *);
+
+/*
+ * Use as much input up to *isize_p and fill output up to *osize_p;
+ * update isize_p and osize_p to indicate how much buffer space was
+ * consumed and filled. Return 0 on success, non-zero on error.
+ */
+extern int stream_filter(struct stream_filter *,
+ const char *input, size_t *isize_p,
+ char *output, size_t *osize_p);
+
#endif /* CONVERT_H */
index e2dc16c13143b97b28e4bc82238bda3c899306f3..852fea13955475c1e2fda9cfc25a63a54a1f61c7 100644 (file)
--- a/entry.c
+++ b/entry.c
}
static int streaming_write_entry(struct cache_entry *ce, char *path,
+ struct stream_filter *filter,
const struct checkout *state, int to_tempfile,
int *fstat_done, struct stat *statbuf)
{
ssize_t kept = 0;
int fd = -1;
- st = open_istream(ce->sha1, &type, &sz);
+ st = open_istream(ce->sha1, &type, &sz, filter);
if (!st)
return -1;
if (type != OBJ_BLOB)
@@ -186,11 +187,14 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
size_t wrote, newsize = 0;
struct stat st;
- if ((ce_mode_s_ifmt == S_IFREG) &&
- can_bypass_conversion(path) &&
- !streaming_write_entry(ce, path, state, to_tempfile,
- &fstat_done, &st))
- goto finish;
+ if (ce_mode_s_ifmt == S_IFREG) {
+ struct stream_filter *filter = get_stream_filter(path, ce->sha1);
+ if (filter &&
+ !streaming_write_entry(ce, path, filter,
+ state, to_tempfile,
+ &fstat_done, &st))
+ goto finish;
+ }
switch (ce_mode_s_ifmt) {
case S_IFREG:
diff --git a/streaming.c b/streaming.c
index 06029266443f3e49516abbdaf8afbb4663de9f98..565f000790b482a8977a36ca9a9cfcbd633f056d 100644 (file)
--- a/streaming.c
+++ b/streaming.c
static open_method_decl(incore);
static open_method_decl(loose);
static open_method_decl(pack_non_delta);
+static struct git_istream *attach_stream_filter(struct git_istream *st,
+ struct stream_filter *filter);
+
static open_istream_fn open_istream_tbl[] = {
open_istream_incore,
open_istream_pack_non_delta,
};
+#define FILTER_BUFFER (1024*16)
+
+struct filtered_istream {
+ struct git_istream *upstream;
+ struct stream_filter *filter;
+ char ibuf[FILTER_BUFFER];
+ char obuf[FILTER_BUFFER];
+ int i_end, i_ptr;
+ int o_end, o_ptr;
+};
+
struct git_istream {
const struct stream_vtbl *vtbl;
unsigned long size; /* inflated size of full object */
struct packed_git *pack;
off_t pos;
} in_pack;
+
+ struct filtered_istream filtered;
} u;
};
struct git_istream *open_istream(const unsigned char *sha1,
enum object_type *type,
- unsigned long *size)
+ unsigned long *size,
+ struct stream_filter *filter)
{
struct git_istream *st;
struct object_info oi;
return NULL;
}
}
+ if (st && filter) {
+ /* Add "&& !is_null_stream_filter(filter)" for performance */
+ struct git_istream *nst = attach_stream_filter(st, filter);
+ if (!nst)
+ close_istream(st);
+ st = nst;
+ }
+
*size = st->size;
return st;
}
}
+/*****************************************************************
+ *
+ * Filtered stream
+ *
+ *****************************************************************/
+
+static close_method_decl(filtered)
+{
+ free_stream_filter(st->u.filtered.filter);
+ return close_istream(st->u.filtered.upstream);
+}
+
+static read_method_decl(filtered)
+{
+ struct filtered_istream *fs = &(st->u.filtered);
+ size_t filled = 0;
+
+ while (sz) {
+ /* do we already have filtered output? */
+ if (fs->o_ptr < fs->o_end) {
+ size_t to_move = fs->o_end - fs->o_ptr;
+ if (sz < to_move)
+ to_move = sz;
+ memcpy(buf + filled, fs->obuf + fs->o_ptr, to_move);
+ fs->o_ptr += to_move;
+ sz -= to_move;
+ filled += to_move;
+ continue;
+ }
+ fs->o_end = fs->o_ptr = 0;
+
+ /* do we have anything to feed the filter with? */
+ if (fs->i_ptr < fs->i_end) {
+ size_t to_feed = fs->i_end - fs->i_ptr;
+ size_t to_receive = FILTER_BUFFER;
+ if (stream_filter(fs->filter,
+ fs->ibuf + fs->i_ptr, &to_feed,
+ fs->obuf, &to_receive))
+ return -1;
+ fs->i_ptr = fs->i_end - to_feed;
+ fs->o_end = FILTER_BUFFER - to_receive;
+ continue;
+ }
+ fs->i_end = fs->i_ptr = 0;
+
+ /* refill the input from the upstream */
+ fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER);
+ if (fs->i_end <= 0)
+ break;
+ }
+ return filled;
+}
+
+static struct stream_vtbl filtered_vtbl = {
+ close_istream_filtered,
+ read_istream_filtered,
+};
+
+static struct git_istream *attach_stream_filter(struct git_istream *st,
+ struct stream_filter *filter)
+{
+ struct git_istream *ifs = xmalloc(sizeof(*ifs));
+ struct filtered_istream *fs = &(ifs->u.filtered);
+
+ ifs->vtbl = &filtered_vtbl;
+ fs->upstream = st;
+ fs->filter = filter;
+ fs->i_end = fs->i_ptr = 0;
+ fs->o_end = fs->o_ptr = 0;
+ ifs->size = -1; /* unknown */
+ return ifs;
+}
+
/*****************************************************************
*
* Loose object stream
diff --git a/streaming.h b/streaming.h
index 18cbe68ac169cdeba709412cc5dcbf2686505dbc..589e857b8c4ad68e30b91da2eb29a076b98ef903 100644 (file)
--- a/streaming.h
+++ b/streaming.h
/* opaque */
struct git_istream;
-extern struct git_istream *open_istream(const unsigned char *, enum object_type *, unsigned long *);
+extern struct git_istream *open_istream(const unsigned char *, enum object_type *, unsigned long *, struct stream_filter *);
extern int close_istream(struct git_istream *);
extern ssize_t read_istream(struct git_istream *, char *, size_t);