diff --git a/configure.ac b/configure.ac index 51dd1594ad..47112fb7c1 100644 --- a/configure.ac +++ b/configure.ac @@ -79,6 +79,8 @@ AX_PTHREAD( [ CFLAGS="$CFLAGS $PTHREAD_CFLAGS" LDFLAGS="$LDFLAGS $PTHREAD_CFLAGS" CC="$PTHREAD_CC"],[]) +AC_CHECK_FUNCS(posix_memalign, [AC_DEFINE(HAVE_POSIX_MEMALIGN, 1, [Define to 1 if you have the posix_memalign() function.])]) + # Not all compilers include /usr/local in the include and link path if test -d /usr/local/include; then CFLAGS="$CFLAGS -I/usr/local/include" diff --git a/tsk3/fs/fs_dir.c b/tsk3/fs/fs_dir.c index 88b57a7b5a..0203f5208a 100644 --- a/tsk3/fs/fs_dir.c +++ b/tsk3/fs/fs_dir.c @@ -607,6 +607,10 @@ tsk_fs_dir_walk_lcl(TSK_FS_INFO * a_fs, DENT_DINFO * a_dinfo, if (depth_added) *a_dinfo->didx[a_dinfo->depth] = '\0'; } + else { + if (tsk_verbose) + fprintf(stderr, "tsk_fs_dir_walk_lcl: Loop detected with address %"PRIuINUM, fs_file->name->meta_addr); + } } // remove the pointer to name buffer @@ -821,6 +825,9 @@ tsk_fs_dir_load_inum_named(TSK_FS_INFO * a_fs) } tsk_release_lock(&a_fs->list_inum_named_lock); + if (tsk_verbose) + fprintf(stderr, "tsk_fs_dir_load_inum_named: Performing dir walk to find named files\n"); + /* Do a dir_walk. There is internal caching code that will populate * the structure. The callback is really a dummy call. This could * be made more effecient in the future (not do callbacks...). We @@ -861,6 +868,17 @@ load_orphan_dir_walk_cb(TSK_FS_FILE * a_fs_file, const char *a_path, // add this entry to the orphan list if (a_fs_file->meta) { + + /* check if we have already added it as an orphan (in a subdirectory) + * Not entirely sure how possible this is, but it was added while + * debugging an infinite loop problem. */ + if (tsk_list_find(data->orphan_subdir_list, a_fs_file->meta->addr)) { + if (tsk_verbose) + fprintf(stderr, "load_orphan_dir_walk_cb: Detected loop with address %"PRIuINUM, + a_fs_file->meta->addr); + return TSK_WALK_STOP; + } + tsk_list_add(&data->orphan_subdir_list, a_fs_file->meta->addr); /* FAT file systems spend a lot of time hunting for parent @@ -928,12 +946,15 @@ find_orphan_meta_walk_cb(TSK_FS_FILE * a_fs_file, void *a_ptr) /* Go into directories to mark their contents as "seen" */ if (a_fs_file->meta->type == TSK_FS_META_TYPE_DIR) { + if (tsk_verbose) + fprintf(stderr, "find_orphan_meta_walk_cb: Going into directory %"PRIuINUM" to mark contents as seen\n", a_fs_file->meta->addr); + if (tsk_fs_dir_walk(fs, a_fs_file->meta->addr, TSK_FS_DIR_WALK_FLAG_UNALLOC | TSK_FS_DIR_WALK_FLAG_RECURSE | TSK_FS_DIR_WALK_FLAG_NOORPHAN, load_orphan_dir_walk_cb, data)) { tsk_error_errstr2_concat - (" - tsk_fs_dir_load_inum_named: identifying inodes allocated by file names"); + (" - find_orphan_meta_walk_cb: identifying inodes allocated by file names"); return TSK_ERR; } } @@ -994,6 +1015,9 @@ tsk_fs_dir_find_orphans(TSK_FS_INFO * a_fs, TSK_FS_DIR * a_fs_dir) return TSK_OK; } + if (tsk_verbose) + fprintf(stderr, "tsk_fs_dir_find_orphans: Searching for orphan files\n"); + memset(&data, 0, sizeof(FIND_ORPHAN_DATA)); /* We first need to determine which of the unallocated meta structures @@ -1016,6 +1040,9 @@ tsk_fs_dir_find_orphans(TSK_FS_INFO * a_fs, TSK_FS_DIR * a_fs_dir) return TSK_ERR; } + if (tsk_verbose) + fprintf(stderr, "tsk_fs_dir_find_orphans: Performing inode_walk to find unnamed metadata structures\n"); + if (tsk_fs_meta_walk(a_fs, a_fs->first_inum, a_fs->last_inum, TSK_FS_META_FLAG_UNALLOC | TSK_FS_META_FLAG_USED, find_orphan_meta_walk_cb, &data)) { @@ -1027,6 +1054,10 @@ tsk_fs_dir_find_orphans(TSK_FS_INFO * a_fs, TSK_FS_DIR * a_fs_dir) tsk_fs_name_free(data.fs_name); data.fs_name = NULL; + + if (tsk_verbose) + fprintf(stderr, "tsk_fs_dir_find_orphans: De-duping orphan files and directories\n"); + /* do some cleanup on the final list. This cleanup will compare the * entries in the root orphan directory with files that can be accessed * from subdirectories of the orphan directory. These entries will exist if diff --git a/tsk3/img/img_io.c b/tsk3/img/img_io.c index d90b42da6d..72bcfb9f45 100644 --- a/tsk3/img/img_io.c +++ b/tsk3/img/img_io.c @@ -12,6 +12,92 @@ #include "tsk_img_i.h" +#define MIN(x,y) ( (x) < (y) ? (x) : (y) ) + +/** + * \internal + * Promotes the selected cache entry, since it has been recently requested. + * This must be called while already under the cache lock. + * @param a_img_info Disk image containing cache + * @param ent Index of the cache entry to promote + * @returns New index of the cache entry (currently always zero) + */ +static inline int +tsk_cache_promote(TSK_IMG_INFO * a_img_info, int ent) +{ + if (ent == 0) + return 0; + + struct TSK_IMG_INFO_CACHE_ENTRY temp; + memcpy(&temp, &(a_img_info->cache_info[ent]), sizeof(struct TSK_IMG_INFO_CACHE_ENTRY)); + memmove(&(a_img_info->cache_info[1]), &(a_img_info->cache_info[0]), sizeof(struct TSK_IMG_INFO_CACHE_ENTRY) * ent); + memcpy(&(a_img_info->cache_info[0]), &temp, sizeof(struct TSK_IMG_INFO_CACHE_ENTRY)); + + return 0; +} + +/** + * \internal + * Ensures that the disk block at the specified offset is in the cache, + * either by finding the already-cached block or by reading it from disk. + * This must be called while already under the cache lock. + * @param a_img_info Disk image to read from + * @param a_off Byte offset of the disk block; required to be a multiple of + * TSK_IMG_INFO_CACHE_LEN + * @param a_entry Output: address of a pointer to a cache info entry that will + * be set by this function. (Address should not be used if + * the function returns an error.) + * @returns 0 on error or 1 on success + */ +static inline int +tsk_get_cache_block(TSK_IMG_INFO * a_img_info, + TSK_OFF_T a_off, struct TSK_IMG_INFO_CACHE_ENTRY ** a_entry) +{ + // we require that we're called with a page-aligned offset + if ( ( a_off & (TSK_IMG_INFO_CACHE_LEN - 1) ) != 0 ) { + fprintf(stderr, "Internal error: request cache page %" PRIuOFF "\n", a_off); + exit(-1); + } + + int ent; + + // find existing cache page + for (ent = 0; ent < a_img_info->cache_used; ent++) { + if (a_img_info->cache_info[ent].offset == a_off) { + ent = tsk_cache_promote(a_img_info, ent); + *a_entry = &(a_img_info->cache_info[ent]); + return 1; + } + } + + // did not find existing cache page + + if (a_img_info->cache_used < TSK_IMG_INFO_CACHE_NUM) { + // if we have not yet filled the cache, add a new cache page + ent = (a_img_info->cache_used)++; + a_img_info->cache_info[ent].page = ent; + } + else { + // otherwise, use the last (lowest-priority) cache page + ent = a_img_info->cache_used - 1; + } + + a_img_info->cache_info[ent].offset = a_off; + a_img_info->cache_info[ent].length = a_img_info->read(a_img_info, a_off, + &(a_img_info->cache[a_img_info->cache_info[ent].page * TSK_IMG_INFO_CACHE_LEN]), + TSK_IMG_INFO_CACHE_LEN); + + if (a_img_info->cache_info[ent].length <= 0) { + a_img_info->cache_info[ent].length = 0; + *a_entry = &(a_img_info->cache_info[ent]); + return 0; + } + + ent = tsk_cache_promote(a_img_info, ent); + *a_entry = &(a_img_info->cache_info[ent]); + return 1; +} + /** * \ingroup imglib * Reads data from an open disk image @@ -25,11 +111,8 @@ ssize_t tsk_img_read(TSK_IMG_INFO * a_img_info, TSK_OFF_T a_off, char *a_buf, size_t a_len) { -#define CACHE_AGE 1000 - ssize_t retval = 0; - int i; - int cache_next = 0; // index to lowest age cache (to use next) - size_t len2; + + size_t len2; if (a_img_info == NULL) { tsk_error_reset(); @@ -44,13 +127,7 @@ tsk_img_read(TSK_IMG_INFO * a_img_info, TSK_OFF_T a_off, */ tsk_take_lock(&(a_img_info->cache_lock)); - // if they ask for more than the cache length, skip the cache - if (a_len > TSK_IMG_INFO_CACHE_LEN) { - ssize_t nbytes = a_img_info->read(a_img_info, a_off, a_buf, a_len); - tsk_release_lock(&(a_img_info->cache_lock)); - return nbytes; - } - + /* Error: read request starts after the end of the image file. */ if (a_off >= a_img_info->size) { tsk_release_lock(&(a_img_info->cache_lock)); tsk_error_reset(); @@ -58,108 +135,58 @@ tsk_img_read(TSK_IMG_INFO * a_img_info, TSK_OFF_T a_off, tsk_error_set_errstr("tsk_img_read - %" PRIuOFF, a_off); return -1; } - - /* See if the requested length is going to be too long. - * we'll use this length when checking the cache. */ + + /* See if the requested length is going to be too long. + * we'll use this length when checking the cache. + * In other words, truncate the read request so that it + * does not pass the end of the image file. */ len2 = a_len; if (a_off + len2 > a_img_info->size) len2 = (size_t) (a_img_info->size - a_off); - // check if it is in the cache - for (i = 0; i < TSK_IMG_INFO_CACHE_NUM; i++) { - - // Look into the in-use cache entries - if (a_img_info->cache_len[i] > 0) { - - // the retval check makes sure we don't go back in after data was read - if ((retval == 0) && (a_img_info->cache_off[i] <= a_off) && - (a_img_info->cache_off[i] + a_img_info->cache_len[i] >= - a_off + len2)) { - - /* - if (tsk_verbose) - fprintf(stderr, - "tsk_img_read: Read found in cache %d\n", i); - */ - - // We found it... - memcpy(a_buf, - &a_img_info->cache[i][a_off - - a_img_info->cache_off[i]], len2); - retval = (ssize_t) len2; - - // reset its "age" since it was useful - a_img_info->cache_age[i] = CACHE_AGE; - - // we don't break out of the loop so that we update all ages - } - else { - /* decrease its "age" since it was not useful. - * We don't let used ones go below 1 so that they are not - * confused with entries that have never been used. */ - a_img_info->cache_age[i]--; - - // see if this is the most eligible replacement - if ((a_img_info->cache_len[cache_next] > 0) - && (a_img_info->cache_age[i] < - a_img_info->cache_age[cache_next])) - cache_next = i; - } - } - else { - cache_next = i; - } - } - - // if we didn't find it, then load it into the cache_next entry - if (retval == 0) { - size_t rlen; - - // round the offset down to a sector boundary - a_img_info->cache_off[cache_next] = (a_off / 512) * 512; - - /* - if (tsk_verbose) - fprintf(stderr, - "tsk_img_read: Loading data into cache %d (%" PRIuOFF - ")\n", cache_next, a_img_info->cache_off[cache_next]); - */ - - // figure out the length to read into the cache - rlen = TSK_IMG_INFO_CACHE_LEN; - if (a_img_info->cache_off[cache_next] + rlen > a_img_info->size) { - rlen = - (size_t) (a_img_info->size - - a_img_info->cache_off[cache_next]); - } - - retval = - a_img_info->read(a_img_info, a_img_info->cache_off[cache_next], - a_img_info->cache[cache_next], rlen); - - // if no error, then set the variables and copy the data - if (retval != -1) { - a_img_info->cache_age[cache_next] = CACHE_AGE; - a_img_info->cache_len[cache_next] = retval; - - // update the length we can actually copy (in case we did not get to read all that we wanted) - if (a_off + len2 > a_img_info->cache_off[cache_next] + retval) - len2 = - (size_t) (a_img_info->cache_off[cache_next] + retval - - a_off); - - memcpy(a_buf, - &a_img_info->cache[cache_next][a_off - - a_img_info->cache_off[cache_next]], len2); - retval = (ssize_t) len2; - } - else { - a_img_info->cache_len[cache_next] = 0; - a_img_info->cache_age[cache_next] = 0; - a_img_info->cache_off[cache_next] = 0; - } - } + if (tsk_verbose > 2) + tsk_fprintf(stderr, "tsk_img_read: offset %" PRIuOFF ", length %lx\n", a_off, len2); + + TSK_OFF_T block_addr; // block to read + TSK_OFF_T block_offs; // offset within block + size_t rlen; // remaining bytes to read + size_t clen; // bytes to copy from the current cache block + + struct TSK_IMG_INFO_CACHE_ENTRY * cache_entry; + + rlen = len2; + block_offs = a_off & (TSK_IMG_INFO_CACHE_LEN - 1); + block_addr = a_off & ~(TSK_IMG_INFO_CACHE_LEN - 1); + + while (rlen > 0) { + // get the current block from cache (possibly reading from disk) + if (! tsk_get_cache_block(a_img_info, block_addr, & cache_entry)) { + tsk_release_lock(&(a_img_info->cache_lock)); + return len2 - rlen; + } + + // copy into the buffer the lesser of how much the block + // holds and how much data we still need + clen = MIN(MIN(TSK_IMG_INFO_CACHE_LEN, cache_entry->length) - block_offs, rlen); + + memcpy(a_buf, + &(a_img_info->cache[cache_entry->page * TSK_IMG_INFO_CACHE_LEN]) + block_offs, + clen); + a_buf += clen; + rlen -= clen; + + if ( (rlen > 0) && (cache_entry->length < TSK_IMG_INFO_CACHE_LEN) ) { + // cache had a short read, but we requested data beyond this + // return a short read + tsk_release_lock(&(a_img_info->cache_lock)); + return len2 - rlen; + } + + // advance to the next block + block_offs = 0; + block_addr += TSK_IMG_INFO_CACHE_LEN; + } tsk_release_lock(&(a_img_info->cache_lock)); - return retval; -} + return len2; +} diff --git a/tsk3/img/raw.c b/tsk3/img/raw.c index ac3c6b4309..c3f15687eb 100644 --- a/tsk3/img/raw.c +++ b/tsk3/img/raw.c @@ -717,9 +717,45 @@ tsk_img_malloc(size_t a_len) TSK_IMG_INFO *imgInfo; if ((imgInfo = (TSK_IMG_INFO *) tsk_malloc(a_len)) == NULL) return NULL; + + imgInfo->tag = TSK_IMG_INFO_TAG; + + long cachesz = TSK_IMG_INFO_CACHE_NUM * TSK_IMG_INFO_CACHE_LEN; + +#ifdef TSK_WIN32 + imgInfo->cache = VirtualAlloc(NULL, cachesz, + MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); +#else // ! TSK_WIN32 + +#ifdef HAVE_POSIX_MEMALIGN + +#ifdef _SC_PAGESIZE + long pagesz = sysconf(_SC_PAGESIZE); +#else +#ifdef _SC_PAGE_SIZE + long pagesz = sysconf(_SC_PAGE_SIZE); +#else + long pagesz = TSK_IMG_INFO_CACHE_LEN; +#endif +#endif // PAGESIZE + + int retval; + if ((retval = posix_memalign(&imgInfo->cache, pagesz, cachesz)) != 0) { + imgInfo->cache = NULL; + } + +#else // ! POSIX_MEMALIGN + imgInfo->cache = malloc(cachesz); +#endif // POSIX_MEMALIGN +#endif // TSK_WIN32 + + if (imgInfo->cache == NULL) { + tsk_fprintf(stderr, "tsk_img_malloc: unable to allocate cache\n"); + return NULL; + } + //init lock tsk_init_lock(&(imgInfo->cache_lock)); - imgInfo->tag = TSK_IMG_INFO_TAG; return (void *) imgInfo; } @@ -736,6 +772,15 @@ tsk_img_free(void *a_ptr) //deinit lock tsk_deinit_lock(&(imgInfo->cache_lock)); imgInfo->tag = 0; + + // free cache +#ifdef TSK_WIN32 + VirtualFree(imgInfo->cache, + TSK_IMG_INFO_CACHE_NUM * TSK_IMG_INFO_CACHE_LEN, + MEM_DECOMMIT | MEM_RELEASE); +#else + free(imgInfo->cache); +#endif free(imgInfo); } diff --git a/tsk3/img/tsk_img.h b/tsk3/img/tsk_img.h index bc7e36a8fb..02326d8dcb 100644 --- a/tsk3/img/tsk_img.h +++ b/tsk3/img/tsk_img.h @@ -69,12 +69,18 @@ extern "C" { TSK_IMG_TYPE_UNSUPP = 0xffff, ///< Unsupported disk image type } TSK_IMG_TYPE_ENUM; -#define TSK_IMG_INFO_CACHE_NUM 4 -#define TSK_IMG_INFO_CACHE_LEN 65536 +#define TSK_IMG_INFO_CACHE_NUM 32 +#define TSK_IMG_INFO_CACHE_LEN 32768 typedef struct TSK_IMG_INFO TSK_IMG_INFO; #define TSK_IMG_INFO_TAG 0x39204231 + struct TSK_IMG_INFO_CACHE_ENTRY { + int page; ///< offset into cache is page * TSK_IMG_INFO_CACHE_LEN + TSK_OFF_T offset; ///< offset in image from which cached data was obtained + size_t length; ///< amount of data in cached page; required to be less than TSK_IMG_INFO_CACHE_LEN + }; + /** * Created when a disk image has been opened and stores general information and handles. */ @@ -85,12 +91,11 @@ extern "C" { unsigned int sector_size; ///< sector size of device in bytes (typically 512) tsk_lock_t cache_lock; ///< Lock for cache and associated values - char cache[TSK_IMG_INFO_CACHE_NUM][TSK_IMG_INFO_CACHE_LEN]; ///< read cache (r/w shared - lock) - TSK_OFF_T cache_off[TSK_IMG_INFO_CACHE_NUM]; ///< starting byte offset of corresponding cache entry (r/w shared - lock) - int cache_age[TSK_IMG_INFO_CACHE_NUM]; ///< "Age" of corresponding cache entry, higher means more recently used (r/w shared - lock) - size_t cache_len[TSK_IMG_INFO_CACHE_NUM]; ///< Length of cache entry used (0 if never used) (r/w shared - lock) + struct TSK_IMG_INFO_CACHE_ENTRY cache_info[TSK_IMG_INFO_CACHE_NUM]; ///< read cache (r/w shared - lock) + int cache_used; ///< number of entries used in the cache entry array (r/w shared - lock) + char * cache; - ssize_t(*read) (TSK_IMG_INFO * img, TSK_OFF_T off, char *buf, size_t len); ///< \internal External progs should call tsk_img_read() + ssize_t(*read) (TSK_IMG_INFO * img, TSK_OFF_T off, char *buf, size_t len); ///< \internal External progs should call tsk_img_read() void (*close) (TSK_IMG_INFO *); ///< \internal Progs should call tsk_img_close() void (*imgstat) (TSK_IMG_INFO *, FILE *); ///< Pointer to file type specific function };