Working multi-segment blerg database
authorChip Black <bytex64@bytex64.net>
Tue, 23 Nov 2010 07:29:04 +0000 (01:29 -0600)
committerChip Black <bytex64@bytex64.net>
Tue, 23 Nov 2010 07:29:04 +0000 (01:29 -0600)
.gitignore [new file with mode: 0644]
Makefile [new file with mode: 0644]
blergtool.c [new file with mode: 0644]
config.h [new file with mode: 0644]
database.c [new file with mode: 0644]
database.h [new file with mode: 0644]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..e1c975d
--- /dev/null
@@ -0,0 +1,3 @@
+*~
+*.o
+blergtool
diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..0b99b0d
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,16 @@
+CFLAGS = -g
+LIBS =
+
+targets = blergtool
+blergtool_objects = blergtool.o database.o
+
+all: $(targets)
+
+clean:
+       rm -f $(targets) $(blergtool_objects)
+
+blergtool: $(blergtool_objects)
+       gcc $^ -o $@
+
+%.o: %.c
+       gcc $(CFLAGS) $(LIBS) -c $< -o $@
diff --git a/blergtool.c b/blergtool.c
new file mode 100644 (file)
index 0000000..a0148c0
--- /dev/null
@@ -0,0 +1,58 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "database.h"
+
+void help() {
+       printf("Usage: blergtool <store|fetch> <storename> [record]\n");
+}
+
+int main(int argc, char *argv[]) {
+       if (argc < 2) {
+               help();
+               exit(1);
+       }
+
+       if (strncmp(argv[1], "store", 5) == 0) {
+               char *store = argv[2];
+               struct blerg *f = blerg_open(store);
+               if (!f) {
+                       printf("Blerg open failed\n");
+                       exit(1);
+               }
+
+               size_t bytes_read = 0;
+               char *data = malloc(65536);
+               do {
+                       bytes_read += fread(data + bytes_read, 1, 65536 - bytes_read, stdin);
+               } while (bytes_read < 65536 && !feof(stdin));
+               int record = blerg_store(f, data, bytes_read);
+               blerg_close(f);
+               free(data);
+
+               if (record < 0) {
+                       fprintf(stderr, "Could not store\n");
+                       exit(1);
+               } else {
+                       fprintf(stderr, "Stored record %d\n", record);
+               }
+       } else if (strncmp(argv[1], "fetch", 5) == 0) {
+               char *store = argv[2];
+               int record = atoi(argv[3]);
+               struct blerg *f = blerg_open(store);
+               if (!f) {
+                       printf("Blerg open failed\n");
+                       exit(1);
+               }
+
+               char *data;
+               int size;
+               blerg_fetch(f, record, &data, &size);
+               blerg_close(f);
+               fwrite(data, 1, size, stdout);
+               free(data);
+       } else {
+               help();
+       }
+
+       return 0;
+}
diff --git a/config.h b/config.h
new file mode 100644 (file)
index 0000000..c3b8610
--- /dev/null
+++ b/config.h
@@ -0,0 +1,6 @@
+#ifndef _CONFIG_H
+#define _CONFIG_H
+
+#define DATA_PATH "data"
+
+#endif //_CONFIG_H
diff --git a/database.c b/database.c
new file mode 100644 (file)
index 0000000..ce03171
--- /dev/null
@@ -0,0 +1,279 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/file.h>
+#include <fcntl.h>
+#include "database.h"
+#include "config.h"
+
+#define RECORDS_PER_SEGMENT 65536
+
+uint64_t blerg_get_record_count(struct blerg *blerg) {
+       uint64_t count;
+       flock(blerg->meta_fd, LOCK_SH);
+       count = blerg->meta->sequence;
+       flock(blerg->meta_fd, LOCK_UN);
+       return count;
+}
+
+// Returns last usable record
+uint64_t blerg_increment_record_count(struct blerg *blerg) {
+       uint64_t count;
+       flock(blerg->meta_fd, LOCK_EX);
+       count = blerg->meta->sequence++;
+       flock(blerg->meta_fd, LOCK_UN);
+       return count;
+}
+
+void blerg_segment_close(struct blerg *blerg) {
+       if (blerg->data != NULL)
+               munmap((void *)blerg->data, blerg->data_size);
+       if (blerg->data_fd != -1)
+               close(blerg->data_fd);
+       if (blerg->index != NULL)
+               munmap((void *)blerg->index, sizeof(RECORDS_PER_SEGMENT * sizeof(struct record)));
+       if (blerg->index_fd != -1)
+               close(blerg->index_fd);
+}
+
+int blerg_segment_switch(struct blerg *blerg, int new_segment) {
+       char filename[512];
+       uint64_t max_sequence = blerg_get_record_count(blerg);
+       struct stat st;
+
+       if (new_segment > max_sequence / RECORDS_PER_SEGMENT) {
+               fprintf(stderr, "Cannot switch to sequence beyond last record\n");
+               return 0;
+       }
+
+       blerg_segment_close(blerg);
+
+       // Load and map the index
+       snprintf(filename, 512, "%s/index%d", blerg->base_path, new_segment);
+       blerg->index_fd = open(filename, O_RDWR | O_CREAT, 0600);
+       if (blerg->index_fd == -1) {
+               perror("Could not open index");
+               goto open_failed_index_open;
+       }
+       flock(blerg->index_fd, LOCK_EX);
+       fstat(blerg->index_fd, &st);
+       if (st.st_size == 0) {
+               int i;
+               struct record r;
+               memset((void *)&r, 0, sizeof(struct record));
+               for (i = 0; i < RECORDS_PER_SEGMENT; i++) {
+                       write(blerg->index_fd, &r, sizeof(struct record));
+               }
+       }
+       flock(blerg->index_fd, LOCK_UN);
+
+       blerg->index = (struct record *) mmap(NULL, RECORDS_PER_SEGMENT * sizeof(struct record), PROT_READ | PROT_WRITE, MAP_SHARED, blerg->index_fd, 0);
+       if (blerg->index == MAP_FAILED) {
+               perror("Could not mmap index");
+               goto open_failed_index_mmap;
+       }
+
+       // Load data file
+       sprintf(filename, "%s/data%d", blerg->base_path, new_segment);
+       blerg->data_fd = open(filename, O_RDWR | O_APPEND | O_CREAT, 0600);
+       fstat(blerg->data_fd, &st);
+       blerg->data_size = st.st_size;
+       if (blerg->data_fd == -1) {
+               perror("Could not open data");
+               goto open_failed_data_open;
+       }
+
+       if (blerg->data_size > 0) {
+               blerg->data = (char *) mmap(NULL, blerg->data_size, PROT_READ, MAP_SHARED, blerg->data_fd, 0);
+               if (blerg->data == MAP_FAILED) {
+                       perror("Could not mmap data");
+                       goto open_failed_data_mmap;
+               }
+       }
+
+       return 1;
+
+open_failed_data_mmap:
+       close(blerg->data_fd);
+open_failed_data_open:
+       munmap((void *)blerg->index, sizeof(65536 * sizeof(struct record)));
+open_failed_index_mmap:
+       close(blerg->index_fd);
+open_failed_index_open:
+       return 0;
+}
+
+struct blerg *blerg_open(const char *name) {
+       int namelen = strlen(name);
+       char filename[512];
+       struct stat st;
+       uint64_t sequence;
+
+       if (namelen > 32) {
+               perror("Name too long");
+               return NULL;
+       }
+       struct blerg *blerg = malloc(sizeof(struct blerg));
+       if (!blerg) {
+               perror("Cannot allocate memory for blerg");
+               goto open_failed_blerg_malloc;
+       }
+       blerg->meta_fd = blerg->index_fd = blerg->data_fd = -1;
+       blerg->meta = NULL;
+       blerg->index = NULL;
+       blerg->data = NULL;
+
+       // Make the directory if it doesn't exist
+       blerg->base_path = malloc(512);
+       snprintf(blerg->base_path, 512, "%s/%s", DATA_PATH, name);
+       if (access(blerg->base_path, F_OK) == -1)
+               mkdir(blerg->base_path, 0755);
+
+       // Open and map metadata
+       snprintf(filename, 512, "%s/meta", blerg->base_path);
+       blerg->meta_fd = open(filename, O_RDWR | O_CREAT, 0600);
+       if (blerg->meta_fd == -1) {
+               perror("Could not open metadata");
+               goto open_failed_meta_open;
+       }
+       fstat(blerg->meta_fd, &st);
+       if (st.st_size == 0) {
+               char *buf = (char *) malloc(sizeof(struct meta));
+               memset(buf, 0, sizeof(struct meta));
+               write(blerg->meta_fd, buf, sizeof(struct meta));
+               free(buf);
+       }
+       blerg->meta = (struct meta *) mmap(NULL, sizeof(struct meta), PROT_READ | PROT_WRITE, MAP_SHARED, blerg->meta_fd, 0);
+       if (blerg->meta == MAP_FAILED) {
+               perror("Could not map metadata");
+               goto open_failed_meta_mmap;
+       }
+
+       // Open and map index and data for the current segment
+       blerg->current_segment = blerg_get_record_count(blerg) / RECORDS_PER_SEGMENT;
+       if (!blerg_segment_switch(blerg, blerg->current_segment)) {
+               fprintf(stderr, "Could not switch segment\n");
+               goto open_failed_segment_switch;
+       }
+
+       return blerg;
+
+open_failed_segment_switch:
+       munmap((void *)blerg->meta, sizeof(struct meta));
+open_failed_meta_mmap:
+       close(blerg->meta_fd);
+open_failed_meta_open:
+       free(blerg);
+open_failed_blerg_malloc:
+       return NULL;
+}
+
+int blerg_close(struct blerg *blerg) {
+       blerg_segment_close(blerg);
+       munmap((void *)blerg->meta, sizeof(struct meta));
+       close(blerg->meta_fd);
+       free(blerg->base_path);
+       free(blerg);
+       return 1;
+}
+
+int blerg_store(struct blerg *blerg, const char *data, int len) {
+       if (len > 65536) {
+               printf("len > 64K\n");
+               return -1;
+       }
+
+       flock(blerg->index_fd, LOCK_EX);
+       flock(blerg->data_fd, LOCK_EX);
+
+       uint64_t rec = blerg_increment_record_count(blerg);
+       if (rec == -1) {
+               printf("Could not find free record\n");
+               return -1;
+       }
+       int segment = rec / RECORDS_PER_SEGMENT;
+       if (segment != blerg->current_segment)
+               blerg_segment_switch(blerg, segment);
+       rec = rec % RECORDS_PER_SEGMENT;
+
+       // Get the position for the new data
+       FILE *datafile = fdopen(dup(blerg->data_fd), "a");
+       fseek(datafile, 0, SEEK_END);
+       int curpos = ftell(datafile);
+       fclose(datafile);
+
+       int bytes = 0;
+       do {
+               int n = write(blerg->data_fd, data + bytes, len);
+               if (n == -1) {
+                       perror("Could not write data");
+                       // Truncate anything we may have written
+                       ftruncate(blerg->data_fd, curpos);
+                       return -1;
+               }
+               bytes += n;
+       } while (bytes < len);
+       blerg->index[rec].flags = 0x0001;
+       blerg->index[rec].offset = curpos;
+       blerg->index[rec].length = len;
+
+       flock(blerg->data_fd, LOCK_UN);
+       flock(blerg->index_fd, LOCK_UN);
+
+       return segment * RECORDS_PER_SEGMENT + rec;
+}
+
+int blerg_fetch(struct blerg *blerg, int rec, char **data, int *length) {
+       if (rec < 0) {
+               printf("Invalid record\n");
+               return 0;
+       }
+
+       int segment = rec / RECORDS_PER_SEGMENT;
+       if (segment != blerg->current_segment)
+               blerg_segment_switch(blerg, segment);
+       rec = rec % RECORDS_PER_SEGMENT;
+
+       if ((blerg->index[rec].flags & 0x1) == 0) {
+               printf("Invalid record\n");
+               return 0;
+       }
+
+       int rec_offset = blerg->index[rec].offset;
+       int rec_length = blerg->index[rec].length;
+       if (rec_offset >= blerg->data_size) {
+               // We're accessing an out-of-bounds record in our mmap.
+               // Recheck size and remap.
+               struct stat st;
+               fstat(blerg->data_fd, &st);
+               blerg->data_size = st.st_size;
+               if (rec_offset > blerg->data_size) {
+                       printf("Record offset outside of data!?");
+                       return 0;
+               }
+
+               munmap(blerg->data, blerg->data_size);
+               blerg->data = (char *) mmap(NULL, blerg->data_size, PROT_READ, MAP_SHARED, blerg->data_fd, 0);
+               if (blerg->data == MAP_FAILED) {
+                       perror("Could not remap data");
+                       return 0;
+               }
+       }
+
+       *data = malloc(rec_length);
+       if (*data == NULL) {
+               perror("Could not allocate string in fetch");
+               return 0;
+       }
+
+       memcpy(*data, blerg->data + rec_offset, rec_length);
+
+       *length = rec_length;
+
+       return 1;
+}
diff --git a/database.h b/database.h
new file mode 100644 (file)
index 0000000..69043e6
--- /dev/null
@@ -0,0 +1,33 @@
+#ifndef _DATABASE_H
+#define _DATABASE_H
+
+#include <stdint.h>
+
+struct record {
+       uint32_t offset;
+       uint16_t length;
+       uint16_t flags;
+};
+
+struct meta {
+       uint64_t sequence;
+};
+
+struct blerg {
+       int meta_fd;
+       int index_fd;
+       int data_fd;
+       char *base_path;
+       struct meta *meta;
+       struct record *index;
+       char *data;
+       int current_segment;
+       int data_size;
+};
+
+struct blerg *blerg_open(const char *);
+int blerg_close(struct blerg *);
+int blerg_store(struct blerg *, const char *, int);
+int blerg_fetch(struct blerg *, int, char **, int *);
+
+#endif //_DATABASE_H