/* MAKETOC.C */
/* For use with TextView and MakeZHDB on the Helio */
/* Earle F. Philhower, III */
/* earle@ziplabel.com   http://www.ziplabel.com/helio */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "zlib.h"
#include "regex.h"

/* MakeZHDB V1.1 */
/* To be used my_stdio file applications */
/* Released to Public Domain December 25, 2001 */

typedef unsigned char byte;
typedef unsigned long DWORD;
typedef unsigned short WORD;


#ifndef __HDB_H_
#define __HDB_H_

#define HDB_VERSION_TEXT 0x8939

#define BYTE		unsigned char
#define USHORT		unsigned short
#define UWORD		unsigned int

#define DatabaseID	unsigned int
#define RecordID	unsigned int
#define AppID		unsigned int

#define SIZE_OF_HDB_HEADER_STRUCT		92
#define SIZE_OF_HDB_REC_HEADER_STRUCT	16
#define SIZE_OF_HDB_DOC_HEADER_STRUCT	16
#define SIZE_OF_HDB_DOC_BOOMARK_STRUCT	20
#define SIZE_OF_HDB_DOC_TOC_STRUCT		40

typedef struct HDB_HEADER_STRUCT 
{
    BYTE  name[32];			//31 chars + NULL
	BYTE  owner[32];		//31 chars + NULL
	USHORT version;			//used to distinguish db version on device
	USHORT dbtype;			//RFU
	UWORD attributes;		//RFU
	UWORD creation_date;	//updated after db is added to device
	UWORD backup_date;		//updated after db is backed up from device
	DatabaseID id;			//updated after db is added to device
	AppID appid;			//updated after db is added to device
	UWORD record_count;		//total records
} HDBHeader;

typedef struct HDB_REC_HEADER_STRUCT
{
	UWORD offset;			//offset to record from beginning of file
	RecordID id;			//user defined must be 0x80000000 - 0x8fffffff, else updated after db is added to device
	UWORD modify_date;		//updated after db is modified from device
	USHORT total_field;		//total fields
	BYTE category;			//record category
	BYTE attribute;			//record attributes
} HDBRecordHeader;

typedef struct HDB_DOC_HEADER_STRUCT
{
	USHORT version;			//HelioDOC version
	USHORT reserved1;		//RFU
	UWORD  uncomp_size;		//total size of doc
	USHORT total_recs;		//number of text records
	USHORT rec_size;		//max size of records
	UWORD  reserved2;		//RFU
} HDBDocHeader;

typedef struct HDB_DOC_BOOMARK_STRUCT
{
	BYTE name[16];			//bookmark identifier, 15 chars + NULL
	UWORD offset;			//char position from beginning of text (decoded)
} HDBDocBookmark;

typedef struct HDB_DOC_TOC_STRUCT
{
	BYTE title[32];				//TOC title, 31 chars + NULL
	UWORD record_count;			//number of entries in TOC
	HDBDocBookmark* toc_item;	//pointer to list of TOC entries, which immediately follow this struct
} HDBTocList;

#define EnFieldSize(word, size, byte_use)												\
{																						\
	byte_use = 0;																		\
	do {																				\
		*(BYTE*)((BYTE*)(&word) + byte_use++) = ((BYTE) size) << 1;						\
		size >>= 7;																		\
	}while(size != 0);																	\
	*(BYTE*)((BYTE*)(&word) + byte_use - 1) |= 0x01;									\
}

#endif





#define BLOCKSIZE 2048





void MakeOneZHDB(char *recordName, char *owner, int size, unsigned char *data, char *outFile)
{
	FILE *out;
	unsigned int i;
	UWORD offset,record_size,encoded_bytes,encoded_size,temp;

	HDBHeader hdb_header_buf;
	HDBRecordHeader *hdb_recheader_buf;
	BYTE** rec_data;
	UWORD* rec_size;
	BYTE *dout;
	int outsz;
	char buff[9];
	int totout;

	totout = 0;
	dout = malloc(2*BLOCKSIZE);

	// Create header
	memset(hdb_header_buf.name, 0x00, 32);
	strncpy(hdb_header_buf.name, recordName, 31);
	hdb_header_buf.name[31] = 0;

	memset(hdb_header_buf.owner, 0x00, 32);
	if (NULL==owner) strcpy(hdb_header_buf.owner, "<");
	else strncpy(hdb_header_buf.owner, owner, 31);
	hdb_header_buf.owner[31] = 0;

	hdb_header_buf.version = HDB_VERSION_TEXT;
	hdb_header_buf.dbtype = 0;
	hdb_header_buf.attributes = 0;
	hdb_header_buf.creation_date = 0;
	hdb_header_buf.backup_date = 0;
	hdb_header_buf.id = 0;
	hdb_header_buf.appid = 0;
	hdb_header_buf.record_count = 1+1+(int)(floor(size/BLOCKSIZE)); //pdb_recheader_buf.num_records;

	//create record list
	hdb_recheader_buf = (HDBRecordHeader*)malloc(hdb_header_buf.record_count * SIZE_OF_HDB_REC_HEADER_STRUCT);
	rec_data = (BYTE**)malloc(hdb_header_buf.record_count * sizeof(BYTE*));
	rec_size = (UWORD*)malloc(hdb_header_buf.record_count * sizeof(UWORD));
	offset = SIZE_OF_HDB_HEADER_STRUCT + (hdb_header_buf.record_count * SIZE_OF_HDB_REC_HEADER_STRUCT);

	// Write header record
	i=0;
	hdb_recheader_buf[i].attribute = 0;
	hdb_recheader_buf[i].category = 0;
	hdb_recheader_buf[i].id = 0x80000000;
	hdb_recheader_buf[i].modify_date = 0;
	hdb_recheader_buf[i].offset = offset;
	hdb_recheader_buf[i].total_field = 1;
	record_size = 8;
	encoded_size = 0;
	temp = record_size;
	EnFieldSize(encoded_size, temp, encoded_bytes);
	rec_size[i] = (record_size + encoded_bytes) * sizeof(BYTE);
	rec_data[i] = (BYTE*)malloc((record_size + encoded_bytes) * sizeof(BYTE));
	memcpy(rec_data[i],&encoded_size,encoded_bytes);
	strcpy(buff, "ZDOC");
	buff[4] = size&255;
	buff[5] = (size>>8)&255;
	buff[6] = (size>>16)&255;
	buff[7] = (size>>24)&255;
	memcpy(rec_data[i]+encoded_bytes, buff, 8*sizeof(BYTE));
	offset += (record_size + encoded_bytes);
	// Write compressed sectors
	for(i=1;i<hdb_header_buf.record_count;i++)
	{
		hdb_recheader_buf[i].attribute = 0;
		hdb_recheader_buf[i].category = 0;
		hdb_recheader_buf[i].id = 0x80000000 | (/*1+*/i);//0;
		hdb_recheader_buf[i].modify_date = 0;
		hdb_recheader_buf[i].offset = offset;
		hdb_recheader_buf[i].total_field = 1;
		record_size = (i==(hdb_header_buf.record_count - 1))?size-(BLOCKSIZE*(hdb_header_buf.record_count - 2)):BLOCKSIZE;
		encoded_size = 0;
		
		outsz=2*BLOCKSIZE;
		compress(dout, &outsz, data, record_size);
		totout += outsz;
		temp = outsz; //record_size;
	    EnFieldSize(encoded_size, temp, encoded_bytes);
		rec_size[i] = (outsz/*record_size*/ + encoded_bytes) * sizeof(BYTE);
		rec_data[i] = (BYTE*)malloc((outsz/*record_size*/ + encoded_bytes) * sizeof(BYTE));
		memcpy(rec_data[i],&encoded_size,encoded_bytes);
		memcpy(rec_data[i]+encoded_bytes, dout/*data*/, outsz/*record_size*sizeof(BYTE)*/);
		offset += (outsz/*record_size*/ + encoded_bytes);
		data += record_size;
	}

	out = fopen(outFile, "wb");
	fwrite(&hdb_header_buf, sizeof(char), SIZE_OF_HDB_HEADER_STRUCT, out);
	fwrite(hdb_recheader_buf, sizeof(char), hdb_header_buf.record_count * SIZE_OF_HDB_REC_HEADER_STRUCT, out);
	for(i=0; i<hdb_header_buf.record_count; i++)
	{
		record_size = rec_size[i];
		fwrite(rec_data[i], sizeof(char), record_size, out);
	}
	fclose(out);
	printf("Compressed %d into %d (%2.1f percent savings)\n", size, totout, 100.0*(1.0-(1.0*totout)/(1.0*size)));
}


void err()
{
	printf("MAKETOC v1.0:                  Earle F. Philhower, III - earle@ziplabel.com\n");
	printf("Creates table of contents files (TOC.*) for use with TextView on the Helio.\n\n");
	printf(" Uses file \"regexp.txt\" in current directory to perform a pattern match and\n");
	printf(" generate a simple text file containing character offset and TOC entry name.\n");
	printf(" This file should then be uploaded to the Helio under the name TOC.xxxx where\n");
	printf(" xxxx is replaced by the exact same name as used for the main text file on the\n");
	printf(" Helio.  It's really not as hard as it seems.\n\n");
	printf("USAGE: maketoc [-regexp c:\\books\\regexp.txt] filename.txt\n");
	printf("       This will generate a file called \"TOC.filename.txt\" that you can edit in\n");
	printf("       any text editor.  Convert and upload to the Helio using:\n");
	printf("          MAKEZHDB toc.filename.txt \"TOC.HELIO FILENAME\" TOC tochdbname.hdb\n\n");
	printf("or, if you don't want to edit the TOC file you may run maketoc as:\n\n");
	printf("maketoc [-regexp c:\\regexp.txt] -zhdb filename.txt \"Helio Filename\" tocfile.hdb\n\n");
	printf("Do not specify the \"TOC.\" part of the filename when run with -zhdb switch.\n\n");
	
	exit(-1);
}

typedef struct TOC {
	unsigned int offset;
	unsigned char title[64];
	struct TOC *next;
} TOC;

int tocs=0;

TOC *AddTOC( unsigned char *title, unsigned int titlelen, unsigned int offset, TOC *curhead )
{
	TOC *newtoc, *cur, *last;

	newtoc = calloc(1, sizeof(TOC));
	memcpy(newtoc->title, title, min(titlelen,64));
	newtoc->title[63]=0;
	newtoc->offset = offset;

	if (curhead==NULL)
		return newtoc;

	if (curhead->offset>offset) {
		newtoc->next = curhead;
		return newtoc;
	}

	last = curhead;
	cur = curhead->next;
	while (cur) {
		if (cur->offset>offset) {
			newtoc->next = cur;
			last->next = newtoc;
			return curhead;
		}
		last = cur;
		cur = cur->next;
	}
	last->next = newtoc;

	tocs++;

	return curhead;
}
	
int findfirsteol(unsigned char *text, int off, int max)
{
	int len = 0;

	while (off<max) {
		if (text[off]=='\r' || text[off]=='\n') break;
		off++;
		len++;
	}
	return len;
}


TOC *MakeTOC(unsigned char *text, int len, unsigned char *regexpfile )
{
	TOC *toc;
	const char    *err;
	struct re_registers regs;
	FILE *in;
	char regexp[1024];
	struct re_pattern_buffer patt;
	int off;


	toc = NULL;
	in = fopen(regexpfile, "rt");
	if (!in) {printf("Unable to read regexp file %s\n", regexpfile); exit(-1);}

	re_syntax_options = RE_SYNTAX_POSIX_EXTENDED;

	
	while (fgets(regexp, 1023, in)) {
		if (strlen(regexp)<2) continue;

		patt.buffer = NULL;
		patt.allocated = 0;
		patt.translate = NULL;
		patt.fastmap = (char *)malloc(1024);
		
		err = re_compile_pattern(regexp, strlen(regexp)-1,&patt);
		if (err) {
			printf("Regular expression ERROR:\n%s\n\t%s\n", regexp, err);
			free(patt.fastmap);
			continue;
        }
		
		printf("Checking for matches against: %s", regexp);
		off = 0;
		while (off < len) {
			int hit_pos, eollen;
			eollen = findfirsteol(text, off, len);
			hit_pos = re_search(&patt, text, len, off, eollen, &regs);
			while (hit_pos >= 0 && off<len) {
				int matchlen;
				matchlen = re_match(&patt, text, off+eollen, hit_pos, &regs);
				toc = AddTOC( text+hit_pos/*off*/, matchlen, off, toc );
				off = hit_pos+matchlen ; //matchlen;
				hit_pos = re_search(&patt, text, len, off, eollen, &regs);
			}
			off += eollen+1;
		}	
		regfree(&patt);
		free(patt.fastmap);
	}

	fclose(in);
	return toc;
}

enum {MODE_TEXT=0, MODE_ZHDB} mode;
char iname[256], oname[256], hdbname[256], regexp[256];

void ProcessArgs(int argc, char **argv)
{
	int off;

	mode = MODE_TEXT;
	strcpy(regexp, "regexp.txt");
	iname[0]=0; oname[0]=0; hdbname[0] = 0;
	off = 1;
	while (off<argc) {
		if (!stricmp(argv[off], "-regexp") || !stricmp(argv[off], "-regex") || !stricmp(argv[off], "-regexpfile")) {
			if (off+1<argc) strcpy(regexp, argv[++off]);
			else err();
			off++;
		} else if (!stricmp(argv[off], "-zhdb")) {
			mode = MODE_ZHDB;
			off++;
			if (off+3==argc) {
				strcpy(iname, argv[off++]);
				sprintf(hdbname, "TOC.%s", argv[off++]);
				strcpy(oname, argv[off++]);
				return;
			} else
				err();
		} else if (off+1==argc) {
			strcpy(iname, argv[off++]);
			sprintf(oname, "TOC.%s", iname);
			return;
		} else
			off++;
	}
	err();
}


int main(int argc, char **argv)
{
	FILE *fp;
	int sz;
	char *buff;
	TOC *toc;

	ProcessArgs(argc, argv);
	if (mode==MODE_TEXT)
		printf("Source Text File: %s\nDestination TOC File: %s\n", iname, oname);
	else
		printf("Source Text File: %s\nHelio File Name: %s\nHDB Destination File: %s\n", iname, hdbname, oname);
	printf("Regular Expression File: %s\n\n", regexp);

	printf("Reading source...\n");
	fp=fopen(iname, "rb");
	if(fp==NULL) {printf("Unable to read source %s\n", iname); exit(-1);}
	fseek(fp,0,SEEK_END);
	sz=ftell(fp);
	fseek(fp,0,SEEK_SET);
	buff=malloc(sz);
	fread(buff,sz,1,fp);
	fclose(fp);

	printf("Generating TOC...\n");
	toc = MakeTOC(buff, sz, regexp);

	printf("Saving TOC...\n");
	fp = fopen(oname, "wt");
	if (!fp) {printf("Unable to write dest file %s\n", oname); exit(-1); }
	while (toc) {
		fprintf(fp, "%d %s\n", toc->offset, toc->title);
		toc = toc->next;
	}
	fclose(fp);
	if (mode==MODE_ZHDB) {
		fp=fopen(oname, "rb");
		if(fp==NULL) {printf("Unable to read temp source %s\n", oname); exit(-1);}
		fseek(fp,0,SEEK_END);
		sz=ftell(fp);
		fseek(fp,0,SEEK_SET);
		buff=malloc(sz);
		fread(buff,sz,1,fp);
		fclose(fp);
		MakeOneZHDB(hdbname, "TextView TOC", sz, buff, oname);
	}

	return 0;
}

