#include #include #include #include #include struct IPDesc { // char label[32]; unsigned long long ip; // uncompressed raw ip value unsigned long long cnt; // count of occurences unsigned long long cip; // compressed ip value // (region number, page number, offset) unsigned long long rn; // region number unsigned long long pn; // page number unsigned long long po; // page offset unsigned long long rcnt; // count of occurances in region unsigned long long pcnt; // count of occurances in page unsigned int rid; // dense region id unsigned int pid; // dense page id unsigned short fid; // id ordered by inverse frequency rank unsigned short id; // id ordered by value UT_hash_handle hh; }; struct IPDesc *Dict = NULL; int encode_by_ip(struct IPDesc *ipd, FILE *fp) { return fwrite(&(ipd->ip), sizeof(ipd->ip), 1, fp); } int decode_by_ip(FILE *fp, struct IPDesc **ipd) { unsigned long long key; int num; *ipd = NULL; num = fread(&key, sizeof(key), 1, fp); if (num==1) { HASH_FIND(hh, Dict, &key, sizeof(key), *ipd); } return num; } int dump_by_ip(struct IPDesc *ipd, FILE *fp) { return fprintf(fp, "%016llx\n", ipd->ip); } void hash_by_ip(struct IPDesc *ipd) { HASH_ADD(hh, Dict, ip, sizeof(ipd->ip), ipd); } int encode_by_id(struct IPDesc *ipd, FILE *fp) { return fwrite(&(ipd->id), sizeof(ipd->id), 1, fp); } int decode_by_id(FILE *fp, struct IPDesc **ipd) { unsigned short key; int num; *ipd = NULL; num = fread(&key, sizeof(key), 1, fp); if (num==1) { HASH_FIND(hh, Dict, &key, sizeof(key), *ipd); } return num; } int dump_by_id(struct IPDesc *ipd, FILE *fp) { return fprintf(fp, "%hu\n", ipd->id); } void hash_by_id(struct IPDesc *ipd) { HASH_ADD(hh, Dict, id, sizeof(ipd->id), ipd); } int encode_by_fid(struct IPDesc *ipd, FILE *fp) { return fwrite(&(ipd->fid), sizeof(ipd->fid), 1, fp); } int decode_by_fid(FILE *fp, struct IPDesc **ipd) { unsigned short key; int num; *ipd = NULL; num = fread(&key, sizeof(key), 1, fp); if (num==1) { HASH_FIND(hh, Dict, &key, sizeof(key), *ipd); } return num; } int dump_by_fid(struct IPDesc *ipd, FILE *fp) { return fprintf(fp, "%hu\n", ipd->fid); } void hash_by_fid(struct IPDesc *ipd) { HASH_ADD(hh, Dict, fid, sizeof(ipd->fid), ipd); } int encode_by_cip(struct IPDesc *ipd, FILE *fp) { return fwrite(&(ipd->cip), sizeof(ipd->cip), 1, fp); } int decode_by_cip(FILE *fp, struct IPDesc **ipd) { unsigned long long key; int num; *ipd = NULL; num = fread(&key, sizeof(key), 1, fp); if (num==1) { HASH_FIND(hh, Dict, &key, sizeof(key), *ipd); } return num; } int dump_by_cip(struct IPDesc *ipd, FILE *fp) { return fprintf(fp, "%llu\n", ipd->cip); } void hash_by_cip(struct IPDesc *ipd) { HASH_ADD(hh, Dict, cip, sizeof(ipd->cip), ipd); } void dump_iphdr(FILE *fp) { fprintf(fp,"id\tfid\tcip\tip\tcnt\tregion\trid\trcnt\tpage\tpid" "\tpcnt\tpo\n"); } void dump_ipdesc(struct IPDesc *ipd, FILE *fp) { fprintf(fp, "%5hu\t%5hu\t%llu\t%16llx\t%llu\t%16llx\t%4u\t%llu\t%16llx" "\t%4u\t%llu\t%16llx\n", ipd->id, ipd->fid, ipd->cip, ipd->ip, ipd->cnt, ipd->rn, ipd->rid, ipd->rcnt, ipd->pn, ipd->pid, ipd->pcnt,ipd->po); } #define FIELDNAMELEN 16 #define DESCLEN 80 enum { ID=0, FID=1, CIP=2, IP=3 }; #define DEFAULT_FIELD FID struct FieldDesc { char description[DESCLEN]; char name[FIELDNAMELEN]; int (*encode)(struct IPDesc *, FILE *fp); int (*decode)(FILE *fp, struct IPDesc **); int (*dump)(struct IPDesc *, FILE *fp); void (*hashby)(struct IPDesc *); int idx; } Fields[] = { {.name = "id", .description = "uniq id value in ascending order", .encode = encode_by_id, .decode = decode_by_id, .dump = dump_by_id, .hashby = hash_by_id, .idx = ID }, {.name = "fid", .description = "uniq frequency id in reverse order", .encode = encode_by_fid, .decode = decode_by_fid, .dump = dump_by_fid, .hashby = hash_by_fid, .idx = FID }, {.name = "cip", .description = "compressed value", .encode = encode_by_cip, .decode = decode_by_cip, .dump = dump_by_cip, .hashby = hash_by_cip, .idx = CIP }, {.name = "ip", .description = "raw 64 bit value from trace", .encode = encode_by_ip, .decode = decode_by_ip, .dump = dump_by_ip, .hashby = hash_by_ip, .idx = IP}, // terminator {.name = { 0x0 }, .description = { 0x0}, .encode = NULL, .dump = NULL } }; void dump_fields() { for (struct FieldDesc *f=Fields; f->name[0]!=0; f++) { printf("%s\t%s\n", f->name, f->description); } } struct FieldDesc * field(char *name) { for (struct FieldDesc *f=Fields; f!=NULL; f++) { if (strncmp(name,f->name, FIELDNAMELEN)==0) return f; } return NULL; } int ip_sort_by_ip(struct IPDesc *a, struct IPDesc *b) { if (a->ip < b->ip) return -1; if (a->ip == b->ip) return 0; return 1; }; int ip_sort_by_reverse_cnt(struct IPDesc *a, struct IPDesc *b) { if (a->cnt > b->cnt) return -1; if (a->cnt == b->cnt) return ip_sort_by_ip(a, b); return 1; }; struct PageDesc { // char label[32]; unsigned long long int pn; unsigned int id; unsigned long long cnt; UT_hash_handle hh; }; int pg_sort(struct PageDesc *a, struct PageDesc *b) { if (a->pn < b->pn) return -1; if (a->pn == b->pn) return 0; return 1; } struct RegionDesc { // char label[32]; unsigned long long int rn; unsigned int id; unsigned long long cnt; struct PageDesc *pages; UT_hash_handle hh; }; int rn_sort(struct RegionDesc *a, struct RegionDesc *b) { if (a->rn < b->rn) return -1; if (a->rn == b->rn) return 0; return 1; } struct RegionDesc *Regions = NULL; union Value { unsigned long long raw; struct { unsigned long long po:12; unsigned long long pn:20; unsigned long long rn:32; } bits; }; unsigned short binToken_t; struct Stats { unsigned long long numuniq; // number of dictionary items unsigned long long seqlen; // number of items in the trace } Stats; struct Args { FILE *infile; FILE *dfile; FILE *outfile; struct FieldDesc *field; char *dfilename; char *outfilename; int aflag; int bflag; int dflag; int Dflag; int verbose; } Args; int create_dict() { union Value v; struct RegionDesc *r; struct PageDesc *p; struct IPDesc *ip; unsigned long long rn, pn; unsigned int rval, pval; unsigned long long maxpcnt=0; unsigned short fid; unsigned short id; unsigned long long count = 0; while (fscanf(Args.infile, "%llx", &v.raw)!=EOF) { count++; // printf("%llx\n", v.raw); // First deal with dict HASH_FIND(hh, Dict, &v.raw, sizeof(ip->ip), ip); if (ip==NULL) { ip = (struct IPDesc *)malloc(sizeof(struct IPDesc)); bzero(ip, sizeof(struct IPDesc)); ip->ip = v.raw; // snprintf(ip->label, sizeof(ip->label), "%016llx", v.raw); hash_by_ip(ip); } ip->cnt++; // Now update spatial breakdown region and pages rn = v.bits.rn; pn = v.bits.pn; ip->rn = rn; ip->pn = pn; ip->po = v.bits.po; HASH_FIND(hh, Regions, &rn, sizeof(r->rn), r); if (r==NULL) { r = (struct RegionDesc *)malloc(sizeof(struct RegionDesc)); bzero(r, sizeof(struct RegionDesc)); r->rn = rn; // snprintf(r->label, sizeof(r->label), "%x", rn); HASH_ADD(hh, Regions, rn, sizeof(r->rn), r); } r->cnt++; HASH_FIND(hh, r->pages, &pn, sizeof(p->pn), p); if (p==NULL) { p = (struct PageDesc *)malloc(sizeof(struct PageDesc)); bzero(p, sizeof(struct PageDesc)); p->pn=pn; // snprintf(p->label, sizeof(p->label), "%x", pn); HASH_ADD(hh, r->pages, pn, sizeof(p->pn), p); } p->cnt++; } HASH_SORT(Regions, rn_sort); // printf("maxpcnt=%lld pndigits=%d\n", maxpcnt, pdigits); for (rval=0, r=Regions; r!=NULL; r=r->hh.next, rval++) { r->id = rval; unsigned long long pcnt = HASH_COUNT(r->pages); if (pcnt>maxpcnt) maxpcnt = pcnt; HASH_SORT(r->pages, pg_sort); for (pval=0,p=r->pages; p!=NULL; p=p->hh.next,pval++) { p->id = pval; // fprintf(stderr, "%u%0*u:%s%s:%llu:%llu\n", rval, pdigits, pval, r->label, // p->label, p->cnt, r->cnt); } } int pdigits = (int)log10(maxpcnt); pdigits++; // assign numerically ip ordered id and create compressed ip (cip) value // update region and page info char cipStr[32]; HASH_SORT(Dict, ip_sort_by_ip); for (id=0,ip=Dict; ip!=NULL; ip=ip->hh.next,id++) { ip->id=id; v.raw = ip->ip; rn = v.bits.rn; pn = v.bits.pn; HASH_FIND(hh, Regions, &rn, sizeof(r->rn), r); assert(r); HASH_FIND(hh, r->pages, &pn, sizeof(p->pn), p); snprintf(cipStr,sizeof(cipStr), "%u%0*u%04u\n", r->id, pdigits, p->id, v.bits.po); ip->cip = strtoull(cipStr, NULL, 10); ip->rid = r->id; ip->rcnt = r->cnt; ip->pid = p->id; ip->pcnt = p->cnt; } // assign frequency ids (fids) HASH_SORT(Dict, ip_sort_by_reverse_cnt); for (fid=0,ip=Dict; ip!=NULL; ip=ip->hh.next,fid++) { ip->fid = fid; if (Args.aflag) dump_ipdesc(ip,stdout); if (Args.bflag) { if (fwrite(ip, sizeof(struct IPDesc), 1, Args.dfile)!=1) { perror("fwrite faild for dictionary"); fclose(Args.dfile); return 0; } } } if (Args.bflag) fclose(Args.dfile); Stats.seqlen = count; Stats.numuniq = HASH_COUNT(Dict); return (int)id; } void dump_dict() { struct IPDesc *ip; for (ip=Dict; ip!=NULL; ip=ip->hh.next) { dump_ipdesc(ip,stdout); } } void dump_stats() { struct RegionDesc *r; fprintf(stderr, "numuniq:%lld\tseqlen:%lld\tnumregs:%d\n", Stats.numuniq, Stats.seqlen, HASH_COUNT(Regions)); for (r=Regions; r!=NULL; r=r->hh.next) { fprintf(stderr, "rn:%llx id:%u numpages:%d cnt:%lld\n", r->rn, r->id, HASH_COUNT(r->pages), r->cnt); // add dump of details of pages in region here if we want to } } int load_dict(void (*dicthashby)(struct IPDesc *ipd)) { assert(Dict==NULL); struct IPDesc *ip; long dictsize; unsigned long numentries; struct IPDesc *ips; unsigned long long int rn, pn; struct RegionDesc *r; struct PageDesc *p; fseek(Args.dfile, 0 , SEEK_END); dictsize = ftell(Args.dfile); fseek(Args.dfile, 0 , SEEK_SET); numentries = (dictsize/sizeof(struct IPDesc)); // file should be an integral number of complete records assert(numentries * sizeof(struct IPDesc) == dictsize); ips = (struct IPDesc *)malloc(dictsize); assert(numentries == fread(ips, sizeof(struct IPDesc), numentries, Args.dfile)); for (unsigned long i=0; irn; pn = ip->pn; HASH_FIND(hh, Regions, &rn, sizeof(r->rn), r); if (r==NULL) { r = (struct RegionDesc *)malloc(sizeof(struct RegionDesc)); bzero(r, sizeof(struct RegionDesc)); r->rn = rn; r->id = ip->rid; r->cnt = ip->rcnt; // snprintf(r->label, sizeof(r->label), "%x", rn); HASH_ADD(hh, Regions, rn, sizeof(r->rn), r); } HASH_FIND(hh, r->pages, &pn, sizeof(p->pn), p); if (p==NULL) { p = (struct PageDesc *)malloc(sizeof(struct PageDesc)); bzero(p, sizeof(struct PageDesc)); p->pn=pn; p->id = ip->pid; p->cnt = ip->pcnt; // snprintf(p->label, sizeof(p->label), "%x", pn); HASH_ADD(hh, r->pages, pn, sizeof(p->pn), p); } // if (Args.aflag) dump_ipdesc(ip, stdout); } Stats.numuniq = HASH_COUNT(Dict); return Stats.numuniq; } int encode_trc(FILE *ofile) { unsigned long long count=0; union Value v; struct IPDesc *ip; int (*encode)(struct IPDesc *, FILE *) = Args.field->encode; int (*dump)(struct IPDesc *, FILE *) = Args.field->dump; FILE *infile = Args.infile; FILE *outfile = Args.outfile; if (Args.bflag && !Args.aflag) { // binary only while (fscanf(infile, "%llx", &v.raw)!=EOF) { count++; HASH_FIND(hh, Dict, &v.raw, sizeof(ip->ip),ip); if (ip==NULL) { fprintf(stderr, "ERROR: could not find %llx in dictionary\n", v.raw); return 0; } if (encode(ip,ofile)!=1) { perror("ERROR: failed during encoding"); return 0; } } } else if (!Args.bflag && Args.aflag) { // ascii only while (fscanf(infile, "%llx", &v.raw)!=EOF) { count++; HASH_FIND(hh, Dict, &v.raw, sizeof(ip->ip), ip); if (ip==NULL) { fprintf(stderr, "ERROR: could not find %llx in dictionary\n", v.raw); return 0; } if (dump(ip,ofile)<=0) { perror("ERROR: failed during encoding"); return 0; } } } else { // binary and ascii if (outfile == NULL) { fprintf(stderr, "To have both binary and ascii encoding you must specify" " the optional outfile\n"); return 0; } while (fscanf(infile, "%llx", &v.raw)!=EOF) { count++; HASH_FIND(hh, Dict, &v.raw, sizeof(ip->ip), ip); if (ip==NULL) { fprintf(stderr, "ERROR: could not find %llx in dictionary\n", v.raw); return 0; } if (encode(ip,ofile)!=1) { perror("ERROR: failed during encoding"); return 0; } if (dump(ip,outfile)<=1) { perror("ERROR: failed during encoding"); return 0; } } } Stats.seqlen = count; return count; } int decode_seq(FILE *ofile) { unsigned long long count=0; struct IPDesc *ipd = NULL; int (*decode)(FILE *,struct IPDesc **) = Args.field->decode; FILE *infile = Args.infile; FILE *outfile = Args.outfile; if (Args.bflag && !Args.aflag) { while (decode(infile,&ipd)==1) { if (ipd) { encode_by_ip(ipd,ofile); count++; } else { fprintf(ofile,"UNKNOWN\n"); } } } else if (!Args.bflag && Args.aflag) { if (Args.verbose) { while (decode(infile,&ipd)==1) { if (ipd) { dump_ipdesc(ipd,ofile); count++; } else { fprintf(ofile,"UNKNOWN\n"); } } } else { while (decode(infile,&ipd)==1) { if (ipd) { dump_by_ip(ipd,ofile); count++; } else { fprintf(ofile,"UNKNOWN\n"); } } } } else { // ascii and binary if (outfile == NULL) { fprintf(stderr, "To have both binary and ascii decoding you must specify" " the optional outfile\n"); return 0; } while (decode(infile,&ipd)==1) { if (ipd) { encode_by_ip(ipd, ofile); dump_by_ip(ipd, outfile); count++; } else { fprintf(outfile,"UNKNOWN\n"); } } } Stats.seqlen = count; return count; } void usage(char *name) { fprintf(stderr, "USAGE: %s [-d] [-v] [-f dictionay field #] [-B] [-A] [-D] [-F] [-i ] [extra outfile]\n" " Reads trace from stdin unless -t is used to specify a file\n" " creates a dictionary file if the specified dectionary file\n" " does not exist. Otherwise loads the dictionary and encodes\n" " the trace using '%s' field of the dictionary unless\n" " a different fieldis specified with -f.\n" " By default an ascii version of the dictionary output is printed" " to stdout" " (-A can be used to suppress this output).\n" " By default a binary encoded seq is written to the stdout and" " if a seq file is specified then an ascii version printed to " "the file." " Ascii meta information is sent to stderr.\n" " -f dictionary field to encode trace with" " (default behaviour is to encode a trace to a sequence)\n" " -d decode a sequence back to a trace\n" " -v verbose ascii decode dumps fully ascii dictionary\n" " -B suppress binary output of encoded trace and dictionary\n" " -A supress ascii output of dictonary of dictionary\n" " -D dump existing binary dictionary [skip encoding]\n" " -F dump dictionary fields\n" " -i read trace from infile rather than stdin\n" "Supported Feilds:\n", name, Fields[DEFAULT_FIELD].name); dump_fields(); } int main(int argc, char **argv) { char opt; bzero(&Stats, sizeof(Stats)); Args.infile = stdin; Args.dfile = NULL; Args.dfilename = NULL; Args.outfilename = NULL; Args.aflag = 1; Args.bflag = 1; Args.Dflag = 0; Args.dflag = 0; Args.field = &(Fields[DEFAULT_FIELD]); Args.verbose = 0; while (optind < argc) { if ((opt = getopt(argc, argv, "vi:hBdf:ADF")) != -1) { switch (opt) { case 'v': Args.verbose=1; break; case 'i': if ((Args.infile = fopen(optarg, "r")) == NULL) { perror("fopen on tracefile"); usage(argv[0]); return -1; } break; case 'B': Args.bflag = 0; break; case 'd': Args.dflag = 1; break; case 'f': Args.field = field(optarg); if (Args.field == NULL) { fprintf(stderr, "Bad Field. Supported Fields are:"); dump_fields(); return -1; } break; case 'A': Args.aflag = 0; break; case 'D': Args.Dflag = 1; break; break; case 'F': dump_fields(); return 0; break; case 'h': usage(argv[0]); return -1; default: abort(); } } else { if (Args.dfilename==NULL) { Args.dfilename = argv[optind]; if ((Args.dfile = fopen(Args.dfilename, "r")) == NULL) { // fprintf(stderr, "Dictionary\n"); dump_iphdr(stderr); } } else if (Args.outfilename==NULL) { // printf("x\n"); Args.outfilename = argv[optind]; if ((Args.outfile = fopen(Args.outfilename, "w")) == NULL) { perror("fopen creating seq file"); usage(argv[0]); return -1; } // printf("%p\n", Args.outfile); } else { fprintf(stderr, "ERROR: unsupported argumeng %s\n", argv[optind]); usage(argv[0]); return -1; } optind++; } } if (Args.dfilename==NULL) { usage(argv[0]); return -1; } if (Args.dfile == NULL) { // dictionary does not exist must be creating the dictionary if ((Args.dfile = fopen(Args.dfilename, "w")) == NULL) { perror("fopen creating dictionary file"); usage(argv[0]); return -1; } create_dict(); goto doneOK; } else { if (!Args.dflag) { load_dict(hash_by_ip); if (Args.Dflag) { dump_iphdr(stderr); dump_dict(); goto doneOK; } } else { load_dict(Args.field->hashby); fprintf(stderr, "dictionary loaded by field: %s\n", Args.field->name); // dump_dict(); } } if (!Args.dflag) { if (!encode_trc(stdout)) { return -1; } } else { if (!decode_seq(stdout)) { return -1; } } if (Args.dfile) fclose(Args.dfile); if (Args.outfile) fclose(Args.outfile); doneOK: dump_stats(); return 0; }