--- /dev/null
+/*
+* Copyright (C) 1995-2005, Index Data ApS
+* See the file LICENSE for details.
+*
+* $Id$
+*/
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+
+#if HAVE_LOCALE_H
+#include <locale.h>
+#endif
+#if HAVE_LANGINFO_H
+#include <langinfo.h>
+#endif
+
+#include <yaz/marcdisp.h>
+#include <yaz/yaz-util.h>
+#include <yaz/xmalloc.h>
+#include <yaz/options.h>
+
+#ifndef SEEK_SET
+#define SEEK_SET 0
+#endif
+#ifndef SEEK_END
+#define SEEK_END 2
+#endif
+
+#include <fcntl.h>
+
+char* clean_marc_xpath = "//*[@tag=\"999\"]";
+char* holdings_xpath = "/*/*[(local-name()='datafield' and "
+ "(@tag!='035' and @tag!='999')) or local-name()!='datafield']";
+
+void prune_doc( xmlDocPtr doc, char* xpath );
+char* _xml_to_string( xmlDocPtr doc );
+
+static void usage(const char *prog) {
+ fprintf (stderr, "Usage: %s -r [xpath] -c [cfile] [-f from] [-t to] [-x] [-O] [-X] [-I] [-v] file...\n", prog);
+}
+
+int main (int argc, char **argv) {
+ int counter = 0;
+
+ int r;
+ int libxml_dom_test = 0;
+ int print_offset = 0;
+ char *arg;
+ int verbose = 0;
+ FILE *inf;
+ char buf[100001];
+ char *prog = *argv;
+ int no = 0;
+ int xml = 0;
+ FILE *cfile = 0;
+ char *from = 0, *to = 0;
+ int num = 1;
+
+ #if HAVE_LOCALE_H
+ setlocale(LC_CTYPE, "");
+ #endif
+ #if HAVE_LANGINFO_H
+ #ifdef CODESET
+ to = nl_langinfo(CODESET);
+ #endif
+ #endif
+
+ char* prune = NULL;
+ while ((r = options("pvcr:xOeXIf:t:2", argv, argc, &arg)) != -2) {
+
+ int count;
+ no++;
+
+ switch (r) {
+ case 'r':
+ prune = arg;
+ xmlKeepBlanksDefault(0);
+ break;
+ case 'f':
+ from = arg;
+ break;
+ case 't':
+ to = arg;
+ break;
+ case 'c':
+ if (cfile)
+ fclose (cfile);
+ cfile = fopen (arg, "w");
+ break;
+ case 'x':
+ xml = YAZ_MARC_SIMPLEXML;
+ break;
+ case 'O':
+ xml = YAZ_MARC_OAIMARC;
+ break;
+ case 'e': /* not supported on older versions of yaz */
+ xml = YAZ_MARC_XCHANGE;
+ break;
+ case 'X':
+ xml = YAZ_MARC_MARCXML;
+ break;
+ case 'I':
+ xml = YAZ_MARC_ISO2709;
+ break;
+ case 'p':
+ print_offset = 1;
+ break;
+ case '2':
+ libxml_dom_test = 1;
+ break;
+ case 0:
+
+ inf = fopen (arg, "rb");
+ count = 0;
+ if (!inf) {
+ fprintf (stderr, "%s: cannot open %s:%s\n",
+ prog, arg, strerror (errno));
+ exit(1);
+ }
+ if (cfile)
+ fprintf (cfile, "char *marc_records[] = {\n");
+
+ if (1) {
+ yaz_marc_t mt = yaz_marc_create();
+ yaz_iconv_t cd = 0;
+
+ if (from && to) {
+ cd = yaz_iconv_open(to, from);
+ if (!cd) {
+ fprintf(stderr, "conversion from %s to %s " "unsupported\n", from, to);
+ exit(2);
+ }
+ yaz_marc_iconv(mt, cd);
+ }
+ yaz_marc_xml(mt, xml);
+ yaz_marc_debug(mt, verbose);
+
+ while (1) {
+ counter++;
+ int len;
+ char *result;
+ int rlen;
+
+ r = fread (buf, 1, 5, inf);
+
+ if (r < 5) {
+ if (r && print_offset)
+ printf ("Extra %d bytes", r);
+ break;
+ }
+
+ if (print_offset) {
+ long off = ftell(inf);
+ printf ("Record %d offset %ld\n", num, (long) off);
+ }
+
+ len = atoi_n(buf, 5);
+
+ if (len < 25 || len > 100000) break;
+
+ len = len - 5;
+ r = fread (buf + 5, 1, len, inf);
+
+ if (r < len) break;
+
+ r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlen);
+
+ if (r <= 0) break;
+
+
+
+ if(!prune) {
+
+ fwrite (result, rlen, 1, stdout);
+
+ } else {
+
+
+ xmlDocPtr doc = xmlParseMemory(result, rlen);
+
+ if (!doc) {
+ fprintf(stderr, "xmLParseMemory failed\n");
+ continue;
+ }
+
+ // xmlDocPtr doc_copy = xmlCopyDoc( doc, 1 );
+ //char* holdings_expr = "/*/*[(local-name()='datafield' and "
+ // "(@tag!='035' and @tag!='999')) or local-name()!='datafield']";
+
+ //char* marc_expr = "//*[@tag=\"999\"]";
+
+ prune_doc( doc, prune );
+ //prune_doc( doc_copy, holdings_expr );
+
+ char* marc = _xml_to_string(doc);
+ //char* holdings = _xml_to_string(doc_copy);
+
+ fprintf(stdout, "%s", marc);
+ //fprintf(stderr, "%s", holdings);
+
+ free(marc);
+ //free(holdings);
+ xmlFreeDoc(doc);
+ //xmlFreeDoc(doc_copy);
+
+ }
+
+
+ if (cfile) {
+
+ char *p = buf;
+ int i;
+ if (count)
+ fprintf (cfile, ",");
+ fprintf (cfile, "\n");
+ for (i = 0; i < r; i++) {
+ if ((i & 15) == 0)
+ fprintf (cfile, " \"");
+ fprintf (cfile, "\\x%02X", p[i] & 255);
+
+ if (i < r - 1 && (i & 15) == 15)
+ fprintf (cfile, "\"\n");
+
+ }
+ fprintf (cfile, "\"\n");
+ }
+ num++;
+ }
+
+ count++;
+
+ if (cd)
+ yaz_iconv_close(cd);
+ yaz_marc_destroy(mt);
+ }
+
+
+ if (cfile)
+ fprintf (cfile, "};\n");
+ fclose(inf);
+ break;
+ case 'v':
+ verbose++;
+ break;
+ default:
+ usage(prog);
+ exit (1);
+ }
+ }
+
+ if (cfile)
+ fclose (cfile);
+ if (!no) {
+ usage(prog);
+ exit (1);
+ }
+
+ fprintf(stderr, "\nProcessed %d Records\n", counter - 1 );
+ exit (0);
+}
+
+
+void prune_doc( xmlDocPtr doc, char* xpath ) {
+
+ xmlXPathContextPtr xpathctx;
+ xmlXPathObjectPtr object;
+
+ xpathctx = xmlXPathNewContext(doc);
+ if(xpathctx == NULL) {
+ fprintf(stderr, "XPATH FAILED");
+ return;
+ }
+
+ object = xmlXPathEvalExpression( BAD_CAST xpath, xpathctx);
+ if(object == NULL) return;
+
+ int i;
+ int size = object->nodesetval->nodeNr;
+ for(i=0; i!= size; i++ ) {
+ xmlNodePtr cur_node = (xmlNodePtr) object->nodesetval->nodeTab[i];
+ xmlUnlinkNode( cur_node );
+ xmlFreeNode( cur_node );
+ }
+
+ /* remove all comments and PI nodes */
+ xmlNodePtr cur = doc->children;
+ while(cur) {
+ if( cur->type == XML_COMMENT_NODE || cur->type == XML_PI_NODE ) {
+ xmlUnlinkNode( cur );
+ xmlFreeNode( cur );
+ }
+ cur = cur->next;
+ }
+
+ xmlXPathFreeObject(object);
+ xmlXPathFreeContext(xpathctx);
+
+}
+
+char* _xml_to_string( xmlDocPtr doc ) {
+
+ int bufsize;
+ xmlChar* xmlbuf;
+ xmlDocDumpFormatMemory( doc, &xmlbuf, &bufsize, 0 );
+
+ char* xml = strdup(xmlbuf);
+ xmlFree(xmlbuf);
+
+ /*** remove the XML declaration */
+ int len = strlen(xml);
+ char tmp[len];
+ memset( tmp, 0, len );
+ int i;
+ int found_at = 0;
+
+ /* when we reach the first >, take everything after it */
+ for( i = 0; i!= len; i++ ) {
+ if( xml[i] == 62) { /* ascii > */
+
+ /* found_at holds the starting index of the rest of the doc*/
+ found_at = i + 1;
+ break;
+ }
+ }
+
+ if( found_at ) {
+
+ /* move the shortened doc into the tmp buffer */
+ strncpy( tmp, xml + found_at, len - found_at );
+ /* move the tmp buffer back into the allocated space */
+ memset( xml, 0, len );
+ strcpy( xml, tmp );
+ }
+
+ int l = strlen(xml)-1;
+ if( xml[l] == 10 || xml[l] == 13 )
+ xml[l] = '\0';
+
+ return xml;
+
+}