1: /* join F1 F2 on stuff */ 2: 3: #include <stdio.h> 4: #define F1 0 5: #define F2 1 6: #define NFLD 20 /* max field per line */ 7: #define comp() cmp(ppi[F1][j1],ppi[F2][j2]) 8: 9: FILE *f[2]; 10: char buf[2][BUFSIZ]; /*input lines */ 11: char *ppi[2][NFLD]; /* pointers to fields in lines */ 12: char *s1,*s2; 13: int j1 = 1; /* join of this field of file 1 */ 14: int j2 = 1; /* join of this field of file 2 */ 15: int olist[2*NFLD]; /* output these fields */ 16: int olistf[2*NFLD]; /* from these files */ 17: int no; /* number of entries in olist */ 18: int sep1 = ' '; /* default field separator */ 19: int sep2 = '\t'; 20: char* null = ""; 21: int unpub1; 22: int unpub2; 23: int aflg; 24: 25: main(argc, argv) 26: char *argv[]; 27: { 28: int i; 29: int n1, n2; 30: long top2, bot2; 31: long ftell(); 32: 33: while (argc > 1 && argv[1][0] == '-') { 34: if (argv[1][1] == '\0') 35: break; 36: switch (argv[1][1]) { 37: case 'a': 38: switch(argv[1][2]) { 39: case '1': 40: aflg |= 1; 41: break; 42: case '2': 43: aflg |= 2; 44: break; 45: default: 46: aflg |= 3; 47: } 48: break; 49: case 'e': 50: null = argv[2]; 51: argv++; 52: argc--; 53: break; 54: case 't': 55: sep1 = sep2 = argv[1][2]; 56: break; 57: case 'o': 58: for (no = 0; no < 2*NFLD; no++) { 59: if (argv[2][0] == '1' && argv[2][1] == '.') { 60: olistf[no] = F1; 61: olist[no] = atoi(&argv[2][2]); 62: } else if (argv[2][0] == '2' && argv[2][1] == '.') { 63: olist[no] = atoi(&argv[2][2]); 64: olistf[no] = F2; 65: } else 66: break; 67: argc--; 68: argv++; 69: } 70: break; 71: case 'j': 72: if (argv[1][2] == '1') 73: j1 = atoi(argv[2]); 74: else if (argv[1][2] == '2') 75: j2 = atoi(argv[2]); 76: else 77: j1 = j2 = atoi(argv[2]); 78: argc--; 79: argv++; 80: break; 81: } 82: argc--; 83: argv++; 84: } 85: for (i = 0; i < no; i++) 86: olist[i]--; /* 0 origin */ 87: if (argc != 3) 88: error("usage: join [-j1 x -j2 y] [-o list] file1 file2"); 89: j1--; 90: j2--; /* everyone else believes in 0 origin */ 91: s1 = ppi[F1][j1]; 92: s2 = ppi[F2][j2]; 93: if (argv[1][0] == '-') 94: f[F1] = stdin; 95: else if ((f[F1] = fopen(argv[1], "r")) == NULL) 96: error("can't open %s", argv[1]); 97: if ((f[F2] = fopen(argv[2], "r")) == NULL) 98: error("can't open %s", argv[2]); 99: 100: #define get1() n1=input(F1) 101: #define get2() n2=input(F2) 102: get1(); 103: bot2 = ftell(f[F2]); 104: get2(); 105: while(n1>0 && n2>0 || aflg!=0 && n1+n2>0) { 106: if(n1>0 && n2>0 && comp()>0 || n1==0) { 107: if(aflg&2) output(0, n2); 108: bot2 = ftell(f[F2]); 109: get2(); 110: } else if(n1>0 && n2>0 && comp()<0 || n2==0) { 111: if(aflg&1) output(n1, 0); 112: get1(); 113: } else /*(n1>0 && n2>0 && comp()==0)*/ { 114: while(n2>0 && comp()==0) { 115: output(n1, n2); 116: top2 = ftell(f[F2]); 117: get2(); 118: } 119: fseek(f[F2], bot2, 0); 120: get2(); 121: get1(); 122: for(;;) { 123: if(n1>0 && n2>0 && comp()==0) { 124: output(n1, n2); 125: get2(); 126: } else if(n1>0 && n2>0 && comp()<0 || n2==0) { 127: fseek(f[F2], bot2, 0); 128: get2(); 129: get1(); 130: } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{ 131: fseek(f[F2], top2, 0); 132: bot2 = top2; 133: get2(); 134: break; 135: } 136: } 137: } 138: } 139: return(0); 140: } 141: 142: input(n) /* get input line and split into fields */ 143: { 144: register int i, c; 145: char *bp; 146: char **pp; 147: 148: bp = buf[n]; 149: pp = ppi[n]; 150: if (fgets(bp, BUFSIZ, f[n]) == NULL) 151: return(0); 152: for (i = 0; ; i++) { 153: if (sep1 == ' ') /* strip multiples */ 154: while ((c = *bp) == sep1 || c == sep2) 155: bp++; /* skip blanks */ 156: else 157: c = *bp; 158: if (c == '\n' || c == '\0') 159: break; 160: *pp++ = bp; /* record beginning */ 161: while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0') 162: bp++; 163: *bp++ = '\0'; /* mark end by overwriting blank */ 164: /* fails badly if string doesn't have \n at end */ 165: } 166: *pp = 0; 167: return(i); 168: } 169: 170: output(on1, on2) /* print items from olist */ 171: int on1, on2; 172: { 173: int i; 174: char *temp; 175: 176: if (no <= 0) { /* default case */ 177: printf("%s", on1? ppi[F1][j1]: ppi[F2][j2]); 178: for (i = 0; i < on1; i++) 179: if (i != j1) 180: printf("%c%s", sep1, ppi[F1][i]); 181: for (i = 0; i < on2; i++) 182: if (i != j2) 183: printf("%c%s", sep1, ppi[F2][i]); 184: printf("\n"); 185: } else { 186: for (i = 0; i < no; i++) { 187: temp = ppi[olistf[i]][olist[i]]; 188: if(olistf[i]==F1 && on1<=olist[i] || 189: olistf[i]==F2 && on2<=olist[i] || 190: *temp==0) 191: temp = null; 192: printf("%s", temp); 193: if (i == no - 1) 194: printf("\n"); 195: else 196: printf("%c", sep1); 197: } 198: } 199: } 200: 201: error(s1, s2, s3, s4, s5) 202: char *s1; 203: { 204: fprintf(stderr, "join: "); 205: fprintf(stderr, s1, s2, s3, s4, s5); 206: fprintf(stderr, "\n"); 207: exit(1); 208: } 209: 210: cmp(s1, s2) 211: char *s1, *s2; 212: { 213: return(strcmp(s1, s2)); 214: }