ceptr
 All Data Structures Files Functions Variables Typedefs Macros Modules Pages
semtrex.c
Go to the documentation of this file.
1 
16 #include "semtrex.h"
17 #include "def.h"
18 #include "debug.h"
19 
21 SState matchstate = {NULL,0,StateMatch}; /* only one instance of the match state*/
22 
23 char *G_s_str[]={"StateSymbol","StateAny","StateValue","StateSplit","StateMatch","StateGroupOpen","StateGroupClose","StateDescend","StateWalk","StateNot"};
24 
25 
31 typedef union Ptrlist Ptrlist;
32 union Ptrlist
33 {
34  Ptrlist *next;
35  SState *s;
36 };
37 
41 Ptrlist*
42 list1(SState **outp)
43 {
44  Ptrlist *l;
45 
46  l = (Ptrlist*)outp;
47  l->next = NULL;
48  return l;
49 }
50 
54 void
55 patch(Ptrlist *l, SState *s,int level)
56 {
57  Ptrlist *next;
58 
59  for(; l; l=next){
60  next = l->next;
61 
62  // This is a wicked trick: The Ptrlist will always be the address of the
63  // SState *out element of the SState structure. Thus, we can get access to the
64  // transition by simply adding 1 to the pointer. Then we can adjust the the transition
65  // pop value by adding in the current level.
66  TransitionType *tr = (TransitionType *)(l+1);
67  // @todo fix this bogus pointer math (maybe by converting TransitionType to a struct?)
68  StateType type = *(StateType *)(sizeof(TransitionType)+(char *)tr);
69 debug(D_STX_BUILD,"patching %s to %s with input:%d states tr:%d\n",G_s_str[type],G_s_str[s->type],(int)level,(int)*tr);
70  if (*tr != TransitionNone) {
71  // if (isTransitionUp(transition)) {
72  // adjust the level so we pop to the right place
73  *tr += -level;
74  // if transition is to next, make sure it's because the source state actually
75  // consumed a node
76  if (*tr==0 && ((type != StateSymbol) && (type != StateAny) && (type != StateValue)))
77  *tr = TransitionNone;
78  }
79  l->s = s;
80  }
81 }
82 
86 Ptrlist*
88 {
89  Ptrlist *oldl1;
90 
91  oldl1 = l1;
92  while(l1->next)
93  l1 = l1->next;
94  l1->next = l2;
95  return oldl1;
96 }
97 
98 
99 //#define state(t,sP) _state(t,sP,0)
103 SState *state(StateType type,int *statesP,int level) {
104  SState *s = malloc(sizeof(SState));
105  s->out = NULL;
106  s->out1 = NULL;
107  s->transition = level;
108  s->transition1 = level;
109  s->type_ = s->type = type;
110  s->_did = 0;
111  (*statesP)++;
112  return s;
113 }
114 
115 int G_group_id;
119 char * __stx_makeFA(T *t,SState **in,Ptrlist **out,int level,int *statesP) {
120  SState *s,*i,*last,*s1,*s2;
121  Ptrlist *o,*o1;
122  char *err;
123  int state_type = -1;
124  int x;
125  SemanticID group_symbol;
126  int group_id;
127  T *v;
128 
129  TransitionType tr;
130 
131  int c = _t_children(t);
132  Symbol sym = _t_symbol(t);
133  switch(sym.id) {
134  case SEMTREX_VALUE_LITERAL_ID:
135  case SEMTREX_VALUE_LITERAL_NOT_ID:
136  debug(D_STX_BUILD,"SYM=\n");
137  state_type = StateValue;
138  s = state(state_type,statesP,level);
139  s->data.value.flags = (sym.id == SEMTREX_VALUE_LITERAL_NOT_ID) ? LITERAL_NOT : 0;
140  // copy the value set (which must be the first child) from the semtrex into the state
141  v = _t_child(t,1);
142  if (!v) {
143  raise_error("expecting value or SEMTREX_VALUE_SET as first child of SEMTREX_VALUE_LITERAL");
144  }
145  if (semeq(_t_symbol(v),SEMTREX_VALUE_SET)) s->data.value.flags |= LITERAL_SET;
146 
147  s->data.value.values = _t_clone(v);
148  *in = s;
149  *out = list1(&s->out);
150  break;
151  case SEMTREX_SYMBOL_LITERAL_ID:
152  case SEMTREX_SYMBOL_LITERAL_NOT_ID:
153  debug(D_STX_BUILD,"SYM\n");
154  state_type = StateSymbol;
155 
156  v = _t_child(t,1);
157  int is_set;
158  Symbol vsym = _t_symbol(v);
159  if (!v || !((is_set = semeq(SEMTREX_SYMBOL_SET,vsym)) || semeq(SEMTREX_SYMBOL,vsym))) {
160  raise_error("expecting SEMTREX_SYMBOL_SET or SEMTREX_SYMBOL as first child of SEMTREX_SYMBOL_LITERAL");
161  }
162  if (c > 2) return "Symbol literal must have 0 or 1 children other than the symbol/set";
163  s = state(state_type,statesP,level);
164  s->data.symbol.flags = (sym.id == SEMTREX_SYMBOL_LITERAL_NOT_ID) ? LITERAL_NOT : 0;
165  if (is_set) s->data.symbol.flags |= LITERAL_SET;
166  s->data.symbol.symbols = _t_clone(v);
167  *in = s;
168  if (c > 1) {
169  err = __stx_makeFA(_t_child(t,2),&i,&o,level-1,statesP);
170  if (err) return err;
171  s->out = i;
172  s->transition = TransitionDown;
173  *out = o;
174  }
175  else {
176  *out = list1(&s->out);
177  }
178  break;
179  case SEMTREX_SYMBOL_ANY_ID:
180  debug(D_STX_BUILD,"ANY\n");
181  state_type = StateAny;
182  if (c > 1) return "Symbol any must have 0 or 1 children";
183 
184  s = state(state_type,statesP,level);
185 
186  *in = s;
187  if (c > 0) {
188  err = __stx_makeFA(_t_child(t,1),&i,&o,level-1,statesP);
189  if (err) return err;
190  s->out = i;
191  s->transition = TransitionDown;
192  *out = o;
193  }
194  else {
195  *out = list1(&s->out);
196  }
197  break;
198  case SEMTREX_SEQUENCE_ID:
199  debug(D_STX_BUILD,"SEQ\n");
200  if (c == 0) return "Sequence must have children";
201  last = 0;
202  for(x=c;x>=1;x--) {
203  err = __stx_makeFA(_t_child(t,x),&i,&o,level,statesP);
204  if (err) return err;
205  if (last) patch(o,last,level);
206  else *out = o;
207  last = i;
208  *in = i;
209  }
210  break;
211  case SEMTREX_OR_ID:
212  debug(D_STX_BUILD,"OR\n");
213  if (c != 2) return "Or must have 2 children";
214  s = state(StateSplit,statesP,TransitionNone);
215  *in = s;
216  err = __stx_makeFA(_t_child(t,1),&i,&o,level,statesP);
217  if (err) return err;
218  s->out = i;
219  err = __stx_makeFA(_t_child(t,2),&i,&o1,level,statesP);
220  if (err) return err;
221  s->out1 = i;
222  *out = append(o,o1);
223  break;
224  case SEMTREX_ZERO_OR_MORE_ID:
225  debug(D_STX_BUILD,"*\n");
226  if (c != 1) raise_error("expecting 1 child for SEMTREX_ZERO_OR_MORE got %d, %s\n",c,t2s(t));
227 
228  if (c != 1) return "Star must have 1 child";
229  s = state(StateSplit,statesP,level);
230  *in = s;
231  err = __stx_makeFA(_t_child(t,1),&i,&o,level,statesP);
232  if (err) return err;
233  s->out = i;
234  s->transition = TransitionNone;
235  patch(o,s,level);
236  *out = list1(&s->out1);
237  break;
238  case SEMTREX_ONE_OR_MORE_ID:
239  debug(D_STX_BUILD,"+\n");
240  if (c != 1) return "Plus must have 1 child";
241  s = state(StateSplit,statesP,level);
242  err = __stx_makeFA(_t_child(t,1),&i,&o,level,statesP);
243  if (err) return err;
244  *in = i;
245  s->out = i;
246  s->transition = TransitionNone;
247  patch(o,s,level);
248  *out = list1(&s->out1);
249  break;
250  case SEMTREX_ZERO_OR_ONE_ID:
251  debug(D_STX_BUILD,"?\n");
252  if (c != 1) return "Question must have 1 child";
253  s = state(StateSplit,statesP,level);
254  *in = s;
255  err = __stx_makeFA(_t_child(t,1),&i,&o,level,statesP);
256  if (err) return err;
257  s->out = i;
258  s->transition = TransitionNone;
259  *out = append(o,list1(&s->out1));
260  break;
261  case SEMTREX_GROUP_ID:
262  debug(D_STX_BUILD,"GROUP\n");
263  if (c != 1) return "Group must have 1 child";
264  s = state(StateGroupOpen,statesP,TransitionNone);
265  *in = s;
266  group_symbol = *(SemanticID *)_t_surface(t);
267  group_id = ++G_group_id;
268  s->data.groupo.symbol = group_symbol;
269  s->data.groupo.uid = group_id;
270  err = __stx_makeFA(_t_child(t,1),&i,&o,level,statesP);
271  if (err) return err;
272  s->out = i;
273  s1 = state(StateGroupClose,statesP,TransitionNone);
274  patch(o,s1,level);
275  s1->data.groupc.openP = s;
276  *out = list1(&s1->out);
277  break;
278  case SEMTREX_DESCEND_ID:
279  debug(D_STX_BUILD,"DESCEND\n");
280  if (c != 1) return "Descend must have 1 child";
281  s = state(StateDescend,statesP,TransitionDown);
282  *in = s;
283  err = __stx_makeFA(_t_child(t,1),&i,&o,level-1,statesP);
284  if (err) return err;
285  s->out = i;
286  *out = o;
287  break;
288  case SEMTREX_NOT_ID:
289  debug(D_STX_BUILD,"NOT\n");
290  if (c != 1) return "Not must have 1 child";
291  s = state(StateNot,statesP,TransitionNone);
292  *in = s;
293  err = __stx_makeFA(_t_child(t,1),&i,&o,level,statesP);
294  if (err) return err;
295  s->out = i;
296  *out = append(o,list1(&s->out1));
297  break;
298  case SEMTREX_WALK_ID:
299  debug(D_STX_BUILD,"WALK\n");
300  if (c != 1) return "Walk must have 1 child";
301  s = state(StateWalk,statesP,TransitionNone);
302  *in = s;
303  err = __stx_makeFA(_t_child(t,1),&i,&o,level,statesP);
304  if (err) return err;
305  s->out = i;
306  *out = o;
307  break;
308  default:
309  return "Unknown SEMTREX SYMBOL";
310  }
311  if (debugging(D_STX_BUILD)) {
312  char buf[20000];
313  int x = level*-1;
314  debug(D_STX_BUILD,"%d:%.*s%s\n",x,x,"_______________________",_stx_dump(*in,buf));
315  }
316 
317  return 0;
318 }
319 
323 SState * _stx_makeFA(T *t,int *statesP) {
324  SState *in;
325  Ptrlist *o;
326  G_group_id = 0;
327  char *err = __stx_makeFA(t,&in,&o,0,statesP);
328  if (err != 0) {raise_error("%s",err);}
329  patch(o,&matchstate,0);
330  // printf("\n");_stx_dump(in);
331  return in;
332 }
333 
334 static int free_id = 0;
335 
339 int __stx_freeFA(SState *s,int id) {
340  if ((s->_did != id) && (s != &matchstate)) {
341  s->_did = id;
342  if (s->out) if (__stx_freeFA(s->out,id)) s->out = 0;
343  if (s->out1) if (__stx_freeFA(s->out1,id)) s->out1 = 0;
344  return 0;
345  }
346  return 1;
347 }
348 
353  if (s->out) __stx_freeFA2(s->out);
354  if (s->out1) __stx_freeFA2(s->out1);
355  if (s->type == StateValue) {
356  _t_free(s->data.value.values);
357  }
358  if (s->type == StateSymbol) {
359  _t_free(s->data.symbol.symbols);
360  }
361  free(s);
362 }
363 
367 void _stx_freeFA(SState *s) {
368  __stx_freeFA(s,++free_id);
369  __stx_freeFA2(s);
370 }
371 
379  if (!t) return 0;
380  int i,c = _t_children(s);
381  Symbol sym = _t_symbol(t);
382  for (i=1;i<=c;i++) {
383  if (semeq(sym,*(Symbol *)_t_surface(_t_child(s,i)))) return 1;
384  }
385  return 0;
386 }
387 
395  if (!t) return 0;
396  int i,c = _t_children(s);
397  Symbol sym = _t_symbol(t);
398  for (i=1;i<=c;i++) {
399  if (semeq(sym,*(Symbol *)_t_surface(_t_child(s,i)))) return 0;
400  }
401  return 1;
402 }
403 
404 /* advance the cursor according to the instructions in the state*/
405 T *__transition(TransitionType transition,T *source_t,int *cursor) {
406  int i;
407  i = 0;
408  char buf[1000];
409  debug(D_STX_MATCH,"transition: cursor %s\n",_t_sprint_path(cursor,buf));
410  while(cursor[i] != TREE_PATH_TERMINATOR) i++;
411 // if (!t) debug(D_STX_MATCH,"transition: t is null\n");
412 // if (!t) return 0;
413  if (transition == TransitionDown) {
414  debug(D_STX_MATCH,"transition: down\n");
415  //@todo add max checking
416  cursor[i++] = 1;
417  cursor[i]= TREE_PATH_TERMINATOR;
418  }
419  else if (isTransitionPop(transition)) {
420  debug(D_STX_MATCH,"transition: popping %d\n",transition);
421  if (i+transition <0) {
422  raise_error("transition: would pop above root!!\n");
423  }
424  i = i+transition;
425  cursor[i] = TREE_PATH_TERMINATOR;
426  i--;
427  // popping always means also moving to next child after the pop
428  if (i >= 0)
429  cursor[i]++;
430  }
431  else if (isTransitionNext(transition)) {
432  debug(D_STX_MATCH,"transition: next\n");
433  i--;
434  if (i >= 0) cursor[i]++;
435  else {
436  // @todo this is weird because we are setting the path to something that
437  // we know will be invalid, but we don't have some true ontological representation
438  // of a "NULL_PATH" which we probably should really have, but that means adding
439  // checks for null path all over the place in tree.c
440  cursor[0] = -2;
441  cursor[1]= TREE_PATH_TERMINATOR;
442  }
443  }
444  T *t = _t_get(source_t,cursor);
445  debug(D_STX_MATCH,"transition: result %s %s\n",_t_sprint_path(cursor,buf),!t ? "NULL":t2s(t));
446  return t;
447 }
448 
449 // helper to see if the surface of given tree nodes matched
451 int _val_match(T *t,T *t1) {
452 
453  int i;
454  size_t l = _t_size(t1);
455  debug(D_STX_MATCH,"comparing sizes %ld,%ld\n",l,_t_size(t));
456  if (l != _t_size(t)) return 0;
457  // @todo this is dangerous because some surfaces, if they come from
458  // c sructures, might have extra bytes in them that aren't cleared/set by the compiler
459  // and will thus be different to a memcmp even though the values being stored
460  // are actually the same.
461  i = memcmp(_t_surface(t),_t_surface(t1),l);
462  debug(D_STX_MATCH,"compare result: %d\n",i);
463  return i==0;
464 }
465 
466 // convert cpointer SEMTREX_MATCH_CURSOR elements to MATCHED_PATH and SIBLING COUNT elements
467 void __fix(T *source_t,T *r) {
468  T *m1,*m2;
469 
470  // get the start and end cursors
471  T *start_c = *(T **)_t_surface(m1 = _t_child(r,2));
472  T *end_c = *(T **)_t_surface(m2 = _t_child(r,3));
473 
474  // morph the start cursor in the match path
475  int *p = _t_get_path(start_c);
476  __t_morph(m1,SEMTREX_MATCH_PATH,p,sizeof(int)*(_t_path_depth(p)+1),1);
477 
478  // now figure out how many children were matched
479  int d = _t_path_depth(p);
480  int i;
481 
482  d--;
483  if (d < 0) { i = 1;}
484  else if (!end_c) {
485  T *parent = _t_parent(start_c);
486  if (!parent) i = 1;
487  else {
488  int pc = _t_children(parent);
489  i = pc - p[d] + 1;
490  }
491  }
492  else {
493  int* p_end;
494  p_end = _t_get_path(end_c);
495  if (_t_path_depth(p_end) < d) {
496  raise_error("whoa! Mismatched path depths!");
497  }
498  if (debugging(D_STX_MATCH)) {
499  char buf[255];
500  _t_sprint_path(p,buf);
501  debug(D_STX_MATCH,"start path:%s\n",buf);
502  _t_sprint_path(p_end,buf);
503  debug(D_STX_MATCH," end path:%s\n",buf);
504  }
505  i = p_end[d]- p[d];
506  free(p_end);
507  }
508  free(p);
509  __t_morph(m2,SEMTREX_MATCH_SIBLINGS_COUNT,&i,sizeof(int),0);
510  int c = _t_children(r);
511  for(i=4;i<=c;i++) {
512  end_c = _t_child(r,i);
513  __fix(source_t,end_c);
514  }
515 }
516 
517 #define MAX_BRANCH_DEPTH 5000
518 #define CURSOR_MAX_DEPTH 100
519 // structure to hold backtracking data for match algorithm
520 typedef struct BranchPoint {
521  T *walk_root;
522  int *walk_cursor;
523  int walk_len;
524  SState *s;
525  TransitionType transition;
526  int cursor[CURSOR_MAX_DEPTH];
527 // T *cursor;
528 // T *cursor_prev;
529  T *match;
530  int *r_path;
531 } BranchPoint;
532 
533 char * __stx_dump_state(SState *s,char *buf);
534 char G_stx_debug_buf[1000];
535 #define _PUSH_BRANCH(state,t,crs,c,w) { \
536  G_stx_debug_buf[0]=0;debug(D_STX_MATCH,"pushing split branch for backtracking to state %s\n with cursor:%s \n",__stx_dump_state(state,G_stx_debug_buf),c?t2s(c):"NULL"); \
537  if((depth+1)>=MAX_BRANCH_DEPTH) {raise_error("MAX branch depth exceeded");} \
538  stack[depth].s = state; \
539  stack[depth].transition = t; \
540  _t_pathcpy(stack[depth].cursor,crs); \
541  stack[depth].walk_root = w; \
542  if (w) stack[depth].walk_cursor = NULL; \
543  if (rP) { \
544  if (*rP) { \
545  stack[depth].match = _t_clone(*rP); \
546  stack[depth].r_path = _t_get_path(r); \
547  } \
548  else stack[depth].match = 0; \
549  } \
550  depth++; \
551 }
552 
553 #define PUSH_BRANCH(state,t,crs,c) _PUSH_BRANCH(state,t,crs,c,0)
554 #define PUSH_WALK_POINT(state,t,crs,c) _PUSH_BRANCH(state,t,crs,c,c)
555 
556 #define FAIL {s=0;break;}
557 #define TRANSITION(x) if (!t) {FAIL;}; if (!x) {FAIL;}; t=__transition(s->transition,source_t,cursor); s = s->out;
558 
567 int __t_match(T *semtrex,T *source_t,T **rP) {
568  int states;
569  char buf[5000];
570  BranchPoint stack[MAX_BRANCH_DEPTH];
571 
572  int depth = 0;
573  T *t = source_t;
574  int matched;
575  T *r = 0,*x;
576  if (rP) *rP = 0;
577 
578  SgroupOpen *o;
579 
580  SState *fa = _stx_makeFA(semtrex,&states);
581  SState *s = fa;
582 
583  int cursor[100] = {TREE_PATH_TERMINATOR};
584 
585  while (s && s != &matchstate) {
586  t = _t_get(source_t,cursor);
587  debug(D_STX_MATCH,"IN:%s\n",G_s_str[s->type]);
588  debug(D_STX_MATCH," CURSOR: %s\n",_t_sprint_path(cursor,buf));
589  if (s->type == StateGroupOpen) {
590  o = &s->data.groupo;
591  debug(D_STX_MATCH," for %s\n",_sem_get_name(G_sem,o->symbol));
592  }
593  if (s->type == StateGroupClose) {
594  // get the match structure from the GroupOpen state pointed to by this state
595  o = &s->data.groupc.openP->data.groupo;
596  debug(D_STX_MATCH," for %s\n",_sem_get_name(G_sem,o->symbol));
597  }
598  if (debugging(D_STX_MATCH)) {G_cursor=t;G_cur_stx_state=s;debug(D_STX_MATCH," FSA:%s\n",_stx_dump(fa,G_stx_dump_buf));debug(D_STX_MATCH," tree:%s\n",!t ? "NULL" : _t2s(G_sem,_t_root(t)));}
599  if (rP && *rP) {debug(D_STX_MATCH,"MATCH:\n%s\n",__t2s(G_sem,*rP,INDENT));}
600 
601 
602  switch(s->type) {
603  case StateValue:
604  if (!t) {FAIL;}
605  else {
606  T *v = s->data.value.values;
607 
608  if (!t) FAIL;
609  int count = _t_children(v);
610  int i;
611  debug(D_STX_MATCH," seeking:%s%s\n",s->data.value.flags & LITERAL_NOT ? " ~":"",__t_dump(G_sem,v,0,buf));
612  Symbol ts = _t_symbol(t);
613  if (s->data.value.flags & LITERAL_NOT) {
614  if (s->data.value.flags & LITERAL_SET) {
615  // all in the set must not match
616  matched = 1;
617  for(i=1;i<=count && matched;i++) {
618  x = _t_child(v,i);
619  matched = !(semeq(ts,_t_symbol(x)) && _val_match(t,x));
620  }
621  }
622  else {
623  matched = !(semeq(ts,_t_symbol(v)) && _val_match(t,v));
624  }
625  }
626  else {
627  if (s->data.value.flags & LITERAL_SET) {
628  // at least one in the set much match
629  matched = 0;
630  for(i=1;i<=count && !matched; i++) {
631  x = _t_child(v,i);
632  matched = semeq(ts,_t_symbol(x)) && _val_match(t,x);
633  }
634  }
635  else {
636  matched = semeq(ts,_t_symbol(v)) && _val_match(t,v);
637  }
638  }
639 
640  if (!matched) FAIL;
641  }
642  t = __transition(s->transition,source_t,cursor);
643  s = s->out;
644  break;
645  case StateSymbol:
646  if (s->data.symbol.flags & LITERAL_SET) {
647  TRANSITION((s->data.symbol.flags & LITERAL_NOT) ?
649  __symbol_set_contains(s->data.symbol.symbols,t));
650  }
651  else {
652  if (!t) FAIL;
653  int matched = semeq(_t_symbol(t),*(Symbol *)_t_surface(s->data.symbol.symbols));
654  TRANSITION(s->data.symbol.flags & LITERAL_NOT ? !matched : matched);
655  }
656  break;
657  case StateAny:
658  TRANSITION(1);
659  break;
660  case StateSplit:
661  PUSH_BRANCH(s->out1,s->transition1,cursor,t);
662  s = s->out;
663  break;
664  case StateWalk:
665  s = s->out;
666  // the walk point branch only gets pushed once because if the branch fails
667  // it just gets restarted with the cursor advanced one step from the last
668  // time through. This is why we push on the destination state from the walk
669  // state instead of on the walk state itself.
670  PUSH_WALK_POINT(s,s->transition,cursor,t);
671  break;
672  case StateGroupOpen:
673  o = &s->data.groupo;
674  if (!rP) {
675  // if we aren't collecting up match results simply follow groups through
676  s = s->out;
677  }
678  else {
679  if (!t) FAIL;
680 
681  r = _t_newi(r,SEMTREX_MATCH,o->uid);
682  if (!*rP) *rP = r; // save the root match
683  T *x = _t_news(r,SEMTREX_MATCH_SYMBOL,o->symbol);
684  // save the current cursor as a c cpointer. This will get converted to
685  // an actual MATCH_PATH later in __fix if it turns out that this particular
686  // part of the tree actually does match.
687  _t_new(r,SEMTREX_MATCH_CURSOR,&t,sizeof(t));
688  s = s->out;
689  }
690  break;
691  case StateGroupClose:
692  if (rP) {
693 
694  int pt[2] = {3,TREE_PATH_TERMINATOR};
695  T *x = _t_new(0,SEMTREX_MATCH_CURSOR,&t,sizeof(t));
696  _t_insert_at(r, pt, x);
697 
698  T *pp = _t_parent(r);
699  if (pp) r = pp;
700  }
701  s = s->out;
702  break;
703  case StateDescend:
704  t = _t_child(t,1);
705  {
706  int i = 0;
707  //@todo add max checking
708  while(cursor[i] != TREE_PATH_TERMINATOR)i++;
709  cursor[i++] = 1;
710  cursor[i]= TREE_PATH_TERMINATOR;
711  }
712  s = s->out;
713  break;
714  case StateMatch:
715  break;
716  }
717  // if we just had a fail see if there is some backtracking we can do
718  if (!s && depth) {
719  --depth;
720  debug(D_STX_MATCH,"Fail & backtracking possible\n");
721  if (rP) {
722  if (*rP) _t_free(*rP);
723  if ((*rP = stack[depth].match)) {
724  r = _t_get(*rP,stack[depth].r_path);
725  free(stack[depth].r_path);
726  }
727  else r = 0;
728  }
729 
730  // pop back to the state in the FSA where we failed
731  s = stack[depth].s;
732 
733  T *walk = stack[depth].walk_root;
734  if (!walk) {
735  // if this isn't a walk branch then:
736 
737  // restore the saved cursor
738  _t_pathcpy(cursor,stack[depth].cursor);
739  t = _t_get(source_t,cursor);
740  debug(D_STX_MATCH," popping to--%s %s\n",_t_sprint_path(cursor,buf), t ? t2s(t) : "NULL");
741  debug(D_STX_MATCH," running transition:%d\n",stack[depth].transition);
742 
743  // run the transition that we saved for
744  // moving to that state that normally would have been run in the TRANSITION macro
745  t = __transition(stack[depth].transition,source_t,cursor);
746  }
747  else {
748  // if it is a walk branch, then take the next step in the walk.
749  t = _t_path_walk(walk,&stack[depth].walk_cursor,&stack[depth].walk_len);
750  // if there is one then restart the branch otherwise we failed
751  if (t) {
752  _t_pathcpy(cursor,stack[depth].walk_cursor);
753  depth++;
754  debug(D_STX_MATCH," walking to--%s %s\n",_t_sprint_path(cursor,buf), t ? t2s(t) : "NULL");
755  }
756  else s = 0;
757  }
758  }
759  }
760  if (rP) {
761  if (s) {
762  debug(D_STX_MATCH,"FIXING RESULTS:\n%s\n",__t2s(G_sem,*rP,INDENT));
763  // convert the cursor pointers to matched paths/sibling counts
764  __fix(source_t,*rP);
765  }
766  else if(*rP) {
767  _t_free(*rP);
768  }
769  }
770  // clean up any remaining stack frames
771  while (depth--) {
772  if (stack[depth].walk_root) {
773  if (stack[depth].walk_cursor) free(stack[depth].walk_cursor);
774  }
775  if (rP) {
776  if ((r = stack[depth].match)) {
777  _t_free(r);
778  free(stack[depth].r_path);
779  }
780  }
781  }
782  _stx_freeFA(fa);
783  if (s == &matchstate) {
784  debug(D_STX_MATCH,"Matched!\n");
785  return true;
786  }
787  return false;
788 }
789 
798 int _t_matchr(T *semtrex,T *t,T **rP) {
799  return __t_match(semtrex,t,rP);
800 }
801 
809 int _t_match(T *semtrex,T *t) {
810  return __t_match(semtrex,t,NULL);
811 }
812 
813 T *_stx_get_matched_node(Symbol s,T *match_results,T *match_tree,int *sibs) {
814  T *m = _t_get_match(match_results,s);
815  if (!m) {
816  raise_error("expected to have match!");
817  }
818  int *path = (int *)_t_surface(_t_child(m,SemtrexMatchPathIdx));
819  if (sibs)
820  *sibs = *(int*)_t_surface(_t_child(m,SemtrexMatchSibsIdx));
821  T *x = _t_get(match_tree,path);
822 
823  if (!x) {
824  raise_error("expecting to get a value from match!!");
825  }
826  return x;
827 }
828 
829 void _stx_replace(T *semtrex,T *t,T *replace){
830  T *r;
831  Symbol sym = _t_symbol(replace);
832  while(_t_matchr(semtrex,t,&r)) {
833  int sibs;
834  T *x = _stx_get_matched_node(sym,r,t,&sibs);
835  if (sibs > 1) raise_error("not implemented for sibs > 1");
836  _t_replace_node(x,_t_clone(replace));
837  _t_free(r);
838  }
839 }
840 
848 T *_t_get_match(T *match,Symbol group)
849 {
850  if (!match) return 0;
851  T *s = _t_child(match,1);
852  if (semeq(*(Symbol *)_t_surface(s),group)) {
853  return match;
854  };
855  int i = 4,c = _t_children(match);
856  for(i=4;i<=c;i++) {
857  s =_t_child(match,i);
858  s = _t_get_match(s,group);
859  if (s) return s;
860  }
861  return 0;
862 }
863 
873 T *_t_embody_from_match(SemTable *sem,T *match,Symbol group,T *t) {
874  return __t_embody_from_match(sem,_t_get_match(match,group),t);
875 }
876 
877 T *__t_embody_from_match(SemTable *sem,T *match,T *t) {
878  Symbol s = *(Symbol *)_t_surface(_t_child(match,1));
879  if (semeq(s,NULL_SYMBOL)) return 0;
880  T *e;
881  int i,j = _t_children(match);
882  if (j > 3) {
883  e = _t_new_root(s);
884  for(i=4;i<=j;i++) {
885  T *c = _t_child(match,i);
886  if (c) {
887  T *r = __t_embody_from_match(sem,c,t);
888  if (r) _t_add(e,r);
889  }
890  }
891  }
892  else {
893  int *p;
894  int children = *(int *)_t_surface(_t_child(match,3));
895  Structure st = _sem_get_symbol_structure(sem,s);
896  T *x;
897  switch(st.id) {
898  case CSTRING_ID:
899  return asciiT_tos(t,match,0,s);
900  case INTEGER_ID:
901  return asciiT_toi(t,match,0, s);
902  case INTEGER64_ID:
903  return asciiT_tol(t,match,0, s);
904  case FLOAT_ID:
905  return asciiT_tof(t,match,0,s);
906  case CHAR_ID:
907  return asciiT_toc(t,match,0,s);
908  default:
909  p = (int *)_t_surface(_t_child(match,2));
910  x = _t_get(t,p);
911  e = _t_clone(x);
912  }
913  }
914  return e;
915 }
916 
917 // semtrex dumping code
918 char * __dump_semtrex(SemTable *sem,T *s,char *buf);
919 
920 void __stxd_multi(SemTable *sem,char *x,T *s,char *buf) {
921  char b[4000];
922  T *sub = _t_child(s,1);
923  Symbol ss = _t_symbol(sub);
924  int has_child = (semeq(ss,SEMTREX_SYMBOL_LITERAL_NOT) || semeq(ss,SEMTREX_SYMBOL_LITERAL)) ? 2 : 1;
925  sprintf(buf,(_t_children(s)>has_child || _t_symbol(sub).id==SEMTREX_SEQUENCE_ID) ? "(%s)%s" : "%s%s",__dump_semtrex(sem,sub,b),x);
926 }
927 void __stxd_descend(SemTable *sem,T *s,char *v,char *buf,int skip) {
928  if((_t_children(s)-skip)>0) {
929  char b[4000];
930  T *sub = _t_child(s,1+skip);
931  Symbol ss = _t_symbol(sub);
932  int has_child = (semeq(ss,SEMTREX_SYMBOL_LITERAL_NOT) || semeq(ss,SEMTREX_SYMBOL_LITERAL)) ? 2 : 1;
933  sprintf(buf,_t_children(sub)>has_child?"%s/(%s)":"%s/%s",v,__dump_semtrex(sem,sub,b));
934  }
935  else sprintf(buf,"%s",v);
936 }
937 
938 char * __dump_semtrex(SemTable *sem,T *s,char *buf) {
939  Symbol sym = _t_symbol(s);
940  char b[5000];
941  char b1[5000];
942  char *sn,*bx;
943  T *t,*v,*v1;
944  int i,c,count;
945  SemanticID sid;
946  switch(sym.id) {
947  case SEMTREX_VALUE_LITERAL_ID:
948  case SEMTREX_VALUE_LITERAL_NOT_ID:
949  v = _t_child(s,1); //get the value or set
950  if (semeq(_t_symbol(v),SEMTREX_VALUE_SET)) {
951  count = _t_children(v);
952  v1 = _t_child(v,1);
953  if (!v1) {raise_error("no values in set!");}
954  }
955  else {
956  count = 1;
957  v1 = v;
958  v = s;
959  }
960  sid = _t_symbol(v1); // if set assume values are all the same type
961  sn = _sem_get_name(sem,sid);
962  if (*sn=='<')
963  sprintf(b,"%d.%d.%d",sid.context,sid.semtype,sid.id);
964  else
965  sprintf(b,"%s",sn);
966  Structure st = _sem_get_symbol_structure(sem,sid);
967  if (sym.id == SEMTREX_VALUE_LITERAL_NOT_ID) {
968  sprintf(b+strlen(b),"!");
969  }
970  sprintf(b+strlen(b),"=");
971  if (count > 1)
972  sprintf(b+strlen(b),"{");
973  for(i=1;i<=count;i++) {
974  T *x = _t_child(v,i);
975  if (semeq(st,CSTRING))
976  sprintf(b+strlen(b),"\"%s\"",(char *)(_t_surface(x)));
977  else if (semeq(st,CHAR))
978  sprintf(b+strlen(b),"'%c'",*(char *)(_t_surface(x)));
979  else if (semeq(st,INTEGER))
980  sprintf(b+strlen(b),"%d",*(int *)(_t_surface(x)));
981  else if (semeq(st,FLOAT))
982  sprintf(b+strlen(b),"%f",*(float *)(_t_surface(x)));
983  else sprintf(b+strlen(b),"???x");
984  if (i < count)
985  sprintf(b+strlen(b),",");
986  }
987  if (count > 1)
988  sprintf(b+strlen(b),"}");
989  sprintf(buf,"%s",b);
990  break;
991  case SEMTREX_SYMBOL_LITERAL_NOT_ID:
992  case SEMTREX_SYMBOL_LITERAL_ID:
993 
994  if (semeq(sym, SEMTREX_SYMBOL_LITERAL_NOT)) {
995  b[0] = '!';
996  b[1] = 0;
997  }
998  else b[0] = 0;
999 
1000  v = _t_child(s,1); //get the symbol value or set
1001 
1002  if (semeq(_t_symbol(v),SEMTREX_SYMBOL_SET)) {
1003  count = _t_children(v);
1004  v1 = _t_child(v,1);
1005  if (!v1) {raise_error("no symbols in set!");}
1006  }
1007  else {
1008  count = 1;
1009  v1 = v;
1010  v = s;
1011  }
1012 
1013  if (count > 1) {
1014  sprintf(b+strlen(b),"{");
1015  }
1016  for(i=1;i<=count;i++) {
1017  sid = *(Symbol *)_t_surface(v1);
1018  sn = _sem_get_name(sem,sid);
1019  // ignore "<unknown symbol"
1020  if (*sn=='<')
1021  sprintf(b+strlen(b),"%d.%d.%d",sid.context,sid.semtype,sid.id);
1022  else
1023  sprintf(b+strlen(b),"%s",sn);
1024  v1 = _t_next_sibling(v1);
1025  if (i!=count)
1026  sprintf(b+strlen(b),",");
1027  }
1028  if (count > 1)
1029  sprintf(b+strlen(b),"}");
1030 
1031  __stxd_descend(sem,s,b,buf,1);
1032  break;
1033  case SEMTREX_SYMBOL_ANY_ID:
1034  sprintf(b,".");
1035  __stxd_descend(sem,s,b,buf,0);
1036  break;
1037  case SEMTREX_SEQUENCE_ID:
1038  sn = buf;
1039  DO_KIDS(s,
1040  sprintf(sn,i<_c ? "%s,":"%s",__dump_semtrex(sem,_t_child(s,i),b));
1041  sn += strlen(sn);
1042  );
1043  break;
1044  case SEMTREX_OR_ID:
1045  t = _t_child(s,1);
1046  sn = __dump_semtrex(sem,t,b);
1047  Symbol ss = _t_symbol(t);
1048  int has_child = (semeq(ss,SEMTREX_SYMBOL_LITERAL_NOT) || semeq(ss,SEMTREX_SYMBOL_LITERAL)) ? 2 : 1;
1049  sprintf(buf,(_t_children(t) > has_child) ? "(%s)|":"%s|",sn);
1050  t = _t_child(s,2);
1051  sn = __dump_semtrex(sem,t,b);
1052  ss = _t_symbol(t);
1053  has_child = (semeq(ss,SEMTREX_SYMBOL_LITERAL_NOT) || semeq(ss,SEMTREX_SYMBOL_LITERAL)) ? 2 : 1;
1054  sprintf(buf+strlen(buf),(_t_children(t) > has_child) ? "(%s)":"%s",sn);
1055  break;
1056  case SEMTREX_NOT_ID:
1057  t = _t_child(s,1);
1058  sn = __dump_semtrex(sem,t,b);
1059  sprintf(buf,"~%s",sn);
1060  break;
1061  case SEMTREX_ZERO_OR_MORE_ID:
1062  __stxd_multi(sem,"*",s,buf);
1063  break;
1064  case SEMTREX_ONE_OR_MORE_ID:
1065  __stxd_multi(sem,"+",s,buf);
1066  break;
1067  case SEMTREX_ZERO_OR_ONE_ID:
1068  __stxd_multi(sem,"?",s,buf);
1069  break;
1070  case SEMTREX_GROUP_ID:
1071  sn = _sem_get_name(sem,*(Symbol *)_t_surface(s));
1072  // ignore "<unknown symbol"
1073  if (*sn=='<')
1074  sprintf(buf, "<%s>",__dump_semtrex(sem,_t_child(s,1),b));
1075  else
1076  sprintf(buf, "<%s:%s>",sn,__dump_semtrex(sem,_t_child(s,1),b));
1077  break;
1078  case SEMTREX_DESCEND_ID:
1079  sprintf(buf, "/%s",__dump_semtrex(sem,_t_child(s,1),b));
1080  break;
1081  case SEMTREX_WALK_ID:
1082  sprintf(buf, "(%%%s)",__dump_semtrex(sem,_t_child(s,1),b));
1083  break;
1084  }
1085  return buf;
1086 }
1087 
1096 char * _dump_semtrex(SemTable *sem,T *s,char *buf) {
1097  buf[0] = '/';
1098  __dump_semtrex(sem,s,buf+1);
1099  return buf;
1100 }
1101 
1102 // helper to add a stx_char value literal to a semtrex
1103 T *__stxcv(T *p,char c) {
1104  T *t = _t_newr(p,SEMTREX_VALUE_LITERAL);
1105  _t_newc(t,ASCII_CHAR,c);
1106  return t;
1107 }
1108 
1109 // helper to to add a semtrex literal value set of ascii chars to a semtrex
1110 T *__stxcvm(T *p,int not,int count,...) {
1111  va_list chars;
1112  T *t = _t_newr(p,not?SEMTREX_VALUE_LITERAL_NOT:SEMTREX_VALUE_LITERAL);
1113  T *v = _t_newr(t,SEMTREX_VALUE_SET);
1114 
1115  va_start(chars,count);
1116  int i;
1117  for(i=0;i<count;i++) {
1118  _t_newc(v,ASCII_CHAR,va_arg(chars,int));
1119  }
1120  va_end(chars);
1121 
1122  return t;
1123 }
1124 
1125 // helper to add a bunch of semtrex ors that match a character set
1126 void _stxcs(T *stxx,char *an) {
1127  T *label = _t_newr(stxx,SEMTREX_ONE_OR_MORE);
1128  label = _t_newr(label,SEMTREX_OR);
1129  while(*an) {
1130  __stxcv(label,*an);
1131  an++;
1132  if (*an) label = _t_newr(label,SEMTREX_OR);
1133  }
1134  __stxcv(label,'_');
1135 }
1136 
1137 void _stxl(T *stxx) {
1138  _stxcs(stxx,"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._");
1139 }
1140 
1141 // temporary function until we get system label table operational
1142 Symbol get_symbol(char *symbol_name,SemTable *sem) {
1143  int ctx;
1144  for (ctx=0;ctx<sem->contexts;ctx++) {
1145  ContextStore *cs = __sem_context(sem,ctx);
1146  if (!cs->definitions) continue;
1147  T *symbols = __sem_get_defs(sem,SEM_TYPE_SYMBOL,ctx);
1148  int i,c = _t_children(symbols);
1149  for(i=1;i<=c;i++) {
1150  T *t = _t_child(symbols,i);
1151  T *c = _t_child(_t_child(t,DefLabelIdx),1);
1152  if (!strcmp(symbol_name,(char *)_t_surface(c))) {
1153  Symbol r = {ctx,SEM_TYPE_SYMBOL,i};
1154  return r;
1155  }
1156  }
1157  }
1158  return NULL_SYMBOL;
1159 }
1160 
1161 //#define DUMP_TOKENS
1162 #ifdef DUMP_TOKENS
1163 #define dump_tokens(str) puts(str);puts(_t2s(G_sem,tokens));
1164 #else
1165 #define dump_tokens(str)
1166 #endif
1167 
1168 /*
1169  a utility function to move the contents of paren/group tokens as children of the
1170  open token.
1171  Assumes the semtrex results was from a semtrex of the form:
1172  ... <STX_OP:STX_OP,<STX_SIBS:!STX_CP+>,STX_CP>
1173  where the contents is marked by one group (in the case above STX_SIBS) and the
1174  the whole thing is marked by an "open" group
1175 
1176  */
1177 T *wrap(T *tokens,T *results, Symbol contents_s, Symbol open_s) {
1178  T *m = _t_get_match(results,contents_s);
1179  T *om = _t_get_match(results,open_s);
1180 
1181  // transfer the contents nodes to the open node
1182  int count = *(int *)_t_surface(_t_child(m,3));
1183  int *cpath = (int *)_t_surface(_t_child(m,2));
1184  int *opath = (int *)_t_surface(_t_child(om,2));
1185  T *o = _t_get(tokens,opath);
1186  T *parent = _t_parent(o);
1187  int x = cpath[_t_path_depth(cpath)-1];
1188  T *t;
1189  while(count--) {
1190  t = _t_child(parent,x);
1191  _t_detach_by_ptr(parent,t);
1192  _t_add(o,t);
1193  }
1194  // free the close token
1195  t = _t_child(parent,x);
1196  _t_detach_by_ptr(parent,t);
1197  _t_free(t);
1198  return o;
1199 }
1200 
1206 T *makeASCIITree(char *c) {
1207  T *s = _t_new_root(ASCII_CHARS);
1208  while(*c) {
1209  _t_newc(s,ASCII_CHAR,*c);
1210  c++;
1211  }
1212  return s;
1213 }
1214 
1215 char *_asciiT2str(T* asciiT,T* match,T *t,char *buf) {
1216  int path[100];
1217  int sibs = *(int *)_t_surface(_t_child(match,SemtrexMatchSibsIdx));
1218  int *p = (int *)_t_surface(_t_child(match,SemtrexMatchPathIdx));
1219  int j,d = _t_path_depth(p);
1220  if (d>=100) {raise_error("path too deep!");}
1221  memcpy(path,p,sizeof(int)*(d+1));
1222  for(j=0;j<sibs;j++) {
1223  buf[j] = *(char *)_t_surface(_t_get(asciiT,path));
1224  path[d-1]++;
1225  }
1226  buf[j]=0;
1227  return buf;
1228 }
1229 
1233 T *asciiT_toi(T* asciiT,T* match,T *t,Symbol s) {
1234  char buf[10];
1235  _asciiT2str(asciiT,match,t,buf);
1236  return _t_newi(t,s,atoi(buf));
1237 }
1238 
1242 T *asciiT_tol(T* asciiT,T* match,T *t,Symbol s) {
1243  char buf[12];
1244  _asciiT2str(asciiT,match,t,buf);
1245  return _t_newi64(t,s,atol(buf));
1246 }
1247 
1251 T *asciiT_tof(T* asciiT,T* match,T *t,Symbol s) {
1252  char buf[10];
1253  _asciiT2str(asciiT,match,t,buf);
1254  float f = atof(buf);
1255  return _t_new(t,s,&f,sizeof(float));
1256 }
1257 
1261 T *asciiT_tos(T* asciiT,T* match,T *t,Symbol s) {
1262  char buf[255];
1263  _asciiT2str(asciiT,match,t,buf);
1264  return _t_new_str(t,s,buf);
1265 }
1266 
1270 T *asciiT_toc(T* asciiT,T* match,T *t,Symbol s) {
1271  int *path = (int *)_t_surface(_t_child(match,2));
1272  int c = *(int *)_t_surface(_t_get(asciiT,path));
1273  return _t_newc(t,s,c);
1274 }
1275 
1279 T *__sl(T *p, bool not,int count, ...) {
1280  va_list symbols;
1281  T *t = _t_newr(p,not ? SEMTREX_SYMBOL_LITERAL_NOT : SEMTREX_SYMBOL_LITERAL);
1282  T *ss = count > 1 ? _t_newr(t,SEMTREX_SYMBOL_SET) : t;
1283  va_start(symbols,count);
1284  int i;
1285  for(i=0;i<count;i++) {
1286  _t_news(ss,SEMTREX_SYMBOL,va_arg(symbols,Symbol));
1287  }
1288  va_end(symbols);
1289  return t;
1290 }
1291 
1298 T *parseSemtrex(SemTable *sem,char *stx) {
1299  // convert the string into a tree
1300  #ifdef DUMP_TOKENS
1301  printf("\nPARSING:%s\n",stx);
1302  #endif
1303  T *t,*s = makeASCIITree(stx);
1304 
1306  // build the token stream out of an ascii stream
1307  // PATTERN
1308  // "/{STX_TOKENS:(ASCII_CHARS/({STX_SL:ASCII_CHAR='/'})|(({STX_OP:ASCII_CHAR='('})|(({STX_CP:ASCII_CHAR=')'})|(({STX_PLUS:ASCII_CHAR='+'})|(({STX_COMMA:ASCII_CHAR=','})|((ASCII_CHAR='!',{STX_EXCEPT:[a-zA-Z0-9_]+})|(({STX_CG:ASCII_CHAR='}'})|(({STX_STAR:ASCII_CHAR='*'})|(({STX_LABEL:[a-zA-Z0-9_]+})|(ASCII_CHAR='{',{STX_OG:[a-zA-Z0-9_]+},ASCII_CHAR=':')))))))))+)}
1309  T *ts = _t_news(0,SEMTREX_GROUP,STX_TOKENS);
1310  T *g = _sl(ts,ASCII_CHARS);
1311  T *sq = _t_newr(g,SEMTREX_SEQUENCE);
1312  T *p = _t_newr(sq,SEMTREX_ONE_OR_MORE);
1313  T *o = _t_newr(p,SEMTREX_OR);
1314  t = _t_news(o,SEMTREX_GROUP,STX_WALK);
1315  __stxcv(t,'%');
1316  o = _t_newr(o,SEMTREX_OR);
1317  t = _t_news(o,SEMTREX_GROUP,STX_SL);
1318  __stxcv(t,'/');
1319  o = _t_newr(o,SEMTREX_OR);
1320  t = _t_news(o,SEMTREX_GROUP,STX_OP);
1321  __stxcv(t,'(');
1322  o = _t_newr(o,SEMTREX_OR);
1323  t = _t_news(o,SEMTREX_GROUP,STX_CP);
1324  __stxcv(t,')');
1325 
1326  o = _t_newr(o,SEMTREX_OR);
1327  t = _t_news(o,SEMTREX_GROUP,STX_OR);
1328  __stxcv(t,'|');
1329  o = _t_newr(o,SEMTREX_OR);
1330  t = _t_news(o,SEMTREX_GROUP,STX_COMMA);
1331  __stxcv(t,',');
1332 
1333  o = _t_newr(o,SEMTREX_OR);
1334  t = _t_news(o,SEMTREX_GROUP,STX_CG);
1335  __stxcv(t,'>');
1336  o = _t_newr(o,SEMTREX_OR);
1337  t = _t_news(o,SEMTREX_GROUP,SEMTREX_SYMBOL_ANY);
1338  __stxcv(t,'.');
1339  o = _t_newr(o,SEMTREX_OR);
1340  t = _t_news(o,SEMTREX_GROUP,STX_STAR);
1341  __stxcv(t,'*');
1342  o = _t_newr(o,SEMTREX_OR);
1343  t = _t_news(o,SEMTREX_GROUP,STX_PLUS);
1344  __stxcv(t,'+');
1345  o = _t_newr(o,SEMTREX_OR);
1346  t = _t_news(o,SEMTREX_GROUP,STX_Q);
1347  __stxcv(t,'?');
1348  o = _t_newr(o,SEMTREX_OR);
1349  t = _t_news(o,SEMTREX_GROUP,STX_NOT);
1350  __stxcv(t,'~');
1351 
1352  o = _t_newr(o,SEMTREX_OR);
1353  sq = _t_newr(o,SEMTREX_SEQUENCE);
1354  t = _t_news(sq,SEMTREX_GROUP,STX_EQ);
1355  _stxl(t);
1356  __stxcv(sq,'=');
1357 
1358  o = _t_newr(o,SEMTREX_OR);
1359  sq = _t_newr(o,SEMTREX_SEQUENCE);
1360  t = _t_news(sq,SEMTREX_GROUP,STX_NEQ);
1361  _stxl(t);
1362  __stxcv(sq,'!');
1363  __stxcv(sq,'=');
1364 
1365  o = _t_newr(o,SEMTREX_OR);
1366  sq = _t_newr(o,SEMTREX_SEQUENCE);
1367  __stxcv(sq,'\'');
1368  t = _t_news(sq,SEMTREX_GROUP,STX_VAL_C);
1369  _sl(t,ASCII_CHAR);
1370  __stxcv(sq,'\'');
1371 
1372  o = _t_newr(o,SEMTREX_OR);
1373  sq = _t_newr(o,SEMTREX_SEQUENCE);
1374  __stxcv(sq,'"');
1375  t = _t_news(sq,SEMTREX_GROUP,STX_VAL_S);
1376  _stxl(t);
1377  __stxcv(sq,'"');
1378 
1379  o = _t_newr(o,SEMTREX_OR);
1380  t = _t_news(o,SEMTREX_GROUP,STX_OS);
1381  __stxcv(t,'{');
1382  o = _t_newr(o,SEMTREX_OR);
1383  t = _t_news(o,SEMTREX_GROUP,STX_CS);
1384  __stxcv(t,'}');
1385 
1386  o = _t_newr(o,SEMTREX_OR);
1387  sq = _t_newr(o,SEMTREX_SEQUENCE);
1388  __stxcv(sq,'!');
1389  t = _t_news(sq,SEMTREX_GROUP,STX_EXCEPT);
1390  _stxl(t);
1391 
1392  o = _t_newr(o,SEMTREX_OR);
1393  sq = _t_newr(o,SEMTREX_SEQUENCE);
1394  t = _t_news(sq,SEMTREX_GROUP,STX_EXCEPT);
1395  __stxcv(t,'!');
1396 
1397  o = _t_newr(o,SEMTREX_OR);
1398  sq = _t_newr(o,SEMTREX_SEQUENCE);
1399  t = _t_news(sq,SEMTREX_GROUP,STX_VAL_F);
1400  T *sq2 = _t_newr(t,SEMTREX_SEQUENCE);
1401  t = _t_newr(sq2,SEMTREX_ZERO_OR_MORE);
1402  _stxcs(t,"0123456789");
1403  __stxcv(sq2,'.');
1404  t = _t_newr(sq2,SEMTREX_ONE_OR_MORE);
1405  _stxcs(t,"0123456789");
1406 
1407  o = _t_newr(o,SEMTREX_OR);
1408  sq = _t_newr(o,SEMTREX_SEQUENCE);
1409  t = _t_news(sq,SEMTREX_GROUP,STX_VAL_I);
1410  t = _t_newr(t,SEMTREX_ONE_OR_MORE);
1411  _stxcs(t,"0123456789");
1412 
1413  o = _t_newr(o,SEMTREX_OR);
1414  t = _t_news(o,SEMTREX_GROUP,STX_LABEL);
1415  _stxl(t);
1416 
1417  // o = _t_newr(o,SEMTREX_OR);
1418  sq = _t_newr(o,SEMTREX_SEQUENCE);
1419  __stxcv(sq,'<');
1420  t = _t_news(sq,SEMTREX_GROUP,STX_OG);
1421  _stxl(t);
1422  __stxcv(sq,':');
1423 
1424 
1425  T *results,*tokens;
1426  if (_t_matchr(ts,s,&results)) {
1427  char buf[10000];
1428 
1429  //----------------
1430  // ACTION
1432  tokens = _t_new_root(STX_TOKENS);
1433  int i,m = _t_children(results);
1434  for(i=4;i<=m;i++) {
1435  T *c = _t_child(results,i);
1436  T *sn = _t_child(c,1);
1437  Symbol ts = *(Symbol *)_t_surface(sn);
1438  if (semeq(ts,STX_VAL_S) || semeq(ts,STX_LABEL) || semeq(ts,STX_OG) || semeq(ts,STX_EXCEPT) || semeq(ts,STX_EQ) || semeq(ts,STX_NEQ)){
1439  asciiT_tos(s,c,tokens,ts);
1440  }
1441  else if (semeq(ts,STX_VAL_C)) {
1442  asciiT_toc(s,c,tokens,ts);
1443  }
1444  else if (semeq(ts,STX_VAL_I)) {
1445  asciiT_toi(s,c,tokens,ts);
1446  }
1447  else if (semeq(ts,STX_VAL_F)) {
1448  asciiT_tof(s,c,tokens,ts);
1449  }
1450  else
1451  _t_newi(tokens,ts,0);
1452  }
1453  _t_free(results);
1454 
1455  dump_tokens("TOKENS:");
1456  T *sxx,*sq;
1457 
1459  // convert STX_OS/STX_CS to STX_SET groups
1460  // /%<STX_OS:STX_OS,<STX_SET:!{STX_OS,STX_CS}+>,STX_CS>
1461 
1462  sxx = _t_new_root(SEMTREX_WALK);
1463  g = _t_news(sxx,SEMTREX_GROUP,STX_OS);
1464  sq = _t_newr(g,SEMTREX_SEQUENCE);
1465  _sl(sq,STX_OS);
1466  t = _t_news(sq,SEMTREX_GROUP,STX_SET);
1467  t = _t_newr(t,SEMTREX_ONE_OR_MORE);
1468 
1469  __sl(t,1,2,STX_OS,STX_CS);
1470  _sl(sq,STX_CS);
1471 
1472  //----------------
1473  // ACTION
1474  while (_t_matchr(sxx,tokens,&results)) {
1475  g = wrap(tokens,results,STX_SET,STX_OS);
1476  // convert the STX_OS to STX_SET and free the STX_CS
1477  g->contents.symbol = STX_SET;
1478  // zap STX_COMMAs
1479  int cc = _t_children(g);
1480  T *v = _t_child(g,1);
1481  while(cc--) {
1482  T *x = _t_next_sibling(v);
1483  Symbol is = _t_symbol(v);
1484  if (semeq(is,STX_COMMA)) {
1485  _t_detach_by_ptr(g,v);
1486  _t_free(v);
1487  }
1488  v = x;
1489  }
1490  _t_free(results);
1491  }
1492  _t_free(sxx);
1493 
1494  dump_tokens("TOKENS_AFTER_SETS:");
1495 
1497  // convert STX_EQ/STX_NEQ to SEMTREX_VALUE_LITERALS
1498  // PATTERN
1499  // /%<SEMTREX_VALUE_LITERAL:STX_EQ|STX_NEQ,<SEMTREX_VALUE_SET:STX_VAL_I|STX_VAL_F|STX_VAL_S|STX_VAL_C|STX_SET)>>
1500  sxx = _t_new_root(SEMTREX_WALK);
1501  g = _t_news(sxx,SEMTREX_GROUP,SEMTREX_VALUE_LITERAL);
1502  sq = _t_newr(g,SEMTREX_SEQUENCE);
1503  o = _t_newr(sq,SEMTREX_OR);
1504  _sl(o,STX_EQ);
1505  _sl(o,STX_NEQ);
1506 
1507  g = _t_news(sq,SEMTREX_GROUP,SEMTREX_VALUE_SET);
1508  o = _t_newr(g,SEMTREX_OR);
1509  _sl(o,STX_VAL_I);
1510  o = _t_newr(o,SEMTREX_OR);
1511  _sl(o,STX_VAL_F);
1512  o = _t_newr(o,SEMTREX_OR);
1513  _sl(o,STX_VAL_S);
1514  o = _t_newr(o,SEMTREX_OR);
1515  _sl(o,STX_VAL_C);
1516  _sl(o,STX_SET);
1517 
1518  //----------------
1519  // ACTION
1520  while (_t_matchr(sxx,tokens,&results)) {
1521 
1522  T *m = _t_get_match(results,SEMTREX_VALUE_LITERAL);
1523  int *path = (int *)_t_surface(_t_child(m,2));
1524  t = _t_get(tokens,path);
1525  Symbol val_type = _t_symbol(t);
1526  t->contents.symbol = semeq(val_type,STX_EQ) ? SEMTREX_VALUE_LITERAL : SEMTREX_VALUE_LITERAL_NOT;
1527 
1528  T *p = _t_parent(t);
1529  T *v = _t_next_sibling(t);
1530  _t_detach_by_ptr(p,v);
1531 
1532  int set_count;
1533  T *set;
1534 
1535  if (semeq(_t_symbol(v),STX_SET)) {
1536  set = v;
1537  v->contents.symbol = SEMTREX_VALUE_SET;
1538  set_count = _t_children(v);
1539  v = _t_child(v,1);
1540  while(set_count--) {
1541  char *symbol_name = (char *)_t_surface(t);
1542  Symbol vs = get_symbol(symbol_name,sem);
1543  // convert the STX_VAL structure token to the semantic type specified by the value literal
1544  v->contents.symbol = vs;
1545  v = _t_next_sibling(v);
1546  }
1547  _t_add(t,set);
1548  }
1549  else {
1550  // set = _t_newr(t,SEMTREX_VALUE_SET);
1551  char *symbol_name = (char *)_t_surface(t);
1552  Symbol vs = get_symbol(symbol_name,sem);
1553  // convert the STX_VAL structure token to the semantic type specified by the value literal
1554  v->contents.symbol = vs;
1555  _t_add(t,v);
1556  }
1557 
1558 
1559  _t_free(results);
1560  }
1561  _t_free(sxx);
1562 
1563  dump_tokens("TOKENS_AFTER_VALUE_LITERAL:");
1564 
1566  // replace paren groups with STX_SIBS list
1567  // PATTERN
1568  // /STX_TOKENS/.*,<STX_OP:STX_OP,<STX_SIBS:!{STX_CP,STX_OP}+>,STX_CP>
1569  sxx = _sl(0,STX_TOKENS);
1570  sq = _t_newr(sxx,SEMTREX_SEQUENCE);
1571  T *st = _t_newr(sq,SEMTREX_ZERO_OR_MORE);
1572  _t_newr(st,SEMTREX_SYMBOL_ANY);
1573  T *gg = _t_news(sq,SEMTREX_GROUP,STX_OP);
1574  T *sq1 = _t_newr(gg,SEMTREX_SEQUENCE);
1575  _sl(sq1,STX_OP);
1576  T *g = _t_news(sq1,SEMTREX_GROUP,STX_SIBS);
1577  T *any = _t_newr(g,SEMTREX_ONE_OR_MORE);
1578  __sl(any,1,2,STX_OP,STX_CP);
1579  _sl(sq1,STX_CP);
1580 
1581  //----------------
1582  // ACTION
1583  while (_t_matchr(sxx,tokens,&results)) {
1584  g = wrap(tokens,results,STX_SIBS,STX_OP);
1585  // convert the STX_OP to STX_SIBS and free the STX_CP
1586  g->contents.symbol = STX_SIBS;
1587  _t_free(results);
1588  }
1589  _t_free(sxx);
1590 
1591  dump_tokens("TOKENS_AFTER_SIBS:");
1592 
1594  // find groups
1595  // PATTERN
1596  // /%,<STX_OG:STX_OG,<SEMTREX_GROUP:!{STX_CG,STX_OG}+>,STX_CG>
1597  sxx = _t_new_root(SEMTREX_WALK);
1598  g = _t_news(sxx,SEMTREX_GROUP,STX_OG);
1599  sq = _t_newr(g,SEMTREX_SEQUENCE);
1600  _sl(sq,STX_OG);
1601  gg = _t_news(sq,SEMTREX_GROUP,SEMTREX_GROUP);
1602  any = _t_newr(gg,SEMTREX_ONE_OR_MORE);
1603 
1604  __sl(any,1,2,STX_OG,STX_CG);
1605  _sl(sq,STX_CG);
1606 
1607  //----------------
1608  // ACTION
1609  while (_t_matchr(sxx,tokens,&results)) {
1610  g = wrap(tokens,results,SEMTREX_GROUP,STX_OG);
1611 
1612  // convert the STX_OG to SEMTREX_GROUP children and free the STX_CG
1613  char *symbol_name = (char *)_t_surface(g);
1614  Symbol sy = get_symbol(symbol_name,sem);
1615  __t_morph(g,SEMTREX_GROUP,&sy,sizeof(Symbol),1);
1616 
1617  _t_free(results);
1618  }
1619  _t_free(sxx);
1620 
1621  dump_tokens("TOKENS_AFTER_GROUPS:");
1622 
1624  // if there are any parens left we raise mismatch!
1625  // PATTERN
1626  // /(STX_TOKENS/.*,(STX_OP)|(STX_CP))
1627  sxx = _sl(0,STX_TOKENS);
1628  sq = _t_newr(sxx,SEMTREX_SEQUENCE);
1629  st = _t_newr(sq,SEMTREX_ZERO_OR_MORE);
1630  _t_newr(st,SEMTREX_SYMBOL_ANY);
1631  o = _t_newr(sq,SEMTREX_OR);
1632  _sl(o,STX_OP);
1633  _sl(o,STX_CP);
1634 
1635  //----------------
1636  // ACTION
1637  if (_t_match(sxx,tokens)) {
1638  raise_error("mismatched parens! [tokens:%s]",_t2s(G_sem,tokens));
1639  }
1640  _t_free(sxx);
1641 
1643  // convert postfix groups
1644  // PATTERN
1645  // /*<STX_POSTFIX:.,STX_PLUS|STX_STAR|STX_Q>
1646  sxx = _t_new_root(SEMTREX_WALK);
1647  g = _t_news(sxx,SEMTREX_GROUP,STX_POSTFIX);
1648  sq = _t_newr(g,SEMTREX_SEQUENCE);
1649  _t_newr(sq,SEMTREX_SYMBOL_ANY);
1650  o = _t_newr(sq,SEMTREX_OR);
1651  _sl(o,STX_PLUS);
1652  o = _t_newr(o,SEMTREX_OR);
1653  _sl(o,STX_STAR);
1654  _sl(o,STX_Q);
1655 
1656  //----------------
1657  // ACTION
1658  while (_t_matchr(sxx,tokens,&results)) {
1659  T *m = _t_get_match(results,STX_POSTFIX);
1660  int *path = (int *)_t_surface(_t_child(m,2));
1661  int x = path[_t_path_depth(path)-1];
1662  t = _t_get(tokens,path);
1663  T *parent = _t_parent(t);
1664  t = _t_child(parent,x);
1665  _t_detach_by_ptr(parent,t);
1666  T *c = _t_get(tokens,path);
1667  _t_add(c,t);
1668  if (semeq(_t_symbol(c),STX_PLUS ))
1669  c->contents.symbol = SEMTREX_ONE_OR_MORE;
1670  else if (semeq(_t_symbol(c),STX_STAR ))
1671  c->contents.symbol = SEMTREX_ZERO_OR_MORE;
1672  else if (semeq(_t_symbol(c),STX_Q ))
1673  c->contents.symbol = SEMTREX_ZERO_OR_ONE;
1674 
1675  _t_free(results);
1676  }
1677  _t_free(sxx);
1678 
1679  dump_tokens("TOKENS_AFTER_POSTFIX:");
1680 
1682  // convert not
1683  // PATTERN
1684  // /%<SEMTREX_NOT:STX_NOT,.>
1685  sxx = _t_new_root(SEMTREX_WALK);
1686  g = _t_news(sxx,SEMTREX_GROUP,SEMTREX_NOT);
1687  sq = _t_newr(g,SEMTREX_SEQUENCE);
1688  _sl(sq,STX_NOT);
1689  _t_newr(sq,SEMTREX_SYMBOL_ANY);
1690 
1691  //----------------
1692  // ACTION
1693  while (_t_matchr(sxx,tokens,&results)) {
1694  T *m = _t_get_match(results,SEMTREX_NOT);
1695  int *path = (int *)_t_surface(_t_child(m,2));
1696  int x = path[_t_path_depth(path)-1];
1697  t = _t_get(tokens,path);
1698  T *parent = _t_parent(t);
1699  // detach the node to be negated
1700  T *c = _t_child(parent,x+1);
1701  _t_detach_by_ptr(parent,c);
1702  // reatach it to to the morphed STX_NOT
1703  T *n = _t_child(parent,x);
1704  n->contents.symbol = SEMTREX_NOT;
1705  _t_add(n,c);
1706  _t_free(results);
1707  }
1708  _t_free(sxx);
1709 
1710  dump_tokens("TOKENS_AFTER_NOT:");
1711 
1713  // convert things following slashes to children of things preceeding slashes
1714  // PATTERN
1715  // /%.*,<STX_CHILD:STX_LABEL,STX_SL,!STX_SL>
1716  sxx = _t_new_root(SEMTREX_WALK);
1717  sq = _t_newr(sxx,SEMTREX_SEQUENCE);
1718  any = _t_newr(sq,SEMTREX_ZERO_OR_MORE);
1719  _t_newr(any,SEMTREX_SYMBOL_ANY);
1720  g = _t_news(sq,SEMTREX_GROUP,STX_CHILD);
1721  sq = _t_newr(g,SEMTREX_SEQUENCE);
1722  _sl(sq,STX_LABEL);
1723  _sl(sq,STX_SL);
1724  _sln(sq,STX_SL);
1725 
1726  //----------------
1727  // ACTION
1728  while (_t_matchr(sxx,tokens,&results)) {
1729  T *m = _t_get_match(results,STX_CHILD);
1730  int *path = (int *)_t_surface(_t_child(m,2));
1731  int x = path[_t_path_depth(path)-1];
1732  t = _t_get(tokens,path);
1733  T *parent = _t_parent(t);
1734  // detach and free the slash token
1735  T *c = _t_child(parent,++x);
1736  _t_detach_by_ptr(parent,c);
1737  _t_free(c);
1738  // detach and add the element following the slash as a child
1739  c = _t_child(parent,x);
1740  _t_detach_by_ptr(parent,c);
1741  _t_add(t,c);
1742 
1743  _t_free(results);
1744  }
1745  _t_free(sxx);
1746 
1747  dump_tokens("TOKENS_AFTER_SLASH:");
1748 
1749 
1751  // convert STX_SET to SEMTREX_LITERALS
1752  // PATTERN
1753  // /%<SEMTREX_SYMBOL_LITERAL:STX_EXCEPT="!"?,STX_SET>
1754 
1755  sxx = _t_new_root(SEMTREX_WALK);
1756  g = _t_news(sxx,SEMTREX_GROUP,SEMTREX_SYMBOL_LITERAL);
1757  sq = _t_newr(g,SEMTREX_SEQUENCE);
1758  t = _t_newr(sq,SEMTREX_ZERO_OR_ONE);
1759  t = _t_newr(t,SEMTREX_VALUE_LITERAL);
1760  t = _t_newr(t,SEMTREX_VALUE_SET);
1761  _t_new(t,STX_EXCEPT,"!",2);
1762  _sl(sq,STX_SET);
1763  //----------------
1764  // ACTION
1765  while (_t_matchr(sxx,tokens,&results)) {
1766  T *m = _t_get_match(results,SEMTREX_SYMBOL_LITERAL);
1767  int *path = (int *)_t_surface(_t_child(m,2));
1768  t = _t_get(tokens,path);
1769  int not = semeq(_t_symbol(t),STX_EXCEPT);
1770  T *parent = _t_parent(t);
1771  if (not) {
1772  // throw away the EXCEPT token
1773  T *x = t;
1774  t = _t_next_sibling(t);
1775  _t_detach_by_ptr(parent,x);
1776  _t_free(x);
1777  }
1778  DO_KIDS(t,
1779  T *x = _t_child(t,i);
1780  char *symbol_name = (char *)_t_surface(x);
1781  Symbol sy = get_symbol(symbol_name,sem);
1782  __t_morph(x,SEMTREX_SYMBOL,&sy,sizeof(Symbol),1);
1783  );
1784  t->contents.symbol = SEMTREX_SYMBOL_SET;
1785  T *x = _t_new_root(not?SEMTREX_SYMBOL_LITERAL_NOT:SEMTREX_SYMBOL_LITERAL);
1786  _t_detach_by_ptr(parent,t);
1787  _t_add(x,t);
1788  _t_insert_at(tokens,path,x);
1789 
1790  _t_free(results);
1791  }
1792  _t_free(sxx);
1793  dump_tokens("TOKENS_AFTER_LITERAL_STX_SET:");
1794 
1796  // convert labels to SEMTREX_LITERALS
1797  // PATTERN
1798  // /%<SEMTREX_SYMBOL_LITERAL:STX_LABEL|STX_EXCEPT>
1799  sxx = _t_new_root(SEMTREX_WALK);
1800  g = _t_news(sxx,SEMTREX_GROUP,SEMTREX_SYMBOL_LITERAL);
1801  o = _t_newr(g,SEMTREX_OR);
1802  _sl(o,STX_LABEL);
1803  _sl(o,STX_EXCEPT);
1804  //----------------
1805  // ACTION
1806  while (_t_matchr(sxx,tokens,&results)) {
1807  T *m = _t_get_match(results,SEMTREX_SYMBOL_LITERAL);
1808  int *path = (int *)_t_surface(_t_child(m,2));
1809  t = _t_get(tokens,path);
1810  char *symbol_name = (char *)_t_surface(t);
1811  Symbol sy = get_symbol(symbol_name,sem);
1812  t->contents.symbol = semeq(t->contents.symbol,STX_LABEL)?SEMTREX_SYMBOL_LITERAL:SEMTREX_SYMBOL_LITERAL_NOT;
1813  T *ss = _t_news(0,SEMTREX_SYMBOL,sy);
1814  int pp[2] = {1,TREE_PATH_TERMINATOR};
1815  _t_insert_at(t,pp,ss);
1816 
1817  _t_free(results);
1818  }
1819  _t_free(sxx);
1820 
1821  dump_tokens("TOKENS_AFTER_LITERAL:");
1822 
1824  // convert comma tokens to sequences
1825  // PATTERN
1826  // /*<SEMTREX_SEQUENCE:(!STX_COMMA,STX_COMMA)+,!STX_COMMA> -> SEMTREX_SEQUENCE
1827  sxx = _t_new_root(SEMTREX_WALK);
1828  g = _t_news(sxx,SEMTREX_GROUP,SEMTREX_SEQUENCE);
1829  sq = _t_newr(g,SEMTREX_SEQUENCE);
1830  o = _t_newr(sq,SEMTREX_ONE_OR_MORE);
1831  _sln(sq,STX_COMMA);
1832  sq = _t_newr(o,SEMTREX_SEQUENCE);
1833  _sln(sq,STX_COMMA);
1834  _sl(sq,STX_COMMA);
1835 
1836  //----------------
1837  // ACTION
1838  while (_t_matchr(sxx,tokens,&results)) {
1839  T *m = _t_get_match(results,SEMTREX_SEQUENCE);
1840  int count = *(int *)_t_surface(_t_child(m,3));
1841  int *path = (int *)_t_surface(_t_child(m,2));
1842  T *seq = _t_new_root(SEMTREX_SEQUENCE);
1843  T *parent = _t_parent(_t_get(tokens,path));
1844  int x = path[_t_path_depth(path)-1];
1845  while(count--) {
1846  t = _t_child(parent,x);
1847  _t_detach_by_ptr(parent,t);
1848  if (semeq(STX_COMMA,_t_symbol(t)))
1849  _t_free(t);
1850  else
1851  _t_add(seq,t);
1852  }
1853  if (_t_children(parent) == 0) {
1854  _t_add(parent,seq);
1855  }
1856  else {
1857  _t_insert_at(tokens,path,seq);
1858  }
1859  _t_free(results);
1860  }
1861  _t_free(sxx);
1862 
1863  dump_tokens("TOKENS_AFTER_COMMA:");
1864 
1865 
1867  // convert ors
1868  // PATTERN
1869  // /%<SEMTREX_OR:!STX_OR,STX_OR,!STX_OR>
1870  sxx = _t_new_root(SEMTREX_WALK);
1871  g = _t_news(sxx,SEMTREX_GROUP,SEMTREX_OR);
1872  sq = _t_newr(g,SEMTREX_SEQUENCE);
1873  _sln(sq,STX_OR);
1874  _sl(sq,STX_OR);
1875  _sln(sq,STX_OR);
1876 
1877  //----------------
1878  // ACTION
1879  while (_t_matchr(sxx,tokens,&results)) {
1880  T *m = _t_get_match(results,SEMTREX_OR);
1881  int *path = (int *)_t_surface(_t_child(m,2));
1882  int x = path[_t_path_depth(path)-1];
1883  t = _t_get(tokens,path);
1884  T *parent = _t_parent(t);
1885  // detach the or's children
1886  T *c1 = _t_child(parent,x);
1887  _t_detach_by_ptr(parent,c1);
1888  T *c2 = _t_child(parent,x+1);
1889  _t_detach_by_ptr(parent,c2);
1890  o = _t_child(parent,x);
1891  _t_add(o,c1);
1892  _t_add(o,c2);
1893  o->contents.symbol = SEMTREX_OR;
1894  _t_free(results);
1895  }
1896  _t_free(sxx);
1897 
1898  dump_tokens("TOKENS_AFTER_ORS:");
1899 
1901  // fixup STX_WALK
1902  // PATTERN
1903  // /%<SEMTREX_WALK:STX_WALK,.>
1904  sxx = _t_new_root(SEMTREX_WALK);
1905  g = _t_news(sxx,SEMTREX_GROUP,SEMTREX_WALK);
1906  sq = _t_newr(g,SEMTREX_SEQUENCE);
1907  _sl(sq,STX_WALK);
1908  _t_newr(sq,SEMTREX_SYMBOL_ANY);
1909  //----------------
1910  // ACTION
1911  while (_t_matchr(sxx,tokens,&results)) {
1912  T *m = _t_get_match(results,SEMTREX_WALK);
1913  int *path = (int *)_t_surface(_t_child(m,2));
1914  t = _t_get(tokens,path);
1915  T *parent = _t_parent(t);
1916  int x = path[_t_path_depth(path)-1];
1917  T *c = _t_child(parent,x+1);
1918  _t_detach_by_ptr(parent,c);
1919  _t_add(t,c);
1920  t->contents.symbol = SEMTREX_WALK;
1921 
1922  _t_free(results);
1923  }
1924  _t_free(sxx);
1925 
1926  dump_tokens("TOKENS_AFTER_WALK:");
1927 
1929  // remove stray STX_SIBS
1930  // PATTERN
1931  // /%<STX_SIBS:STX_SIBS>
1932  sxx = _t_new_root(SEMTREX_WALK);
1933  g = _t_news(sxx,SEMTREX_GROUP,STX_SIBS);
1934  _sl(g,STX_SIBS);
1935  //----------------
1936  // ACTION
1937  while (_t_matchr(sxx,tokens,&results)) {
1938  T *m = _t_get_match(results,STX_SIBS);
1939  int *path = (int *)_t_surface(_t_child(m,2));
1940  t = _t_get(tokens,path);
1941  T *parent = _t_parent(t);
1942  if (_t_children(t) > 1) {
1943  __t_dump(G_sem,tokens,0,buf);
1944  raise_error("sibs with more than one child! [tokens:%s]",buf);
1945  }
1946  int x = path[_t_path_depth(path)-1];
1947  T *c = _t_child(t,1);
1948  _t_detach_by_ptr(t,c);
1949  _t_replace(parent,x,c);
1950  _t_free(results);
1951  }
1952  _t_free(sxx);
1953 
1954  dump_tokens("TOKENS_FINAL:");
1955 
1956  int c = _t_children(tokens);
1957  t =_t_child(tokens,1);
1958  Symbol sy = _t_symbol(t);
1959  if (c == 2 && (semeq(STX_SL,sy))) {
1960  t = _t_child(tokens,2);
1961  _t_detach_by_ptr(tokens,t);
1962  }
1963  else {
1964  __t_dump(G_sem,tokens,0,buf);
1965  raise_error("unexpected tokens! [tokens:%s]",buf);
1966  }
1967  _t_free(tokens);
1968 
1969  }
1970  _t_free(ts);
1971  _t_free(s);
1972 
1973  return t;
1974 }
1975 
1976 // recursable implementation of _stx_results2sem_map
1977 void __stx_r2fi(SemTable *sem,T *mr,T *mt, T *sem_map) {
1978  T *t = _t_newr(sem_map,SEMANTIC_LINK);
1979  T *match_symbol = _t_child(mr,SemtrexMatchSymbolIdx);
1980  Symbol msym = *(Symbol*)_t_surface(match_symbol);
1981  _t_news(t,USAGE,msym);
1982  T *r = _t_newr(t,REPLACEMENT_VALUE);
1983 
1984  int *path = (int *)_t_surface(_t_child(mr,SemtrexMatchPathIdx));
1985  // will just use the first sibling... *sibs = *(int*)_t_surface(_t_child(m,SemtrexMatchSibsIdx));
1986  T *x = _t_get(mt,path);
1987  if (!x) {
1988  raise_error("expecting to get a value from match!!");
1989  }
1990  x = _t_clone(x);
1991 
1992  int c = _t_children(mr);
1993 
1994  if (!semeq(msym,NULL_SYMBOL)) {
1995  Symbol xsym = _t_symbol(x);
1996  Structure mrs = _sem_get_symbol_structure(sem,msym);
1997  Structure xs = _sem_get_symbol_structure(sem,xsym);
1998 
1999  // the structures are the same then we can just set the symbol type
2000  if (semeq(mrs,xs)) {
2001  x->contents.symbol = msym;
2002  }
2003  else {
2004  // otherwise try embody from match
2005  T *e = _t_embody_from_match(sem,mr,msym,mt);
2006  if (!e)
2007  raise_error("unable to embody from match: %s(%s) -> %s(%s)",
2008  _sem_get_name(sem,xsym),_sem_get_name(sem,xs),
2009  _sem_get_name(sem,msym),_sem_get_name(sem,msym)
2010  );
2011  _t_free(x);
2012  x = e;
2013  }
2014  }
2015  _t_add(r,x);
2016  int i;
2017  for (i=SemtrexMatchSibsIdx+1;i<=c;i++) {
2018  __stx_r2fi(sem,_t_child(mr,i),mt,sem_map);
2019  }
2020 }
2021 
2029 T *_stx_results2sem_map(SemTable *sem,T *match_results,T *match_tree) {
2030  T *sem_map = _t_new_root(SEMANTIC_MAP);
2031  __stx_r2fi(sem,match_results,match_tree,sem_map);
2032  return sem_map;
2033 }
2034 
2035 char trbuf[255];
2036 char *transition2Str(TransitionType transition) {
2037  trbuf[0]=0;
2038  if (transition == TransitionDown) sprintf(trbuf,"Down");
2039  else if (isTransitionPop(transition)) sprintf(trbuf,"Up%d",transition*-1);
2040  if (isTransitionNext(transition)) {
2041  sprintf(trbuf+strlen(trbuf),"%sNext",trbuf[0]?"+":"");
2042  }
2043  return trbuf;
2044 }
2045 // debugging code to dump out an ascii representation of the stx fsa
2046 #include "ansicolor.h"
2047 static int dump_id = 99;
2048 SState *G_cur_stx_state = NULL;
2049 char G_stx_dump_buf[100000];
2050 #define pbuf(...) sprintf(buf+strlen(buf),__VA_ARGS__)
2051 
2052 char * __stx_dump_state(SState *s,char *buf) {
2053  switch (s->type) {
2054  case StateMatch:
2055  pbuf("(M)");
2056  break;
2057  case StateGroupOpen:
2058  pbuf("{%d:%s",s->data.groupo.uid,_sem_get_name(G_sem,s->data.groupo.symbol));
2059  break;
2060  case StateGroupClose:
2061  pbuf("%d:%s}",s->data.groupc.openP->data.groupo.uid,_sem_get_name(G_sem,s->data.groupc.openP->data.groupo.symbol));
2062  break;
2063  case StateSymbol:
2064  {
2065  Symbol sym;
2066  T *x;
2067  x = s->data.symbol.symbols;
2068  if (semeq(_t_symbol(x),SEMTREX_SYMBOL))
2069  sym = *(Symbol *)_t_surface(x);
2070  else raise_error("unimplemented state data type in stx_dump\n");
2071  pbuf("(%s%s)",(s->data.symbol.flags & LITERAL_NOT) ? "!" : "",
2072  _sem_get_name(G_sem,sym));
2073  }
2074  break;
2075  case StateValue:
2076  pbuf("(%sV",(s->data.value.flags & LITERAL_NOT) ? "!" : "");
2077  if (s->data.value.flags & LITERAL_SET) {
2078  T *x = s->data.value.values;
2079  int c = _t_children(x);
2080  int i;
2081 
2082  for(i=1;i<=c;i++) {
2083  pbuf("%s;",t2s(_t_child(x,i)));
2084  }
2085  }
2086  else {
2087  T *x = _t_child(s->data.value.values,1);
2088  pbuf("%s",x ? t2s(_t_child(x,1)) : "_nil_");
2089  }
2090  pbuf(")");
2091  break;
2092  case StateAny:
2093  pbuf("(.)");
2094  break;
2095  case StateDescend:
2096  printf("(/)");
2097  break;
2098  case StateNot:
2099  pbuf("(~)");
2100  break;
2101  case StateSplit:
2102  pbuf("(S)");
2103  break;
2104  case StateWalk:
2105  pbuf("(%%)");
2106  break;
2107  default:
2108  raise_error("unknown state");
2109  pbuf("(\?\?)");
2110  }
2111  return buf;
2112 }
2113 
2114 void __stx_dump(SState *s,char *buf) {
2115  if (s->_did == dump_id) {pbuf("X");return;}
2116  s->_did = dump_id;
2117  if (s == G_cur_stx_state)
2118  pbuf(KRED);
2119  __stx_dump_state(s,buf);
2120  if (s == G_cur_stx_state)
2121  pbuf(KNRM);
2122  if (s->out) {
2123  pbuf("-%s->",transition2Str(s->transition));
2124  __stx_dump(s->out,buf);
2125  }
2126  if (s->out1) {
2127  pbuf("[-%s->",transition2Str(s->transition1));
2128  __stx_dump(s->out1,buf);
2129  pbuf("]");
2130  }
2131  // printf("\n");
2132 }
2133 
2134 char * _stx_dump(SState *s,char *buf) {
2135  ++dump_id;
2136  buf[0] = 0;
2137  __stx_dump(s,buf);
2138  return buf;
2139 }
2140 
2141 void stx_dump(T *s) {
2142  int l;
2143 
2144  SState *f = _stx_makeFA(s,&l); _stx_dump(f,G_stx_dump_buf);
2145  puts(G_stx_dump_buf);
2146  _stx_freeFA(f);
2147 }
2148 
T * _t_new_root(Symbol symbol)
Definition: tree.c:160
int _did
used to hold a mark when freeing and printing out FSA to prevent looping.
Definition: semtrex.h:91
char * _sem_get_name(SemTable *sem, SemanticID s)
Definition: semtable.c:85
Definition: ceptr_types.h:114
T * _t_next_sibling(T *t)
Definition: tree.c:1306
SemanticID symbol
the symbol that describes the group semantically
Definition: semtrex.h:42
T * asciiT_toc(T *asciiT, T *match, T *t, Symbol s)
Definition: semtrex.c:1270
T * asciiT_tos(T *asciiT, T *match, T *t, Symbol s)
Definition: semtrex.c:1261
T * _t_get(T *t, int *p)
Definition: tree.c:1441
T * _t_path_walk(T *t, int **pathP, int *lenP)
Definition: tree.c:1543
header file for symbol and structure definition functions
int _t_path_depth(int *p)
Definition: tree.c:1365
SgroupOpen groupo
Group data for matching for StateGroup type states.
Definition: semtrex.h:74
Semantic tree regular expression header file.
Ptrlist * list1(SState **outp)
Definition: semtrex.c:42
int __t_match(T *semtrex, T *source_t, T **rP)
Definition: semtrex.c:567
Svalue value
Value data to match on for StateValue type states.
Definition: semtrex.h:73
int * _t_get_path(T *t)
Definition: tree.c:1384
T * _t_clone(T *t)
Definition: tree.c:589
void patch(Ptrlist *l, SState *s, int level)
Definition: semtrex.c:55
T * _t_root(T *t)
Definition: tree.c:1272
T * __sl(T *p, bool not, int count,...)
Definition: semtrex.c:1279
Symbol _t_symbol(T *t)
Definition: tree.c:1228
T * asciiT_tof(T *asciiT, T *match, T *t, Symbol s)
Definition: semtrex.c:1251
T * _t_child(T *t, int i)
Definition: tree.c:1251
T * makeASCIITree(char *c)
Definition: semtrex.c:1206
StateType type_
copy of state type needed for patch to grab (far too tricky)
Definition: semtrex.h:89
void _t_pathcpy(int *dst_p, int *src_p)
Definition: tree.c:1424
void _t_insert_at(T *t, int *path, T *i)
Definition: tree.c:438
TransitionType transition1
will be: TransitionNextChild=0,TransitionUp=-1,TransitionDown=1
Definition: semtrex.h:88
SState * _stx_makeFA(T *t, int *statesP)
Definition: semtrex.c:323
int _t_match(T *semtrex, T *t)
Definition: semtrex.c:809
void * _t_surface(T *t)
Definition: tree.c:1215
T * _t_get_match(T *match, Symbol group)
Definition: semtrex.c:848
SgroupClose groupc
Group data for matching for StateGroup type states.
Definition: semtrex.h:75
T * _t_embody_from_match(SemTable *sem, T *match, Symbol group, T *t)
Definition: semtrex.c:873
void __stx_freeFA2(SState *s)
Definition: semtrex.c:352
void _t_replace_node(T *t, T *r)
Definition: tree.c:391
SState * state(StateType type, int *statesP, int level)
Definition: semtrex.c:103
int __stx_freeFA(SState *s, int id)
Definition: semtrex.c:339
T * _t_parent(T *t)
Definition: tree.c:1262
int _t_matchr(T *semtrex, T *t, T **rP)
Definition: semtrex.c:798
char * __stx_makeFA(T *t, SState **in, Ptrlist **out, int level, int *statesP)
Definition: semtrex.c:119
void _stx_freeFA(SState *s)
Definition: semtrex.c:367
STypeData data
a union to hold the data for which ever type of SState this is
Definition: semtrex.h:92
T * asciiT_toi(T *asciiT, T *match, T *t, Symbol s)
Definition: semtrex.c:1233
struct SState * out
which state to go to next
Definition: semtrex.h:84
Ptrlist * append(Ptrlist *l1, Ptrlist *l2)
Definition: semtrex.c:87
TransitionType transition
will be: TransitionNextChild=0,TransitionDown=1, or a negative number which means pop up that many le...
Definition: semtrex.h:85
#define _sl(t, s)
macro to add a single symbol literal to semtrex tree
Definition: semtrex.h:122
char * __t2s(SemTable *sem, T *t, int indent)
Definition: def.c:518
Definition: semtrex.h:83
Sliteral symbol
Symbol to match on for StateSymbol type states.
Definition: semtrex.h:72
T * parseSemtrex(SemTable *sem, char *stx)
Definition: semtrex.c:1298
T * asciiT_tol(T *asciiT, T *match, T *t, Symbol s)
Definition: semtrex.c:1242
int _val_match(T *t, T *t1)
Definition: semtrex.c:451
void _t_add(T *t, T *c)
Definition: tree.c:261
int __symbol_set_does_not_contain(T *s, T *t)
Definition: semtrex.c:394
T * _stx_results2sem_map(SemTable *sem, T *match_results, T *match_tree)
Definition: semtrex.c:2029
int __symbol_set_contains(T *s, T *t)
Definition: semtrex.c:378
void __t_morph(T *t, Symbol s, void *surface, size_t size, int allocate)
Definition: tree.c:325
char * _dump_semtrex(SemTable *sem, T *s, char *buf)
Definition: semtrex.c:1096
int _t_children(T *t)
Definition: tree.c:1205
StateType type
what type of state this is
Definition: semtrex.h:86
void _t_replace(T *t, int i, T *r)
Definition: tree.c:372
void _t_detach_by_ptr(T *t, T *c)
Definition: tree.c:291
int uid
unique id for the group
Definition: semtrex.h:43
void _t_free(T *t)
Definition: tree.c:526
size_t _t_size(T *t)
Definition: tree.c:1238
#define _sln(t, s)
macro to add a single symbol literal not to semtrex tree
Definition: semtrex.h:125
SState matchstate
the final matching state in the FSA can be declared statically and globally
Definition: semtrex.c:21
char * _t_sprint_path(int *fp, char *buf)
Definition: tree.c:1508
struct SState * out1
which alternate state to go to next in the case this is a Split state
Definition: semtrex.h:87