23 char *G_s_str[]={
"StateSymbol",
"StateAny",
"StateValue",
"StateSplit",
"StateMatch",
"StateGroupOpen",
"StateGroupClose",
"StateDescend",
"StateWalk",
"StateNot"};
66 TransitionType *tr = (TransitionType *)(l+1);
68 StateType type = *(StateType *)(
sizeof(TransitionType)+(
char *)tr);
69 debug(D_STX_BUILD,
"patching %s to %s with input:%d states tr:%d\n",G_s_str[type],G_s_str[s->
type],(
int)level,(
int)*tr);
70 if (*tr != TransitionNone) {
76 if (*tr==0 && ((type != StateSymbol) && (type != StateAny) && (type != StateValue)))
120 SState *s,*i,*last,*s1,*s2;
134 case SEMTREX_VALUE_LITERAL_ID:
135 case SEMTREX_VALUE_LITERAL_NOT_ID:
136 debug(D_STX_BUILD,
"SYM=\n");
137 state_type = StateValue;
138 s =
state(state_type,statesP,level);
139 s->
data.
value.flags = (sym.id == SEMTREX_VALUE_LITERAL_NOT_ID) ? LITERAL_NOT : 0;
143 raise_error(
"expecting value or SEMTREX_VALUE_SET as first child of SEMTREX_VALUE_LITERAL");
151 case SEMTREX_SYMBOL_LITERAL_ID:
152 case SEMTREX_SYMBOL_LITERAL_NOT_ID:
153 debug(D_STX_BUILD,
"SYM\n");
154 state_type = StateSymbol;
159 if (!v || !((is_set = semeq(SEMTREX_SYMBOL_SET,vsym)) || semeq(SEMTREX_SYMBOL,vsym))) {
160 raise_error(
"expecting SEMTREX_SYMBOL_SET or SEMTREX_SYMBOL as first child of SEMTREX_SYMBOL_LITERAL");
162 if (c > 2)
return "Symbol literal must have 0 or 1 children other than the symbol/set";
163 s =
state(state_type,statesP,level);
164 s->
data.
symbol.flags = (sym.id == SEMTREX_SYMBOL_LITERAL_NOT_ID) ? LITERAL_NOT : 0;
165 if (is_set) s->
data.
symbol.flags |= LITERAL_SET;
179 case SEMTREX_SYMBOL_ANY_ID:
180 debug(D_STX_BUILD,
"ANY\n");
181 state_type = StateAny;
182 if (c > 1)
return "Symbol any must have 0 or 1 children";
184 s =
state(state_type,statesP,level);
198 case SEMTREX_SEQUENCE_ID:
199 debug(D_STX_BUILD,
"SEQ\n");
200 if (c == 0)
return "Sequence must have children";
205 if (last)
patch(o,last,level);
212 debug(D_STX_BUILD,
"OR\n");
213 if (c != 2)
return "Or must have 2 children";
214 s =
state(StateSplit,statesP,TransitionNone);
224 case SEMTREX_ZERO_OR_MORE_ID:
225 debug(D_STX_BUILD,
"*\n");
226 if (c != 1) raise_error(
"expecting 1 child for SEMTREX_ZERO_OR_MORE got %d, %s\n",c,t2s(t));
228 if (c != 1)
return "Star must have 1 child";
229 s =
state(StateSplit,statesP,level);
238 case SEMTREX_ONE_OR_MORE_ID:
239 debug(D_STX_BUILD,
"+\n");
240 if (c != 1)
return "Plus must have 1 child";
241 s =
state(StateSplit,statesP,level);
250 case SEMTREX_ZERO_OR_ONE_ID:
251 debug(D_STX_BUILD,
"?\n");
252 if (c != 1)
return "Question must have 1 child";
253 s =
state(StateSplit,statesP,level);
261 case SEMTREX_GROUP_ID:
262 debug(D_STX_BUILD,
"GROUP\n");
263 if (c != 1)
return "Group must have 1 child";
264 s =
state(StateGroupOpen,statesP,TransitionNone);
267 group_id = ++G_group_id;
273 s1 =
state(StateGroupClose,statesP,TransitionNone);
278 case SEMTREX_DESCEND_ID:
279 debug(D_STX_BUILD,
"DESCEND\n");
280 if (c != 1)
return "Descend must have 1 child";
281 s =
state(StateDescend,statesP,TransitionDown);
289 debug(D_STX_BUILD,
"NOT\n");
290 if (c != 1)
return "Not must have 1 child";
291 s =
state(StateNot,statesP,TransitionNone);
298 case SEMTREX_WALK_ID:
299 debug(D_STX_BUILD,
"WALK\n");
300 if (c != 1)
return "Walk must have 1 child";
301 s =
state(StateWalk,statesP,TransitionNone);
309 return "Unknown SEMTREX SYMBOL";
311 if (debugging(D_STX_BUILD)) {
314 debug(D_STX_BUILD,
"%d:%.*s%s\n",x,x,
"_______________________",_stx_dump(*in,buf));
328 if (err != 0) {raise_error(
"%s",err);}
329 patch(o,&matchstate,0);
334 static int free_id = 0;
340 if ((s->
_did !=
id) && (s != &matchstate)) {
355 if (s->
type == StateValue) {
358 if (s->
type == StateSymbol) {
405 T *__transition(TransitionType transition,
T *source_t,
int *cursor) {
409 debug(D_STX_MATCH,
"transition: cursor %s\n",
_t_sprint_path(cursor,buf));
410 while(cursor[i] != TREE_PATH_TERMINATOR) i++;
413 if (transition == TransitionDown) {
414 debug(D_STX_MATCH,
"transition: down\n");
417 cursor[i]= TREE_PATH_TERMINATOR;
419 else if (isTransitionPop(transition)) {
420 debug(D_STX_MATCH,
"transition: popping %d\n",transition);
421 if (i+transition <0) {
422 raise_error(
"transition: would pop above root!!\n");
425 cursor[i] = TREE_PATH_TERMINATOR;
431 else if (isTransitionNext(transition)) {
432 debug(D_STX_MATCH,
"transition: next\n");
434 if (i >= 0) cursor[i]++;
441 cursor[1]= TREE_PATH_TERMINATOR;
444 T *t =
_t_get(source_t,cursor);
445 debug(D_STX_MATCH,
"transition: result %s %s\n",
_t_sprint_path(cursor,buf),!t ?
"NULL":t2s(t));
455 debug(D_STX_MATCH,
"comparing sizes %ld,%ld\n",l,
_t_size(t));
462 debug(D_STX_MATCH,
"compare result: %d\n",i);
467 void __fix(
T *source_t,
T *r) {
496 raise_error(
"whoa! Mismatched path depths!");
498 if (debugging(D_STX_MATCH)) {
501 debug(D_STX_MATCH,
"start path:%s\n",buf);
503 debug(D_STX_MATCH,
" end path:%s\n",buf);
509 __t_morph(m2,SEMTREX_MATCH_SIBLINGS_COUNT,&i,
sizeof(
int),0);
513 __fix(source_t,end_c);
517 #define MAX_BRANCH_DEPTH 5000
518 #define CURSOR_MAX_DEPTH 100
525 TransitionType transition;
526 int cursor[CURSOR_MAX_DEPTH];
533 char * __stx_dump_state(
SState *s,
char *buf);
534 char G_stx_debug_buf[1000];
535 #define _PUSH_BRANCH(state,t,crs,c,w) { \
536 G_stx_debug_buf[0]=0;debug(D_STX_MATCH,"pushing split branch for backtracking to state %s\n with cursor:%s \n",__stx_dump_state(state,G_stx_debug_buf),c?t2s(c):"NULL"); \
537 if((depth+1)>=MAX_BRANCH_DEPTH) {raise_error("MAX branch depth exceeded");} \
538 stack[depth].s = state; \
539 stack[depth].transition = t; \
540 _t_pathcpy(stack[depth].cursor,crs); \
541 stack[depth].walk_root = w; \
542 if (w) stack[depth].walk_cursor = NULL; \
545 stack[depth].match = _t_clone(*rP); \
546 stack[depth].r_path = _t_get_path(r); \
548 else stack[depth].match = 0; \
553 #define PUSH_BRANCH(state,t,crs,c) _PUSH_BRANCH(state,t,crs,c,0)
554 #define PUSH_WALK_POINT(state,t,crs,c) _PUSH_BRANCH(state,t,crs,c,c)
556 #define FAIL {s=0;break;}
557 #define TRANSITION(x) if (!t) {FAIL;}; if (!x) {FAIL;}; t=__transition(s->transition,source_t,cursor); s = s->out;
583 int cursor[100] = {TREE_PATH_TERMINATOR};
585 while (s && s != &matchstate) {
586 t =
_t_get(source_t,cursor);
587 debug(D_STX_MATCH,
"IN:%s\n",G_s_str[s->
type]);
589 if (s->
type == StateGroupOpen) {
593 if (s->
type == StateGroupClose) {
598 if (debugging(D_STX_MATCH)) {G_cursor=t;G_cur_stx_state=s;debug(D_STX_MATCH,
" FSA:%s\n",_stx_dump(fa,G_stx_dump_buf));debug(D_STX_MATCH,
" tree:%s\n",!t ?
"NULL" : _t2s(G_sem,
_t_root(t)));}
599 if (rP && *rP) {debug(D_STX_MATCH,
"MATCH:\n%s\n",
__t2s(G_sem,*rP,INDENT));}
611 debug(D_STX_MATCH,
" seeking:%s%s\n",s->
data.
value.flags & LITERAL_NOT ?
" ~":
"",__t_dump(G_sem,v,0,buf));
617 for(i=1;i<=count && matched;i++) {
630 for(i=1;i<=count && !matched; i++) {
642 t = __transition(s->
transition,source_t,cursor);
647 TRANSITION((s->
data.
symbol.flags & LITERAL_NOT) ?
654 TRANSITION(s->
data.
symbol.flags & LITERAL_NOT ? !matched : matched);
681 r = _t_newi(r,SEMTREX_MATCH,o->
uid);
683 T *x = _t_news(r,SEMTREX_MATCH_SYMBOL,o->
symbol);
687 _t_new(r,SEMTREX_MATCH_CURSOR,&t,
sizeof(t));
691 case StateGroupClose:
694 int pt[2] = {3,TREE_PATH_TERMINATOR};
695 T *x = _t_new(0,SEMTREX_MATCH_CURSOR,&t,
sizeof(t));
708 while(cursor[i] != TREE_PATH_TERMINATOR)i++;
710 cursor[i]= TREE_PATH_TERMINATOR;
720 debug(D_STX_MATCH,
"Fail & backtracking possible\n");
723 if ((*rP = stack[depth].match)) {
724 r =
_t_get(*rP,stack[depth].r_path);
725 free(stack[depth].r_path);
733 T *walk = stack[depth].walk_root;
739 t =
_t_get(source_t,cursor);
740 debug(D_STX_MATCH,
" popping to--%s %s\n",
_t_sprint_path(cursor,buf), t ? t2s(t) :
"NULL");
741 debug(D_STX_MATCH,
" running transition:%d\n",stack[depth].transition);
745 t = __transition(stack[depth].transition,source_t,cursor);
749 t =
_t_path_walk(walk,&stack[depth].walk_cursor,&stack[depth].walk_len);
754 debug(D_STX_MATCH,
" walking to--%s %s\n",
_t_sprint_path(cursor,buf), t ? t2s(t) :
"NULL");
762 debug(D_STX_MATCH,
"FIXING RESULTS:\n%s\n",
__t2s(G_sem,*rP,INDENT));
772 if (stack[depth].walk_root) {
773 if (stack[depth].walk_cursor) free(stack[depth].walk_cursor);
776 if ((r = stack[depth].match)) {
778 free(stack[depth].r_path);
783 if (s == &matchstate) {
784 debug(D_STX_MATCH,
"Matched!\n");
813 T *_stx_get_matched_node(
Symbol s,
T *match_results,
T *match_tree,
int *sibs) {
816 raise_error(
"expected to have match!");
821 T *x =
_t_get(match_tree,path);
824 raise_error(
"expecting to get a value from match!!");
829 void _stx_replace(
T *semtrex,
T *t,
T *replace){
834 T *x = _stx_get_matched_node(sym,r,t,&sibs);
835 if (sibs > 1) raise_error(
"not implemented for sibs > 1");
850 if (!match)
return 0;
874 return __t_embody_from_match(sem,
_t_get_match(match,group),t);
877 T *__t_embody_from_match(
SemTable *sem,
T *match,
T *t) {
879 if (semeq(s,NULL_SYMBOL))
return 0;
887 T *r = __t_embody_from_match(sem,c,t);
895 Structure st = _sem_get_symbol_structure(sem,s);
918 char * __dump_semtrex(
SemTable *sem,
T *s,
char *buf);
920 void __stxd_multi(
SemTable *sem,
char *x,
T *s,
char *buf) {
924 int has_child = (semeq(ss,SEMTREX_SYMBOL_LITERAL_NOT) || semeq(ss,SEMTREX_SYMBOL_LITERAL)) ? 2 : 1;
925 sprintf(buf,(
_t_children(s)>has_child ||
_t_symbol(sub).
id==SEMTREX_SEQUENCE_ID) ?
"(%s)%s" :
"%s%s",__dump_semtrex(sem,sub,b),x);
927 void __stxd_descend(
SemTable *sem,
T *s,
char *v,
char *buf,
int skip) {
932 int has_child = (semeq(ss,SEMTREX_SYMBOL_LITERAL_NOT) || semeq(ss,SEMTREX_SYMBOL_LITERAL)) ? 2 : 1;
933 sprintf(buf,
_t_children(sub)>has_child?
"%s/(%s)":
"%s/%s",v,__dump_semtrex(sem,sub,b));
935 else sprintf(buf,
"%s",v);
938 char * __dump_semtrex(
SemTable *sem,
T *s,
char *buf) {
947 case SEMTREX_VALUE_LITERAL_ID:
948 case SEMTREX_VALUE_LITERAL_NOT_ID:
950 if (semeq(
_t_symbol(v),SEMTREX_VALUE_SET)) {
953 if (!v1) {raise_error(
"no values in set!");}
963 sprintf(b,
"%d.%d.%d",sid.context,sid.semtype,sid.id);
966 Structure st = _sem_get_symbol_structure(sem,sid);
967 if (sym.id == SEMTREX_VALUE_LITERAL_NOT_ID) {
968 sprintf(b+strlen(b),
"!");
970 sprintf(b+strlen(b),
"=");
972 sprintf(b+strlen(b),
"{");
973 for(i=1;i<=count;i++) {
975 if (semeq(st,CSTRING))
976 sprintf(b+strlen(b),
"\"%s\"",(
char *)(
_t_surface(x)));
977 else if (semeq(st,CHAR))
978 sprintf(b+strlen(b),
"'%c'",*(
char *)(
_t_surface(x)));
979 else if (semeq(st,INTEGER))
980 sprintf(b+strlen(b),
"%d",*(
int *)(
_t_surface(x)));
981 else if (semeq(st,FLOAT))
982 sprintf(b+strlen(b),
"%f",*(
float *)(
_t_surface(x)));
983 else sprintf(b+strlen(b),
"???x");
985 sprintf(b+strlen(b),
",");
988 sprintf(b+strlen(b),
"}");
991 case SEMTREX_SYMBOL_LITERAL_NOT_ID:
992 case SEMTREX_SYMBOL_LITERAL_ID:
994 if (semeq(sym, SEMTREX_SYMBOL_LITERAL_NOT)) {
1002 if (semeq(
_t_symbol(v),SEMTREX_SYMBOL_SET)) {
1005 if (!v1) {raise_error(
"no symbols in set!");}
1014 sprintf(b+strlen(b),
"{");
1016 for(i=1;i<=count;i++) {
1021 sprintf(b+strlen(b),
"%d.%d.%d",sid.context,sid.semtype,sid.id);
1023 sprintf(b+strlen(b),
"%s",sn);
1026 sprintf(b+strlen(b),
",");
1029 sprintf(b+strlen(b),
"}");
1031 __stxd_descend(sem,s,b,buf,1);
1033 case SEMTREX_SYMBOL_ANY_ID:
1035 __stxd_descend(sem,s,b,buf,0);
1037 case SEMTREX_SEQUENCE_ID:
1040 sprintf(sn,i<_c ?
"%s,":
"%s",__dump_semtrex(sem,
_t_child(s,i),b));
1046 sn = __dump_semtrex(sem,t,b);
1048 int has_child = (semeq(ss,SEMTREX_SYMBOL_LITERAL_NOT) || semeq(ss,SEMTREX_SYMBOL_LITERAL)) ? 2 : 1;
1049 sprintf(buf,(
_t_children(t) > has_child) ?
"(%s)|":
"%s|",sn);
1051 sn = __dump_semtrex(sem,t,b);
1053 has_child = (semeq(ss,SEMTREX_SYMBOL_LITERAL_NOT) || semeq(ss,SEMTREX_SYMBOL_LITERAL)) ? 2 : 1;
1054 sprintf(buf+strlen(buf),(
_t_children(t) > has_child) ?
"(%s)":
"%s",sn);
1056 case SEMTREX_NOT_ID:
1058 sn = __dump_semtrex(sem,t,b);
1059 sprintf(buf,
"~%s",sn);
1061 case SEMTREX_ZERO_OR_MORE_ID:
1062 __stxd_multi(sem,
"*",s,buf);
1064 case SEMTREX_ONE_OR_MORE_ID:
1065 __stxd_multi(sem,
"+",s,buf);
1067 case SEMTREX_ZERO_OR_ONE_ID:
1068 __stxd_multi(sem,
"?",s,buf);
1070 case SEMTREX_GROUP_ID:
1074 sprintf(buf,
"<%s>",__dump_semtrex(sem,
_t_child(s,1),b));
1076 sprintf(buf,
"<%s:%s>",sn,__dump_semtrex(sem,
_t_child(s,1),b));
1078 case SEMTREX_DESCEND_ID:
1079 sprintf(buf,
"/%s",__dump_semtrex(sem,
_t_child(s,1),b));
1081 case SEMTREX_WALK_ID:
1082 sprintf(buf,
"(%%%s)",__dump_semtrex(sem,
_t_child(s,1),b));
1098 __dump_semtrex(sem,s,buf+1);
1103 T *__stxcv(
T *p,
char c) {
1104 T *t = _t_newr(p,SEMTREX_VALUE_LITERAL);
1105 _t_newc(t,ASCII_CHAR,c);
1110 T *__stxcvm(
T *p,
int not,
int count,...) {
1112 T *t = _t_newr(p,not?SEMTREX_VALUE_LITERAL_NOT:SEMTREX_VALUE_LITERAL);
1113 T *v = _t_newr(t,SEMTREX_VALUE_SET);
1115 va_start(chars,count);
1117 for(i=0;i<count;i++) {
1118 _t_newc(v,ASCII_CHAR,va_arg(chars,
int));
1126 void _stxcs(
T *stxx,
char *an) {
1127 T *label = _t_newr(stxx,SEMTREX_ONE_OR_MORE);
1128 label = _t_newr(label,SEMTREX_OR);
1132 if (*an) label = _t_newr(label,SEMTREX_OR);
1137 void _stxl(
T *stxx) {
1138 _stxcs(stxx,
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._");
1144 for (ctx=0;ctx<sem->contexts;ctx++) {
1146 if (!cs->definitions)
continue;
1147 T *symbols = __sem_get_defs(sem,SEM_TYPE_SYMBOL,ctx);
1152 if (!strcmp(symbol_name,(
char *)
_t_surface(c))) {
1153 Symbol r = {ctx,SEM_TYPE_SYMBOL,i};
1163 #define dump_tokens(str) puts(str);puts(_t2s(G_sem,tokens));
1165 #define dump_tokens(str)
1209 _t_newc(s,ASCII_CHAR,*c);
1215 char *_asciiT2str(
T* asciiT,
T* match,
T *t,
char *buf) {
1220 if (d>=100) {raise_error(
"path too deep!");}
1221 memcpy(path,p,
sizeof(
int)*(d+1));
1222 for(j=0;j<sibs;j++) {
1235 _asciiT2str(asciiT,match,t,buf);
1236 return _t_newi(t,s,atoi(buf));
1244 _asciiT2str(asciiT,match,t,buf);
1245 return _t_newi64(t,s,atol(buf));
1253 _asciiT2str(asciiT,match,t,buf);
1254 float f = atof(buf);
1255 return _t_new(t,s,&f,
sizeof(
float));
1263 _asciiT2str(asciiT,match,t,buf);
1264 return _t_new_str(t,s,buf);
1273 return _t_newc(t,s,c);
1281 T *t = _t_newr(p,not ? SEMTREX_SYMBOL_LITERAL_NOT : SEMTREX_SYMBOL_LITERAL);
1282 T *ss = count > 1 ? _t_newr(t,SEMTREX_SYMBOL_SET) : t;
1283 va_start(symbols,count);
1285 for(i=0;i<count;i++) {
1286 _t_news(ss,SEMTREX_SYMBOL,va_arg(symbols,
Symbol));
1301 printf(
"\nPARSING:%s\n",stx);
1309 T *ts = _t_news(0,SEMTREX_GROUP,STX_TOKENS);
1310 T *g =
_sl(ts,ASCII_CHARS);
1311 T *sq = _t_newr(g,SEMTREX_SEQUENCE);
1312 T *p = _t_newr(sq,SEMTREX_ONE_OR_MORE);
1313 T *o = _t_newr(p,SEMTREX_OR);
1314 t = _t_news(o,SEMTREX_GROUP,STX_WALK);
1316 o = _t_newr(o,SEMTREX_OR);
1317 t = _t_news(o,SEMTREX_GROUP,STX_SL);
1319 o = _t_newr(o,SEMTREX_OR);
1320 t = _t_news(o,SEMTREX_GROUP,STX_OP);
1322 o = _t_newr(o,SEMTREX_OR);
1323 t = _t_news(o,SEMTREX_GROUP,STX_CP);
1326 o = _t_newr(o,SEMTREX_OR);
1327 t = _t_news(o,SEMTREX_GROUP,STX_OR);
1329 o = _t_newr(o,SEMTREX_OR);
1330 t = _t_news(o,SEMTREX_GROUP,STX_COMMA);
1333 o = _t_newr(o,SEMTREX_OR);
1334 t = _t_news(o,SEMTREX_GROUP,STX_CG);
1336 o = _t_newr(o,SEMTREX_OR);
1337 t = _t_news(o,SEMTREX_GROUP,SEMTREX_SYMBOL_ANY);
1339 o = _t_newr(o,SEMTREX_OR);
1340 t = _t_news(o,SEMTREX_GROUP,STX_STAR);
1342 o = _t_newr(o,SEMTREX_OR);
1343 t = _t_news(o,SEMTREX_GROUP,STX_PLUS);
1345 o = _t_newr(o,SEMTREX_OR);
1346 t = _t_news(o,SEMTREX_GROUP,STX_Q);
1348 o = _t_newr(o,SEMTREX_OR);
1349 t = _t_news(o,SEMTREX_GROUP,STX_NOT);
1352 o = _t_newr(o,SEMTREX_OR);
1353 sq = _t_newr(o,SEMTREX_SEQUENCE);
1354 t = _t_news(sq,SEMTREX_GROUP,STX_EQ);
1358 o = _t_newr(o,SEMTREX_OR);
1359 sq = _t_newr(o,SEMTREX_SEQUENCE);
1360 t = _t_news(sq,SEMTREX_GROUP,STX_NEQ);
1365 o = _t_newr(o,SEMTREX_OR);
1366 sq = _t_newr(o,SEMTREX_SEQUENCE);
1368 t = _t_news(sq,SEMTREX_GROUP,STX_VAL_C);
1372 o = _t_newr(o,SEMTREX_OR);
1373 sq = _t_newr(o,SEMTREX_SEQUENCE);
1375 t = _t_news(sq,SEMTREX_GROUP,STX_VAL_S);
1379 o = _t_newr(o,SEMTREX_OR);
1380 t = _t_news(o,SEMTREX_GROUP,STX_OS);
1382 o = _t_newr(o,SEMTREX_OR);
1383 t = _t_news(o,SEMTREX_GROUP,STX_CS);
1386 o = _t_newr(o,SEMTREX_OR);
1387 sq = _t_newr(o,SEMTREX_SEQUENCE);
1389 t = _t_news(sq,SEMTREX_GROUP,STX_EXCEPT);
1392 o = _t_newr(o,SEMTREX_OR);
1393 sq = _t_newr(o,SEMTREX_SEQUENCE);
1394 t = _t_news(sq,SEMTREX_GROUP,STX_EXCEPT);
1397 o = _t_newr(o,SEMTREX_OR);
1398 sq = _t_newr(o,SEMTREX_SEQUENCE);
1399 t = _t_news(sq,SEMTREX_GROUP,STX_VAL_F);
1400 T *sq2 = _t_newr(t,SEMTREX_SEQUENCE);
1401 t = _t_newr(sq2,SEMTREX_ZERO_OR_MORE);
1402 _stxcs(t,
"0123456789");
1404 t = _t_newr(sq2,SEMTREX_ONE_OR_MORE);
1405 _stxcs(t,
"0123456789");
1407 o = _t_newr(o,SEMTREX_OR);
1408 sq = _t_newr(o,SEMTREX_SEQUENCE);
1409 t = _t_news(sq,SEMTREX_GROUP,STX_VAL_I);
1410 t = _t_newr(t,SEMTREX_ONE_OR_MORE);
1411 _stxcs(t,
"0123456789");
1413 o = _t_newr(o,SEMTREX_OR);
1414 t = _t_news(o,SEMTREX_GROUP,STX_LABEL);
1418 sq = _t_newr(o,SEMTREX_SEQUENCE);
1420 t = _t_news(sq,SEMTREX_GROUP,STX_OG);
1438 if (semeq(ts,STX_VAL_S) || semeq(ts,STX_LABEL) || semeq(ts,STX_OG) || semeq(ts,STX_EXCEPT) || semeq(ts,STX_EQ) || semeq(ts,STX_NEQ)){
1441 else if (semeq(ts,STX_VAL_C)) {
1444 else if (semeq(ts,STX_VAL_I)) {
1447 else if (semeq(ts,STX_VAL_F)) {
1451 _t_newi(tokens,ts,0);
1455 dump_tokens(
"TOKENS:");
1463 g = _t_news(sxx,SEMTREX_GROUP,STX_OS);
1464 sq = _t_newr(g,SEMTREX_SEQUENCE);
1466 t = _t_news(sq,SEMTREX_GROUP,STX_SET);
1467 t = _t_newr(t,SEMTREX_ONE_OR_MORE);
1469 __sl(t,1,2,STX_OS,STX_CS);
1474 while (
_t_matchr(sxx,tokens,&results)) {
1475 g = wrap(tokens,results,STX_SET,STX_OS);
1477 g->contents.symbol = STX_SET;
1484 if (semeq(is,STX_COMMA)) {
1494 dump_tokens(
"TOKENS_AFTER_SETS:");
1501 g = _t_news(sxx,SEMTREX_GROUP,SEMTREX_VALUE_LITERAL);
1502 sq = _t_newr(g,SEMTREX_SEQUENCE);
1503 o = _t_newr(sq,SEMTREX_OR);
1507 g = _t_news(sq,SEMTREX_GROUP,SEMTREX_VALUE_SET);
1508 o = _t_newr(g,SEMTREX_OR);
1510 o = _t_newr(o,SEMTREX_OR);
1512 o = _t_newr(o,SEMTREX_OR);
1514 o = _t_newr(o,SEMTREX_OR);
1520 while (
_t_matchr(sxx,tokens,&results)) {
1526 t->contents.symbol = semeq(val_type,STX_EQ) ? SEMTREX_VALUE_LITERAL : SEMTREX_VALUE_LITERAL_NOT;
1537 v->contents.symbol = SEMTREX_VALUE_SET;
1540 while(set_count--) {
1542 Symbol vs = get_symbol(symbol_name,sem);
1544 v->contents.symbol = vs;
1552 Symbol vs = get_symbol(symbol_name,sem);
1554 v->contents.symbol = vs;
1563 dump_tokens(
"TOKENS_AFTER_VALUE_LITERAL:");
1569 sxx =
_sl(0,STX_TOKENS);
1570 sq = _t_newr(sxx,SEMTREX_SEQUENCE);
1571 T *st = _t_newr(sq,SEMTREX_ZERO_OR_MORE);
1572 _t_newr(st,SEMTREX_SYMBOL_ANY);
1573 T *gg = _t_news(sq,SEMTREX_GROUP,STX_OP);
1574 T *sq1 = _t_newr(gg,SEMTREX_SEQUENCE);
1576 T *g = _t_news(sq1,SEMTREX_GROUP,STX_SIBS);
1577 T *any = _t_newr(g,SEMTREX_ONE_OR_MORE);
1578 __sl(any,1,2,STX_OP,STX_CP);
1583 while (
_t_matchr(sxx,tokens,&results)) {
1584 g = wrap(tokens,results,STX_SIBS,STX_OP);
1586 g->contents.symbol = STX_SIBS;
1591 dump_tokens(
"TOKENS_AFTER_SIBS:");
1598 g = _t_news(sxx,SEMTREX_GROUP,STX_OG);
1599 sq = _t_newr(g,SEMTREX_SEQUENCE);
1601 gg = _t_news(sq,SEMTREX_GROUP,SEMTREX_GROUP);
1602 any = _t_newr(gg,SEMTREX_ONE_OR_MORE);
1604 __sl(any,1,2,STX_OG,STX_CG);
1609 while (
_t_matchr(sxx,tokens,&results)) {
1610 g = wrap(tokens,results,SEMTREX_GROUP,STX_OG);
1614 Symbol sy = get_symbol(symbol_name,sem);
1621 dump_tokens(
"TOKENS_AFTER_GROUPS:");
1627 sxx =
_sl(0,STX_TOKENS);
1628 sq = _t_newr(sxx,SEMTREX_SEQUENCE);
1629 st = _t_newr(sq,SEMTREX_ZERO_OR_MORE);
1630 _t_newr(st,SEMTREX_SYMBOL_ANY);
1631 o = _t_newr(sq,SEMTREX_OR);
1638 raise_error(
"mismatched parens! [tokens:%s]",_t2s(G_sem,tokens));
1647 g = _t_news(sxx,SEMTREX_GROUP,STX_POSTFIX);
1648 sq = _t_newr(g,SEMTREX_SEQUENCE);
1649 _t_newr(sq,SEMTREX_SYMBOL_ANY);
1650 o = _t_newr(sq,SEMTREX_OR);
1652 o = _t_newr(o,SEMTREX_OR);
1658 while (
_t_matchr(sxx,tokens,&results)) {
1669 c->contents.symbol = SEMTREX_ONE_OR_MORE;
1671 c->contents.symbol = SEMTREX_ZERO_OR_MORE;
1673 c->contents.symbol = SEMTREX_ZERO_OR_ONE;
1679 dump_tokens(
"TOKENS_AFTER_POSTFIX:");
1686 g = _t_news(sxx,SEMTREX_GROUP,SEMTREX_NOT);
1687 sq = _t_newr(g,SEMTREX_SEQUENCE);
1689 _t_newr(sq,SEMTREX_SYMBOL_ANY);
1693 while (
_t_matchr(sxx,tokens,&results)) {
1704 n->contents.symbol = SEMTREX_NOT;
1710 dump_tokens(
"TOKENS_AFTER_NOT:");
1717 sq = _t_newr(sxx,SEMTREX_SEQUENCE);
1718 any = _t_newr(sq,SEMTREX_ZERO_OR_MORE);
1719 _t_newr(any,SEMTREX_SYMBOL_ANY);
1720 g = _t_news(sq,SEMTREX_GROUP,STX_CHILD);
1721 sq = _t_newr(g,SEMTREX_SEQUENCE);
1728 while (
_t_matchr(sxx,tokens,&results)) {
1747 dump_tokens(
"TOKENS_AFTER_SLASH:");
1756 g = _t_news(sxx,SEMTREX_GROUP,SEMTREX_SYMBOL_LITERAL);
1757 sq = _t_newr(g,SEMTREX_SEQUENCE);
1758 t = _t_newr(sq,SEMTREX_ZERO_OR_ONE);
1759 t = _t_newr(t,SEMTREX_VALUE_LITERAL);
1760 t = _t_newr(t,SEMTREX_VALUE_SET);
1761 _t_new(t,STX_EXCEPT,
"!",2);
1765 while (
_t_matchr(sxx,tokens,&results)) {
1769 int not = semeq(
_t_symbol(t),STX_EXCEPT);
1781 Symbol sy = get_symbol(symbol_name,sem);
1784 t->contents.symbol = SEMTREX_SYMBOL_SET;
1785 T *x =
_t_new_root(not?SEMTREX_SYMBOL_LITERAL_NOT:SEMTREX_SYMBOL_LITERAL);
1793 dump_tokens(
"TOKENS_AFTER_LITERAL_STX_SET:");
1800 g = _t_news(sxx,SEMTREX_GROUP,SEMTREX_SYMBOL_LITERAL);
1801 o = _t_newr(g,SEMTREX_OR);
1806 while (
_t_matchr(sxx,tokens,&results)) {
1811 Symbol sy = get_symbol(symbol_name,sem);
1812 t->contents.symbol = semeq(t->contents.symbol,STX_LABEL)?SEMTREX_SYMBOL_LITERAL:SEMTREX_SYMBOL_LITERAL_NOT;
1813 T *ss = _t_news(0,SEMTREX_SYMBOL,sy);
1814 int pp[2] = {1,TREE_PATH_TERMINATOR};
1821 dump_tokens(
"TOKENS_AFTER_LITERAL:");
1828 g = _t_news(sxx,SEMTREX_GROUP,SEMTREX_SEQUENCE);
1829 sq = _t_newr(g,SEMTREX_SEQUENCE);
1830 o = _t_newr(sq,SEMTREX_ONE_OR_MORE);
1832 sq = _t_newr(o,SEMTREX_SEQUENCE);
1838 while (
_t_matchr(sxx,tokens,&results)) {
1863 dump_tokens(
"TOKENS_AFTER_COMMA:");
1871 g = _t_news(sxx,SEMTREX_GROUP,SEMTREX_OR);
1872 sq = _t_newr(g,SEMTREX_SEQUENCE);
1879 while (
_t_matchr(sxx,tokens,&results)) {
1893 o->contents.symbol = SEMTREX_OR;
1898 dump_tokens(
"TOKENS_AFTER_ORS:");
1905 g = _t_news(sxx,SEMTREX_GROUP,SEMTREX_WALK);
1906 sq = _t_newr(g,SEMTREX_SEQUENCE);
1908 _t_newr(sq,SEMTREX_SYMBOL_ANY);
1911 while (
_t_matchr(sxx,tokens,&results)) {
1920 t->contents.symbol = SEMTREX_WALK;
1926 dump_tokens(
"TOKENS_AFTER_WALK:");
1933 g = _t_news(sxx,SEMTREX_GROUP,STX_SIBS);
1937 while (
_t_matchr(sxx,tokens,&results)) {
1943 __t_dump(G_sem,tokens,0,buf);
1944 raise_error(
"sibs with more than one child! [tokens:%s]",buf);
1954 dump_tokens(
"TOKENS_FINAL:");
1959 if (c == 2 && (semeq(STX_SL,sy))) {
1964 __t_dump(G_sem,tokens,0,buf);
1965 raise_error(
"unexpected tokens! [tokens:%s]",buf);
1977 void __stx_r2fi(
SemTable *sem,
T *mr,
T *mt,
T *sem_map) {
1978 T *t = _t_newr(sem_map,SEMANTIC_LINK);
1979 T *match_symbol =
_t_child(mr,SemtrexMatchSymbolIdx);
1981 _t_news(t,USAGE,msym);
1982 T *r = _t_newr(t,REPLACEMENT_VALUE);
1988 raise_error(
"expecting to get a value from match!!");
1994 if (!semeq(msym,NULL_SYMBOL)) {
1996 Structure mrs = _sem_get_symbol_structure(sem,msym);
1997 Structure xs = _sem_get_symbol_structure(sem,xsym);
2000 if (semeq(mrs,xs)) {
2001 x->contents.symbol = msym;
2007 raise_error(
"unable to embody from match: %s(%s) -> %s(%s)",
2017 for (i=SemtrexMatchSibsIdx+1;i<=c;i++) {
2018 __stx_r2fi(sem,
_t_child(mr,i),mt,sem_map);
2031 __stx_r2fi(sem,match_results,match_tree,sem_map);
2036 char *transition2Str(TransitionType transition) {
2038 if (transition == TransitionDown) sprintf(trbuf,
"Down");
2039 else if (isTransitionPop(transition)) sprintf(trbuf,
"Up%d",transition*-1);
2040 if (isTransitionNext(transition)) {
2041 sprintf(trbuf+strlen(trbuf),
"%sNext",trbuf[0]?
"+":
"");
2046 #include "ansicolor.h"
2047 static int dump_id = 99;
2048 SState *G_cur_stx_state = NULL;
2049 char G_stx_dump_buf[100000];
2050 #define pbuf(...) sprintf(buf+strlen(buf),__VA_ARGS__)
2052 char * __stx_dump_state(
SState *s,
char *buf) {
2057 case StateGroupOpen:
2060 case StateGroupClose:
2070 else raise_error(
"unimplemented state data type in stx_dump\n");
2071 pbuf(
"(%s%s)",(s->
data.
symbol.flags & LITERAL_NOT) ?
"!" :
"",
2076 pbuf(
"(%sV",(s->
data.
value.flags & LITERAL_NOT) ?
"!" :
"");
2077 if (s->
data.
value.flags & LITERAL_SET) {
2088 pbuf(
"%s",x ? t2s(
_t_child(x,1)) :
"_nil_");
2108 raise_error(
"unknown state");
2114 void __stx_dump(
SState *s,
char *buf) {
2115 if (s->
_did == dump_id) {pbuf(
"X");
return;}
2117 if (s == G_cur_stx_state)
2119 __stx_dump_state(s,buf);
2120 if (s == G_cur_stx_state)
2124 __stx_dump(s->
out,buf);
2128 __stx_dump(s->
out1,buf);
2134 char * _stx_dump(
SState *s,
char *buf) {
2141 void stx_dump(
T *s) {
2145 puts(G_stx_dump_buf);
T * _t_new_root(Symbol symbol)
int _did
used to hold a mark when freeing and printing out FSA to prevent looping.
char * _sem_get_name(SemTable *sem, SemanticID s)
T * _t_next_sibling(T *t)
SemanticID symbol
the symbol that describes the group semantically
T * asciiT_toc(T *asciiT, T *match, T *t, Symbol s)
T * asciiT_tos(T *asciiT, T *match, T *t, Symbol s)
T * _t_path_walk(T *t, int **pathP, int *lenP)
header file for symbol and structure definition functions
int _t_path_depth(int *p)
SgroupOpen groupo
Group data for matching for StateGroup type states.
Semantic tree regular expression header file.
Ptrlist * list1(SState **outp)
int __t_match(T *semtrex, T *source_t, T **rP)
Svalue value
Value data to match on for StateValue type states.
void patch(Ptrlist *l, SState *s, int level)
T * __sl(T *p, bool not, int count,...)
T * asciiT_tof(T *asciiT, T *match, T *t, Symbol s)
T * _t_child(T *t, int i)
T * makeASCIITree(char *c)
StateType type_
copy of state type needed for patch to grab (far too tricky)
void _t_pathcpy(int *dst_p, int *src_p)
void _t_insert_at(T *t, int *path, T *i)
TransitionType transition1
will be: TransitionNextChild=0,TransitionUp=-1,TransitionDown=1
SState * _stx_makeFA(T *t, int *statesP)
int _t_match(T *semtrex, T *t)
T * _t_get_match(T *match, Symbol group)
SgroupClose groupc
Group data for matching for StateGroup type states.
T * _t_embody_from_match(SemTable *sem, T *match, Symbol group, T *t)
void __stx_freeFA2(SState *s)
void _t_replace_node(T *t, T *r)
SState * state(StateType type, int *statesP, int level)
int __stx_freeFA(SState *s, int id)
int _t_matchr(T *semtrex, T *t, T **rP)
char * __stx_makeFA(T *t, SState **in, Ptrlist **out, int level, int *statesP)
void _stx_freeFA(SState *s)
STypeData data
a union to hold the data for which ever type of SState this is
T * asciiT_toi(T *asciiT, T *match, T *t, Symbol s)
struct SState * out
which state to go to next
Ptrlist * append(Ptrlist *l1, Ptrlist *l2)
TransitionType transition
will be: TransitionNextChild=0,TransitionDown=1, or a negative number which means pop up that many le...
#define _sl(t, s)
macro to add a single symbol literal to semtrex tree
char * __t2s(SemTable *sem, T *t, int indent)
Sliteral symbol
Symbol to match on for StateSymbol type states.
T * parseSemtrex(SemTable *sem, char *stx)
T * asciiT_tol(T *asciiT, T *match, T *t, Symbol s)
int _val_match(T *t, T *t1)
int __symbol_set_does_not_contain(T *s, T *t)
T * _stx_results2sem_map(SemTable *sem, T *match_results, T *match_tree)
int __symbol_set_contains(T *s, T *t)
void __t_morph(T *t, Symbol s, void *surface, size_t size, int allocate)
char * _dump_semtrex(SemTable *sem, T *s, char *buf)
StateType type
what type of state this is
void _t_replace(T *t, int i, T *r)
void _t_detach_by_ptr(T *t, T *c)
int uid
unique id for the group
#define _sln(t, s)
macro to add a single symbol literal not to semtrex tree
SState matchstate
the final matching state in the FSA can be declared statically and globally
char * _t_sprint_path(int *fp, char *buf)
struct SState * out1
which alternate state to go to next in the case this is a Split state