1 package oqube.muse;
2
3 import java.util.List;
4 import java.util.ArrayList;
5 import java.util.regex.Pattern;
6 import java.util.regex.Matcher;
7 import java.util.Iterator;
8 import fr.lifl.parsing.ParserListenerDelegate;
9 import java.io.Reader;
10 import fr.lifl.parsing.ParserConfiguration;
11 import fr.lifl.parsing.ParserListener;
12 import java.io.BufferedReader;
13 import java.util.Stack;
14 import fr.lifl.parsing.ParserException;
15 import fr.lifl.parsing.Parser;
16 import fr.lifl.parsing.ParserPosition;
17 import fr.lifl.parsing.Namespace;
18 import java.io.IOException;
19 import java.io.InputStream;
20 import java.io.InputStreamReader;
21 import org.apache.commons.logging.Log;
22 import org.apache.commons.logging.LogFactory;
23
24 /***
25 * Muse parser implementation. Current implementation is closed but future
26 * evolutions shall allow some customization of parsing process.
27 *
28 * @author abailly@oqube.muse.com
29 * @version $Id$
30 */
31 public class MuseParser implements Parser {
32
33 private static final String EOL = System.getProperty("line.separator");
34
35
36 private ParserListenerDelegate delegate = new ParserListenerDelegate();
37
38
39 private Reader reader;
40
41
42 private StringBuffer tagContent;
43
44
45
46
47 private String currentTag;
48
49 public String getCurrentTag() {
50 return currentTag;
51 }
52
53 public void setCurrentTag(String currentTag) {
54 this.currentTag = currentTag;
55 }
56
57 public Reader getReader() {
58 return reader;
59 }
60
61
62 private boolean debug = false;
63
64
65 private static Log log = LogFactory.getLog(MuseParser.class);
66
67 public static Log getLog() {
68 return log;
69 }
70
71 public static void setLog(Log log) {
72 MuseParser.log = log;
73 }
74
75
76 private State state;
77
78
79 private Stack
80
81
82 private MuseSink sink;
83
84 public MuseSink getSink() {
85 return sink;
86 }
87
88 public void setSink(MuseSink sink) {
89 this.sink = strong;
90 strong.setWrappedSink(sink);
91 }
92
93
94
95
96 private State push(int st, int col) {
97 states.push(state);
98
99 if (state != null) {
100 StringBuffer sb = new StringBuffer();
101 for (int i = 0; i < col; i++)
102 sb.append(' ');
103 sb.append(state.state).append(" > ").append(st);
104 if (log.isDebugEnabled())
105 log.debug(sb);
106 }
107 return state = new State(st, col);
108 }
109
110
111
112
113 private State pop() {
114 State s = state;
115 state = (State) states.pop();
116 StringBuffer sb = new StringBuffer();
117 for (int i = 0; i < state.col; i++)
118 sb.append(' ');
119 sb.append(state.state).append(" < ").append(s.state);
120 if (log.isDebugEnabled()) {
121 log.debug(sb);
122 }
123 return s;
124 }
125
126
127 private ParserPosition pos;
128
129 public ParserPosition getPos() {
130 return pos;
131 }
132
133 public void setPos(ParserPosition pos) {
134 this.pos = pos;
135 }
136
137 /***
138 * Handles paragraph liens
139 */
140 private AbstractLexer paralex = new AbstractLexer("^(//S+.*)$") {
141
142
143
144
145
146
147 public void handler() {
148 switch (state.state) {
149 case para:
150 sink.text(matcher.group(1));
151 break;
152 case top:
153
154 sink.startPara();
155 sink.text(matcher.group(1));
156 push(para, 0);
157 break;
158 case tab:
159 pop();
160 handler();
161 break;
162 case item:
163 pop();
164 sink.endItem();
165 handler();
166 break;
167 case list:
168 pop();
169 sink.endList();
170 handler();
171 break;
172 case enums:
173 pop();
174 sink.endEnums();
175 handler();
176 break;
177 case quote:
178 pop();
179 sink.endQuote();
180 handler();
181 break;
182 case table:
183 pop();
184 sink.endTable();
185 handler();
186 break;
187 case center:
188 pop();
189 sink.endCenter();
190 handler();
191 break;
192 case head:
193 pop();
194 sink.endHeader();
195 sink.startBody();
196 handler();
197 break;
198 case tag:
199 tagContent.append(matcher.group(0)).append(EOL);
200 return;
201 default:
202 throw new ParserException("Invalid paragraph line " + matcher.group(1)
203 + " @(" + pos + ")");
204 }
205 }
206 };
207
208 abstract class ItemLexer extends AbstractLexer {
209
210 ItemLexer(String pat) {
211 super(pat);
212 }
213
214 protected void handle(int type) {
215 if (state.state == tag) {
216 tagContent.append(matcher.group(0)).append(EOL);
217 return;
218 }
219 int c = matcher.group(1).length();
220 if (c > state.col) {
221
222 push(type, c);
223 push(item, c);
224 if (type == list)
225 sink.startList();
226 else if (type == enums)
227 sink.startEnums();
228
229 c += matcher.group(2).length();
230 push(tab, c);
231 sink.startItem();
232 sink.text(matcher.group(3));
233 } else if (c < state.col) {
234 State s = pop();
235 switch (s.state) {
236 case para:
237 sink.endPara();
238 break;
239 case head:
240 sink.endHeader();
241 sink.startBody();
242 break;
243 case tab:
244 break;
245 case item:
246 sink.endItem();
247 break;
248 case list:
249 sink.endList();
250 break;
251 case enums:
252 sink.endEnums();
253 break;
254 case quote:
255 sink.endQuote();
256 break;
257 case center:
258 sink.endCenter();
259 case table:
260 sink.endTable();
261 break;
262 default:
263 throw new ParserException("invalid indentation of list item "
264 + matcher.group(0) + " @(" + pos + ")");
265 }
266
267 handler();
268 } else {
269 assert c == state.col;
270 while (state.state != item) {
271 switch (state.state) {
272 case tab:
273 pop();
274 handler();
275 return;
276 default:
277 throw new ParserException("Invalid indentation of list item "
278 + matcher.group(0) + " at " + pos
279 + ": should be at least one space further right");
280 }
281 }
282 sink.endItem();
283 c += matcher.group(2).length();
284 push(tab, c);
285 sink.startItem();
286 sink.text(matcher.group(3));
287 }
288 }
289 };
290
291
292 private AbstractLexer listlex = new ItemLexer("(//s+)(-//s+)(.*)") {
293 public void handler() {
294 handle(list);
295 }
296 };
297
298
299 private AbstractLexer enumlex = new ItemLexer("(//s+)(//d+//.//s+)(.*)") {
300 public void handler() {
301 handle(enums);
302 }
303 };
304
305
306 private AbstractLexer blanklex = new AbstractLexer("(//s+)(//S.*)") {
307 public void handler() {
308
309 int ws = matcher.group(1).length();
310 switch (state.state) {
311 case top:
312 case item:
313 case para:
314 case tab:
315 if (ws >= 6 + state.col) {
316 sink.startCenter();
317 push(center, ws);
318 } else if (ws > state.col) {
319 sink.startQuote();
320 push(quote, ws);
321 }
322 if (ws < state.col)
323 throw new ParserException("Incorrect indentation line "
324 + matcher.group(1) + "in state " + state + " @(" + pos + ")");
325 sink.text(matcher.group(2));
326 break;
327 case quote:
328 case center:
329 if (ws < state.col)
330 throw new ParserException("Incorrect indentation line "
331 + matcher.group(1) + "in state " + state + " @(" + pos + ")");
332 sink.text(matcher.group(2));
333 break;
334 case head:
335 pop();
336 sink.endHeader();
337 sink.startBody();
338 handler();
339 break;
340 case tag:
341 tagContent.append(matcher.group(0)).append(EOL);
342 return;
343 default:
344 throw new ParserException("Unexpected whitespace starting line "
345 + matcher.group(1) + "in state " + state + " @(" + pos + ")");
346 }
347 }
348 };
349
350
351
352
353
354 private AbstractLexer emptylex = new AbstractLexer("^//s*$") {
355 public void handler() {
356 while (state.state != top) {
357 assert !states.isEmpty();
358
359 switch (state.state) {
360 case para:
361 sink.endPara();
362 break;
363 case list:
364 sink.endList();
365 break;
366 case quote:
367 sink.endQuote();
368 break;
369 case center:
370 sink.endCenter();
371 break;
372 case enums:
373 sink.endEnums();
374 break;
375 case item:
376 sink.endItem();
377 break;
378 case head:
379 sink.endHeader();
380 sink.startBody();
381 break;
382 case tab:
383 break;
384 case table:
385 sink.endTable();
386 break;
387 case tag:
388 tagContent.append(EOL).append(EOL);
389 return;
390 default:
391 throw new ParserException("Unexpected empty line in state " + state
392 + " @" + pos + "");
393 }
394 pop();
395 }
396 assert state.state == top;
397 }
398 };
399
400
401 private AbstractLexer headerlex = new AbstractLexer("^(//*+)//s+(//S+.*)$") {
402
403 public void handler() {
404 switch (state.state) {
405 case head:
406 pop();
407 sink.endHeader();
408 sink.startBody();
409 break;
410 case tag:
411 tagContent.append(matcher.group(0)).append(EOL);
412 return;
413 }
414 if (state.state != top)
415 throw new ParserException("Cannot use header inside blocks: "
416 + matcher.group(0) + " in state " + state.state + " @" + pos + "");
417
418 int lvl = matcher.group(1).length();
419 switch (lvl) {
420 case 1:
421 sink.startTitle1();
422 sink.text(matcher.group(2));
423 sink.endTitle1();
424 break;
425 case 2:
426 sink.startTitle2();
427 sink.text(matcher.group(2));
428 sink.endTitle2();
429 break;
430 case 3:
431 sink.startTitle3();
432 sink.text(matcher.group(2));
433 sink.endTitle3();
434 break;
435 default:
436 sink.startTitle4();
437 sink.text(matcher.group(2));
438 sink.endTitle4();
439 break;
440 }
441 }
442 };
443
444
445 private AbstractLexer metalex = new AbstractLexer("^#(//S+)(:?//s+(.*))?$") {
446
447 public void handler() {
448 switch (state.state) {
449 case head:
450
451 sink.addMetadata(matcher.group(1), matcher.group(3));
452 break;
453 case tag:
454 tagContent.append(matcher.group(0)).append(EOL);
455 return;
456 default:
457
458 if (matcher.group(3) == null || "".equals(matcher.group(3)))
459 sink.anchor(matcher.group(1));
460 else
461 throw new ParserException("Cannot use meta data inside blocks: "
462 + matcher.group(0) + " @(" + pos + ")");
463 }
464 }
465 };
466
467
468 private AbstractLexer sepalex = new AbstractLexer("^----+") {
469
470 public void handler() {
471 switch (state.state) {
472 case top:
473 sink.separator();
474 break;
475 case tag:
476 tagContent.append(matcher.group(0));
477 return;
478 default:
479 throw new ParserException("Cannot use separator inside blocks: "
480 + matcher.group(0) + " @" + pos + "");
481 }
482 }
483 };
484
485
486 private AbstractLexer sttaglex = new AbstractLexer("^<(//w+)>//s*$") {
487
488 public void handler() {
489 String tg = matcher.group(1);
490
491 if (state.state == tag) {
492 tagContent.append(matcher.group(0)).append(EOL);
493 return;
494 }
495 setCurrentTag(tg);
496 push(tag, 0);
497 tagContent = new StringBuffer();
498 }
499 };
500
501 private AbstractLexer endtaglex = new AbstractLexer("^</(//w+)>//s*$") {
502
503 public void handler() {
504 String tg = matcher.group(1);
505
506 if (!getCurrentTag().equals(tg)) {
507 tagContent.append(matcher.group(0)).append(EOL);
508 return;
509 }
510 if (state.state != tag)
511 throw new ParserException("Found ending tag " + tag + " @" + pos);
512 pop();
513
514 sink.block(tg, tagContent.toString());
515 }
516 };
517
518 private AbstractLexer tableHdrLex = new AbstractLexer("^[^|]+(//|//|[^|]+)+$") {
519
520 public void handler() {
521 String th = matcher.group(0);
522 if (state.state == tag) {
523 tagContent.append(matcher.group(0)).append(EOL);
524 return;
525 }
526 if (state.state == table)
527 throw new ParserException("Cannot nest headers inside table");
528 push(table, state.col);
529 sink.startTable();
530
531 sink.startTableRow();
532 String[] hdrs = th.split("//|//|");
533 for (int i = 0; i < hdrs.length; i++) {
534 sink.startTableHeader();
535 sink.text(hdrs[i]);
536 sink.endTableHeader();
537 }
538 sink.endTableRow();
539 }
540
541 };
542
543 private AbstractLexer tableDataLex = new AbstractLexer("^[^|]+(//|[^|]*)+$") {
544
545 public void handler() {
546 String th = matcher.group(0);
547 if (state.state == tag) {
548 tagContent.append(matcher.group(0)).append(EOL);
549 return;
550 }
551 if (state.state != table) {
552
553 push(table, state.col);
554 sink.startTable();
555 }
556
557 sink.startTableRow();
558 String[] hdrs = th.split("//|");
559 for (int i = 0; i < hdrs.length; i++) {
560 sink.startTableData();
561 sink.text(hdrs[i]);
562 sink.endTableData();
563 }
564 sink.endTableRow();
565 }
566
567 };
568
569
570 private static final int top = 0;
571
572 private static final int para = 1;
573
574 private static final int list = 2;
575
576 private static final int item = 3;
577
578 private static final int quote = 4;
579
580 private static final int center = 5;
581
582 private static final int enums = 6;
583
584 private static final int tab = 7;
585
586 private static final int head = 8;
587
588 private static final int foot = 9;
589
590 private static final int tag = 10;
591
592 private static final int table = 11;
593
594
595
596
597 private FlowLexer emph = new FlowLexer("//*([^*]+)//*") {
598 public void handler() {
599 startEmph();
600
601 assert next != null;
602 ((FlowLexer) next).format(matcher.group(1));
603 endEmph();
604 }
605 };
606
607 private FlowLexer strong = new FlowLexer("//*//*([^*]+)//*//*") {
608 public void handler() {
609 startStrong();
610
611 assert next != null;
612 ((FlowLexer) next).format(matcher.group(1));
613 endStrong();
614 }
615 };
616
617 private FlowLexer verb = new FlowLexer("//=([^=]+)//=") {
618 public void handler() {
619 startVerb();
620
621 assert next != null;
622 ((FlowLexer) next).format(matcher.group(1));
623 endVerb();
624 }
625 };
626
627 private FlowLexer uline = new FlowLexer("_([^_]+)_") {
628 public void handler() {
629 startUline();
630
631 assert next != null;
632 ((FlowLexer) next).format(matcher.group(1));
633 endUline();
634 }
635 };
636
637 private FlowLexer link = new FlowLexer(
638 "//[//[([^]]*)//](?://[([^]]*)//])?//]") {
639 public void handler() {
640 link(matcher.group(1), matcher.group(2));
641 }
642 };
643
644 /***
645 * Sets the configuration object to be used by this parser
646 *
647 * @param config
648 */
649 public void setParserConfiguration(ParserConfiguration config) {
650 }
651
652 /***
653 * Sets the reader object providing the stream of characters to parse. The
654 * reader is not closed by this method
655 *
656 * @param reader
657 * a valid Reader object
658 */
659 public void setReader(Reader reader) {
660 this.reader = reader;
661 }
662
663 /***
664 * Set an input stream to be parsed by this parser. This input stream will be
665 * wrapped in a reader using default locale settings.
666 *
667 * @param is
668 * the stream. may not be null.
669 */
670 public void setStream(InputStream is) {
671 this.reader = new InputStreamReader(is);
672 }
673
674 /***
675 * Adds a listener for parse events to this Parser. Parsing events are
676 * generated by the Parser to notify listeners of warnings and recoverable
677 * errors. Unrecoverable errors are notified through
678 * {@see fr.lifl.util.ParserException}.
679 *
680 * @param listener
681 * the listener to add to this parser
682 */
683 public void addParserListener(ParserListener listener) {
684 delegate.addParserListener(listener);
685 }
686
687 /***
688 * Gives information to this parser that parsing starts at given position in
689 * the enclosing context. If this method is not called prior to a call to
690 * {@see #start()} method, start position is assumed to be line 1, column 1.
691 *
692 * @param pos
693 * the start position - may not be null
694 */
695 public void setStartPosition(ParserPosition pos) {
696 this.pos = pos;
697 }
698
699 /***
700 * Gives the Parser information of the enclosing Namespace this parsing is
701 * part of.
702 *
703 */
704 public void setStartScope(Namespace scope) {
705 }
706
707 /***
708 * Default Constructor.
709 */
710 public MuseParser() {
711
712 headerlex.setNext(tableHdrLex).setNext(tableDataLex).setNext(sttaglex)
713 .setNext(endtaglex).setNext(emptylex).setNext(sepalex).setNext(listlex)
714 .setNext(enumlex).setNext(blanklex).setNext(metalex).setNext(paralex);
715
716 strong.setNext(emph).setNext(verb).setNext(uline).setNext(link).setNext(
717 IdentityLexer.instance);
718 }
719
720 /***
721 * Asks this Parser to start parsing. This method is normally blocking and
722 * Parser should return when finished. This method must be called after a call
723 * to {@see #setReader(java.io.Reader)} or else it will throw immediatly a
724 * ParserException.
725 * <p>
726 * Recoverable parse events are notified through registered ParserListener
727 * interface, while non recoverable errors throw a PArserException.
728 *
729 * @throws ParserException
730 */
731 public void start() throws ParserException {
732 String line = null;
733 BufferedReader br = new BufferedReader(reader);
734 if (pos == null)
735 pos = new ParserPosition(1, 1);
736 else {
737 pos.setLine(1);
738 pos.setColumn(1);
739 }
740 push(top, 0);
741 push(head, 0);
742 sink.startHeader();
743
744
745
746
747 try {
748 while ((line = br.readLine()) != null) {
749 parse(line);
750 pos.setLine(pos.getLine() + 1);
751 }
752
753 if (state.state != top)
754 emptylex.handler();
755 assert state.state == top;
756 sink.endBody();
757 } catch (IOException e) {
758 throw new ParserException(e);
759 }
760 }
761
762 /***
763 * @param line
764 */
765 public void parse(String line) {
766 if (log.isDebugEnabled())
767 log.debug("Parsing line " + line + " @" + pos);
768 headerlex.parse(line);
769 if (log.isDebugEnabled())
770 log.debug("Done");
771 }
772
773 }