1    
2    /* ====================================================================
3     * The Apache Software License, Version 1.1
4     *
5     * Copyright (c) 2002 The Apache Software Foundation.  All rights
6     * reserved.
7     *
8     * Redistribution and use in source and binary forms, with or without
9     * modification, are permitted provided that the following conditions
10    * are met:
11    *
12    * 1. Redistributions of source code must retain the above copyright
13    *    notice, this list of conditions and the following disclaimer.
14    *
15    * 2. Redistributions in binary form must reproduce the above copyright
16    *    notice, this list of conditions and the following disclaimer in
17    *    the documentation and/or other materials provided with the
18    *    distribution.
19    *
20    * 3. The end-user documentation included with the redistribution,
21    *    if any, must include the following acknowledgment:
22    *       "This product includes software developed by the
23    *        Apache Software Foundation (http://www.apache.org/)."
24    *    Alternately, this acknowledgment may appear in the software itself,
25    *    if and wherever such third-party acknowledgments normally appear.
26    *
27    * 4. The names "Apache" and "Apache Software Foundation" and
28    *    "Apache POI" must not be used to endorse or promote products
29    *    derived from this software without prior written permission. For
30    *    written permission, please contact apache@apache.org.
31    *
32    * 5. Products derived from this software may not be called "Apache",
33    *    "Apache POI", nor may "Apache" appear in their name, without
34    *    prior written permission of the Apache Software Foundation.
35    *
36    * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37    * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38    * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39    * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40    * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41    * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42    * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43    * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44    * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45    * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46    * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47    * SUCH DAMAGE.
48    * ====================================================================
49    *
50    * This software consists of voluntary contributions made by many
51    * individuals on behalf of the Apache Software Foundation.  For more
52    * information on the Apache Software Foundation, please see
53    * <http://www.apache.org/>.
54    */
55   
56   
57   package org.apache.poi.hssf.model;
58   
59   import org.apache.poi.hssf.record.formula.*;
60   import org.apache.poi.hssf.util.SheetReferences;
61   
62   import java.util.ArrayList;
63   import java.util.List;
64   
65   
66   /**
67    * This class parses a formula string into a List of tokens in RPN order.
68    * Inspired by 
69    *           Lets Build a Compiler, by Jack Crenshaw
70    * BNF for the formula expression is :
71    * <expression> ::= <term> [<addop> <term>]*
72    * <term> ::= <factor>  [ <mulop> <factor> ]*
73    * <factor> ::= <number> | (<expression>) | <cellRef> | <function>
74    * <function> ::= <functionName> ([expression [, expression]*])
75    *
76    *  @author Avik Sengupta <avik AT Avik Sengupta DOT com>
77    *  @author Andrew C. oliver (acoliver at apache dot org)
78    *  @author Eric Ladner (eladner at goldinc dot com)
79    */
80   public class FormulaParser {
81       
82       public static int FORMULA_TYPE_CELL = 0;
83       public static int FORMULA_TYPE_SHARED = 1;
84       public static int FORMULA_TYPE_ARRAY =2;
85       public static int FORMULA_TYPE_CONDFOMRAT = 3;
86       public static int FORMULA_TYPE_NAMEDRANGE = 4;
87       
88       private String formulaString;
89       private int pointer=0;
90       private int formulaLength;
91       
92       private List tokens = new java.util.Stack();
93       //private Stack tokens = new java.util.Stack();
94       private List result = new ArrayList();
95       private int numParen;
96       
97       private static char TAB = '\t';
98       private static char CR = '\n';
99       
100     private char look;              // Lookahead Character
101     
102     private Workbook book;
103      
104      
105      /** create the parser with the string that is to be parsed
106       *    later call the parse() method to return ptg list in rpn order
107       *    then call the getRPNPtg() to retrive the parse results
108       *  This class is recommended only for single threaded use
109       */
110      public FormulaParser(String formula, Workbook book){
111          formulaString = formula;
112          pointer=0;
113          this.book = book;
114      	formulaLength = formulaString.length();
115      }
116      
117  
118      /** Read New Character From Input Stream */
119      private void GetChar() {
120          // Check to see if we've walked off the end of the string.
121  	// Just return if so and reset Look to smoething to keep 
122  	// SkipWhitespace from spinning
123          if (pointer == formulaLength) {
124              look = (char)0;
125  	    return;
126  	}
127          look=formulaString.charAt(pointer++);
128          //System.out.println("Got char: "+Look);
129      }
130      
131  
132      /** Report an Error */
133      private void Error(String s) {
134          System.out.println("Error: "+s);
135      }
136      
137      
138   
139      /** Report Error and Halt */
140      private void Abort(String s) {
141          Error(s);
142          //System.exit(1);  //throw exception??
143          throw new RuntimeException("Cannot Parse, sorry : "+s);
144      }
145      
146      
147  
148      /** Report What Was Expected */
149      private void Expected(String s) {
150          Abort(s + " Expected");
151      }
152      
153      
154   
155      /** Recognize an Alpha Character */
156      private boolean IsAlpha(char c) {
157          return Character.isLetter(c) || c == '$';
158      }
159      
160      
161   
162      /** Recognize a Decimal Digit */
163      private boolean IsDigit(char c) {
164          //System.out.println("Checking digit for"+c);
165          return Character.isDigit(c);
166      }
167      
168      
169  
170      /** Recognize an Alphanumeric */
171      private boolean  IsAlNum(char c) {
172          return  (IsAlpha(c) || IsDigit(c));
173      }
174      
175      
176  
177      /** Recognize an Addop */
178      private boolean IsAddop( char c) {
179          return (c =='+' || c =='-');
180      }
181      
182  
183      /** Recognize White Space */
184      private boolean IsWhite( char c) {
185          return  (c ==' ' || c== TAB);
186      }
187      
188      
189  
190      /** Skip Over Leading White Space */
191      private void SkipWhite() {
192          while (IsWhite(look)) {
193              GetChar();
194          }
195      }
196      
197      
198  
199      /** Match a Specific Input Character */
200      private void Match(char x) {
201          if (look != x) {
202              Expected("" + x + "");
203          }else {
204              GetChar();
205              SkipWhite();
206          }
207      }
208      
209      /** Get an Identifier */
210      private String GetName() {
211          StringBuffer Token = new StringBuffer();
212          if (!IsAlpha(look)) {
213              Expected("Name");
214          }
215          while (IsAlNum(look)) {
216              Token = Token.append(Character.toUpperCase(look));
217              GetChar();
218          }
219          SkipWhite();
220          return Token.toString();
221      }
222      
223      /**Get an Identifier AS IS, without stripping white spaces or 
224         converting to uppercase; used for literals */
225      private String GetNameAsIs() {
226          StringBuffer Token = new StringBuffer();
227          if (!IsAlpha(look)) {
228              Expected("Name");
229          }
230          while (IsAlNum(look) || IsWhite(look)) {
231              Token = Token.append(look);
232              GetChar();
233          }
234          return Token.toString();
235      }
236      
237      
238      /** Get a Number */
239      private String GetNum() {
240          String Value ="";
241          if  (!IsDigit(look)) Expected("Integer");
242          while (IsDigit(look)){
243              Value = Value + look;
244              GetChar();
245          }
246          SkipWhite();
247          return Value;
248      }
249  
250      /** Output a String with Tab */
251      private void  Emit(String s){
252          System.out.print(TAB+s);
253      }
254  
255      /** Output a String with Tab and CRLF */
256      private void EmitLn(String s) {
257          Emit(s);
258          System.out.println();;
259      }
260      
261      /** Parse and Translate a String Identifier */
262      private void Ident() {
263          String name;
264          name = GetName();
265          if (look == '('){
266              //This is a function
267              function(name);
268          } else if (look == ':') { // this is a AreaReference
269              String first = name;
270              Match(':');
271              String second = GetName();
272              tokens.add(new AreaPtg(first+":"+second));
273          } else if (look == '!') {
274              Match('!');
275              String sheetName = name;
276              String first = GetName();
277              short externIdx = book.checkExternSheet(book.getSheetIndex(sheetName));
278              if (look == ':') {
279                  Match(':');
280                  String second=GetName();
281                  
282                  tokens.add(new Area3DPtg(first+":"+second,externIdx));
283              } else {
284                  tokens.add(new Ref3DPtg(first,externIdx));
285              }
286          } else {
287              //this can be either a cell ref or a named range !!
288              boolean cellRef = true ; //we should probably do it with reg exp??
289              boolean boolLit = (name.equals("TRUE") || name.equals("FALSE"));
290              if (boolLit) {
291                  tokens.add(new BoolPtg(name));
292              } else if (cellRef) {
293                  tokens.add(new ReferencePtg(name));
294              }else {
295                  //handle after named range is integrated!!
296              }
297          }
298      }
299      
300      private void function(String name) {
301          Match('(');
302          int numArgs = Arguments();
303          Match(')');
304          tokens.add(getFunction(name,(byte)numArgs));
305      }
306      
307      private Ptg getFunction(String name,byte numArgs) {
308          Ptg retval = null;
309          //retval = new FuncVarPtg(name,numArgs);
310         if (name.equals("IF")) {
311              AttrPtg ptg = new AttrPtg();
312              ptg.setData((short)6); //sums don't care but this is what excel does.
313              ptg.setOptimizedIf(true);
314              retval = ptg;
315          } else {
316              retval = new FuncVarPtg(name,numArgs);
317          }
318          
319          return retval; 
320      }
321      
322      /** get arguments to a function */
323      private int Arguments() {
324          int numArgs = 0;
325          if (look != ')')  {
326              numArgs++; 
327              Expression();
328          }
329          while (look == ','  || look == ';') { //TODO handle EmptyArgs
330              if(look == ',') {
331                Match(',');
332              }
333              else {
334                Match(';');
335              }
336              Expression();
337              numArgs++;
338          }
339          return numArgs;
340      }
341  
342     /** Parse and Translate a Math Factor  */
343      private void Factor() {
344          if (look == '(' ) {
345              Match('(');
346              Expression();
347              Match(')');
348              tokens.add(new ParenthesisPtg());
349              return;
350          } else if (IsAlpha(look)){
351              Ident();
352          } else if(look == '"') {
353             StringLiteral();
354          } else {
355               
356              String number = GetNum();
357              if (look=='.') {
358                  Match('.');
359                  String decimalPart = null;
360                  if (IsDigit(look)) number = number +"."+ GetNum(); //this also takes care of someone entering "1234."
361                  tokens.add(new NumberPtg(number));
362              } else {
363                  tokens.add(new IntPtg(number));  //TODO:what if the number is too big to be a short? ..add factory to return Int or Number!
364              }
365          }
366      }
367      
368      private void StringLiteral() {
369          Match('"');
370          String name= GetNameAsIs();
371          Match('"');
372          tokens.add(new StringPtg(name));
373      }
374      
375      /** Recognize and Translate a Multiply */
376      private void Multiply(){
377          Match('*');
378          Factor();
379          tokens.add(new MultiplyPtg());
380    
381      }
382      
383      
384      /** Recognize and Translate a Divide */
385      private void Divide() {
386          Match('/');
387          Factor();
388          tokens.add(new DividePtg());
389  
390      }
391      
392      
393      /** Parse and Translate a Math Term */
394      private void  Term(){
395          Factor();
396          while (look == '*' || look == '/' || look == '^' || look == '&' || look == '=' ) {
397              ///TODO do we need to do anything here??
398              if (look == '*') Multiply();
399              if (look == '/') Divide();
400              if (look == '^') Power();
401              if (look == '&') Concat();
402              if (look == '=') Equal();
403          }
404      }
405      
406      /** Recognize and Translate an Add */
407      private void Add() {
408          Match('+');
409          Term();
410          tokens.add(new AddPtg());
411      }
412      
413      /** Recognize and Translate a Concatination */
414      private void Concat() {
415          Match('&');
416          Term();
417          tokens.add(new ConcatPtg());
418      }
419      
420      /** Recognize and Translate a test for Equality  */
421      private void Equal() {
422          Match('=');
423          Term();
424          tokens.add(new EqualPtg());
425      }
426      
427      /** Recognize and Translate a Subtract */
428      private void Subtract() {
429          Match('-');
430          Term();
431          tokens.add(new SubtractPtg());
432      }
433      
434      private void Power() {
435          Match('^');
436          Term();
437          tokens.add(new PowerPtg());
438      }
439      
440      
441      /** Parse and Translate an Expression */
442      private void Expression() {
443          if (IsAddop(look)) {
444              EmitLn("CLR D0");  //unaryAdd ptg???
445          } else {
446              Term();
447          }
448          while (IsAddop(look)) {
449              if ( look == '+' )  Add();
450              if (look == '-') Subtract();
451              if (look == '*') Multiply();
452              if (look == '/') Divide();
453          }
454      }
455      
456      
457      //{--------------------------------------------------------------}
458      //{ Parse and Translate an Assignment Statement }
459      /**
460  procedure Assignment;
461  var Name: string[8];
462  begin
463     Name := GetName;
464     Match('=');
465     Expression;
466  
467  end;
468       **/
469      
470   
471      /** Initialize */
472      
473      private void  init() {
474          GetChar();
475          SkipWhite();
476      }
477      
478      /** API call to execute the parsing of the formula
479       *
480       */
481      public void parse() {
482          synchronized (tokens) {
483              init();
484              Expression();
485          }
486      }
487      
488      
489      /*********************************
490       * PARSER IMPLEMENTATION ENDS HERE
491       * EXCEL SPECIFIC METHODS BELOW
492       *******************************/
493      
494      /** API call to retrive the array of Ptgs created as 
495       * a result of the parsing
496       */
497      public Ptg[] getRPNPtg() {
498       return getRPNPtg(FORMULA_TYPE_CELL);
499      }
500      
501      public Ptg[] getRPNPtg(int formulaType) {
502          Node node = createTree();
503          setRootLevelRVA(node, formulaType);
504          setParameterRVA(node,formulaType);
505          return (Ptg[]) tokens.toArray(new Ptg[0]);
506      }
507      
508      private void setRootLevelRVA(Node n, int formulaType) {
509          //Pg 16, excelfileformat.pdf @ openoffice.org
510          Ptg p = (Ptg) n.getValue();
511              if (formulaType == this.FORMULA_TYPE_NAMEDRANGE) {
512                  if (p.getDefaultOperandClass() == Ptg.CLASS_REF) {
513                      setClass(n,Ptg.CLASS_REF);
514                  } else {
515                      setClass(n,Ptg.CLASS_ARRAY);
516                  }
517              } else {
518                  setClass(n,Ptg.CLASS_VALUE);
519              }
520          
521      }
522      
523      private void setParameterRVA(Node n, int formulaType) {
524          Ptg p = (Ptg) n.getValue();
525          if (p instanceof AbstractFunctionPtg) {
526              int numOperands = n.getNumChildren();
527              for (int i =0;i<n.getNumChildren();i++) {
528                  setParameterRVA(n.getChild(i),((AbstractFunctionPtg)p).getParameterClass(i),formulaType);
529                  if (n.getChild(i).getValue() instanceof AbstractFunctionPtg) {
530                      setParameterRVA(n.getChild(i),formulaType);
531                  }
532              }  
533          } else {
534              for (int i =0;i<n.getNumChildren();i++) {
535                  setParameterRVA(n.getChild(i),formulaType);
536              }
537          } 
538      }
539      private void setParameterRVA(Node n, int expectedClass,int formulaType) {
540          Ptg p = (Ptg) n.getValue();
541          if (expectedClass == Ptg.CLASS_REF) { //pg 15, table 1 
542              if (p.getDefaultOperandClass() == Ptg.CLASS_REF ) {
543                  setClass(n, Ptg.CLASS_REF);
544              }
545              if (p.getDefaultOperandClass() == Ptg.CLASS_VALUE) {
546                  if (formulaType==FORMULA_TYPE_CELL || formulaType == FORMULA_TYPE_SHARED) {
547                      setClass(n,Ptg.CLASS_VALUE);
548                  } else {
549                      setClass(n,Ptg.CLASS_ARRAY);
550                  }
551              }
552              if (p.getDefaultOperandClass() == Ptg.CLASS_ARRAY ) {
553                  setClass(n, Ptg.CLASS_ARRAY);
554              }
555          } else if (expectedClass == Ptg.CLASS_VALUE) { //pg 15, table 2
556              if (formulaType == FORMULA_TYPE_NAMEDRANGE) {
557                  setClass(n,Ptg.CLASS_ARRAY) ;
558              } else {
559                  setClass(n,Ptg.CLASS_VALUE);
560              }
561          } else { //Array class, pg 16. 
562              if (p.getDefaultOperandClass() == Ptg.CLASS_VALUE &&
563                   (formulaType==FORMULA_TYPE_CELL || formulaType == FORMULA_TYPE_SHARED)) {
564                   setClass(n,Ptg.CLASS_VALUE);
565              } else {
566                  setClass(n,Ptg.CLASS_ARRAY);
567              }
568          }
569      }
570      
571       private void setClass(Node n, byte theClass) {
572          Ptg p = (Ptg) n.getValue();
573          if (p instanceof AbstractFunctionPtg || !(p instanceof OperationPtg)) {
574              p.setClass(theClass);
575          } else {
576              for (int i =0;i<n.getNumChildren();i++) {
577                  setClass(n.getChild(i),theClass);
578              }
579          }
580       }
581      /**
582       * Convience method which takes in a list then passes it to the other toFormulaString
583       * signature. 
584       * @param lptgs - list of ptgs, can be null
585       */
586      public static String toFormulaString(SheetReferences refs, List lptgs) {
587          String retval = null;
588          if (lptgs == null || lptgs.size() == 0) return "#NAME";
589          Ptg[] ptgs = new Ptg[lptgs.size()];
590          ptgs = (Ptg[])lptgs.toArray(ptgs);
591          retval = toFormulaString(refs, ptgs);
592          return retval;
593      }
594      
595      /** Static method to convert an array of Ptgs in RPN order 
596       *  to a human readable string format in infix mode
597       *  @param ptgs - array of ptgs, can be null or empty
598       */
599      public static String toFormulaString(SheetReferences refs, Ptg[] ptgs) {
600          if (ptgs == null || ptgs.length == 0) return "#NAME";
601          java.util.Stack stack = new java.util.Stack();
602          int numPtgs = ptgs.length;
603          OperationPtg o;
604          int numOperands;
605          String result=null;
606          String[] operands;
607          AttrPtg ifptg = null;
608          for (int i=0;i<numPtgs;i++) {
609             // Excel allows to have AttrPtg at position 0 (such as Blanks) which
610             // do not have any operands. Skip them.
611              if (ptgs[i] instanceof OperationPtg && i>0) {
612                    o = (OperationPtg) ptgs[i];
613                    
614                    if (o instanceof AttrPtg && ((AttrPtg)o).isOptimizedIf()) {
615                          ifptg=(AttrPtg)o;
616                    } else {
617                        
618                        numOperands = o.getNumberOfOperands();
619                        operands = new String[numOperands];
620                        
621                        for (int j=0;j<numOperands;j++) {
622                            operands[numOperands-j-1] = (String) stack.pop(); //TODO: catch stack underflow and throw parse exception. 
623                        }  
624  
625                        if ( (o instanceof AbstractFunctionPtg) && 
626                              ((AbstractFunctionPtg)o).getName().equals("specialflag") &&
627                              ifptg != null
628                              ) {
629                               // this special case will be way different.
630                               result = ifptg.toFormulaString(
631                                    new String[] {(o.toFormulaString(operands))}
632                                                             );
633                               ifptg = null;
634                        } else {                      
635                          result = o.toFormulaString(operands);                                              
636                        }
637                        stack.push(result);                                        
638                    }
639                        
640                    
641              } else {
642                  stack.push(ptgs[i].toFormulaString(refs));
643              }
644          }
645          return (String) stack.pop(); //TODO: catch stack underflow and throw parse exception. 
646      }
647      
648      private Node createTree() {
649          java.util.Stack stack = new java.util.Stack();
650          int numPtgs = tokens.size();
651          OperationPtg o;
652          int numOperands;
653          Node[] operands;
654          for (int i=0;i<numPtgs;i++) {
655              if (tokens.get(i) instanceof OperationPtg) {
656                  
657                  o = (OperationPtg) tokens.get(i);
658                  numOperands = o.getNumberOfOperands();
659                  operands = new Node[numOperands];
660                  for (int j=0;j<numOperands;j++) {
661                      operands[numOperands-j-1] = (Node) stack.pop(); 
662                  }
663                  Node result = new Node(o);
664                  result.setChildren(operands);
665                  stack.push(result);
666              } else {
667                  stack.push(new Node((Ptg)tokens.get(i)));
668              }
669          }
670          return (Node) stack.pop();
671      }
672     
673      /** toString on the parser instance returns the RPN ordered list of tokens
674       *   Useful for testing
675       */
676      public String toString() {
677          SheetReferences refs = book.getSheetReferences();
678          StringBuffer buf = new StringBuffer();
679             for (int i=0;i<tokens.size();i++) {
680              buf.append( ( (Ptg)tokens.get(i)).toFormulaString(refs));
681              buf.append(' ');
682          } 
683          return buf.toString();
684      }
685      
686  }    
687      class Node {
688          private Ptg value=null;
689          private Node[] children=new Node[0];
690          private int numChild=0;
691          public Node(Ptg val) {
692              value = val; 
693          }
694          public void setChildren(Node[] child) {children = child;numChild=child.length;}
695          public int getNumChildren() {return numChild;}
696          public Node getChild(int number) {return children[number];}
697          public Ptg getValue() {return value;}
698      }
699