Frames | No Frames |
1: /* textPreProcessor.java -- 2: Copyright (C) 2005 Free Software Foundation, Inc. 3: 4: This file is part of GNU Classpath. 5: 6: GNU Classpath is free software; you can redistribute it and/or modify 7: it under the terms of the GNU General Public License as published by 8: the Free Software Foundation; either version 2, or (at your option) 9: any later version. 10: 11: GNU Classpath is distributed in the hope that it will be useful, but 12: WITHOUT ANY WARRANTY; without even the implied warranty of 13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14: General Public License for more details. 15: 16: You should have received a copy of the GNU General Public License 17: along with GNU Classpath; see the file COPYING. If not, write to the 18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19: 02110-1301 USA. 20: 21: Linking this library statically or dynamically with other modules is 22: making a combined work based on this library. Thus, the terms and 23: conditions of the GNU General Public License cover the whole 24: combination. 25: 26: As a special exception, the copyright holders of this library give you 27: permission to link this library with independent modules to produce an 28: executable, regardless of the license terms of these independent 29: modules, and to copy and distribute the resulting executable under 30: terms of your choice, provided that you also meet, for each linked 31: independent module, the terms and conditions of the license of that 32: module. An independent module is a module which is not derived from 33: or based on this library. If you modify this library, you may extend 34: this exception to your version of the library, but you are not 35: obligated to do so. If you do not wish to do so, delete this 36: exception statement from your version. */ 37: 38: 39: package gnu.javax.swing.text.html.parser.support; 40: 41: import gnu.javax.swing.text.html.parser.support.low.Constants; 42: 43: /** 44: * Pre - processes text in text parts of the html document. 45: * Not thread - safe. 46: * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) 47: */ 48: public class textPreProcessor 49: { 50: /** 51: * Pre - process non-preformatted text. 52: * \t, \r and \n mutate into spaces, then multiple spaces mutate 53: * into single one, all whitespace around tags is consumed. 54: * The content of the passed buffer is destroyed. 55: * @param text A text to pre-process. 56: */ 57: public char[] preprocess(StringBuffer a_text) 58: { 59: if (a_text.length() == 0) 60: return null; 61: 62: char[] text = toCharArray(a_text); 63: 64: int a = 0; 65: int b = text.length - 1; 66: 67: try 68: { 69: while (Constants.bWHITESPACE.get(text [ a ])) 70: a++; 71: while (Constants.bWHITESPACE.get(text [ b ])) 72: b--; 73: } 74: catch (ArrayIndexOutOfBoundsException sx) 75: { 76: // A text fragment, consisting from line breaks only. 77: return null; 78: } 79: 80: a_text.setLength(0); 81: 82: boolean spacesWere = false; 83: boolean spaceNow; 84: char c; 85: 86: chars: 87: for (int i = a; i <= b; i++) 88: { 89: c = text [ i ]; 90: spaceNow = Constants.bWHITESPACE.get(c); 91: if (spacesWere && spaceNow) 92: continue chars; 93: if (spaceNow) 94: a_text.append(' '); 95: else 96: a_text.append(c); 97: spacesWere = spaceNow; 98: } 99: 100: if (a_text.length() == text.length) 101: { 102: a_text.getChars(0, a_text.length(), text, 0); 103: return text; 104: } 105: else 106: return toCharArray(a_text); 107: } 108: 109: /** 110: * Pre - process pre-formatted text. 111: * Heading/closing spaces and tabs preserved. 112: * ONE bounding \r, \n or \r\n is removed. 113: * \r or \r\n mutate into \n. Tabs are 114: * preserved. 115: * The content of the passed buffer is destroyed. 116: * @param text 117: * @return 118: */ 119: public char[] preprocessPreformatted(StringBuffer a_text) 120: { 121: if (a_text.length() == 0) 122: return null; 123: 124: char[] text = toCharArray(a_text); 125: 126: int a = 0; 127: int n = text.length - 1; 128: int b = n; 129: 130: if (text [ 0 ] == '\n') 131: a++; 132: else 133: { 134: if (text [ 0 ] == '\r') 135: { 136: a++; 137: if (text.length > 1 && text [ 1 ] == '\n') 138: a++; 139: } 140: } 141: 142: if (text [ n ] == '\r') 143: b--; 144: else 145: { 146: if (text [ n ] == '\n') 147: { 148: b--; 149: if (n > 0 && text [ n - 1 ] == '\r') 150: b--; 151: } 152: } 153: 154: a_text.setLength(0); 155: 156: if (a > b) 157: return null; 158: 159: char c; 160: 161: for (int i = a; i <= b; i++) 162: { 163: c = text [ i ]; 164: if (c == '\r') 165: { 166: if (i == b || text [ i + 1 ] != '\n') 167: a_text.append('\n'); 168: } 169: else 170: a_text.append(c); 171: } 172: 173: if (a_text.length() == text.length) 174: { 175: a_text.getChars(0, a_text.length(), text, 0); 176: return text; 177: } 178: else 179: return toCharArray(a_text); 180: } 181: 182: /** 183: * Return array of chars, present in the given buffer. 184: * @param a_text The buffer 185: * @return 186: */ 187: private static char[] toCharArray(StringBuffer a_text) 188: { 189: char[] text = new char[ a_text.length() ]; 190: a_text.getChars(0, text.length, text, 0); 191: return text; 192: } 193: }