001//////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code for adherence to a set of rules. 003// Copyright (C) 2001-2015 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018//////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.api; 021 022import java.io.BufferedReader; 023import java.io.File; 024import java.io.FileInputStream; 025import java.io.FileNotFoundException; 026import java.io.IOException; 027import java.io.InputStreamReader; 028import java.io.Reader; 029import java.io.StringReader; 030import java.io.UnsupportedEncodingException; 031import java.nio.charset.Charset; 032import java.nio.charset.CharsetDecoder; 033import java.nio.charset.CodingErrorAction; 034import java.nio.charset.UnsupportedCharsetException; 035import java.util.AbstractList; 036import java.util.ArrayList; 037import java.util.Arrays; 038import java.util.List; 039import java.util.regex.Matcher; 040import java.util.regex.Pattern; 041 042import org.apache.commons.lang3.ArrayUtils; 043 044import com.google.common.io.Closeables; 045 046/** 047 * Represents the text contents of a file of arbitrary plain text type. 048 * <p> 049 * This class will be passed to instances of class FileSetCheck by 050 * Checker. It implements a string list to ensure backwards 051 * compatibility, but can be extended in the future to allow more 052 * flexible, more powerful or more efficient handling of certain 053 * situations. 054 * </p> 055 * 056 * @author Martin von Gagern 057 */ 058public final class FileText extends AbstractList<String> { 059 060 /** 061 * The number of characters to read in one go. 062 */ 063 private static final int READ_BUFFER_SIZE = 1024; 064 065 /** 066 * Regular expression pattern matching all line terminators. 067 */ 068 private static final Pattern LINE_TERMINATOR = Pattern.compile("\\n|\\r\\n?"); 069 070 // For now, we always keep both full text and lines array. 071 // In the long run, however, the one passed at initialization might be 072 // enough, while the other could be lazily created when requested. 073 // This would save memory but cost CPU cycles. 074 075 /** 076 * The name of the file. 077 * {@code null} if no file name is available for whatever reason. 078 */ 079 private final File file; 080 081 /** 082 * The charset used to read the file. 083 * {@code null} if the file was reconstructed from a list of lines. 084 */ 085 private final Charset charset; 086 087 /** 088 * The full text contents of the file. 089 */ 090 private final String fullText; 091 092 /** 093 * The lines of the file, without terminators. 094 */ 095 private final String[] lines; 096 097 /** 098 * The first position of each line within the full text. 099 */ 100 private int[] lineBreaks; 101 102 /** 103 * Creates a new file text representation. 104 * 105 * <p>The file will be read using the specified encoding, replacing 106 * malformed input and unmappable characters with the default 107 * replacement character. 108 * 109 * @param file the name of the file 110 * @param charsetName the encoding to use when reading the file 111 * @throws NullPointerException if the text is null 112 * @throws IOException if the file could not be read 113 */ 114 public FileText(File file, String charsetName) throws IOException { 115 this.file = file; 116 117 // We use our own decoder, to be sure we have complete control 118 // about replacements. 119 final CharsetDecoder decoder; 120 try { 121 charset = Charset.forName(charsetName); 122 decoder = charset.newDecoder(); 123 decoder.onMalformedInput(CodingErrorAction.REPLACE); 124 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 125 } 126 catch (final UnsupportedCharsetException ex) { 127 final String message = "Unsupported charset: " + charsetName; 128 final UnsupportedEncodingException ex2 = new UnsupportedEncodingException(message); 129 ex2.initCause(ex); 130 throw ex2; 131 } 132 133 fullText = readFile(file, decoder); 134 135 // Use the BufferedReader to break down the lines as this 136 // is about 30% faster than using the 137 // LINE_TERMINATOR.split(fullText, -1) method 138 final ArrayList<String> textLines = new ArrayList<>(); 139 final BufferedReader reader = 140 new BufferedReader(new StringReader(fullText)); 141 while (true) { 142 final String line = reader.readLine(); 143 if (line == null) { 144 break; 145 } 146 textLines.add(line); 147 } 148 lines = textLines.toArray(new String[textLines.size()]); 149 } 150 151 /** 152 * Compatibility constructor. 153 * 154 * <p>This constructor reconstructs the text of the file by joining 155 * lines with linefeed characters. This process does not restore 156 * the original line terminators and should therefore be avoided. 157 * 158 * @param file the name of the file 159 * @param lines the lines of the text, without terminators 160 * @throws NullPointerException if the lines array is null 161 */ 162 private FileText(File file, List<String> lines) { 163 final StringBuilder buf = new StringBuilder(); 164 for (final String line : lines) { 165 buf.append(line).append('\n'); 166 } 167 buf.trimToSize(); 168 169 this.file = file; 170 charset = null; 171 fullText = buf.toString(); 172 this.lines = lines.toArray(new String[lines.size()]); 173 } 174 175 /** 176 * Copy constructor. 177 * @param fileText to make copy of 178 */ 179 public FileText(FileText fileText) { 180 file = fileText.file; 181 charset = fileText.charset; 182 fullText = fileText.fullText; 183 lines = fileText.lines.clone(); 184 lineBreaks = ArrayUtils.clone(fileText.lineBreaks); 185 } 186 187 /** 188 * Reads file using specific decoder and returns all its content as a String. 189 * @param inputFile File to read 190 * @param decoder Charset decoder 191 * @return File's text 192 * @throws IOException Unable to open or read the file 193 */ 194 private static String readFile(final File inputFile, final CharsetDecoder decoder) 195 throws IOException { 196 if (!inputFile.exists()) { 197 throw new FileNotFoundException(inputFile.getPath() + " (No such file or directory)"); 198 } 199 final StringBuilder buf = new StringBuilder(); 200 final FileInputStream stream = new FileInputStream(inputFile); 201 final Reader reader = new InputStreamReader(stream, decoder); 202 try { 203 final char[] chars = new char[READ_BUFFER_SIZE]; 204 while (true) { 205 final int len = reader.read(chars); 206 if (len < 0) { 207 break; 208 } 209 buf.append(chars, 0, len); 210 } 211 } 212 finally { 213 Closeables.closeQuietly(reader); 214 } 215 return buf.toString(); 216 } 217 218 /** 219 * Compatibility conversion. 220 * 221 * <p>This method can be used to convert the arguments passed to 222 * {@link FileSetCheck#process(File,List)} to a FileText 223 * object. If the list of lines already is a FileText, it is 224 * returned as is. Otherwise, a new FileText is constructed by 225 * joining the lines using line feed characters. 226 * 227 * @param file the name of the file 228 * @param lines the lines of the text, without terminators 229 * @return an object representing the denoted text file 230 */ 231 public static FileText fromLines(File file, List<String> lines) { 232 if (lines instanceof FileText) { 233 return (FileText) lines; 234 } 235 else { 236 return new FileText(file, lines); 237 } 238 } 239 240 /** 241 * Get the name of the file. 242 * @return an object containing the name of the file 243 */ 244 public File getFile() { 245 return file; 246 } 247 248 /** 249 * Get the character set which was used to read the file. 250 * Will be {@code null} for a file reconstructed from its lines. 251 * @return the charset used when the file was read 252 */ 253 public Charset getCharset() { 254 return charset; 255 } 256 257 /** 258 * Retrieve the full text of the file. 259 * @return the full text of the file 260 */ 261 public CharSequence getFullText() { 262 return fullText; 263 } 264 265 /** 266 * Returns an array of all lines. 267 * {@code text.toLinesArray()} is equivalent to 268 * {@code text.toArray(new String[text.size()])}. 269 * @return an array of all lines of the text 270 */ 271 public String[] toLinesArray() { 272 return lines.clone(); 273 } 274 275 /** 276 * Find positions of line breaks in the full text. 277 * @return an array giving the first positions of each line. 278 */ 279 private int[] findLineBreaks() { 280 if (lineBreaks == null) { 281 final int[] lineBreakPositions = new int[size() + 1]; 282 lineBreakPositions[0] = 0; 283 int lineNo = 1; 284 final Matcher matcher = LINE_TERMINATOR.matcher(fullText); 285 while (matcher.find()) { 286 lineBreakPositions[lineNo] = matcher.end(); 287 lineNo++; 288 } 289 if (lineNo < lineBreakPositions.length) { 290 lineBreakPositions[lineNo] = fullText.length(); 291 } 292 lineBreaks = lineBreakPositions; 293 } 294 return lineBreaks; 295 } 296 297 /** 298 * Determine line and column numbers in full text. 299 * @param pos the character position in the full text 300 * @return the line and column numbers of this character 301 */ 302 public LineColumn lineColumn(int pos) { 303 final int[] lineBreakPositions = findLineBreaks(); 304 int lineNo = Arrays.binarySearch(lineBreakPositions, pos); 305 if (lineNo < 0) { 306 // we have: lineNo = -(insertion point) - 1 307 // we want: lineNo = (insertion point) - 1 308 lineNo = -lineNo - 2; 309 } 310 final int startOfLine = lineBreakPositions[lineNo]; 311 final int columnNo = pos - startOfLine; 312 // now we have lineNo and columnNo, both starting at zero. 313 return new LineColumn(lineNo + 1, columnNo); 314 } 315 316 /** 317 * Retrieves a line of the text by its number. 318 * The returned line will not contain a trailing terminator. 319 * @param lineNo the number of the line to get, starting at zero 320 * @return the line with the given number 321 */ 322 @Override 323 public String get(final int lineNo) { 324 return lines[lineNo]; 325 } 326 327 /** 328 * Counts the lines of the text. 329 * @return the number of lines in the text 330 */ 331 @Override 332 public int size() { 333 return lines.length; 334 } 335 336}