001////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code for adherence to a set of rules.
003// Copyright (C) 2001-2015 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.javadoc;
021
022import java.util.ArrayDeque;
023import java.util.Deque;
024import java.util.List;
025import java.util.Locale;
026import java.util.Set;
027import java.util.regex.Pattern;
028
029import com.google.common.collect.ImmutableSortedSet;
030import com.puppycrawl.tools.checkstyle.api.Check;
031import com.puppycrawl.tools.checkstyle.api.DetailAST;
032import com.puppycrawl.tools.checkstyle.api.FileContents;
033import com.puppycrawl.tools.checkstyle.api.Scope;
034import com.puppycrawl.tools.checkstyle.api.TextBlock;
035import com.puppycrawl.tools.checkstyle.api.TokenTypes;
036import com.puppycrawl.tools.checkstyle.utils.CheckUtils;
037import com.puppycrawl.tools.checkstyle.utils.ScopeUtils;
038
039/**
040 * Custom Checkstyle Check to validate Javadoc.
041 *
042 * @author Chris Stillwell
043 * @author Daniel Grenner
044 * @author Travis Schneeberger
045 */
046public class JavadocStyleCheck
047    extends Check {
048
049    /** Message property key for the Unclosed HTML message. */
050    public static final String JAVADOC_MISSING = "javadoc.missing";
051
052    /** Message property key for the Unclosed HTML message. */
053    public static final String EMPTY = "javadoc.empty";
054
055    /** Message property key for the Unclosed HTML message. */
056    public static final String NO_PERIOD = "javadoc.noPeriod";
057
058    /** Message property key for the Unclosed HTML message. */
059    public static final String INCOMPLETE_TAG = "javadoc.incompleteTag";
060
061    /** Message property key for the Unclosed HTML message. */
062    public static final String UNCLOSED_HTML = "javadoc.unclosedHtml";
063
064    /** Message property key for the Extra HTML message. */
065    public static final String EXTRA_HTML = "javadoc.extraHtml";
066
067    /** HTML tags that do not require a close tag. */
068    private static final Set<String> SINGLE_TAGS = ImmutableSortedSet.of(
069            "br", "li", "dt", "dd", "hr", "img", "p", "td", "tr", "th");
070
071    /** HTML tags that are allowed in java docs.
072     * From http://www.w3schools.com/tags/default.asp
073     * The forms and structure tags are not allowed
074     */
075    private static final Set<String> ALLOWED_TAGS = ImmutableSortedSet.of(
076            "a", "abbr", "acronym", "address", "area", "b", "bdo", "big",
077            "blockquote", "br", "caption", "cite", "code", "colgroup", "dd",
078            "del", "div", "dfn", "dl", "dt", "em", "fieldset", "font", "h1",
079            "h2", "h3", "h4", "h5", "h6", "hr", "i", "img", "ins", "kbd",
080            "li", "ol", "p", "pre", "q", "samp", "small", "span", "strong",
081            "style", "sub", "sup", "table", "tbody", "td", "tfoot", "th",
082            "thead", "tr", "tt", "u", "ul");
083
084    /** The scope to check. */
085    private Scope scope = Scope.PRIVATE;
086
087    /** The visibility scope where Javadoc comments shouldn't be checked. **/
088    private Scope excludeScope;
089
090    /** Format for matching the end of a sentence. */
091    private String endOfSentenceFormat = "([.?!][ \t\n\r\f<])|([.?!]$)";
092
093    /** Regular expression for matching the end of a sentence. */
094    private Pattern endOfSentencePattern;
095
096    /**
097     * Indicates if the first sentence should be checked for proper end of
098     * sentence punctuation.
099     */
100    private boolean checkFirstSentence = true;
101
102    /**
103     * Indicates if the HTML within the comment should be checked.
104     */
105    private boolean checkHtml = true;
106
107    /**
108     * Indicates if empty javadoc statements should be checked.
109     */
110    private boolean checkEmptyJavadoc;
111
112    @Override
113    public int[] getDefaultTokens() {
114        return getAcceptableTokens();
115    }
116
117    @Override
118    public int[] getAcceptableTokens() {
119        return new int[] {
120            TokenTypes.INTERFACE_DEF,
121            TokenTypes.CLASS_DEF,
122            TokenTypes.ANNOTATION_DEF,
123            TokenTypes.ENUM_DEF,
124            TokenTypes.METHOD_DEF,
125            TokenTypes.CTOR_DEF,
126            TokenTypes.VARIABLE_DEF,
127            TokenTypes.ENUM_CONSTANT_DEF,
128            TokenTypes.ANNOTATION_FIELD_DEF,
129            TokenTypes.PACKAGE_DEF,
130        };
131    }
132
133    @Override
134    public int[] getRequiredTokens() {
135        return getAcceptableTokens();
136    }
137
138    @Override
139    public void visitToken(DetailAST ast) {
140        if (shouldCheck(ast)) {
141            final FileContents contents = getFileContents();
142            // Need to start searching for the comment before the annotations
143            // that may exist. Even if annotations are not defined on the
144            // package, the ANNOTATIONS AST is defined.
145            final TextBlock textBlock =
146                contents.getJavadocBefore(ast.getFirstChild().getLineNo());
147
148            checkComment(ast, textBlock);
149        }
150    }
151
152    /**
153     * Whether we should check this node.
154     * @param ast a given node.
155     * @return whether we should check a given node.
156     */
157    private boolean shouldCheck(final DetailAST ast) {
158        boolean check = false;
159
160        if (ast.getType() == TokenTypes.PACKAGE_DEF) {
161            check = getFileContents().inPackageInfo();
162        }
163        else if (!ScopeUtils.isInCodeBlock(ast)) {
164            final Scope customScope;
165
166            if (ScopeUtils.isInInterfaceOrAnnotationBlock(ast)
167                    || ast.getType() == TokenTypes.ENUM_CONSTANT_DEF) {
168                customScope = Scope.PUBLIC;
169            }
170            else {
171                customScope = ScopeUtils.getScopeFromMods(ast.findFirstToken(TokenTypes.MODIFIERS));
172            }
173            final Scope surroundingScope = ScopeUtils.getSurroundingScope(ast);
174
175            check = customScope.isIn(scope)
176                    && (surroundingScope == null || surroundingScope.isIn(scope))
177                    && (excludeScope == null
178                        || !customScope.isIn(excludeScope)
179                        || surroundingScope != null
180                            && !surroundingScope.isIn(excludeScope));
181        }
182        return check;
183    }
184
185    /**
186     * Performs the various checks against the Javadoc comment.
187     *
188     * @param ast the AST of the element being documented
189     * @param comment the source lines that make up the Javadoc comment.
190     *
191     * @see #checkFirstSentenceEnding(DetailAST, TextBlock)
192     * @see #checkHtmlTags(DetailAST, TextBlock)
193     */
194    private void checkComment(final DetailAST ast, final TextBlock comment) {
195        if (comment == null) {
196            /*checking for missing docs in JavadocStyleCheck is not consistent
197            with the rest of CheckStyle...  Even though, I didn't think it
198            made sense to make another check just to ensure that the
199            package-info.java file actually contains package Javadocs.*/
200            if (getFileContents().inPackageInfo()) {
201                log(ast.getLineNo(), JAVADOC_MISSING);
202            }
203            return;
204        }
205
206        if (checkFirstSentence) {
207            checkFirstSentenceEnding(ast, comment);
208        }
209
210        if (checkHtml) {
211            checkHtmlTags(ast, comment);
212        }
213
214        if (checkEmptyJavadoc) {
215            checkJavadocIsNotEmpty(comment);
216        }
217    }
218
219    /**
220     * Checks that the first sentence ends with proper punctuation.  This method
221     * uses a regular expression that checks for the presence of a period,
222     * question mark, or exclamation mark followed either by whitespace, an
223     * HTML element, or the end of string. This method ignores {_AT_inheritDoc}
224     * comments for TokenTypes that are valid for {_AT_inheritDoc}.
225     *
226     * @param ast the current node
227     * @param comment the source lines that make up the Javadoc comment.
228     */
229    private void checkFirstSentenceEnding(final DetailAST ast, TextBlock comment) {
230        final String commentText = getCommentText(comment.getText());
231
232        if (!commentText.isEmpty()
233            && !getEndOfSentencePattern().matcher(commentText).find()
234            && !(commentText.startsWith("{@inheritDoc}")
235            && JavadocTagInfo.INHERIT_DOC.isValidOn(ast))) {
236            log(comment.getStartLineNo(), NO_PERIOD);
237        }
238    }
239
240    /**
241     * Checks that the Javadoc is not empty.
242     *
243     * @param comment the source lines that make up the Javadoc comment.
244     */
245    private void checkJavadocIsNotEmpty(TextBlock comment) {
246        final String commentText = getCommentText(comment.getText());
247
248        if (commentText.isEmpty()) {
249            log(comment.getStartLineNo(), EMPTY);
250        }
251    }
252
253    /**
254     * Returns the comment text from the Javadoc.
255     * @param comments the lines of Javadoc.
256     * @return a comment text String.
257     */
258    private static String getCommentText(String... comments) {
259        final StringBuilder builder = new StringBuilder();
260        for (final String line : comments) {
261            final int textStart = findTextStart(line);
262
263            if (textStart != -1) {
264                if (line.charAt(textStart) == '@') {
265                    //we have found the tag section
266                    break;
267                }
268                builder.append(line.substring(textStart));
269                trimTail(builder);
270                builder.append('\n');
271            }
272        }
273
274        return builder.toString().trim();
275    }
276
277    /**
278     * Finds the index of the first non-whitespace character ignoring the
279     * Javadoc comment start and end strings (&#47** and *&#47) as well as any
280     * leading asterisk.
281     * @param line the Javadoc comment line of text to scan.
282     * @return the int index relative to 0 for the start of text
283     *         or -1 if not found.
284     */
285    private static int findTextStart(String line) {
286        int textStart = -1;
287        for (int i = 0; i < line.length();) {
288            if (!Character.isWhitespace(line.charAt(i))) {
289                if (line.regionMatches(i, "/**", 0, "/**".length())) {
290                    i += 2;
291                }
292                else if (line.regionMatches(i, "*/", 0, 2)) {
293                    i++;
294                }
295                else if (line.charAt(i) != '*') {
296                    textStart = i;
297                    break;
298                }
299            }
300            i++;
301        }
302        return textStart;
303    }
304
305    /**
306     * Trims any trailing whitespace or the end of Javadoc comment string.
307     * @param builder the StringBuilder to trim.
308     */
309    private static void trimTail(StringBuilder builder) {
310        int index = builder.length() - 1;
311        while (true) {
312            if (Character.isWhitespace(builder.charAt(index))) {
313                builder.deleteCharAt(index);
314            }
315            else if (builder.charAt(index) == '/'
316                    && builder.charAt(index - 1) == '*') {
317                builder.deleteCharAt(index);
318                builder.deleteCharAt(index - 1);
319                index--;
320                while (builder.charAt(index - 1) == '*') {
321                    builder.deleteCharAt(index - 1);
322                    index--;
323                }
324            }
325            else {
326                break;
327            }
328            index--;
329        }
330    }
331
332    /**
333     * Checks the comment for HTML tags that do not have a corresponding close
334     * tag or a close tag that has no previous open tag.  This code was
335     * primarily copied from the DocCheck checkHtml method.
336     *
337     * @param ast the node with the Javadoc
338     * @param comment the {@code TextBlock} which represents
339     *                 the Javadoc comment.
340     */
341    private void checkHtmlTags(final DetailAST ast, final TextBlock comment) {
342        final int lineNo = comment.getStartLineNo();
343        final Deque<HtmlTag> htmlStack = new ArrayDeque<>();
344        final String[] text = comment.getText();
345
346        final TagParser parser = new TagParser(text, lineNo);
347
348        while (parser.hasNextTag()) {
349            final HtmlTag tag = parser.nextTag();
350
351            if (tag.isIncompleteTag()) {
352                log(tag.getLineNo(), INCOMPLETE_TAG,
353                    text[tag.getLineNo() - lineNo]);
354                return;
355            }
356            if (tag.isClosedTag()) {
357                //do nothing
358                continue;
359            }
360            if (tag.isCloseTag()) {
361                // We have found a close tag.
362                if (isExtraHtml(tag.getId(), htmlStack)) {
363                    // No corresponding open tag was found on the stack.
364                    log(tag.getLineNo(),
365                        tag.getPosition(),
366                        EXTRA_HTML,
367                        tag);
368                }
369                else {
370                    // See if there are any unclosed tags that were opened
371                    // after this one.
372                    checkUnclosedTags(htmlStack, tag.getId());
373                }
374            }
375            else {
376                //We only push html tags that are allowed
377                if (isAllowedTag(tag)) {
378                    htmlStack.push(tag);
379                }
380            }
381        }
382
383        // Identify any tags left on the stack.
384        // Skip multiples, like <b>...<b>
385        String lastFound = "";
386        final List<String> typeParameters = CheckUtils.getTypeParameterNames(ast);
387        for (final HtmlTag htmlTag : htmlStack) {
388            if (!isSingleTag(htmlTag)
389                && !htmlTag.getId().equals(lastFound)
390                && !typeParameters.contains(htmlTag.getId())) {
391                log(htmlTag.getLineNo(), htmlTag.getPosition(), UNCLOSED_HTML, htmlTag);
392                lastFound = htmlTag.getId();
393            }
394        }
395    }
396
397    /**
398     * Checks to see if there are any unclosed tags on the stack.  The token
399     * represents a html tag that has been closed and has a corresponding open
400     * tag on the stack.  Any tags, except single tags, that were opened
401     * (pushed on the stack) after the token are missing a close.
402     *
403     * @param htmlStack the stack of opened HTML tags.
404     * @param token the current HTML tag name that has been closed.
405     */
406    private void checkUnclosedTags(Deque<HtmlTag> htmlStack, String token) {
407        final Deque<HtmlTag> unclosedTags = new ArrayDeque<>();
408        HtmlTag lastOpenTag = htmlStack.pop();
409        while (!token.equalsIgnoreCase(lastOpenTag.getId())) {
410            // Find unclosed elements. Put them on a stack so the
411            // output order won't be back-to-front.
412            if (isSingleTag(lastOpenTag)) {
413                lastOpenTag = htmlStack.pop();
414            }
415            else {
416                unclosedTags.push(lastOpenTag);
417                lastOpenTag = htmlStack.pop();
418            }
419        }
420
421        // Output the unterminated tags, if any
422        // Skip multiples, like <b>..<b>
423        String lastFound = "";
424        for (final HtmlTag htag : unclosedTags) {
425            lastOpenTag = htag;
426            if (lastOpenTag.getId().equals(lastFound)) {
427                continue;
428            }
429            lastFound = lastOpenTag.getId();
430            log(lastOpenTag.getLineNo(),
431                lastOpenTag.getPosition(),
432                UNCLOSED_HTML,
433                lastOpenTag);
434        }
435    }
436
437    /**
438     * Determines if the HtmlTag is one which does not require a close tag.
439     *
440     * @param tag the HtmlTag to check.
441     * @return {@code true} if the HtmlTag is a single tag.
442     */
443    private static boolean isSingleTag(HtmlTag tag) {
444        // If its a singleton tag (<p>, <br>, etc.), ignore it
445        // Can't simply not put them on the stack, since singletons
446        // like <dt> and <dd> (unhappily) may either be terminated
447        // or not terminated. Both options are legal.
448        return SINGLE_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
449    }
450
451    /**
452     * Determines if the HtmlTag is one which is allowed in a javadoc.
453     *
454     * @param tag the HtmlTag to check.
455     * @return {@code true} if the HtmlTag is an allowed html tag.
456     */
457    private static boolean isAllowedTag(HtmlTag tag) {
458        return ALLOWED_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
459    }
460
461    /**
462     * Determines if the given token is an extra HTML tag. This indicates that
463     * a close tag was found that does not have a corresponding open tag.
464     *
465     * @param token an HTML tag id for which a close was found.
466     * @param htmlStack a Stack of previous open HTML tags.
467     * @return {@code false} if a previous open tag was found
468     *         for the token.
469     */
470    private static boolean isExtraHtml(String token, Deque<HtmlTag> htmlStack) {
471        boolean isExtra = true;
472        for (final HtmlTag td : htmlStack) {
473            // Loop, looking for tags that are closed.
474            // The loop is needed in case there are unclosed
475            // tags on the stack. In that case, the stack would
476            // not be empty, but this tag would still be extra.
477            if (token.equalsIgnoreCase(td.getId())) {
478                isExtra = false;
479                break;
480            }
481        }
482
483        return isExtra;
484    }
485
486    /**
487     * Sets the scope to check.
488     * @param from string to get the scope from
489     */
490    public void setScope(String from) {
491        scope = Scope.getInstance(from);
492    }
493
494    /**
495     * Set the excludeScope.
496     * @param excludeScope a {@code String} value
497     */
498    public void setExcludeScope(String excludeScope) {
499        this.excludeScope = Scope.getInstance(excludeScope);
500    }
501
502    /**
503     * Set the format for matching the end of a sentence.
504     * @param format format for matching the end of a sentence.
505     */
506    public void setEndOfSentenceFormat(String format) {
507        endOfSentenceFormat = format;
508    }
509
510    /**
511     * Returns a regular expression for matching the end of a sentence.
512     *
513     * @return a regular expression for matching the end of a sentence.
514     */
515    private Pattern getEndOfSentencePattern() {
516        if (endOfSentencePattern == null) {
517            endOfSentencePattern = Pattern.compile(endOfSentenceFormat);
518        }
519        return endOfSentencePattern;
520    }
521
522    /**
523     * Sets the flag that determines if the first sentence is checked for
524     * proper end of sentence punctuation.
525     * @param flag {@code true} if the first sentence is to be checked
526     */
527    public void setCheckFirstSentence(boolean flag) {
528        checkFirstSentence = flag;
529    }
530
531    /**
532     * Sets the flag that determines if HTML checking is to be performed.
533     * @param flag {@code true} if HTML checking is to be performed.
534     */
535    public void setCheckHtml(boolean flag) {
536        checkHtml = flag;
537    }
538
539    /**
540     * Sets the flag that determines if empty Javadoc checking should be done.
541     * @param flag {@code true} if empty Javadoc checking should be done.
542     */
543    public void setCheckEmptyJavadoc(boolean flag) {
544        checkEmptyJavadoc = flag;
545    }
546}