123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172 |
- #! /bin/bash
- # Extract a stream of keywords from a file
- #
- # This is an applicaton of Marcus Ranum's principal of "Artificial Ignorance"
- # http://www.ranum.com/security/computer_security/papers/ai/
- #
- # Usage: ai input-file term1 [term2 ...]
- #
- # Input:
- # - input-file on the command line
- # - terms - one or more on the command line.
- #
- # terms can have "|" to match more than one term.
- #
- # If there is a "|", the first term will be used
- # as the output file name.
- #
- # Note if you want the
- # output to have a particualr name, use it as
- # the first term, it may not actually match in grep.
- #
- # Output:
- # - ai-TERM1.txt
- # - ai-TERM2.txt
- # - ...
- # - ai-TERMn.txt
- # - ai-leftovers.txt
- #
- # Example
- #
- # I wrapped this in a shell script as follows to write a report.
- #
- # ! /bin/bash
- # artificial-ignorance.sh completd-only.org \
- # 'research-projects|clustering|heatmaps' \
- # 'external|USENIX' \
- # 'products|firewall' \
- # 'development|lint|make|python' \
- #
- # set -e
- set -u
- PREFIX=ai-
- LEFTOVERS=ai-leftovers.txt
- LEFTOVERS_TMP=/dev/null
- INPUT=${1:-""}
- OUTOUT=""
- if [ "$INPUT" == "" ]; then
- echo Need file on command line
- exit 1
- else
- shift
- fi
- for var in "$@"
- do
- PATTERN="${var}"
- # allow patterns of A|B|C, use first element as name
- FIRST_ELEMENT=`echo $PATTERN | sed -e 's/|.*//'`
- OUTPUT="${PREFIX}${FIRST_ELEMENT}.txt"
- LEFTOVERS_TMP="ai-leftovers-${FIRST_ELEMENT}.tmp"
- cat $INPUT | tee >(egrep -i "$PATTERN" > $OUTPUT) | (egrep -vi "$PATTERN" > $LEFTOVERS_TMP)
- INPUT=$LEFTOVERS_TMP
- done
- cp $LEFTOVERS_TMP $LEFTOVERS
- rm -f ai-*.tmp || echo no leftovers
|