Close

How to apply Java Regex to any Command Line Output?

[Last Updated: Jan 6, 2019]

Java Tools & Commands 

This example shows how to apply Java Regex to any native command line output. Though there are built in pattern matching facilities with commands like findstr (window), find/grep (linux) etc but sometimes they are not powerful enough to apply the desired pattern.

Following class shows how to use our familiar Java regex on command line:

package com.logicbig.example;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

public class JRegex {
    public static void main(String[] args) {
        if (args.length == 1) {
            applyRegex(args[0], null, getInput());
        } else if (args.length == 2) {
            applyRegex(args[0], args[1], getInput());
        } else if (args.length == 3) {
            applyRegex(args[0], args[1], args[2]);
        } else {
            System.err.println("Wrong usage!");
            System.out.println("Valid uses:");
            System.out.println(" (1)  jregex \"pattern\" \"format\" \"text\"");
            System.out.println(" (2)  externalCommand | jregex \"pattern\"");
            System.out.println(" (3)  externalCommand | jregex \"pattern\" \"format\"");
            System.out.println(" (4)  jregex \"pattern\" < myfile.txt");
            System.out.println(" (5)  jregex \"pattern\" < myfile.txt");
        }
    }

    private static String getInput() {
        String input = "";
        try (BufferedReader reader = new BufferedReader(new InputStreamReader(System.in))) {
            String line = null;
            while (true) {
                if ((line = reader.readLine()) != null) {
                    input += line + "\n";
                } else {
                    break;
                }
            }
        } catch (Exception e) {
            System.err.println(e);
        }
        return input.trim();
    }

    private static void applyRegex(String patternString, String groupFormat, String input) {
        try {
            if (groupFormat == null) {
                Pattern.compile(patternString)
                       .matcher(input)
                       .results()
                       .forEach(matchResult -> System.out.println(matchResult.group()));
            } else {
                Pattern pattern = Pattern.compile(patternString);
                Matcher m = pattern.matcher(input);
                while (m.find()) {
                    String formattedOutput = getFormattedOutput(m, input, groupFormat);
                    System.out.println(formattedOutput);
                }
            }
        } catch (PatternSyntaxException e) {
            System.err.println(e);
        }
    }

    private static String getFormattedOutput(final Matcher matcher,
                                             final String inputText,
                                             final String formattedReplacement) {
        StringBuilder result = new StringBuilder();
        int cursor = 0;
        while (cursor < formattedReplacement.length()) {
            char nextChar = formattedReplacement.charAt(cursor);
            if (nextChar == '\\') {
                cursor++;
                if (cursor == formattedReplacement.length())
                    throw new IllegalArgumentException("character to be escaped is missing");
                nextChar = formattedReplacement.charAt(cursor);
                result.append(nextChar);
                cursor++;
            } else if (nextChar == '$') {//does not support named groups
                // Skip past $
                cursor++;
                if (cursor == formattedReplacement.length())
                    throw new IllegalArgumentException(
                            "Illegal group reference: group index is missing");
                nextChar = formattedReplacement.charAt(cursor);
                int refNum = -1;
                // The first number is always a group
                refNum = nextChar - '0';
                if ((refNum < 0) || (refNum > 9))
                    throw new IllegalArgumentException(
                            "Illegal group reference");
                cursor++;
                // Capture the largest legal group string
                boolean done = false;
                while (!done) {
                    if (cursor >= formattedReplacement.length()) {
                        break;
                    }
                    int nextDigit = formattedReplacement.charAt(cursor) - '0';
                    if ((nextDigit < 0) || (nextDigit > 9)) { // not a number
                        break;
                    }
                    int newRefNum = (refNum * 10) + nextDigit;
                    if (matcher.groupCount() < newRefNum) {
                        done = true;
                    } else {
                        refNum = newRefNum;
                        cursor++;
                    }
                }
                // Append group
                if (matcher.start(refNum) != -1 && matcher.end(refNum) != -1)
                    result.append(inputText, matcher.start(refNum), matcher.end(refNum));
            } else {
                result.append(nextChar);
                cursor++;
            }
        }
        return result.toString();
    }
}

Compile above class and create a batch file:

jregex.bat

@echo off
java.exe  -classpath target\classes com.logicbig.example.JRegex %*

If you are using Linux then create following bash script:

jregex.sh

#!/bin/bash
java -classpath target/classes com.logicbig.example.JRegex $*

Let's see how to use it.

The normal 'dir' command in windows:

D:\java-command-line-regex>dir
D:\java-command-line-regex>dir
Volume in drive D is Data
Volume Serial Number is 68F9-EDFA

Directory of D:\java-command-line-regex

12/28/2018 11:17 AM .
12/28/2018 11:17 AM ..
12/28/2018 11:17 AM .idea
01/04/2019 10:47 PM 669 java-command-line-regex.iml
12/27/2018 11:06 PM 96 jregex.bat
12/27/2018 10:35 PM 907 pom.xml
12/27/2018 10:35 PM src
12/27/2018 11:04 PM target
3 File(s) 1,672 bytes
5 Dir(s) 20,447,772,672 bytes free

Let's use our program to list only directories:

D:\java-command-line-regex>dir | jregex .*DIR.*
01/05/2019 01:47 AM <DIR> .
01/05/2019 01:47 AM <DIR> ..
12/28/2018 11:17 AM <DIR> .idea
12/27/2018 10:35 PM <DIR> src
12/27/2018 11:04 PM <DIR> target

Let's view only directory names and their modified time in our desired format:

D:\java-command-line-regex>dir | jregex "(\d.*\w\w).*DIR.*    [ ]*(.*)" "modified: $1, dir: $2"
modified: 01/05/2019 01:47 AM, dir: .
modified: 01/05/2019 01:47 AM, dir: ..
modified: 12/28/2018 11:17 AM, dir: .idea
modified: 12/27/2018 10:35 PM, dir: src
modified: 12/27/2018 11:04 PM, dir: target

Finding Java Programs in tasklist:

D:\java-command-line-regex>tasklist | jregex  .*java.*
java.exe 28808 Console 7 84,192 K
java.exe 26672 Console 7 126,640 K
java.exe 18604 Console 7 2,730,368 K
java.exe 16852 Console 7 318,860 K
java.exe 22052 Console 7 31,772 K

Finding only Java programs in tasklist that have current memory consumption in thousands or more:

D:\java-command-line-regex>tasklist | jregex  .*java.*[\d,]{7,}.*
java.exe 26672 Console 7 126,640 K
java.exe 18604 Console 7 2,732,076 K
java.exe 16852 Console 7 318,860 K

Finding <plugins> section of our example project's pom.xml:

D:\java-command-line-regex>jregex "(?s)<plugins>.*</plugins>" < pom.xml
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
<configuration>
<source>11</source>
<target>11</target>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
</plugins>

Listing plugins' artifacts in effective pom:

D:\java-command-line-regex>mvn help:effective-pom | jregex "(?s)<plugin>.*?<artifactId>(.*?)</artifactId>.*?</plugin>" $1
maven-antrun-plugin
maven-assembly-plugin
maven-dependency-plugin
maven-release-plugin
maven-compiler-plugin
maven-clean-plugin
maven-resources-plugin
maven-jar-plugin
maven-surefire-plugin
maven-install-plugin
maven-deploy-plugin
maven-site-plugin

To use this utility program anywhere, set the jregex.bat/jregex.sh in the system PATH.

In linux you have to escape $ (as \$) in the format part or use single quotes e.g:

$ ls -la | ./jregex.sh ".*(\d\d:\d\d).*" '$1'
01:47
22:34
11:17
22:47
01:39
01:47
22:35
22:35
23:04

Also check out our Java Regex tutorials to learn to apply more complex regex patterns.

Example Project

Dependencies and Technologies Used:

  • JDK 11
  • Maven 3.5.4

Command line + Java Regex Select All Download
  • java-command-line-regex
    • src
      • main
        • java
          • com
            • logicbig
              • example
                • JRegex.java

    See Also