Strip ANSI escape codes from a string
Remove every ANSI / VT escape sequence from text — for log scrubbing, plain-text emails, terminals that don't support color, or pipe-friendly CLI output. The regex below matches CSI, OSC, and standalone ESC sequences across the most common forms.
The regex (Perl-compatible)
PCRE\x1b(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~]|\][^\x07\x1b]*(?:\x07|\x1b\\))Matches `ESC` (0x1b) followed by either: a single non-printable two-byte sequence (`ESC X` where X is < 0x40, e.g. RIS `\033c`); a CSI sequence (`ESC [` + parameter bytes + final byte 0x40–0x7e); or an OSC / DCS / SOS / PM / APC sequence (`ESC [ ] P X ^ _ ]` + any bytes + terminator `BEL` or `ESC \`).
By language
import re
ANSI = re.compile(r'\x1b(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~]|\][^\x07\x1b]*(?:\x07|\x1b\\))')
def strip_ansi(s: str) -> str:
return ANSI.sub('', s)const ANSI = /\x1b(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~]|\][^\x07\x1b]*(?:\x07|\x1b\\))/g;
export function stripAnsi(s) {
return s.replace(ANSI, '');
}
// npm alternative:
// npm i strip-ansi
// import stripAnsi from 'strip-ansi';
// stripAnsi('\u001b[31mhi\u001b[0m'); // 'hi'package ansi
import "regexp"
var ansiRe = regexp.MustCompile("\x1b(?:[@-Z\\\\-_]|\\[[0-?]*[ -/]*[@-~]|\\][^\x07\x1b]*(?:\x07|\x1b\\\\))")
func Strip(s string) string {
return ansiRe.ReplaceAllString(s, "")
}// Cargo.toml: regex = "1"
use regex::Regex;
use std::sync::OnceLock;
fn ansi() -> &'static Regex {
static R: OnceLock<Regex> = OnceLock::new();
R.get_or_init(|| Regex::new(
r"\x1b(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~]|\][^\x07\x1b]*(?:\x07|\x1b\\))"
).unwrap())
}
pub fn strip_ansi(s: &str) -> String {
ansi().replace_all(s, "").into_owned()
}# GNU sed with -E / -r:
sed -E 's/\x1b(\[[0-?]*[ -\/]*[@-~]|\][^\x07\x1b]*(\x07|\x1b\\)|[@-Z\\\\-_])//g' input.log > clean.log
# perl one-liner (more portable across BSD/macOS sed):
perl -pe 's/\e(?:[@-Z\\-_]|\[[0-?]*[ -\/]*[@-~]|\][^\a\e]*(?:\a|\e\\))//g' input.log#include <stdio.h>
// Strip every ESC sequence from stdin to stdout.
int main(void) {
int c;
while ((c = getchar()) != EOF) {
if (c != 0x1b) { putchar(c); continue; }
int next = getchar();
if (next == EOF) break;
if (next == '[') { // CSI
int b;
while ((b = getchar()) != EOF && (b < 0x40 || b > 0x7e)) {}
continue;
}
if (next == ']' || next == 'P' || next == 'X' ||
next == '^' || next == '_') { // OSC, DCS, SOS, PM, APC
int b;
while ((b = getchar()) != EOF) {
if (b == 0x07) break; // BEL terminator
if (b == 0x1b) { getchar(); break; } // ST = ESC \\
}
continue;
}
// ESC X (two-byte) — drop both bytes
}
return 0;
}What this does NOT strip
Lone control characters that aren't escape sequences (BEL `\x07`, BS `\x08`, raw `\r` / `\n`, etc.) are preserved by the recipes below — those usually belong in the text. Add a second pass like `s/[\x00-\x08\x0b\x0c\x0e-\x1f]//g` if you want to strip every C0 control too.
Related sequences
Canonical pages for the escape families the regex above matches and removes.