String Compression
Overview
String compression involves reducing the size of a string by encoding repeated characters or patterns more efficiently. Compression algorithms are essential for data storage, transmission, and optimization. String compression problems test your understanding of two pointers, character counting, and efficient encoding techniques.
Common compression problems include:
- Run-length encoding (RLE)
- Compress string in-place
- Decompress compressed string
- String compression with constraints
- Optimal compression strategies
- Compression ratio analysis
Pattern 1: Run-Length Encoding (RLE)
Encode consecutive repeated characters as character + count.
Basic RLE:
function compressRLE(s: string): string {
if (s.length === 0) return '';
const result: string[] = [];
let count = 1;
for (let i = 1; i < s.length; i++) {
if (s[i] === s[i - 1]) {
count++;
} else {
result.push(s[i - 1]);
if (count > 1) {
result.push(count.toString());
}
count = 1;
}
}
// Handle last character
result.push(s[s.length - 1]);
if (count > 1) {
result.push(count.toString());
}
return result.join('');
}Example: "aaabbbcc" ā "a3b3c2"
Time: O(n), Space: O(n)
RLE with two pointers:
function compressRLETwoPointers(s: string): string {
const result: string[] = [];
let i = 0;
while (i < s.length) {
const char = s[i];
let count = 0;
while (i < s.length && s[i] === char) {
count++;
i++;
}
result.push(char);
if (count > 1) {
result.push(count.toString());
}
}
return result.join('');
}Pattern 2: In-Place Compression
Compress string in-place, modifying the input array.
function compressInPlace(chars: string[]): number {
let write = 0; // Position to write compressed result
let read = 0; // Position to read from input
while (read < chars.length) {
const char = chars[read];
let count = 0;
// Count consecutive characters
while (read < chars.length && chars[read] === char) {
count++;
read++;
}
// Write character
chars[write++] = char;
// Write count if > 1
if (count > 1) {
const countStr = count.toString();
for (const digit of countStr) {
chars[write++] = digit;
}
}
}
return write; // New length
}Example: ['a','a','b','b','c','c','c'] ā ['a','2','b','2','c','3'] (length 6)
Time: O(n), Space: O(1) excluding output
Pattern 3: Decompress RLE String
Decompress a run-length encoded string back to original.
function decompressRLE(s: string): string {
const result: string[] = [];
let i = 0;
while (i < s.length) {
const char = s[i++];
let count = 0;
// Read count
while (i < s.length && s[i] >= '0' && s[i] <= '9') {
count = count * 10 + (s.charCodeAt(i) - '0'.charCodeAt(0));
i++;
}
// If no count, count is 1
if (count === 0) count = 1;
// Append character count times
for (let j = 0; j < count; j++) {
result.push(char);
}
}
return result.join('');
}Example: "a3b2c1" ā "aaabbc"
Time: O(n), Space: O(n)
Handling multi-digit counts:
function decompressRLEMultiDigit(s: string): string {
const result: string[] = [];
let i = 0;
while (i < s.length) {
if (i + 1 < s.length && s[i + 1] >= '0' && s[i + 1] <= '9') {
const char = s[i++];
let count = 0;
while (i < s.length && s[i] >= '0' && s[i] <= '9') {
count = count * 10 + (s.charCodeAt(i) - '0'.charCodeAt(0));
i++;
}
for (let j = 0; j < count; j++) {
result.push(char);
}
} else {
result.push(s[i++]);
}
}
return result.join('');
}Pattern 4: Compression with Minimum Length
Compress only if it reduces length, otherwise return original.
function compressIfShorter(s: string): string {
const compressed = compressRLE(s);
return compressed.length < s.length ? compressed : s;
}Calculate compression ratio:
function compressionRatio(original: string, compressed: string): number {
if (original.length === 0) return 0;
return compressed.length / original.length;
}Pattern 5: Compression with Character Limit
Compress string but limit count representation (e.g., max 2 digits).
function compressWithLimit(chars: string[]): number {
let write = 0;
let read = 0;
while (read < chars.length) {
const char = chars[read];
let count = 0;
while (read < chars.length && chars[read] === char) {
count++;
read++;
}
chars[write++] = char;
if (count > 1) {
const countStr = count.toString();
// Limit to 2 digits
const limitedCount = countStr.length > 2 ? countStr.substring(0, 2) : countStr;
for (const digit of limitedCount) {
chars[write++] = digit;
}
}
}
return write;
}Pattern 6: Group Compression
Compress groups of characters (not just single characters).
function compressGroups(s: string, groupSize: number): string {
const result: string[] = [];
let i = 0;
while (i < s.length) {
const group = s.substring(i, Math.min(i + groupSize, s.length));
let count = 1;
let j = i + groupSize;
// Count consecutive groups
while (j <= s.length - groupSize && s.substring(j, j + groupSize) === group) {
count++;
j += groupSize;
}
result.push(group);
if (count > 1) {
result.push(count.toString());
}
i = j;
}
return result.join('');
}Example: "abcabcabc" with groupSize=3 ā "abc3"
Pattern 7: Compression with Escape Characters
Handle special characters that might conflict with compression format.
function compressWithEscape(s: string): string {
const result: string[] = [];
let i = 0;
while (i < s.length) {
const char = s[i];
// If digit or escape character, escape it
if (char >= '0' && char <= '9' || char === '\\') {
result.push('\\');
result.push(char);
i++;
continue;
}
let count = 1;
i++;
while (i < s.length && s[i] === char) {
count++;
i++;
}
result.push(char);
if (count > 1) {
result.push(count.toString());
}
}
return result.join('');
}Decompress with escape handling:
function decompressWithEscape(s: string): string {
const result: string[] = [];
let i = 0;
while (i < s.length) {
if (s[i] === '\\' && i + 1 < s.length) {
// Escaped character
result.push(s[i + 1]);
i += 2;
} else {
const char = s[i++];
let count = 0;
while (i < s.length && s[i] >= '0' && s[i] <= '9') {
count = count * 10 + (s.charCodeAt(i) - '0'.charCodeAt(0));
i++;
}
if (count === 0) count = 1;
for (let j = 0; j < count; j++) {
result.push(char);
}
}
}
return result.join('');
}Pattern 8: Optimal Compression Strategy
Choose compression strategy based on string characteristics.
function optimalCompress(s: string): string {
// Analyze string
const avgRunLength = analyzeRunLength(s);
if (avgRunLength < 2) {
// No benefit from compression
return s;
} else if (avgRunLength < 4) {
// Simple RLE
return compressRLE(s);
} else {
// Group compression might be better
return compressGroups(s, 2);
}
}
function analyzeRunLength(s: string): number {
if (s.length === 0) return 0;
let totalRuns = 0;
let totalLength = 0;
let i = 0;
while (i < s.length) {
const char = s[i];
let count = 0;
while (i < s.length && s[i] === char) {
count++;
i++;
}
totalRuns++;
totalLength += count;
}
return totalLength / totalRuns;
}Pattern 9: Compression Validation
Validate if a compressed string is valid.
function isValidCompression(compressed: string): boolean {
let i = 0;
while (i < compressed.length) {
// Must start with a character
if (i >= compressed.length || !/[a-zA-Z]/.test(compressed[i])) {
return false;
}
i++; // Skip character
// Optional digits
while (i < compressed.length && compressed[i] >= '0' && compressed[i] <= '9') {
i++;
}
}
return true;
}Check if compression is lossless (can be decompressed correctly):
function isLosslessCompression(original: string, compressed: string): boolean {
const decompressed = decompressRLE(compressed);
return decompressed === original;
}Pattern 10: Compression Statistics
Analyze compression effectiveness.
interface CompressionStats {
originalLength: number;
compressedLength: number;
ratio: number;
savings: number;
savingsPercent: number;
avgRunLength: number;
}
function getCompressionStats(original: string): CompressionStats {
const compressed = compressRLE(original);
const originalLength = original.length;
const compressedLength = compressed.length;
const ratio = compressedLength / originalLength;
const savings = originalLength - compressedLength;
const savingsPercent = (savings / originalLength) * 100;
// Calculate average run length
let totalRuns = 0;
let totalLength = 0;
let i = 0;
while (i < original.length) {
const char = original[i];
let count = 0;
while (i < original.length && original[i] === char) {
count++;
i++;
}
totalRuns++;
totalLength += count;
}
const avgRunLength = totalRuns > 0 ? totalLength / totalRuns : 0;
return {
originalLength,
compressedLength,
ratio,
savings,
savingsPercent,
avgRunLength
};
}Pattern 11: Selective Compression
Compress only runs longer than a threshold.
function compressSelective(s: string, threshold: number): string {
const result: string[] = [];
let i = 0;
while (i < s.length) {
const char = s[i];
let count = 0;
while (i < s.length && s[i] === char) {
count++;
i++;
}
if (count >= threshold) {
// Compress
result.push(char);
result.push(count.toString());
} else {
// Don't compress
for (let j = 0; j < count; j++) {
result.push(char);
}
}
}
return result.join('');
}Example: "aaabbcc" with threshold=3 ā "a3bbcc"
Pattern 12: Multi-Character Compression
Compress patterns of multiple different characters.
function compressPatterns(s: string, patternLength: number): string {
const patterns = new Map<string, number>();
const patternList: string[] = [];
// Find all patterns
for (let i = 0; i <= s.length - patternLength; i++) {
const pattern = s.substring(i, i + patternLength);
if (!patterns.has(pattern)) {
patterns.set(pattern, patternList.length);
patternList.push(pattern);
}
}
// Compress using pattern indices
const result: string[] = [];
let i = 0;
while (i < s.length) {
let matched = false;
// Try to match longest pattern
for (let len = patternLength; len >= 1 && i + len <= s.length; len--) {
const pattern = s.substring(i, i + len);
if (patterns.has(pattern)) {
const index = patterns.get(pattern)!;
result.push(`[${index}]`);
i += len;
matched = true;
break;
}
}
if (!matched) {
result.push(s[i++]);
}
}
return result.join('');
}When to Use String Compression
Use compression techniques when:
- Problem asks about "compression", "encoding", or "RLE"
- Need to reduce string size efficiently
- Handling repeated characters or patterns
- In-place modification is required
- Storage optimization is needed
- Transmission efficiency matters
Template (Basic RLE)
function compressTemplate(s: string): string {
const result: string[] = [];
let i = 0;
while (i < s.length) {
const char = s[i];
let count = 0;
while (i < s.length && s[i] === char) {
count++;
i++;
}
result.push(char);
if (count > 1) {
result.push(count.toString());
}
}
return result.join('');
}Template (In-Place Compression)
function compressInPlaceTemplate(chars: string[]): number {
let write = 0;
let read = 0;
while (read < chars.length) {
const char = chars[read];
let count = 0;
while (read < chars.length && chars[read] === char) {
count++;
read++;
}
chars[write++] = char;
if (count > 1) {
const countStr = count.toString();
for (const digit of countStr) {
chars[write++] = digit;
}
}
}
return write;
}Time and Space Complexity Summary
- Basic RLE: O(n) time, O(n) space
- In-place compression: O(n) time, O(1) space (excluding output)
- Decompression: O(n) time, O(n) space
- Group compression: O(n * m) time where m is group size
- Pattern compression: O(n²) time worst case
Practice Tips
- Use two pointers ā Efficient for counting consecutive characters
- Handle edge cases ā Empty strings, single character, all same characters
- Consider in-place ā When space optimization is critical
- Validate compression ā Ensure lossless decompression
- Optimize count representation ā Use string conversion carefully
- Handle special characters ā Escape sequences if needed
Common Mistakes
- Off-by-one errors ā Be careful with array indices
- Not handling single characters ā Don't compress if count is 1
- Multi-digit counts ā Handle counts > 9 correctly
- Edge cases ā Empty string, single character runs
- In-place modification ā Don't overwrite before reading
Related Concepts
- Two Pointers ā Core technique for compression
- Character Counting ā Essential for RLE
- String Manipulation ā Building compressed strings
- In-Place Algorithms ā Space-efficient compression
String compression is a practical technique with many real-world applications. Master RLE and in-place compression, and you'll be well-prepared for compression-related interview questions.