Text Manipulation for Developers: Essential Techniques and Tools
A comprehensive guide to common text processing operations developers encounter daily, with code examples and practical solutions for case conversion, deduplication, sorting, and more.
Case conversion is fundamental in programming for naming conventions, display formatting, and data normalization.Case Conversion
Common Case Styles
| Style | Example | Use Case |
|---|---|---|
| lowercase | my variable name | Search normalization, URLs |
| UPPERCASE | MY VARIABLE NAME | Constants, headings |
| Title Case | My Variable Name | Headlines, titles |
| Sentence case | My variable name | UI text, descriptions |
| camelCase | myVariableName | JavaScript, Java variables |
| PascalCase | MyVariableName | Classes, React components |
| snake_case | my_variable_name | Python, databases, Ruby |
| kebab-case | my-variable-name | URLs, CSS classes, filenames |
| CONSTANT_CASE | MY_VARIABLE_NAME | Environment variables, constants |
JavaScript Examples
// Basic case conversion
const str = "Hello World";
str.toLowerCase(); // "hello world"
str.toUpperCase(); // "HELLO WORLD"
// Title Case
function toTitleCase(str) {
return str.replace(/\w\S*/g, txt =>
txt.charAt(0).toUpperCase() + txt.substr(1).toLowerCase()
);
}
toTitleCase("hello world"); // "Hello World"
// camelCase from spaces/kebab/snake
function toCamelCase(str) {
return str
.toLowerCase()
.replace(/[-\s]+(.)?/g, (, c) => c ? c.toUpperCase() : '');
}
toCamelCase("hello-world"); // "helloWorld"
toCamelCase("hello_world"); // "helloWorld"
// snake_case from camelCase
function toSnakeCase(str) {
return str
.replace(/([A-Z])/g, '$1')
.toLowerCase()
.replace(/^/, '');
}
toSnakeCase("helloWorld"); // "hello_world"
// kebab-case from camelCase
function toKebabCase(str) {
return str
.replace(/([A-Z])/g, '-$1')
.toLowerCase()
.replace(/^-/, '');
}
toKebabCase("helloWorld"); // "hello-world"
Python Examples
# Basic case conversion
text = "Hello World"
text.lower() # "hello world"
text.upper() # "HELLO WORLD"
text.title() # "Hello World"
text.capitalize() # "Hello world"
text.swapcase() # "hELLO wORLD"
# snake_case to camelCase
def to_camel_case(snake_str):
components = snake_str.split('_')
return components[0] + ''.join(x.title() for x in components[1:])
to_camel_case("hello_world") # "helloWorld"
# camelCase to snake_case
import re
def to_snake_case(camel_str):
return re.sub(r'(?<!^)(?=[A-Z])', '_', camel_str).lower()
to_snake_case("helloWorld") # "hello_world"
Deduplication is essential for cleaning data imports, log analysis, and list processing.Removing Duplicates
JavaScript
// Remove duplicate lines from text
function removeDuplicateLines(text) {
const lines = text.split('\n');
const unique = [...new Set(lines)];
return unique.join('\n');
}
// Preserve original order
function removeDuplicatesPreserveOrder(text) {
const seen = new Set();
return text.split('\n')
.filter(line => {
if (seen.has(line)) return false;
seen.add(line);
return true;
})
.join('\n');
}
// Case-insensitive deduplication
function removeDuplicatesCaseInsensitive(text) {
const seen = new Set();
return text.split('\n')
.filter(line => {
const lower = line.toLowerCase();
if (seen.has(lower)) return false;
seen.add(lower);
return true;
})
.join('\n');
}
// Remove duplicates while trimming whitespace
function removeDuplicatesTrimmed(text) {
const seen = new Set();
return text.split('\n')
.filter(line => {
const trimmed = line.trim();
if (!trimmed || seen.has(trimmed)) return false;
seen.add(trimmed);
return true;
})
.join('\n');
}
Python
# Remove duplicates (unordered)
lines = text.split('\n')
unique = list(set(lines))
# Remove duplicates (preserve order)
def remove_duplicates(lines):
seen = set()
return [x for x in lines if not (x in seen or seen.add(x))]
# Using dict.fromkeys (Python 3.7+, preserves order)
unique = list(dict.fromkeys(lines))
# Case-insensitive with original case preserved
def remove_duplicates_case_insensitive(lines):
seen = set()
result = []
for line in lines:
lower = line.lower()
if lower not in seen:
seen.add(lower)
result.append(line)
return result
Sorting is crucial for organizing lists, configuration files, and data comparison.Sorting Lines
Sorting Strategies
| Type | Example Result | Use Case |
|---|---|---|
| Alphabetical (A-Z) | Apple, Banana, Cherry | General lists |
| Reverse (Z-A) | Cherry, Banana, Apple | Reverse order |
| Numeric | 1, 2, 10, 20, 100 | Numbered lists |
| Natural | file1, file2, file10 | Filenames, versions |
| By Length | a, ab, abc, abcd | Size analysis |
| Random | Varies | Shuffling, testing |
JavaScript
// Basic alphabetical sort
const lines = text.split('\n');
const sorted = lines.sort();
const reversed = lines.sort().reverse();
// Case-insensitive sort
lines.sort((a, b) => a.toLowerCase().localeCompare(b.toLowerCase()));
// Numeric sort
lines.sort((a, b) => parseFloat(a) - parseFloat(b));
// Natural sort (file1, file2, file10 instead of file1, file10, file2)
lines.sort((a, b) => a.localeCompare(b, undefined, { numeric: true }));
// Sort by line length
lines.sort((a, b) => a.length - b.length);
// Random shuffle
function shuffle(array) {
for (let i = array.length - 1; i > 0; i--) {
const j = Math.floor(Math.random() * (i + 1));
[array[i], array[j]] = [array[j], array[i]];
}
return array;
}
Python
import random
from natsort import natsorted # pip install natsort
lines = text.split('\n')
# Alphabetical
sorted_lines = sorted(lines)
reversed_lines = sorted(lines, reverse=True)
# Case-insensitive
sorted_lines = sorted(lines, key=str.lower)
# Numeric (if lines are numbers)
sorted_lines = sorted(lines, key=float)
# Natural sort
sorted_lines = natsorted(lines)
# By length
sorted_lines = sorted(lines, key=len)
# Random shuffle
random.shuffle(lines)
Slugs are URL-friendly versions of strings, essential for SEO-friendly URLs and file naming.URL Slug Generation
Slug Rules
- Lowercase only
- No spaces (use hyphens or underscores)
- No special characters
- ASCII only (convert accented characters)
- Trim leading/trailing hyphens
- Collapse multiple hyphens
JavaScript
function generateSlug(text) {
return text
.toLowerCase()
.normalize('NFD') // Decompose accents
.replace(/[\u0300-\u036f]/g, '') // Remove accent marks
.replace(/[^\w\s-]/g, '') // Remove special chars
.replace(/\s+/g, '-') // Spaces to hyphens
.replace(/-+/g, '-') // Collapse multiple hyphens
.replace(/^-+|-+$/g, ''); // Trim hyphens
}
// Examples
generateSlug("Hello World!"); // "hello-world"
generateSlug("Café Résumé"); // "cafe-resume"
generateSlug(" Multiple Spaces "); // "multiple-spaces"
generateSlug("Special @#$% Characters"); // "special-characters"
generateSlug("Accénted Chàracters"); // "accented-characters"
// With custom separator
function generateSlugCustom(text, separator = '-') {
return text
.toLowerCase()
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '')
.replace(/[^\w\s-]/g, '')
.replace(/\s+/g, separator)
.replace(new RegExp(${separator}+, 'g'), separator)
.replace(new RegExp(^${separator}+|${separator}+$, 'g'), '');
}
generateSlugCustom("Hello World", "_"); // "hello_world"
Python
import re
import unicodedata
def generate_slug(text, separator='-'):
# Normalize unicode and convert to ASCII
text = unicodedata.normalize('NFKD', text)
text = text.encode('ascii', 'ignore').decode('ascii')
# Lowercase and replace non-alphanumerics
text = text.lower()
text = re.sub(r'[^\w\s-]', '', text)
text = re.sub(r'[\s_-]+', separator, text)
text = text.strip(separator)
return text
# Examples
generate_slug("Hello World!") # "hello-world"
generate_slug("Café Résumé") # "cafe-resume"
generate_slug("Multiple Spaces") # "multiple-spaces"
# Using python-slugify (pip install python-slugify)
from slugify import slugify
slugify("Hello World!") # "hello-world"
slugify("Café Résumé") # "cafe-resume"
slugify("Hello World", separator="_") # "hello_world"
Extracting structured data from unstructured text is a common parsing task.Data Extraction (Emails, URLs)
Email Extraction
// JavaScript - Extract emails
function extractEmails(text) {
const regex = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+.[a-zA-Z]{2,}/g;
return text.match(regex) || [];
}
const text = Contact us at hello@toolsdock.com or support@toolsdock.com. You can also reach sales@toolsdock.com for inquiries.;
extractEmails(text);
// ["hello@toolsdock.com", "support@toolsdock.com", "sales@toolsdock.com"]
// Python - Extract emails
import re
def extract_emails(text):
pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+.[a-zA-Z]{2,}'
return re.findall(pattern, text)
# Extract unique emails only
def extract_unique_emails(text):
return list(set(extract_emails(text)))
URL Extraction
// JavaScript - Extract URLs
function extractUrls(text) {
const regex = /https?://[^\s<>"{}|\^`[]]+/gi;
return text.match(regex) || [];
}
const text = Visit https://toolsdock.com for tools. More info at https://toolsdock.com/articles/ and documentation at https://toolsdock.com/docs/;
extractUrls(text);
// ["https://toolsdock.com", "https://toolsdock.com/articles/", "https://toolsdock.com/docs/"]
// Python - Extract URLs
import re
def extract_urls(text):
pattern = r'https?://[^\s<>"{}|\^`[]]+'
return re.findall(pattern, text)
Phone Number Extraction
// JavaScript - Basic phone extraction (US format)
function extractPhones(text) {
const regex = /(+?1[-.\s]?)?(?\d{3})?[-.\s]?\d{3}[-.\s]?\d{4}/g;
return text.match(regex) || [];
}
// Python
import re
def extract_phones(text):
pattern = r'(+?1[-.\s]?)?(?\d{3})?[-.\s]?\d{3}[-.\s]?\d{4}'
return re.findall(pattern, text)
Reversing text has uses in palindrome checking, encryption, and creative applications.Text Reversal
JavaScript
// Reverse entire string
function reverseString(str) {
return str.split('').reverse().join('');
}
reverseString("Hello World"); // "dlroW olleH"
// Reverse words order (keep word spelling)
function reverseWords(str) {
return str.split(' ').reverse().join(' ');
}
reverseWords("Hello World"); // "World Hello"
// Reverse each word individually
function reverseEachWord(str) {
return str.split(' ').map(word =>
word.split('').reverse().join('')
).join(' ');
}
reverseEachWord("Hello World"); // "olleH dlroW"
// Reverse lines
function reverseLines(text) {
return text.split('\n').reverse().join('\n');
}
// Unicode-safe reverse (handles emojis)
function reverseStringUnicode(str) {
return [...str].reverse().join('');
}
reverseStringUnicode("Hello 👋"); // "👋 olleH"
Python
# Reverse string
text = "Hello World"
reversed_text = text[::-1] # "dlroW olleH"
# Reverse words
reversed_words = ' '.join(text.split()[::-1]) # "World Hello"
# Reverse each word
reverse_each = ' '.join(word[::-1] for word in text.split()) # "olleH dlroW"
# Reverse lines
lines = text.split('\n')
reversed_lines = '\n'.join(lines[::-1])
Proper whitespace handling is critical for data cleaning and text normalization.Whitespace Handling
Common Operations
// JavaScript
const text = " Hello World ";
// Trim leading/trailing whitespace
text.trim(); // "Hello World"
text.trimStart(); // "Hello World "
text.trimEnd(); // " Hello World"
// Collapse multiple spaces to single
text.replace(/\s+/g, ' '); // " Hello World "
// Trim and collapse
text.trim().replace(/\s+/g, ' '); // "Hello World"
// Remove all whitespace
text.replace(/\s/g, ''); // "HelloWorld"
// Remove empty lines
function removeEmptyLines(text) {
return text.split('\n')
.filter(line => line.trim())
.join('\n');
}
// Trim each line
function trimEachLine(text) {
return text.split('\n')
.map(line => line.trim())
.join('\n');
}
Python
import re
text = " Hello World "
# Trim
text.strip() # "Hello World"
text.lstrip() # "Hello World "
text.rstrip() # " Hello World"
# Collapse multiple spaces
' '.join(text.split()) # "Hello World"
re.sub(r'\s+', ' ', text) # " Hello World "
# Remove all whitespace
text.replace(' ', '') # "HelloWorld"
re.sub(r'\s', '', text) # "HelloWorld"
# Remove empty lines
'\n'.join(line for line in text.split('\n') if line.strip())
# Trim each line
'\n'.join(line.strip() for line in text.split('\n'))
Unix/Linux provides powerful text manipulation tools for scripting.Command Line Tools
Essential Commands
| Command | Purpose | Example |
|---|---|---|
sort | Sort lines | sort file.txt |
uniq | Remove duplicates | sort file.txt | uniq |
tr | Transform chars | tr 'a-z' 'A-Z' |
sed | Stream edit | sed 's/old/new/g' |
awk | Pattern processing | awk '{print $1}' |
rev | Reverse lines | rev file.txt |
cut | Extract columns | cut -d',' -f1 |
Common Recipes
# Sort and remove duplicates
sort file.txt | uniq
# Sort unique with count
sort file.txt | uniq -c | sort -rn
# Convert to lowercase
tr '[:upper:]' '[:lower:]' < file.txt
# Convert to uppercase
tr '[:lower:]' '[:upper:]' < file.txt
# Remove blank lines
sed '/^$/d' file.txt
grep -v '^$' file.txt
# Trim whitespace from each line
sed 's/^[ \t]//;s/[ \t]$//' file.txt
# Extract emails
grep -oE '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+.[a-zA-Z]{2,}' file.txt
# Extract URLs
grep -oE 'https?://[^[:space:]]+' file.txt
# Reverse line order
tac file.txt
# Reverse each line
rev file.txt
# Number lines
nl file.txt
cat -n file.txt
# Replace text in-place
sed -i 's/old/new/g' file.txt
For quick text manipulation without writing code, use these browser-based tools:Online Tools
Case Converter
Convert text between lowercase, UPPERCASE, Title Case, camelCase, and more.
Convert CaseQuick Reference Cheatsheet
JavaScript
str.toLowerCase()
str.toUpperCase()
str.trim()
str.split('\n')
arr.join('\n')
[...new Set(arr)]
arr.sort()
arr.reverse()Python
str.lower()
str.upper()
str.strip()
str.split('\n')
'\n'.join(arr)
list(set(arr))
sorted(arr)
arr[::-1]