Text Manipulation for Developers: Essential Techniques and Tools

A comprehensive guide to common text processing operations developers encounter daily, with code examples and practical solutions for case conversion, deduplication, sorting, and more.

Case Conversion

Case conversion is fundamental in programming for naming conventions, display formatting, and data normalization.

Common Case Styles

Style	Example	Use Case
lowercase	my variable name	Search normalization, URLs
UPPERCASE	MY VARIABLE NAME	Constants, headings
Title Case	My Variable Name	Headlines, titles
Sentence case	My variable name	UI text, descriptions
camelCase	myVariableName	JavaScript, Java variables
PascalCase	MyVariableName	Classes, React components
snake_case	my_variable_name	Python, databases, Ruby
kebab-case	my-variable-name	URLs, CSS classes, filenames
CONSTANT_CASE	MY_VARIABLE_NAME	Environment variables, constants

JavaScript Examples

// Basic case conversion
const str = "Hello World";
str.toLowerCase();  // "hello world"
str.toUpperCase();  // "HELLO WORLD"
// Title Case
function toTitleCase(str) {
return str.replace(/\w\S*/g, txt =>
txt.charAt(0).toUpperCase() + txt.substr(1).toLowerCase()
);
}
toTitleCase("hello world"); // "Hello World"
// camelCase from spaces/kebab/snake
function toCamelCase(str) {
return str
.toLowerCase()
.replace(/[-\s]+(.)?/g, (, c) => c ? c.toUpperCase() : '');
}
toCamelCase("hello-world"); // "helloWorld"
toCamelCase("hello_world"); // "helloWorld"
// snake_case from camelCase
function toSnakeCase(str) {
return str
.replace(/([A-Z])/g, '$1')
.toLowerCase()
.replace(/^/, '');
}
toSnakeCase("helloWorld"); // "hello_world"
// kebab-case from camelCase
function toKebabCase(str) {
return str
.replace(/([A-Z])/g, '-$1')
.toLowerCase()
.replace(/^-/, '');
}
toKebabCase("helloWorld"); // "hello-world"

Python Examples

# Basic case conversion
text = "Hello World"
text.lower()      # "hello world"
text.upper()      # "HELLO WORLD"
text.title()      # "Hello World"
text.capitalize() # "Hello world"
text.swapcase()   # "hELLO wORLD"
# snake_case to camelCase
def to_camel_case(snake_str):
components = snake_str.split('_')
return components[0] + ''.join(x.title() for x in components[1:])
to_camel_case("hello_world")  # "helloWorld"
# camelCase to snake_case
import re
def to_snake_case(camel_str):
return re.sub(r'(?<!^)(?=[A-Z])', '_', camel_str).lower()
to_snake_case("helloWorld")  # "hello_world"

Removing Duplicates

Deduplication is essential for cleaning data imports, log analysis, and list processing.

JavaScript

// Remove duplicate lines from text
function removeDuplicateLines(text) {
const lines = text.split('\n');
const unique = [...new Set(lines)];
return unique.join('\n');
}
// Preserve original order
function removeDuplicatesPreserveOrder(text) {
const seen = new Set();
return text.split('\n')
.filter(line => {
if (seen.has(line)) return false;
seen.add(line);
return true;
})
.join('\n');
}
// Case-insensitive deduplication
function removeDuplicatesCaseInsensitive(text) {
const seen = new Set();
return text.split('\n')
.filter(line => {
const lower = line.toLowerCase();
if (seen.has(lower)) return false;
seen.add(lower);
return true;
})
.join('\n');
}
// Remove duplicates while trimming whitespace
function removeDuplicatesTrimmed(text) {
const seen = new Set();
return text.split('\n')
.filter(line => {
const trimmed = line.trim();
if (!trimmed || seen.has(trimmed)) return false;
seen.add(trimmed);
return true;
})
.join('\n');
}

Python

# Remove duplicates (unordered)
lines = text.split('\n')
unique = list(set(lines))
# Remove duplicates (preserve order)
def remove_duplicates(lines):
seen = set()
return [x for x in lines if not (x in seen or seen.add(x))]
# Using dict.fromkeys (Python 3.7+, preserves order)
unique = list(dict.fromkeys(lines))
# Case-insensitive with original case preserved
def remove_duplicates_case_insensitive(lines):
seen = set()
result = []
for line in lines:
lower = line.lower()
if lower not in seen:
seen.add(lower)
result.append(line)
return result

Sorting Lines

Sorting is crucial for organizing lists, configuration files, and data comparison.

Sorting Strategies

Type	Example Result	Use Case
Alphabetical (A-Z)	Apple, Banana, Cherry	General lists
Reverse (Z-A)	Cherry, Banana, Apple	Reverse order
Numeric	1, 2, 10, 20, 100	Numbered lists
Natural	file1, file2, file10	Filenames, versions
By Length	a, ab, abc, abcd	Size analysis
Random	Varies	Shuffling, testing

JavaScript

// Basic alphabetical sort
const lines = text.split('\n');
const sorted = lines.sort();
const reversed = lines.sort().reverse();
// Case-insensitive sort
lines.sort((a, b) => a.toLowerCase().localeCompare(b.toLowerCase()));
// Numeric sort
lines.sort((a, b) => parseFloat(a) - parseFloat(b));
// Natural sort (file1, file2, file10 instead of file1, file10, file2)
lines.sort((a, b) => a.localeCompare(b, undefined, { numeric: true }));
// Sort by line length
lines.sort((a, b) => a.length - b.length);
// Random shuffle
function shuffle(array) {
for (let i = array.length - 1; i > 0; i--) {
const j = Math.floor(Math.random() * (i + 1));
[array[i], array[j]] = [array[j], array[i]];
}
return array;
}

Python

import random
from natsort import natsorted  # pip install natsort
lines = text.split('\n')
# Alphabetical
sorted_lines = sorted(lines)
reversed_lines = sorted(lines, reverse=True)
# Case-insensitive
sorted_lines = sorted(lines, key=str.lower)
# Numeric (if lines are numbers)
sorted_lines = sorted(lines, key=float)
# Natural sort
sorted_lines = natsorted(lines)
# By length
sorted_lines = sorted(lines, key=len)
# Random shuffle
random.shuffle(lines)

URL Slug Generation

Slugs are URL-friendly versions of strings, essential for SEO-friendly URLs and file naming.

Slug Rules

Lowercase only
No spaces (use hyphens or underscores)
No special characters
ASCII only (convert accented characters)
Trim leading/trailing hyphens
Collapse multiple hyphens

JavaScript

function generateSlug(text) {
return text
.toLowerCase()
.normalize('NFD')                      // Decompose accents
.replace(/[\u0300-\u036f]/g, '')       // Remove accent marks
.replace(/[^\w\s-]/g, '')              // Remove special chars
.replace(/\s+/g, '-')                  // Spaces to hyphens
.replace(/-+/g, '-')                   // Collapse multiple hyphens
.replace(/^-+|-+$/g, '');              // Trim hyphens
}
// Examples
generateSlug("Hello World!");            // "hello-world"
generateSlug("Café Résumé");             // "cafe-resume"
generateSlug("  Multiple   Spaces  ");   // "multiple-spaces"
generateSlug("Special @#$% Characters"); // "special-characters"
generateSlug("Accénted Chàracters");     // "accented-characters"
// With custom separator
function generateSlugCustom(text, separator = '-') {
return text
.toLowerCase()
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '')
.replace(/[^\w\s-]/g, '')
.replace(/\s+/g, separator)
.replace(new RegExp(${separator}+, 'g'), separator)
.replace(new RegExp(^${separator}+|${separator}+$, 'g'), '');
}
generateSlugCustom("Hello World", "_"); // "hello_world"

Python

import re
import unicodedata
def generate_slug(text, separator='-'):
# Normalize unicode and convert to ASCII
text = unicodedata.normalize('NFKD', text)
text = text.encode('ascii', 'ignore').decode('ascii')
# Lowercase and replace non-alphanumerics
text = text.lower()
text = re.sub(r'[^\w\s-]', '', text)
text = re.sub(r'[\s_-]+', separator, text)
text = text.strip(separator)
return text
# Examples
generate_slug("Hello World!")        # "hello-world"
generate_slug("Café Résumé")         # "cafe-resume"
generate_slug("Multiple   Spaces")   # "multiple-spaces"
# Using python-slugify (pip install python-slugify)
from slugify import slugify
slugify("Hello World!")              # "hello-world"
slugify("Café Résumé")               # "cafe-resume"
slugify("Hello World", separator="_") # "hello_world"

Data Extraction (Emails, URLs)

Extracting structured data from unstructured text is a common parsing task.

Email Extraction

// JavaScript - Extract emails
function extractEmails(text) {
const regex = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+.[a-zA-Z]{2,}/g;
return text.match(regex) || [];
}
const text = Contact us at hello@toolsdock.com or support@toolsdock.com. You can also reach sales@toolsdock.com for inquiries.;
extractEmails(text);
// ["hello@toolsdock.com", "support@toolsdock.com", "sales@toolsdock.com"]
// Python - Extract emails
import re
def extract_emails(text):
pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+.[a-zA-Z]{2,}'
return re.findall(pattern, text)
# Extract unique emails only
def extract_unique_emails(text):
return list(set(extract_emails(text)))

URL Extraction

// JavaScript - Extract URLs
function extractUrls(text) {
const regex = /https?://[^\s<>"{}|\^`[]]+/gi;
return text.match(regex) || [];
}
const text = Visit https://toolsdock.com for tools. More info at https://toolsdock.com/articles/ and documentation at https://toolsdock.com/docs/;
extractUrls(text);
// ["https://toolsdock.com", "https://toolsdock.com/articles/", "https://toolsdock.com/docs/"]
// Python - Extract URLs
import re
def extract_urls(text):
pattern = r'https?://[^\s<>"{}|\^`[]]+'
return re.findall(pattern, text)

Phone Number Extraction

// JavaScript - Basic phone extraction (US format)
function extractPhones(text) {
const regex = /(+?1[-.\s]?)?(?\d{3})?[-.\s]?\d{3}[-.\s]?\d{4}/g;
return text.match(regex) || [];
}
// Python
import re
def extract_phones(text):
pattern = r'(+?1[-.\s]?)?(?\d{3})?[-.\s]?\d{3}[-.\s]?\d{4}'
return re.findall(pattern, text)

Text Reversal

Reversing text has uses in palindrome checking, encryption, and creative applications.

JavaScript

// Reverse entire string
function reverseString(str) {
return str.split('').reverse().join('');
}
reverseString("Hello World"); // "dlroW olleH"
// Reverse words order (keep word spelling)
function reverseWords(str) {
return str.split(' ').reverse().join(' ');
}
reverseWords("Hello World"); // "World Hello"
// Reverse each word individually
function reverseEachWord(str) {
return str.split(' ').map(word =>
word.split('').reverse().join('')
).join(' ');
}
reverseEachWord("Hello World"); // "olleH dlroW"
// Reverse lines
function reverseLines(text) {
return text.split('\n').reverse().join('\n');
}
// Unicode-safe reverse (handles emojis)
function reverseStringUnicode(str) {
return [...str].reverse().join('');
}
reverseStringUnicode("Hello 👋"); // "👋 olleH"

Python

# Reverse string
text = "Hello World"
reversed_text = text[::-1]  # "dlroW olleH"
# Reverse words
reversed_words = ' '.join(text.split()[::-1])  # "World Hello"
# Reverse each word
reverse_each = ' '.join(word[::-1] for word in text.split())  # "olleH dlroW"
# Reverse lines
lines = text.split('\n')
reversed_lines = '\n'.join(lines[::-1])

Whitespace Handling

Proper whitespace handling is critical for data cleaning and text normalization.

Common Operations

// JavaScript
const text = "  Hello    World  ";
// Trim leading/trailing whitespace
text.trim();                    // "Hello    World"
text.trimStart();               // "Hello    World  "
text.trimEnd();                 // "  Hello    World"
// Collapse multiple spaces to single
text.replace(/\s+/g, ' ');      // " Hello World "
// Trim and collapse
text.trim().replace(/\s+/g, ' '); // "Hello World"
// Remove all whitespace
text.replace(/\s/g, '');        // "HelloWorld"
// Remove empty lines
function removeEmptyLines(text) {
return text.split('\n')
.filter(line => line.trim())
.join('\n');
}
// Trim each line
function trimEachLine(text) {
return text.split('\n')
.map(line => line.trim())
.join('\n');
}

Python

import re
text = "  Hello    World  "
# Trim
text.strip()                    # "Hello    World"
text.lstrip()                   # "Hello    World  "
text.rstrip()                   # "  Hello    World"
# Collapse multiple spaces
' '.join(text.split())          # "Hello World"
re.sub(r'\s+', ' ', text)       # " Hello World "
# Remove all whitespace
text.replace(' ', '')           # "HelloWorld"
re.sub(r'\s', '', text)         # "HelloWorld"
# Remove empty lines
'\n'.join(line for line in text.split('\n') if line.strip())
# Trim each line
'\n'.join(line.strip() for line in text.split('\n'))

Command Line Tools

Unix/Linux provides powerful text manipulation tools for scripting.

Essential Commands

Command	Purpose	Example
`sort`	Sort lines	`sort file.txt`
`uniq`	Remove duplicates	`sort file.txt \| uniq`
`tr`	Transform chars	`tr 'a-z' 'A-Z'`
`sed`	Stream edit	`sed 's/old/new/g'`
`awk`	Pattern processing	`awk '{print $1}'`
`rev`	Reverse lines	`rev file.txt`
`cut`	Extract columns	`cut -d',' -f1`

Common Recipes

# Sort and remove duplicates
sort file.txt | uniq
# Sort unique with count
sort file.txt | uniq -c | sort -rn
# Convert to lowercase
tr '[:upper:]' '[:lower:]' < file.txt
# Convert to uppercase
tr '[:lower:]' '[:upper:]' < file.txt
# Remove blank lines
sed '/^$/d' file.txt
grep -v '^$' file.txt
# Trim whitespace from each line
sed 's/^[ \t]//;s/[ \t]$//' file.txt
# Extract emails
grep -oE '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+.[a-zA-Z]{2,}' file.txt
# Extract URLs
grep -oE 'https?://[^[:space:]]+' file.txt
# Reverse line order
tac file.txt
# Reverse each line
rev file.txt
# Number lines
nl file.txt
cat -n file.txt
# Replace text in-place
sed -i 's/old/new/g' file.txt

Online Tools

For quick text manipulation without writing code, use these browser-based tools:

Case Converter

Convert text between lowercase, UPPERCASE, Title Case, camelCase, and more.

Convert Case

Remove Duplicates

Deduplicate lines with options for case sensitivity and trimming.

Remove Duplicates

Sort Lines

Sort text alphabetically, numerically, by length, or randomly.

Sort Lines

Slug Generator

Generate URL-friendly slugs from any text with accent handling.

Generate Slug

Email Extractor

Extract all email addresses from any text with deduplication.

Extract Emails

Text Reverser

Reverse text, words, or lines with Unicode support.

Reverse Text

Quick Reference Cheatsheet

JavaScript

str.toLowerCase()
str.toUpperCase()
str.trim()
str.split('\n')
arr.join('\n')
[...new Set(arr)]
arr.sort()
arr.reverse()

Python

str.lower()
str.upper()
str.strip()
str.split('\n')
'\n'.join(arr)
list(set(arr))
sorted(arr)
arr[::-1]

Frequently Asked Questions

camelCase starts lowercase with each word capitalized (myVariableName)—used for JavaScript/Java variables. PascalCase capitalizes every word including first (MyVariableName)—used for classes and React components. snake_case uses underscores between lowercase words (my_variable_name)—used in Python and databases.

Use regex to insert underscores before capital letters: str.replace(/([A-Z])/g, '_$1').toLowerCase().replace(/^_/, ''). This finds each capital letter, adds an underscore before it, converts to lowercase, and removes any leading underscore.

Use a Set to track seen lines while filtering. In JavaScript: const seen = new Set(); lines.filter(line => { if (seen.has(line)) return false; seen.add(line); return true; }). This keeps the first occurrence of each line.

Natural sorting treats numbers within strings numerically. Regular sort: file1, file10, file2. Natural sort: file1, file2, file10. Use when sorting filenames, version numbers, or any strings with embedded numbers. JavaScript: arr.sort((a,b) => a.localeCompare(b, undefined, {numeric: true})).

Convert to lowercase, normalize accents (NFD), remove accent marks, replace non-alphanumerics with hyphens, collapse multiple hyphens, and trim edge hyphens. Example: 'Café Résumé!' becomes 'cafe-resume'. Most frameworks have built-in slugify functions.

Use regex: text.match(/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g) returns an array of all email addresses. For unique emails only, wrap result with [...new Set(matches)]. This pattern handles common email formats.

Split by newline, trim each line, rejoin: text.split('\n').map(line => line.trim()).join('\n'). In Python: '\n'.join(line.strip() for line in text.split('\n')). Command line: sed 's/^[ \t]*//;s/[ \t]*$//' file.txt.

Simple string reversal can break emojis and combined characters. Use the spread operator for Unicode-safe reversal: [...str].reverse().join(''). This properly handles multi-byte characters like emojis that would be corrupted by split('').reverse().join('').

sort (sort lines), uniq (remove duplicates, requires sorted input), tr (transform characters like case), sed (search and replace), awk (column processing), grep (search), cut (extract columns). Combine with pipes: sort file.txt | uniq -c | sort -rn shows unique lines with counts.

Use regex replace: str.replace(/\s+/g, ' '). This replaces any sequence of whitespace (spaces, tabs, newlines) with a single space. Add .trim() to also remove leading/trailing whitespace. In Python: ' '.join(text.split()) handles both.