listminer/listminer.py at main · awillard1/listminer

History

2565 lines (2152 loc) · 102 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

830

831

832

833

834

835

836

837

838

839

840

841

842

843

844

845

846

847

848

849

850

851

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866

867

868

869

870

871

872

873

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

902

903

904

905

906

907

908

909

910

911

912

913

914

915

916

917

918

919

920

921

922

923

924

925

926

927

928

929

930

931

932

933

934

935

936

937

938

939

940

941

942

943

944

945

946

947

948

949

950

951

952

953

954

955

956

957

958

959

960

961

962

963

964

965

966

967

968

969

970

971

972

973

974

975

976

977

978

979

980

981

982

983

984

985

986

987

988

989

990

991

992

993

994

995

996

997

998

999

1000

#!/usr/bin/env python3

# -*- coding: utf-8 -*-

"""

PasswordRuleMiner — Password Artifact Generator

Features:

- Single or multiple potfiles (directory recursion)

- Single or multiple hash files (directory recursion)

- Generates Hashcat prepend/append rules, masks, and year/season rules

- Fully compatible with complex usernames and special characters

- Robust logging for every processed file and generation step

- Levenshtein distance-based scoring for transformation effort

- Advanced Hashcat operations (T, L, R, swap, insert, etc.)

- Custom wordlist integration with spell-checking

- Statistical analysis and rule pruning

"""

import argparse

import hashlib

import logging

import os

import pickle

import re

import signal

import sys

from collections import Counter, defaultdict, deque

from concurrent.futures import ThreadPoolExecutor, as_completed

from datetime import datetime

from pathlib import Path

from threading import Lock

from typing import List, Iterable, Dict, Set, Tuple, Optional

from typing import Any

# =============================================

# SCORING CONSTANTS

# =============================================

# Rule scoring constants for prioritization

SCORE_USER_CONTEXT_HIGH = 10_000_000

SCORE_USER_CONTEXT_MEDIUM = 9_900_000

SCORE_USER_CONTEXT_LOW = 9_800_000

SCORE_BFS_BASE = 500_000

SCORE_ROTATION_SWAP_HIGH = 400_000

SCORE_ROTATION_SWAP_MEDIUM = 380_000

SCORE_ROTATION_SWAP_LOW = 350_000

SCORE_ADVANCED_SWAP = 390_000

SCORE_SWAP_CASE = 370_000

SCORE_NUMERIC = 320_000

SCORE_COMBINED_RULES = 310_000

SCORE_PREPEND_APPEND = 300_000

SCORE_SURROUND = 250_000

SCORE_CUSTOM_WORDLIST = 200_000

# Levenshtein distance smoothing factor

LEVENSHTEIN_SMOOTHING = 1.0

# Maximum transformation history per rule to prevent memory issues

MAX_TRANSFORMATION_HISTORY = 10

# =============================================

# PROGRESS BAR

# =============================================

try:

from tqdm import tqdm as _tqdm

TQDM = True

except ImportError:

TQDM = False

def progress(it, **kw):

return _tqdm(it, **kw) if TQDM and sys.stdout.isatty() else it

# =============================================

# PARALLEL PROCESSING CONFIGURATION

# =============================================

# Fallback CPU count when os.cpu_count() returns None (unknown system)

FALLBACK_CPU_COUNT = 4

# Determine optimal worker count (CPU count or environment variable)

DEFAULT_WORKERS = min(8, (os.cpu_count() or FALLBACK_CPU_COUNT))

MAX_WORKERS = int(os.environ.get('LISTMINER_MAX_WORKERS', DEFAULT_WORKERS))

# Batch multiplier for load balancing across workers

# Higher values create more batches for better distribution and progress tracking

BATCH_MULTIPLIER = 4

# Minimum batch sizes for different operation types

MIN_PASSWORD_BATCH_SIZE = 1000 # For password processing operations

MIN_WORD_BATCH_SIZE = 10 # For word-level operations (smaller datasets)

# =============================================

# Logging

# =============================================

logging.basicConfig(

level=logging.INFO,

format="[%(asctime)s] %(message)s",

datefmt="%H:%M:%S"

)

log = logging.getLogger(__name__)

# Thread-safe logging lock for parallel operations

_log_lock = Lock()

def parallel_log(message: str):

"""Thread-safe logging for parallel operations"""

with _log_lock:

log.info(message)

def sigint_handler(signum, frame):

log.warning("\nInterrupted by user — exiting cleanly")

sys.exit(0)

signal.signal(signal.SIGINT, sigint_handler)

# =============================================

# SPELL-CHECKING LIBRARY (OPTIONAL)

# =============================================

try:

import enchant

ENCHANT_AVAILABLE = True

except ImportError:

ENCHANT_AVAILABLE = False

# =============================================

# LEVENSHTEIN DISTANCE

# =============================================

def levenshtein_distance(s1: str, s2: str) -> int:

"""

Calculate the Levenshtein distance between two strings.

This represents the minimum number of single-character edits

(insertions, deletions, or substitutions) required to change one string into another.

Used for scoring transformation effort between password candidates.

"""

if len(s1) < len(s2):

return levenshtein_distance(s2, s1)

if len(s2) == 0:

return len(s1)

previous_row = range(len(s2) + 1)

for i, c1 in enumerate(s1):

current_row = [i + 1]

for j, c2 in enumerate(s2):

# Cost of insertions, deletions, or substitutions

insertions = previous_row[j + 1] + 1

deletions = current_row[j] + 1

substitutions = previous_row[j] + (c1 != c2)

current_row.append(min(insertions, deletions, substitutions))

previous_row = current_row

return previous_row[-1]

def score_transformation_effort(base: str, password: str) -> int:

"""

Score the transformation effort from base to password using Levenshtein distance.

Lower scores indicate easier transformations (more effective rules).

"""

return levenshtein_distance(base.lower(), password.lower())

# =============================================

# RULE EFFECTIVENESS TRACKING

# =============================================

class RuleEffectivenessTracker:

"""

Track and analyze the effectiveness of generated rules.

Provides statistical analysis and pruning capabilities.

"""

def __init__(self):

self.rule_stats: Dict[str, Dict] = defaultdict(lambda: {

'count': 0,

'total_score': 0,

'transformations': [],

'levenshtein_sum': 0

})

self.transformation_history: List[Tuple[str, str, str, int]] = []

def record_transformation(self, base: str, password: str, rule: str, score: int):

"""Record a successful transformation"""

lev_dist = levenshtein_distance(base, password)

self.rule_stats[rule]['count'] += 1

self.rule_stats[rule]['total_score'] += score

self.rule_stats[rule]['levenshtein_sum'] += lev_dist

# Limit transformation history to prevent memory issues

if len(self.rule_stats[rule]['transformations']) < MAX_TRANSFORMATION_HISTORY:

self.rule_stats[rule]['transformations'].append((base, password))

# Limit overall transformation history size to prevent unbounded growth

if len(self.transformation_history) < 10000: # Max 10k transformations stored

self.transformation_history.append((base, password, rule, lev_dist))

def get_rule_effectiveness(self, rule: str) -> float:

"""Calculate effectiveness score for a rule"""

stats = self.rule_stats[rule]

if stats['count'] == 0:

return 0.0

avg_score = stats['total_score'] / stats['count']

avg_levenshtein = stats['levenshtein_sum'] / stats['count']

# Effective rules have high scores and low transformation effort

effectiveness = avg_score / (LEVENSHTEIN_SMOOTHING + avg_levenshtein)

return effectiveness

def prune_ineffective_rules(self, rules: List[Tuple[int, str]], min_effectiveness: float = 100.0) -> List[Tuple[int, str]]:

"""Prune rules that don't meet minimum effectiveness threshold"""

pruned = []

for score, rule in rules:

effectiveness = self.get_rule_effectiveness(rule)

if effectiveness >= min_effectiveness or rule not in self.rule_stats:

pruned.append((score, rule))

return pruned

def get_statistics_report(self) -> str:

"""Generate a statistical report of rule effectiveness"""

if not self.rule_stats:

return "No rule statistics available."

sorted_rules = sorted(

self.rule_stats.items(),

key=lambda x: self.get_rule_effectiveness(x[0]),

reverse=True

)

report = ["Rule Effectiveness Statistics:", "=" * 50]

report.append(f"Total unique rules tracked: {len(self.rule_stats)}")

report.append(f"Total transformations: {len(self.transformation_history)}")

report.append("")

report.append("Top 20 Most Effective Rules:")

report.append("-" * 50)

for i, (rule, stats) in enumerate(sorted_rules[:20], 1):

effectiveness = self.get_rule_effectiveness(rule)

avg_lev = stats['levenshtein_sum'] / stats['count'] if stats['count'] > 0 else 0

report.append(f"{i:2d}. Rule: {rule[:50]}")

report.append(f" Count: {stats['count']}, Effectiveness: {effectiveness:.2f}, Avg Levenshtein: {avg_lev:.2f}")

return "\n".join(report)

# =============================================

# FILE CACHE

# =============================================

class FileCache:

"""

Caching system for processed potfiles and hashfiles.

Uses file modification time and size for cache validation.

"""

def __init__(self, cache_dir: Path):

self.cache_dir = cache_dir

self.cache_dir.mkdir(parents=True, exist_ok=True)

self.enabled = True

def _get_file_key(self, filepath: Path) -> str:

"""Generate a unique cache key for a file"""

stat = filepath.stat()

# Use path, size, and mtime for cache key

key_str = f"{filepath.resolve()}:{stat.st_size}:{stat.st_mtime}"

return hashlib.md5(key_str.encode()).hexdigest()

def _get_cache_path(self, filepath: Path, cache_type: str) -> Path:

"""Get the cache file path for a given file"""

key = self._get_file_key(filepath)

return self.cache_dir / f"{cache_type}_{key}.pkl"

def get(self, filepath: Path, cache_type: str) -> Optional[Any]:

"""Retrieve cached data for a file if valid"""

if not self.enabled:

return None

cache_path = self._get_cache_path(filepath, cache_type)

if not cache_path.exists():

return None

try:

with cache_path.open('rb') as f:

cached_data = pickle.load(f)

log.info(f" → Using cached data for {filepath.name}")

return cached_data

except (pickle.PickleError, EOFError, FileNotFoundError):

# Cache corrupted or invalid

cache_path.unlink(missing_ok=True)

return None

def set(self, filepath: Path, cache_type: str, data: Any) -> None:

"""Store data in cache for a file"""

if not self.enabled:

return

cache_path = self._get_cache_path(filepath, cache_type)

try:

with cache_path.open('wb') as f:

pickle.dump(data, f)

except (pickle.PickleError, OSError) as e:

log.warning(f"Failed to cache {filepath.name}: {e}")

def clear(self):

"""Clear all cache files"""

for cache_file in self.cache_dir.glob("*.pkl"):

cache_file.unlink(missing_ok=True)

log.info("Cache cleared")

# =============================================

# Password decoding

# =============================================

HEX_BRACKET_RE = re.compile(r'\$HEX\[[0-9a-fA-F]+\]')

HEX_ESCAPE_RE = re.compile(r'\\x[0-9a-fA-F]{2}')

def decode_plaintext(text: str) -> str:

r"""Decode $HEX[...] and \xHH sequences"""

if not text:

return ""

if text.startswith("$HEX[") and text.endswith("]"):

try:

return bytes.fromhex(text[5:-1]).decode("latin-1")

except ValueError:

return ""

return HEX_ESCAPE_RE.sub(lambda m: chr(int(m.group(0)[2:], 16)), text)

def extract_password_from_pot(line: str) -> str:

line = line.strip()

if not line or line.startswith("#"):

return ""

return decode_plaintext(line.rsplit(":", 1)[-1])

def extract_password_from_wordlist(line: str) -> str:

return decode_plaintext(line.strip())

# =============================================

# Hashcat Rule Helpers

# =============================================

def is_ascii_safe(text: str) -> bool:

"""Check if text contains only ASCII characters (safe for Hashcat rules)"""

return all(ord(c) < 128 for c in text)

def hashcat_prepend(word: str, reverse: bool = True, max_length: int = 6) -> str:

"""

Generate efficient Hashcat prepend rule using only Hashcat syntax.

Uses ^X for each character (reversed order for proper prepending).

Limited to max_length characters for practical rule efficiency.

"""

if not is_ascii_safe(word) or len(word) > max_length or len(word) == 0:

return None

# For single character, use ^X

if len(word) == 1:

return f"^{word}"

# For multiple characters, use individual ^ commands (reversed for correct order)

if reverse:

word = word[::-1]

return " ".join(f"^{c}" for c in word)

def hashcat_append(word: str, max_length: int = 6) -> str:

"""

Generate efficient Hashcat append rule using only Hashcat syntax.

Uses $X for each character (Hashcat standard).

Limited to max_length characters for practical rule efficiency.

"""

if not is_ascii_safe(word) or len(word) > max_length or len(word) == 0:

return None

# For single character, use $X

if len(word) == 1:

return f"${word}"

# For multiple characters, use individual $ commands

return " ".join(f"${c}" for c in word)

# =============================================

# JOHN THE RIPPER (JTR) RULE HELPERS

# =============================================

def jtr_prepend(word: str, max_length: int = 6) -> str:

"""

Generate John the Ripper prepend rule.

Uses ^X for each character (reversed order for proper prepending).

Limited to max_length characters for practical rule efficiency.

"""

if not is_ascii_safe(word) or len(word) > max_length or len(word) == 0:

return None

# For single character, use ^X

if len(word) == 1:

return f"^{word}"

# For multiple characters, use individual ^ commands (reversed for correct order)

# JtR uses same syntax as Hashcat for prepend

word_reversed = word[::-1]

return "".join(f"^{c}" for c in word_reversed)

def jtr_append(word: str, max_length: int = 6) -> str:

"""

Generate John the Ripper append rule.

Uses $X for each character (JtR standard).

Limited to max_length characters for practical rule efficiency.

"""

if not is_ascii_safe(word) or len(word) > max_length or len(word) == 0:

return None

# For single character, use $X

if len(word) == 1:

return f"${word}"

# For multiple characters, use individual $ commands

# JtR uses same syntax as Hashcat for append

return "".join(f"${c}" for c in word)

# =============================================

# RULE FORMAT CONVERTER

# =============================================

class RuleConverter:

"""

Convert between Hashcat and John the Ripper rule formats.

Handles compatible operations and documents limitations.

"""

# Operations that are identical between Hashcat and JtR

IDENTICAL_OPS = {

'l', 'u', 'c', 'C', 't', 'r', 'd', # Case and duplication

'{', '}', # Rotation

'[', ']', # Remove first/last

'E', # Title case

}

# Character-based operations (same syntax)

CHAR_OPS = {'^', '$', 's', '@', 'i', 'o', 'D', 'T'}

# Hashcat operations that don't have JtR equivalents

NO_JTR_EQUIVALENT = {

'L': 'bitwise shift left',

'R': 'bitwise shift right',

}

@staticmethod

def hashcat_to_jtr(hashcat_rule: str) -> Optional[str]:

"""

Convert a Hashcat rule to John the Ripper format.

Returns None if the rule contains operations without JtR equivalents.

Returns the converted rule string otherwise.

"""

if not hashcat_rule:

return None

# Split rule into individual operations (space-separated for Hashcat)

operations = hashcat_rule.split()

jtr_operations = []

for op in operations:

if not op:

continue

# Check for operations without JtR equivalents

if len(op) >= 1 and op[0] in RuleConverter.NO_JTR_EQUIVALENT:

# Skip unsupported operations

continue

# Most operations are identical between Hashcat and JtR

# The main difference is that Hashcat uses spaces between operations

# while JtR concatenates them

jtr_operations.append(op)

if not jtr_operations:

return None

# JtR concatenates operations without spaces

return "".join(jtr_operations)

@staticmethod

def is_compatible_with_jtr(hashcat_rule: str) -> bool:

"""

Check if a Hashcat rule can be converted to JtR format.

Returns True if the rule only uses operations supported by both.

"""

if not hashcat_rule:

return False

operations = hashcat_rule.split()

for op in operations:

if not op:

continue

if len(op) >= 1 and op[0] in RuleConverter.NO_JTR_EQUIVALENT:

return False

return True

# =============================================

# JOHN THE RIPPER RULE GENERATOR

# =============================================

class JohnTheRipperRuleGenerator:

"""

Generate John the Ripper compatible password rules.

Provides feature parity with Hashcat where possible.

"""

def __init__(self, converter: RuleConverter = None):

self.converter = converter or RuleConverter()

self.generated_rules: List[str] = []

def generate_prepend_rule(self, word: str, score: int = SCORE_PREPEND_APPEND) -> Optional[Tuple[int, str]]:

"""Generate JtR prepend rule"""

rule = jtr_prepend(word)

if rule:

return (score, rule)

return None

def generate_append_rule(self, word: str, score: int = SCORE_PREPEND_APPEND) -> Optional[Tuple[int, str]]:

"""Generate JtR append rule"""

rule = jtr_append(word)

if rule:

return (score, rule)

return None

def generate_surround_rule(self, prefix: str, suffix: str, score: int = SCORE_SURROUND) -> Optional[Tuple[int, str]]:

"""Generate JtR rule that surrounds word with prefix and suffix"""

prep = jtr_prepend(prefix)

app = jtr_append(suffix)

if prep and app:

rule = f"{prep}{app}"

return (score, rule)

return None

def generate_case_rules(self) -> List[Tuple[int, str]]:

"""Generate basic case transformation rules for JtR"""

return [

(SCORE_BFS_BASE, "l"), # lowercase

(SCORE_BFS_BASE, "u"), # uppercase

(SCORE_BFS_BASE, "c"), # capitalize

(SCORE_BFS_BASE, "C"), # invert capitalize

(SCORE_BFS_BASE, "t"), # toggle case

(SCORE_BFS_BASE, "r"), # reverse

(SCORE_BFS_BASE, "d"), # duplicate

(SCORE_BFS_BASE, "E"), # title case

]

def generate_toggle_rules(self, positions: List[int] = None) -> List[Tuple[int, str]]:

"""Generate toggle case at position rules"""

if positions is None:

positions = [0, 1, 2, 3, 4]

rules = []

for pos in positions:

rule = f"T{pos}"

rules.append((SCORE_ROTATION_SWAP_MEDIUM, rule))

return rules

def generate_rotation_rules(self) -> List[Tuple[int, str]]:

"""Generate rotation rules for JtR"""

return [

(SCORE_ROTATION_SWAP_HIGH, "{"), # rotate left

(SCORE_ROTATION_SWAP_HIGH, "}"), # rotate right

(SCORE_ROTATION_SWAP_MEDIUM, "{{"), # double rotate left

(SCORE_ROTATION_SWAP_MEDIUM, "}}"), # double rotate right

]

def generate_leet_rules(self, word: str, max_substitutions: int = 2) -> List[Tuple[int, str]]:

"""

Generate simple leet-speak substitution rules for John the Ripper.

Uses only single-character 'sXY' commands (JtR limitation).

"""

rules = []

word_lower = word.lower()

positions = [(i, c) for i, c in enumerate(word_lower) if c in LEET_MAP]

if not positions:

return rules

# Single substitutions

for _, char in positions:

for leet_char in LEET_MAP[char][:3]: # Top 3 variants

if len(leet_char) == 1: # JtR only supports single-char replacement

rule = f"s{char}{leet_char}"

rules.append((SCORE_NUMERIC, rule))

# Double substitutions (optional, limited)

if len(positions) >= 2 and max_substitutions >= 2:

for i in range(len(positions)):

for j in range(i + 1, min(i + 3, len(positions))):

_, char1 = positions[i]

_, char2 = positions[j]

for leet1 in LEET_MAP[char1][:2]:

if len(leet1) != 1:

continue

for leet2 in LEET_MAP[char2][:2]:

if len(leet2) != 1:

continue

rule = f"s{char1}{leet1}s{char2}{leet2}"

rules.append((SCORE_NUMERIC - 10000, rule))

return rules

def convert_from_hashcat_rules(self, hashcat_rules: List[Tuple[int, str]]) -> List[Tuple[int, str]]:

"""

Convert a list of scored Hashcat rules to JtR format.

Only includes rules that are compatible with JtR.

"""

jtr_rules = []

skipped_count = 0

for score, hashcat_rule in hashcat_rules:

if self.converter.is_compatible_with_jtr(hashcat_rule):

jtr_rule = self.converter.hashcat_to_jtr(hashcat_rule)

if jtr_rule:

jtr_rules.append((score, jtr_rule))

else:

skipped_count += 1

if skipped_count > 0:

log.info(f"Skipped {skipped_count:,} Hashcat rules without JtR equivalents")

return jtr_rules

# =============================================

# ADVANCED HASHCAT OPERATIONS

# =============================================

def hashcat_toggle_at_position(pos: int) -> str:

"""Generate Hashcat rule to toggle case at specific position (TN)"""

return f"T{pos}"

def hashcat_bitwise_shift_left() -> str:

"""Generate Hashcat rule for bitwise shift left (L)"""

return "L"

def hashcat_bitwise_shift_right() -> str:

"""Generate Hashcat rule for bitwise shift right (R)"""

return "R"

def hashcat_swap_positions(pos1: int, pos2: int) -> str:

"""Generate Hashcat rule to swap characters at two positions (*NM)"""

return f"*{pos1}{pos2}"

def hashcat_insert_at_position(pos: int, char: str) -> str:

"""Generate Hashcat rule to insert character at position (iNX)"""

if not is_ascii_safe(char) or len(char) != 1:

return None

return f"i{pos}{char}"

def hashcat_overwrite_at_position(pos: int, char: str) -> str:

"""Generate Hashcat rule to overwrite character at position (oNX)"""

if not is_ascii_safe(char) or len(char) != 1:

return None

return f"o{pos}{char}"

def hashcat_delete_at_position(pos: int) -> str:

"""Generate Hashcat rule to delete character at position (DN)"""

return f"D{pos}"

def hashcat_extract_range(start: int, length: int) -> str:

"""Generate Hashcat rule to extract substring (xNM)"""

return f"x{start}{length}"

def hashcat_purge_character(char: str) -> str:

"""Generate Hashcat rule to purge all instances of character (@X)"""

if not is_ascii_safe(char) or len(char) != 1:

return None

return f"@{char}"

def generate_advanced_swap_rules(common_positions: Optional[List[int]] = None) -> List[str]:

"""

Generate advanced multi-character swap rules.

Swaps characters at common positions to create password variants.

"""

if common_positions is None:

common_positions = [0, 1, 2, -1, -2]

rules = []

for i, pos1 in enumerate(common_positions):

for pos2 in common_positions[i+1:]:

# Hashcat uses positive indices only

if pos1 >= 0 and pos2 >= 0:

rules.append(hashcat_swap_positions(pos1, pos2))

return rules

def generate_numeric_sequence_rules() -> List[str]:

"""

Generate rules for numeric sequence manipulation.

Includes common patterns like incrementing, decrementing, and replacing sequences.

"""

rules = []

# Common number sequences to append/prepend

sequences = ['123', '1234', '321', '12345', '456', '789', '000', '111']

for seq in sequences:

# Append sequence

app = hashcat_append(seq)

if app:

rules.append(app)

# Prepend sequence

prep = hashcat_prepend(seq)

if prep:

rules.append(prep)

# Replace numbers with other numbers (common patterns)

number_substitutions = [

('0', '1'), ('1', '2'), ('2', '3'),

('0', '!'), ('1', '!'),

('9', '0'), ('8', '9'),

]

for old, new in number_substitutions:

rules.append(f"s{old}{new}")

return rules

def generate_combined_prepend_append_rules(

prefixes: List[str],

suffixes: List[str],

case_transforms: List[str] = ['l', 'c', 'u']

) -> List[str]:

"""

Generate combined prepend/append rules with case transformations.

Creates comprehensive mutation rules combining multiple operations.

"""

rules = []

for prefix in prefixes[:20]: # Limit to top 20 for performance

if not is_ascii_safe(prefix) or len(prefix) > 4:

continue

prep = hashcat_prepend(prefix)

if not prep:

continue

for suffix in suffixes[:20]:

if not is_ascii_safe(suffix) or len(suffix) > 4:

continue

app = hashcat_append(suffix)

if not app:

continue

# Basic combination

rules.append(f"{prep} {app}")

# With case transformations

for case_op in case_transforms:

rules.append(f"{case_op} {prep} {app}")

rules.append(f"{prep} {case_op} {app}")

return rules

# =============================================

# ADVANCED FEATURES: LEET MAPPING

# =============================================

LEET_MAP = {

'a': ['@', '4', '^'],

'b': ['8', '6', '13'],

'c': ['(', '<', '{', '['],

'd': ['6', ')'],

'e': ['3', '&'],

'f': ['#', 'ph'],

'g': ['9', '6', '&'],

'h': ['#', '|-|'],

'i': ['1', '!', '|'],

'j': ['_', '_|'],

'k': ['X', '|<'],

'l': ['1', '|', '!', '£'],

'm': ['|\\/|', '/\\/\\'],

'n': ['|\\|', '/\\/'],

'o': ['0', '()'],

'p': ['9', '|*'],

'q': ['9', '0_'],

'r': ['|2', '12'],

's': ['$', '5', 'z'],

't': ['7', '+'],

'u': ['|_|', 'v'],

'v': ['\\/', '|/'],

'w': ['\\/\\/', 'vv'],

'x': ['%', '><'],

'y': ['j', '`/'],

'z': ['2', '5'],

'A': ['@', '4', '^'],

'B': ['8', '6', '13'],

'C': ['(', '<', '{', '['],

'D': ['6', ')'],

'E': ['3', '&'],

'F': ['#', 'ph'],

'G': ['9', '6', '&'],

'H': ['#', '|-|'],

'I': ['1', '!', '|'],

'J': ['_', '_|'],

'K': ['X', '|<'],

'L': ['1', '|', '!', '£'],

'M': ['|\\/|', '/\\/\\'],

'N': ['|\\|', '/\\/'],

'O': ['0', '()'],

'P': ['9', '|*'],

'Q': ['9', '0_'],

'R': ['|2', '12'],

'S': ['$', '5', 'z'],

'T': ['7', '+'],

'U': ['|_|', 'v'],

'V': ['\\/', '|/'],

'W': ['\\/\\/', 'vv'],

'X': ['%', '><'],

'Y': ['j', '`/'],

'Z': ['2', '5'],

}

# =============================================

# Multi-Character Leet Expansion Functionality

# =============================================

def preprocess_leet_expansions(passwords: List[str], leet_map: Dict[str, List[str]], max_expansions: int = 5000) -> Set[str]:

"""

Preprocesses a list of passwords to handle multi-character leet substitutions.

Converts passwords based on leet_map and returns expanded passwords.

"""

def expand_word(word: str, leet_map: Dict[str, List[str]]) -> Set[str]:

"""Expand a word with multi-character leet substitutions."""

expanded = set()

queue = [(word, 0)] # Track current word and index to expand

while queue:

current_word, idx = queue.pop(0)

if idx >= len(current_word):

expanded.add(current_word)

continue

char = current_word[idx]

# Apply all leet transformations

if char in leet_map:

for variant in leet_map[char]:

new_word = current_word[:idx] + variant + current_word[idx + 1:]

queue.append((new_word, idx + len(variant)))

# Proceed without modification

queue.append((current_word, idx + 1))

return expanded

expanded_set = set()

for password in passwords[:max_expansions]: # Limit to avoid performance issues

expanded_set.update(expand_word(password, leet_map))

return expanded_set

def generate_leet_rules(word: str, max_substitutions: int = 2) -> List[str]:

"""

Generate leet-speak Hashcat substitution rules for a word.

Uses 's' command for character substitution.

Only generates single-character substitutions (multi-char not supported).

"""

rules = []

word_lower = word.lower()

positions = [(i, c) for i, c in enumerate(word_lower) if c in LEET_MAP]

if not positions:

return rules

# Single substitutions (only single-character leet replacements)

for _, char in positions:

for leet_char in LEET_MAP[char]:

# Only use single-character substitutions

if len(leet_char) == 1:

rule = f"s{char}{leet_char}"

rules.append(rule)

# Double substitutions (if enough positions)

if len(positions) >= 2 and max_substitutions >= 2:

for i in range(len(positions)):

for j in range(i + 1, min(i + 4, len(positions))):

_, char1 = positions[i]

_, char2 = positions[j]

for leet1 in LEET_MAP[char1]:

# Only use single-character substitutions

if len(leet1) != 1:

continue

for leet2 in LEET_MAP[char2]:

# Only use single-character substitutions

if len(leet2) != 1:

continue

rule = f"s{char1}{leet1} s{char2}{leet2}"

rules.append(rule)

return rules

# =============================================

# ADVANCED FEATURES: BFS RULE GENERATION

# =============================================

class BFSRuleGenerator:

"""

Generate complex Hashcat rules using BFS exploration.

Combines multiple operations in sequence for comprehensive coverage.

Enhanced with advanced operations: swaps, rotations, insertions.

"""

# Basic Hashcat operations (including Hashcat 7+ features)

OPERATIONS = [

('l', 'lowercase'),

('u', 'uppercase'),

('c', 'capitalize'),

('C', 'invert capitalize'),

('E', 'title case'), # Hashcat 7+

('t', 'toggle case'),

('r', 'reverse'),

('d', 'duplicate'),

('{', 'rotate left'),

('}', 'rotate right'),

('L', 'bitwise shift left'), # Advanced

('R', 'bitwise shift right'), # Advanced

]

# Advanced positional operations

POSITIONAL_OPS = [

('T0', 'toggle case at position 0'),

('T1', 'toggle case at position 1'),

('D0', 'delete at position 0'),

('*01', 'swap positions 0 and 1'),

('*02', 'swap positions 0 and 2'),

('*12', 'swap positions 1 and 2'),

]

def __init__(self, max_depth: int = 3, include_advanced: bool = True):

self.max_depth = max_depth

self.include_advanced = include_advanced

self.rules: Set[str] = set()

def generate(self, base_ops: Optional[List[str]] = None) -> List[Tuple[int, str]]:

"""

Generate rules using BFS with scoring.

Returns list of (score, rule) tuples.

"""

if base_ops is None:

base_ops = [op[0] for op in self.OPERATIONS[:10]] # Use more operations

# Add positional operations if advanced mode

if self.include_advanced:

base_ops.extend([op[0] for op in self.POSITIONAL_OPS[:3]])

scored_rules = []

queue = deque([("", 0)]) # (rule, depth)

seen = {""}

while queue:

current_rule, depth = queue.popleft()

if depth > 0:

# Score based on complexity and depth

score = SCORE_BFS_BASE // (depth + 1)

scored_rules.append((score, current_rule.strip()))

if depth >= self.max_depth:

continue

# Expand with each operation

for op in base_ops:

new_rule = f"{current_rule} {op}".strip() if current_rule else op

if new_rule not in seen:

seen.add(new_rule)

queue.append((new_rule, depth + 1))

return scored_rules

def generate_rotation_swap_combos(self) -> List[Tuple[int, str]]:

"""

Generate specialized rules combining rotations and swaps.

These create interesting password variants.

"""

scored_rules = []

# Rotation combinations

rotation_ops = ['{', '}', '{{', '}}'] # Single and double rotations

for rot in rotation_ops:

scored_rules.append((SCORE_ROTATION_SWAP_HIGH, rot))

# Rotation with case changes

for case_op in ['l', 'c', 'u']:

scored_rules.append((SCORE_ROTATION_SWAP_MEDIUM, f"{rot} {case_op}"))

scored_rules.append((SCORE_ROTATION_SWAP_MEDIUM - 5000, f"{case_op} {rot}"))

# Swap combinations

swap_rules = generate_advanced_swap_rules([0, 1, 2, 3])

for swap in swap_rules:

scored_rules.append((SCORE_ADVANCED_SWAP, swap))

# Swap with case changes

scored_rules.append((SCORE_SWAP_CASE, f"{swap} l"))

scored_rules.append((SCORE_SWAP_CASE - 5000, f"{swap} c"))

# Combined rotations and swaps

for rot in ['{', '}']:

for swap in swap_rules[:3]: # Limit combinations

scored_rules.append((SCORE_ROTATION_SWAP_LOW, f"{rot} {swap}"))

return scored_rules

def generate_append_prepend_combos(self, common_strings: List[str], limit: int = 100) -> List[Tuple[int, str]]:

"""

Generate BFS-style combinations of prepend and append operations.

"""

scored_rules = []

for s in common_strings[:limit]:

if len(s) < 1 or len(s) > 4:

continue

# Prepend only

prep = hashcat_prepend(s)

if not prep:

continue

View remainder of file in raw view

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

listminer.py

Latest commit

History

listminer.py

File metadata and controls