<!
DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>OCR Text Extractor</title>
  <script src="https://cdnjs.cloudflare.com/ajax/libs/tesseract.js/4.1.1/tesseract.min.js"></script>
  <style>
    body {
        font-family: Arial, sans-serif;
        max-width: 1200px;
        margin: 0 auto;
        padding: 20px;
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        min-height: 100vh;
    .container {
        background: rgba(255, 255, 255, 0.95);
        border-radius: 15px;
        padding: 30px;
        box-shadow: 0 20px 40px rgba(0,0,0,0.1);
        backdrop-filter: blur(10px);
    }
h1 {
    color: #333;
    text-align: center;
    margin-bottom: 30px;
    font-size: 2.5em;
    background: linear-gradient(45deg, #667eea, #764ba2);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    background-clip: text;
.upload-area {
    border: 3px dashed #667eea;
    border-radius: 15px;
    padding: 40px;
    text-align: center;
    margin: 20px 0;
    background: linear-gradient(135deg, rgba(102, 126, 234, 0.1), rgba(118, 75, 162, 0.1));
    transition: all 0.3s ease;
.upload-area:hover {
    border-color: #764ba2;
    background: linear-gradient(135deg, rgba(102, 126, 234, 0.2), rgba(118, 75, 162, 0.2));
    transform: translateY(-2px);
}
.upload-area.dragover {
    border-color: #764ba2;
    background: linear-gradient(135deg, rgba(102, 126, 234, 0.3), rgba(118, 75, 162, 0.3));
#fileInput {
    display: none;
.upload-button {
    background: linear-gradient(45deg, #667eea, #764ba2);
    color: white;
    padding: 12px 30px;
    border: none;
    border-radius: 25px;
    cursor: pointer;
    font-size: 16px;
    font-weight: bold;
    transition: all 0.3s ease;
    box-shadow: 0 5px 15px rgba(102, 126, 234, 0.3);
.upload-button:hover {
    transform: translateY(-2px);
    box-shadow: 0 10px 25px rgba(102, 126, 234, 0.4);
.progress-container {
    margin: 20px 0;
    display: none;
.progress-bar {
    width: 100%;
    height: 20px;
    background: #e0e0e0;
    border-radius: 10px;
    overflow: hidden;
    box-shadow: inset 0 2px 5px rgba(0,0,0,0.1);
.progress-fill {
    height: 100%;
    background: linear-gradient(45deg, #667eea, #764ba2);
    width: 0%;
    transition: width 0.3s ease;
    position: relative;
}
.progress-fill::after {
    content: '';
    position: absolute;
    top: 0;
    left: 0;
    right: 0;
    bottom: 0;
    background: linear-gradient(45deg, transparent, rgba(255,255,255,0.3), transparent);
    animation: shine 2s infinite;
@keyframes shine {
    0% { transform: translateX(-100%); }
    100% { transform: translateX(100%); }
.results {
    margin-top: 30px;
    display: none;
.extracted-text {
    background: #f8f9fa;
    border: 2px solid #e9ecef;
    border-radius: 10px;
    padding: 20px;
    white-space: pre-wrap;
    max-height: 400px;
    overflow-y: auto;
    font-family: 'Courier New', monospace;
    font-size: 14px;
    line-height: 1.6;
    box-shadow: inset 0 2px 10px rgba(0,0,0,0.05);
.status {
    margin: 15px 0;
    padding: 15px;
    border-radius: 8px;
    font-weight: bold;
    text-align: center;
.status.processing {
    background: linear-gradient(45deg, rgba(102, 126, 234, 0.1), rgba(118, 75, 162, 0.1));
    color: #667eea;
    border: 2px solid rgba(102, 126, 234, 0.3);
}
.status.success {
    background: linear-gradient(45deg, rgba(40, 167, 69, 0.1), rgba(40, 167, 69, 0.2));
    color: #28a745;
    border: 2px solid rgba(40, 167, 69, 0.3);
.status.error {
    background: linear-gradient(45deg, rgba(220, 53, 69, 0.1), rgba(220, 53, 69, 0.2));
    color: #dc3545;
    border: 2px solid rgba(220, 53, 69, 0.3);
.controls {
    display: flex;
    gap: 15px;
    margin: 20px 0;
    flex-wrap: wrap;
.control-group {
    display: flex;
    flex-direction: column;
    gap: 5px;
}
.control-group label {
    font-weight: bold;
    color: #555;
    font-size: 14px;
.control-group select, .control-group input {
    padding: 8px 12px;
    border: 2px solid #e9ecef;
    border-radius: 8px;
    font-size: 14px;
    transition: border-color 0.3s ease;
.control-group select:focus, .control-group input:focus {
    outline: none;
    border-color: #667eea;
    box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
.image-preview {
    max-width: 100%;
    max-height: 300px;
    border-radius: 10px;
    box-shadow: 0 10px 30px rgba(0,0,0,0.2);
       margin: 20px 0;
       display: none;
   .copy-button {
       background: linear-gradient(45deg, #28a745, #20c997);
       color: white;
       padding: 10px 20px;
       border: none;
       border-radius: 20px;
       cursor: pointer;
       font-size: 14px;
       font-weight: bold;
       margin-top: 15px;
       transition: all 0.3s ease;
   .copy-button:hover {
       transform: translateY(-2px);
       box-shadow: 0 5px 15px rgba(40, 167, 69, 0.3);
 </style>
</head>
<body>
 <div class="container">
    <h1>🔍 OCR Text Extractor</h1>
    <div class="upload-area" id="uploadArea">
      <div style="font-size: 3em; margin-bottom: 15px;">📄</div>
       <p style="font-size: 18px; margin-bottom: 20px;">Drop your image or PDF file here, or click to
select</p>
      <button class="upload-button" onclick="document.getElementById('fileInput').click()">
         Choose File
      </button>
      <input type="file" id="fileInput" accept="image/*,.pdf" />
    </div>
    <img id="imagePreview" class="image-preview" />
    <div class="controls">
      <div class="control-group">
         <label for="languageSelect">Language:</label>
         <select id="languageSelect">
             <option value="eng">English</option>
             <option value="fra">French</option>
             <option value="spa">Spanish</option>
             <option value="deu">German</option>
             <option value="ita">Italian</option>
             <option value="por">Portuguese</option>
             <option value="rus">Russian</option>
             <option value="chi_sim">Chinese (Simplified)</option>
    <option value="jpn">Japanese</option>
    <option value="kor">Korean</option>
    <option value="ara">Arabic</option>
  </select>
</div>
<div class="control-group">
  <label for="psmSelect">Page Segmentation Mode:</label>
  <select id="psmSelect">
    <option value="3">Fully automatic page segmentation (default)</option>
    <option value="6">Uniform block of text</option>
    <option value="7">Single text line</option>
    <option value="8">Single word</option>
    <option value="13">Raw line. Treat image as single text line</option>
  </select>
</div>
<div class="control-group">
  <label for="oemSelect">OCR Engine Mode:</label>
  <select id="oemSelect">
    <option value="3">Default (Legacy + LSTM)</option>
    <option value="1">Legacy OCR Engine</option>
    <option value="2">LSTM OCR Engine</option>
  </select>
</div>
  </div>
  <div class="progress-container" id="progressContainer">
    <div class="progress-bar">
         <div class="progress-fill" id="progressFill"></div>
    </div>
    <div id="progressText" style="text-align: center; margin-top: 10px; font-weight: bold;"></div>
  </div>
  <div id="status" class="status" style="display: none;"></div>
  <div class="results" id="results">
    <h3>Extracted Text:</h3>
    <div class="extracted-text" id="extractedText"></div>
    <button class="copy-button" onclick="copyToClipboard()">📋 Copy Text</button>
  </div>
</div>
<script>
  let extractedTextContent = '';
  // File input handling
  const fileInput = document.getElementById('fileInput');
  const uploadArea = document.getElementById('uploadArea');
  const imagePreview = document.getElementById('imagePreview');
const progressContainer = document.getElementById('progressContainer');
const progressFill = document.getElementById('progressFill');
const progressText = document.getElementById('progressText');
const status = document.getElementById('status');
const results = document.getElementById('results');
const extractedText = document.getElementById('extractedText');
// Drag and drop functionality
uploadArea.addEventListener('dragover', (e) => {
      e.preventDefault();
      uploadArea.classList.add('dragover');
});
uploadArea.addEventListener('dragleave', () => {
      uploadArea.classList.remove('dragover');
});
uploadArea.addEventListener('drop', (e) => {
      e.preventDefault();
      uploadArea.classList.remove('dragover');
      const files = e.dataTransfer.files;
      if (files.length > 0) {
          handleFile(files[0]);
});
fileInput.addEventListener('change', (e) => {
      if (e.target.files.length > 0) {
          handleFile(e.target.files[0]);
});
function handleFile(file) {
      if (!file.type.startsWith('image/') && file.type !== 'application/pdf') {
          showStatus('Please select an image or PDF file.', 'error');
          return;
      // Show image preview for images
      if (file.type.startsWith('image/')) {
          const reader = new FileReader();
          reader.onload = (e) => {
               imagePreview.src = e.target.result;
               imagePreview.style.display = 'block';
          };
          reader.readAsDataURL(file);
      } else {
          imagePreview.style.display = 'none';
      }
    performOCR(file);
async function performOCR(file) {
    const language = document.getElementById('languageSelect').value;
    const psm = document.getElementById('psmSelect').value;
    const oem = document.getElementById('oemSelect').value;
    progressContainer.style.display = 'block';
    results.style.display = 'none';
    showStatus('Initializing OCR...', 'processing');
    try {
      const worker = await Tesseract.createWorker(language, oem, {
            logger: m => {
                if (m.status === 'recognizing text') {
                    const progress = Math.round(m.progress * 100);
                    progressFill.style.width = progress + '%';
                    progressText.textContent = `Processing: ${progress}%`;
                    showStatus(`Processing: ${progress}%`, 'processing');
      });
      await worker.setParameters({
              tessedit_pageseg_mode: psm,
        });
        showStatus('Extracting text...', 'processing');
        const { data: { text, confidence } } = await worker.recognize(file);
        await worker.terminate();
        extractedTextContent = text;
        extractedText.textContent = text;
        results.style.display = 'block';
        progressContainer.style.display = 'none';
        showStatus(`Text extraction completed! Confidence: ${Math.round(confidence)}%`, 'success');
    } catch (error) {
        console.error('OCR Error:', error);
        showStatus('Error during text extraction: ' + error.message, 'error');
        progressContainer.style.display = 'none';
function showStatus(message, type) {
    status.textContent = message;
    status.className = `status ${type}`;
    status.style.display = 'block';
function copyToClipboard() {
    navigator.clipboard.writeText(extractedTextContent).then(() => {
          showStatus('Text copied to clipboard!', 'success');
    }).catch(err => {
          console.error('Failed to copy text: ', err);
          showStatus('Failed to copy text to clipboard', 'error');
    });
// Auto-load PDF from file system if available
window.addEventListener('load', async () => {
    try {
          // Try to read the uploaded PDF file
          const fileData = await window.fs.readFile('épreuves Polytech de yde et dla.pdf');
          const blob = new Blob([fileData], { type: 'application/pdf' });
          const file = new File([blob], 'épreuves Polytech de yde et dla.pdf', { type: 'application/pdf' });
          // Set language to French for this document
          document.getElementById('languageSelect').value = 'fra';
          showStatus('Loading PDF file...', 'processing');
          handleFile(file);
         } catch (error) {
             console.log('No PDF file found in file system, ready for manual upload');
   });
 </script>
</body>
</html>