Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ Run in command line:
pip3 install manga-ocr
```

Local install
```commandline
pip install -e C:\Users\bluek\Documents\Python\manga-ocr
```

## Troubleshooting

- `ImportError: DLL load failed while importing fugashi: The specified module could not be found.` - might be because of Python installed from Microsoft Store, try installing Python from the [official site](https://www.python.org/downloads)
Expand Down
2 changes: 1 addition & 1 deletion manga_ocr/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
__version__ = '0.1.11'
__version__ = '0.1.12'

from manga_ocr.ocr import MangaOcr
2 changes: 1 addition & 1 deletion manga_ocr/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


def main():
fire.Fire(run)
fire.Fire(run(read_from='cli', write_to='cli'))


if __name__ == '__main__':
Expand Down
21 changes: 16 additions & 5 deletions manga_ocr/ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,15 @@ def __init__(self, pretrained_model_name_or_path='kha-white/manga-ocr-base', for

if not force_cpu and torch.cuda.is_available():
logger.info('Using CUDA')
print('Using CUDA')
self.model.cuda()
elif not force_cpu and torch.backends.mps.is_available():
logger.info('Using MPS')
print('Using MPS')
self.model.to('mps')
else:
logger.info('Using CPU')
print('Using CPU')

example_path = Path(__file__).parent / 'assets/example.jpg'
if not example_path.is_file():
Expand All @@ -33,18 +36,24 @@ def __init__(self, pretrained_model_name_or_path='kha-white/manga-ocr-base', for

def __call__(self, img_or_path):
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
img = Image.open(img_or_path)
img = [Image.open(img_or_path)]
elif isinstance(img_or_path, Image.Image):
img = [img_or_path]
elif type(img_or_path) in (tuple, list):
img = img_or_path
else:
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')

img = img.convert('L').convert('RGB')
img = [i.convert('L').convert('RGB') for i in img]

x = self._preprocess(img)
x = self.model.generate(x[None].to(self.model.device), max_length=300)[0].cpu()
x = self.tokenizer.decode(x, skip_special_tokens=True)
x = post_process(x)

if x.dim() == 3:
x = x.unsqueeze(0)

x = self.model.generate(x.to(self.model.device), max_length=300).cpu()
x = [self.tokenizer.decode(x[i], skip_special_tokens=True) for i in range(x.size(dim=0))]
x = [post_process(i) for i in x]
return x

def _preprocess(self, img):
Expand All @@ -58,4 +67,6 @@ def post_process(text):
text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
text = jaconv.h2z(text, ascii=True, digit=True)

if not bool(text.strip()):
return "<no ocr>"
return text
31 changes: 29 additions & 2 deletions manga_ocr/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,13 @@ def process_and_write_results(mocr, img_or_path, write_to):

if write_to == 'clipboard':
pyperclip.copy(text)
elif write_to == 'cli':
for i in text:
print(i)
else:
write_to = Path(write_to)
if write_to.suffix != '.txt':
raise ValueError('write_to must be either "clipboard" or a path to a text file')
raise ValueError('write_to must be either "clipboard", "cli" or a path to a text file')

with write_to.open('a', encoding="utf-8") as f:
f.write(text + '\n')
Expand Down Expand Up @@ -101,11 +104,19 @@ def run(read_from='clipboard',
process_and_write_results(mocr, img, write_to)

time.sleep(delay_secs)
elif read_from == 'cli':
logger.info(f'Reading from cli')

while True:
path = input('Enter image path:\n')
paths = path.split(",")
images = get_images(paths)
if len(images) > 0:
process_and_write_results(mocr, images, write_to)
else:
read_from = Path(read_from)
if not read_from.is_dir():
raise ValueError('read_from must be either "clipboard" or a path to a directory')
raise ValueError('read_from must be either "clipboard", "cli" or a path to a directory')

logger.info(f'Reading from directory {read_from}')

Expand All @@ -130,5 +141,21 @@ def run(read_from='clipboard',
time.sleep(delay_secs)


def get_images(paths):
images = []
for path in paths:
read_from = Path(path)
if not read_from.is_file():
print(f'{path} is not a file')
continue
try:
img = Image.open(read_from)
img.load()
images.append(img)
except (UnidentifiedImageError, OSError) as e:
print(f'Error while reading file {read_from}: {e}')
return images


if __name__ == '__main__':
fire.Fire(run)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name="manga-ocr",
version='0.1.11',
version='0.1.12',
description="OCR for Japanese manga",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down