Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
lukas-blecher committed Oct 2, 2023
2 parents 97dc445 + ee62830 commit 1d1394c
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 5 deletions.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,18 @@ Install via
### Get prediction for a PDF
#### CLI

To get predictions for a PDF run
To get predictions for a PDF run

```
$ nougat path/to/file.pdf -o output_directory
```

A path to a directory or to a file where each line is a path to a PDF can also be passed as a positional argument

```
$ nougat path/to/directory -o output_directory
```

```
usage: nougat [-h] [--batchsize BATCHSIZE] [--checkpoint CHECKPOINT] [--model MODEL] [--out OUT]
[--recompute] [--markdown] [--no-skipping] pdf [pdf ...]
Expand Down
2 changes: 1 addition & 1 deletion nougat/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
LICENSE file in the root directory of this source tree.
"""

__version__ = "0.1.13"
__version__ = "0.1.14"
12 changes: 9 additions & 3 deletions predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import re
import argparse
import re
import os
from functools import partial
import torch
from torch.utils.data import ConcatDataset
Expand Down Expand Up @@ -91,9 +92,14 @@ def get_args():
if len(args.pdf) == 1 and not args.pdf[0].suffix == ".pdf":
# input is a list of pdfs
try:
args.pdf = [
Path(l) for l in open(args.pdf[0]).read().split("\n") if len(l) > 0
]
pdfs_path = args.pdf[0]
if pdfs_path.is_dir():
args.pdf = list(pdfs_path.rglob("*.pdf"))
else:
args.pdf = [
Path(l) for l in open(pdfs_path).read().split("\n") if len(l) > 0
]
logging.info(f"Found {len(args.pdf)} files.")
except:
pass
if args.pages and len(args.pdf) == 1:
Expand Down

0 comments on commit 1d1394c

Please sign in to comment.