Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: Lint & Type Check

on:
push:
branches: [main]
pull_request:
branches: [main]

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.12'
- name: Install uv
uses: astral-sh/setup-uv@v2
- name: Install dependencies
run: uv sync
- name: Run Ruff check
run: uv run ruff check pytextractor/ tests/
- name: Run Ruff format check
run: uv run ruff format --check pytextractor/ tests/
- name: Run type checking with ty
run: uv run ty check
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ jobs:
pip install pytest
pip install -e .
- name: Run tests
run: pytest tests/
run: pytest tests/ -m "not pytesseract"
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,4 @@ tags

#venv
bin
.claude/worktrees/
15 changes: 15 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,26 @@ build-backend = "hatchling.build"

[tool.pytest.ini_options]
testpaths = ["tests"]
markers = [
"pytesseract: tests that require pytesseract/tesseract binary",
]

[dependency-groups]
dev = [
"pytest",
"ruff",
"ty",
]

[tool.ruff]
line-length = 120
target-version = "py312"

[tool.ruff.lint]
select = ["E", "W", "F", "I", "UP", "B", "C4", "T10"]

[tool.ruff.format]
indent-style = "space"

[project.optional-dependencies]
dev = []
3 changes: 3 additions & 0 deletions pytextractor/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .pytextractor import PyTextractor

__all__ = ["PyTextractor"]
Empty file added pytextractor/py.typed
Empty file.
77 changes: 36 additions & 41 deletions pytextractor/pytextractor.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,48 @@
import time
import os
import time

import requests
import cv2
import numpy as np
import pytesseract
import requests
from imutils.object_detection import non_max_suppression


class PyTextractor:
layer_names = ('feature_fusion/Conv_7/Sigmoid', 'feature_fusion/concat_3',)
layer_names = (
"feature_fusion/Conv_7/Sigmoid",
"feature_fusion/concat_3",
)

def __init__(self, east=None):
pkg_east_model = os.path.join(os.path.dirname(__file__), 'data', 'frozen_east_text_detection.pb')
pkg_east_model = os.path.join(os.path.dirname(__file__), "data", "frozen_east_text_detection.pb")
self.east = east or pkg_east_model
self._load_assets()

def get_image_text(self,
image,
width=320,
height=320,
display=False,
numbers=False,
confidence=0.5,
percentage=2.0,
min_boxes=1,
max_iterations=20,
**kwargs):
def get_image_text(
self,
image,
width=320,
height=320,
display=False,
numbers=False,
confidence=0.5,
percentage=2.0,
min_boxes=1,
max_iterations=20,
**kwargs,
):
loaded_image = self._load_image(image)
image, width, height, ratio_width, ratio_height = self._resize_image(
loaded_image, width, height
)
image, width, height, ratio_width, ratio_height = self._resize_image(loaded_image, width, height)
scores, geometry = self._compute_scores_geometry(image, width, height)
(num_rows, num_cols) = scores.shape[2:4]

start = time.time()
boxes = self._get_boxes(num_rows, num_cols, confidence, geometry, scores, min_boxes, max_iterations)
end = time.time()
print('Found {boxes} ROIs {seconds:.6f} seconds'.format(boxes=len(boxes), seconds=(end - start)))
print(f"Found {len(boxes)} ROIs {end - start:.6f} seconds")

return self._extract_text(
loaded_image, boxes, percentage, display, numbers, ratio_width, ratio_height
)
return self._extract_text(loaded_image, boxes, percentage, display, numbers, ratio_width, ratio_height)

def _load_image(self, image):
return cv2.imread(image)
Expand All @@ -53,7 +54,6 @@ def _resize_image(self, image, width, height):
ratio_width = W / float(newW)
ratio_height = H / float(newH)


# resize the image and grab the new image dimensions
resized_image = cv2.resize(image, (newW, newH))
(H, W) = resized_image.shape[:2]
Expand All @@ -62,24 +62,22 @@ def _resize_image(self, image, width, height):
def _compute_scores_geometry(self, image, width, height):
# construct a blob from the image and then perform a forward pass of
# the model to obtain the two output layer sets
blob = cv2.dnn.blobFromImage(
image, 1.0, (width, height), (123.68, 116.78, 103.94), swapRB=True, crop=False
)
blob = cv2.dnn.blobFromImage(image, 1.0, (width, height), (123.68, 116.78, 103.94), swapRB=True, crop=False)
start = time.time()
self.east_net.setInput(blob)
(scores, geometry) = self.east_net.forward(self.layer_names)
end = time.time()

# show timing information on text prediction
print('[INFO] text detection took {:.6f} seconds'.format(end - start))
print(f"[INFO] text detection took {end - start:.6f} seconds")
return (scores, geometry)

def _load_assets(self):
self._get_east()
start = time.time()
self.east_net = cv2.dnn.readNet(self.east)
end = time.time()
print('[INFO] Loaded EAST text detector {:.6f} seconds ...'.format(end - start))
print(f"[INFO] Loaded EAST text detector {end - start:.6f} seconds ...")

def _get_east(self):
if os.path.exists(self.east):
Expand All @@ -88,9 +86,9 @@ def _get_east(self):
pkg_path = os.path.dirname(__file__)
data_file = os.path.join(pkg_path, self.east)
os.makedirs(os.path.dirname(data_file))
print('Downloading east data file to {}'.format(data_file))
with open(data_file, 'wb') as fp:
with requests.get('https://tinyurl.com/yxdd7kb5', stream=True) as response:
print(f"Downloading east data file to {data_file}")
with open(data_file, "wb") as fp:
with requests.get("https://tinyurl.com/yxdd7kb5", stream=True) as response:
for chunk in response.iter_content(chunk_size=2048):
fp.write(chunk)

Expand All @@ -99,7 +97,7 @@ def _get_boxes(self, num_rows, num_cols, confidence, geometry, scores, min_boxes
boxes = []
rects = []
confidences = []
while(iterations < max_iterations):
while iterations < max_iterations:
for y in range(0, num_rows):
# extract the scores (probabilities), followed by the geometrical
# data used to derive potential bounding box coordinates that
Expand Down Expand Up @@ -151,12 +149,11 @@ def _get_boxes(self, num_rows, num_cols, confidence, geometry, scores, min_boxes
return boxes
else:
confidence /= 2
print('Couldn\'t find at least {min_boxes} boxe(s), halving confidence to {confidence}'.
format(min_boxes=min_boxes, confidence=confidence))
print(f"Couldn't find at least {min_boxes} boxe(s), halving confidence to {confidence}")

def _extract_text(self, image, boxes, percent, display, numbers, ratio_width, ratio_height):
extracted_text = []
for (start_X, start_Y, end_X, end_Y) in boxes:
for start_X, start_Y, end_X, end_Y in boxes:
# scale the bounding box coordinates based on the respective
# ratios
percent = (percent / 100 + 1) if percent >= 0 else ((100 - percent) / 100)
Expand All @@ -170,16 +167,14 @@ def _extract_text(self, image, boxes, percent, display, numbers, ratio_width, ra
cv2.rectangle(image, (start_X, start_Y), (end_X, end_Y), (0, 255, 0), 2)

ROIImage = image.copy()[start_Y:end_Y, start_X:end_X]
config = '--psm 6' if numbers else ''
extracted_text.append(pytesseract.image_to_string(
ROIImage, config=config)
)
config = "--psm 6" if numbers else ""
extracted_text.append(pytesseract.image_to_string(ROIImage, config=config))
if display:
cv2.imshow('SubImage', ROIImage)
cv2.imshow("SubImage", ROIImage)

# show the output image
if display:
cv2.imshow('Text Detection', image)
cv2.imshow("Text Detection", image)
cv2.waitKey(0)

return extracted_text
30 changes: 15 additions & 15 deletions pytextractor/text_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,33 @@
import argparse
import sys

from .pytextractor import PyTextractor
from pytextractor import PyTextractor


def text_detector():
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser(description='Text/Number extractor from image')
ap.add_argument('images', type=str, nargs='+', help='path(s) to input image(s)')
ap.add_argument('--east', type=str, required=False, help='path to input EAST text detector')
ap = argparse.ArgumentParser(description="Text/Number extractor from image")
ap.add_argument("images", type=str, nargs="+", help="path(s) to input image(s)")
ap.add_argument("--east", type=str, required=False, help="path to input EAST text detector")
ap.add_argument(
'-c', '--confidence', type=float, default=0.5, help='minimum probability required to inspect a region'
"-c", "--confidence", type=float, default=0.5, help="minimum probability required to inspect a region"
)
ap.add_argument('-w', '--width', type=int, default=320, help='resized image width (should be multiple of 32)')
ap.add_argument('-e', '--height', type=int, default=320, help='resized image height (should be multiple of 32)')
ap.add_argument('-d', '--display', action='store_true', help='Display bounding boxes')
ap.add_argument('-n', '--numbers', action='store_true', help='Detect only numbers')
ap.add_argument('-p', '--percentage', type=float, default=2.0, help='Expand/shrink detected bound box')
ap.add_argument('-b', '--min-boxes', type=int, default=1, help='minimum number of detected boxes to return')
ap.add_argument('-i', '--max-iterations', type=int, default=20, help='max number of iterations finding min_boxes')
ap.add_argument("-w", "--width", type=int, default=320, help="resized image width (should be multiple of 32)")
ap.add_argument("-e", "--height", type=int, default=320, help="resized image height (should be multiple of 32)")
ap.add_argument("-d", "--display", action="store_true", help="Display bounding boxes")
ap.add_argument("-n", "--numbers", action="store_true", help="Detect only numbers")
ap.add_argument("-p", "--percentage", type=float, default=2.0, help="Expand/shrink detected bound box")
ap.add_argument("-b", "--min-boxes", type=int, default=1, help="minimum number of detected boxes to return")
ap.add_argument("-i", "--max-iterations", type=int, default=20, help="max number of iterations finding min_boxes")

kwargs = vars(ap.parse_args())
images = kwargs.pop('images')
extractor = PyTextractor(kwargs.pop('east'))
images = kwargs.pop("images")
extractor = PyTextractor(kwargs.pop("east"))
for image in images:
for text in extractor.get_image_text(image, **kwargs):
print(text)
return 0


if __name__ == '__main__':
if __name__ == "__main__":
sys.exit(text_detector())
63 changes: 43 additions & 20 deletions tests/test_detector.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import re
import os
import re
import subprocess

import pytest

from pytextractor.pytextractor import PyTextractor

_HERE = os.path.dirname(__file__)
Expand All @@ -11,43 +13,64 @@ def resource_filename(package, path):
return os.path.join(_HERE, path)


class TestDetector(object):
class TestDetector:
def setup_method(self, method):
self.extractor = PyTextractor()

@pytest.mark.parametrize('image', [
resource_filename(__name__, 'test_images/52.png'),
resource_filename(__name__, 'test_images/53.png'),
resource_filename(__name__, 'test_images/56.png'),
])
@pytest.mark.pytesseract
@pytest.mark.parametrize(
"image",
[
resource_filename(__name__, "test_images/52.png"),
resource_filename(__name__, "test_images/53.png"),
resource_filename(__name__, "test_images/56.png"),
],
)
def test_number_detector(self, image):
detected = self.extractor.get_image_text(image, number=True)
assert len(detected)

@pytest.mark.parametrize('image,expected_number', [
(resource_filename(__name__, 'test_images/52.png'), 52),
])
@pytest.mark.pytesseract
@pytest.mark.parametrize(
"image,expected_number",
[
(resource_filename(__name__, "test_images/52.png"), 52),
],
)
def test_number_extractor(self, image, expected_number):
detected = self.extractor.get_image_text(image, number=True)
assert(int(detected[0]) == expected_number)

assert int(detected[0]) == expected_number

@pytest.mark.pytesseract
@pytest.mark.skip(reason="[LIMITATION] can't detect number")
@pytest.mark.parametrize('image,expected_number', [
(resource_filename(__name__, 'test_images/53.png'), 53),
(resource_filename(__name__, 'test_images/56.png'), 56),
(resource_filename(__name__, 'test_images/57.png'), 57),
])
@pytest.mark.parametrize(
"image,expected_number",
[
(resource_filename(__name__, "test_images/53.png"), 53),
(resource_filename(__name__, "test_images/56.png"), 56),
(resource_filename(__name__, "test_images/57.png"), 57),
],
)
def test_number_extractor_tweaked(self, image, expected_number):
detected = self.extractor.get_image_text(image, number=True, percentage=4, min_boxes=2)
for roi in detected:
try:
print(roi, ' ? ', expected_number)
number = int(re.sub(r'\D', '', roi))
print(roi, " ? ", expected_number)
number = int(re.sub(r"\D", "", roi))
if number == expected_number:
break
except ValueError:
pass
else:
assert False
raise AssertionError("No ROI detected")


def test_cli_help():
"""Test that the CLI --help flag works."""
result = subprocess.run(
["python", "-m", "pytextractor.text_detection", "--help"],
capture_output=True,
text=True,
)
assert result.returncode == 0
assert "Text/Number extractor from image" in result.stdout
Loading
Loading