In [1]:
import os
import string
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from tqdm import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Используемое устройство: {device}")


Используемое устройство: cuda


In [2]:
ALPHABET = '-ABEKMHOPCTYX0123456789'
CHAR_TO_IDX = {char: idx + 1 for idx, char in enumerate(ALPHABET)}  # 0 будет использоваться для CTC blank
IDX_TO_CHAR = {idx + 1: char for idx, char in enumerate(ALPHABET)}

NUM_CLASSES = len(ALPHABET) + 1


In [3]:
class LicensePlateDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        """
        Args:
            root_dir (string): Путь к директории с данными (train, val, test)
            transform (callable, optional): Трансформации для изображений
        """
        self.root_dir = root_dir
        self.img_dir = os.path.join(root_dir, 'img')
        self.transform = transform
        self.images = [img for img in os.listdir(self.img_dir) if img.endswith(('.png', '.jpg', '.jpeg'))]
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('L') 
        if self.transform:
            image = self.transform(image)
        
        label_str = os.path.splitext(img_name)[0].upper()
        label = [CHAR_TO_IDX[char] for char in label_str if char in CHAR_TO_IDX]
        label = torch.tensor(label, dtype=torch.long)
        
        return image, label


In [6]:
transform = transforms.Compose([
    transforms.Resize((32, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

import random
import torchvision.transforms.functional as F
from torchvision import transforms

def random_padding(img):
    pad_left = random.randint(5, 15)
    pad_right = random.randint(5, 15)
    pad_top = random.randint(5, 15)
    pad_bottom = random.randint(5, 15)
    return F.pad(img, (pad_left, pad_top, pad_right, pad_bottom), fill=0)

# Обновленный transform
transform_train = transforms.Compose([
    transforms.Lambda(random_padding),
    transforms.Resize((32, 128)),  # Изменяем размер изображения
    transforms.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.9, 1.1)),  # Сдвиг, масштабирование, поворот
    transforms.ColorJitter(brightness=0.2, contrast=0.2),  # Изменение яркости и контраста
    transforms.RandomPerspective(distortion_scale=0.2, p=0.5),  # Перспективные искажения
    transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0)),  # Размытие
    transforms.ToTensor(),  # Преобразуем в тензор
    transforms.Normalize((0.5,), (0.5,)),  # Нормализация
])

def collate_fn(batch):
    images, labels = zip(*batch)
    images = torch.stack(images, 0)
    
    # Соединяем все метки в один тензор
    label_lengths = torch.tensor([len(label) for label in labels], dtype=torch.long)
    labels_concat = torch.cat(labels)
    
    return images, labels_concat, label_lengths


In [26]:
train_dataset = LicensePlateDataset(root_dir='dataset-ocr-new/train', transform=transform_train)
val_dataset = LicensePlateDataset(root_dir='dataset-ocr-new/val', transform=transform)
test_dataset = LicensePlateDataset(root_dir='dataset-ocr-new/test', transform=transform)
test_10 = LicensePlateDataset(root_dir=r'dataset-ocr\fine-tune-val', transform=transform)

batch_size = 64

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, collate_fn=lambda x: collate_fn(x))
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0, collate_fn=lambda x: collate_fn(x))
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0, collate_fn=lambda x: collate_fn(x))


In [23]:
class CRNN(nn.Module):
    def __init__(self, num_classes):
        super(CRNN, self).__init__()
        
        # CNN часть
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),  # (batch, 64, 32, 128)
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),  # (batch, 64, 16, 64)
            
            nn.Conv2d(64, 128, kernel_size=3, padding=1),  # (batch, 128, 16, 64)
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),  # (batch, 128, 8, 32)
            
            nn.Conv2d(128, 256, kernel_size=3, padding=1),  # (batch, 256, 8, 32)
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(256),
            
            nn.Conv2d(256, 256, kernel_size=3, padding=1),  # (batch, 256, 8, 32)
            nn.ReLU(inplace=True),
            nn.MaxPool2d((2,1), (2,1)),  # (batch, 256, 4, 32)
            
            nn.Conv2d(256, 512, kernel_size=3, padding=1),  # (batch, 512, 4, 32)
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(512),
            
            nn.Conv2d(512, 512, kernel_size=3, padding=1),  # (batch, 512, 4, 32)
            nn.ReLU(inplace=True),
            nn.MaxPool2d((2,1), (2,1)),  # (batch, 512, 2, 32)
        )
        
        # RNN часть
        self.linear1 = nn.Linear(512 * 2, 256)
        self.relu = nn.ReLU(inplace=True)
        self.lstm = nn.LSTM(256, 256, bidirectional=True, batch_first=True)
        self.linear2 = nn.Linear(512, num_classes)
        
    def forward(self, x):
        # CNN часть
        conv = self.cnn(x)  # (batch, 512, 2, 32)
        
        # Перестановка и изменение формы для RNN
        conv = conv.permute(0, 3, 1, 2)  # (batch, width=32, channels=512, height=2)
        conv = conv.view(conv.size(0), conv.size(1), -1)  # (batch, 32, 512*2)
        
        # RNN часть
        out = self.linear1(conv)  # (batch, 32, 256)
        out = self.relu(out)      # (batch, 32, 256)
        out, _ = self.lstm(out)   # (batch, 32, 512)
        out = self.linear2(out)   # (batch, 32, num_classes)
        
        # Перестановка для CTC loss
        out = out.permute(1, 0, 2)  # (32, batch, num_classes)
        return out


In [10]:
def train(model, loader, optimizer, criterion, device):
    model.train()
    epoch_loss = 0
    for images, labels, label_lengths in tqdm(loader, desc='Training'):
        images = images.to(device)
        labels = labels.to(device)
        label_lengths = label_lengths.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)  # (seq_len, batch, num_classes)
        
        # Определяем длину входных последовательностей (последний слой)
        input_lengths = torch.full(size=(outputs.size(1),), fill_value=outputs.size(0), dtype=torch.long).to(device)
        
        loss = criterion(outputs.log_softmax(2), labels, input_lengths, label_lengths)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
    return epoch_loss / len(loader)

def validate(model, loader, criterion, device):
    model.eval()
    epoch_loss = 0
    with torch.no_grad():
        for images, labels, label_lengths in tqdm(loader, desc='Validation'):
            images = images.to(device)
            labels = labels.to(device)
            label_lengths = label_lengths.to(device)
            
            outputs = model(images)
            
            input_lengths = torch.full(size=(outputs.size(1),), fill_value=outputs.size(0), dtype=torch.long).to(device)
            
            loss = criterion(outputs.log_softmax(2), labels, input_lengths, label_lengths)
            epoch_loss += loss.item()
    return epoch_loss / len(loader)

def decode_predictions(preds, blank=0):
    preds = preds.argmax(2)  # (seq_len, batch)
    preds = preds.permute(1, 0)  # (batch, seq_len)
    decoded = []
    for pred in preds:
        pred = pred.tolist()
        decoded_seq = []
        previous = blank
        for p in pred:
            if p != previous and p != blank:
                decoded_seq.append(IDX_TO_CHAR.get(p, ''))
            previous = p
        decoded.append(''.join(decoded_seq))
    return decoded

def evaluate(model, loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels, label_lengths in tqdm(loader, desc='Testing'):
            images = images.to(device)
            outputs = model(images)
            preds = decode_predictions(outputs)
            
            batch_size = images.size(0)
            start = 0
            for i in range(batch_size):
                length = label_lengths[i]
                true_label = ''.join([IDX_TO_CHAR.get(idx.item(), '') for idx in labels[start:start+length]])
                start += length
                pred_label = preds[i]
                if pred_label == true_label:
                    correct += 1
                total += 1
    accuracy = correct / total * 100
    return accuracy


In [27]:
model = CRNN(num_classes=NUM_CLASSES).to(device)

criterion = nn.CTCLoss(blank=0, zero_infinity=True)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


In [29]:
num_epochs = 10
best_val_loss = float('inf')
best_accuracy = 0.0

for epoch in range(1, num_epochs + 1):
    print(f'Epoch {epoch}/{num_epochs}')
    
    train_loss = train(model, train_loader, optimizer, criterion, device)
    val_loss = validate(model, val_loader, criterion, device)
    accuracy = evaluate(model, val_loader, device)
    
    print(f'Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Accuracy: {accuracy:.2f}%')
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'best_loss_model_3.pth')
        print('Модель с лучшей потерей сохранена!')
    
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        torch.save(model.state_dict(), 'best_accuracy_model_3.pth')
        print('Модель с лучшей точностью сохранена!')


Epoch 1/10


Training: 100%|██████████| 569/569 [01:20<00:00,  7.03it/s]
Validation: 100%|██████████| 72/72 [00:04<00:00, 16.47it/s]
Testing: 100%|██████████| 72/72 [00:05<00:00, 14.37it/s]


Train Loss: 0.0850 | Val Loss: 0.0767 | Accuracy: 96.73%
Модель с лучшей потерей сохранена!
Модель с лучшей точностью сохранена!
Epoch 2/10


Training: 100%|██████████| 569/569 [01:20<00:00,  7.07it/s]
Validation: 100%|██████████| 72/72 [00:04<00:00, 17.92it/s]
Testing: 100%|██████████| 72/72 [00:04<00:00, 14.78it/s]


Train Loss: 0.0829 | Val Loss: 0.0706 | Accuracy: 97.63%
Модель с лучшей потерей сохранена!
Модель с лучшей точностью сохранена!
Epoch 3/10


Training: 100%|██████████| 569/569 [01:20<00:00,  7.09it/s]
Validation: 100%|██████████| 72/72 [00:04<00:00, 17.20it/s]
Testing: 100%|██████████| 72/72 [00:04<00:00, 14.51it/s]


Train Loss: 0.0796 | Val Loss: 0.0763 | Accuracy: 96.81%
Epoch 4/10


Training: 100%|██████████| 569/569 [01:19<00:00,  7.11it/s]
Validation: 100%|██████████| 72/72 [00:04<00:00, 17.50it/s]
Testing: 100%|██████████| 72/72 [00:04<00:00, 15.43it/s]


Train Loss: 0.0805 | Val Loss: 0.0732 | Accuracy: 97.72%
Модель с лучшей точностью сохранена!
Epoch 5/10


Training: 100%|██████████| 569/569 [01:19<00:00,  7.17it/s]
Validation: 100%|██████████| 72/72 [00:04<00:00, 17.76it/s]
Testing: 100%|██████████| 72/72 [00:04<00:00, 14.62it/s]


Train Loss: 0.0787 | Val Loss: 0.0716 | Accuracy: 97.76%
Модель с лучшей точностью сохранена!
Epoch 6/10


Training: 100%|██████████| 569/569 [01:19<00:00,  7.13it/s]
Validation: 100%|██████████| 72/72 [00:03<00:00, 18.01it/s]
Testing: 100%|██████████| 72/72 [00:04<00:00, 14.58it/s]


Train Loss: 0.0775 | Val Loss: 0.0732 | Accuracy: 97.63%
Epoch 7/10


Training: 100%|██████████| 569/569 [01:18<00:00,  7.21it/s]
Validation: 100%|██████████| 72/72 [00:04<00:00, 17.52it/s]
Testing: 100%|██████████| 72/72 [00:04<00:00, 14.53it/s]


Train Loss: 0.0731 | Val Loss: 0.0746 | Accuracy: 97.58%
Epoch 8/10


Training: 100%|██████████| 569/569 [01:19<00:00,  7.15it/s]
Validation: 100%|██████████| 72/72 [00:04<00:00, 17.64it/s]
Testing: 100%|██████████| 72/72 [00:04<00:00, 14.43it/s]


Train Loss: 0.0745 | Val Loss: 0.0753 | Accuracy: 96.77%
Epoch 9/10


Training: 100%|██████████| 569/569 [01:19<00:00,  7.19it/s]
Validation: 100%|██████████| 72/72 [00:04<00:00, 17.87it/s]
Testing: 100%|██████████| 72/72 [00:05<00:00, 14.38it/s]


Train Loss: 0.0734 | Val Loss: 0.0742 | Accuracy: 97.80%
Модель с лучшей точностью сохранена!
Epoch 10/10


Training: 100%|██████████| 569/569 [01:19<00:00,  7.15it/s]
Validation: 100%|██████████| 72/72 [00:04<00:00, 17.24it/s]
Testing: 100%|██████████| 72/72 [00:04<00:00, 14.55it/s]

Train Loss: 0.0767 | Val Loss: 0.0776 | Accuracy: 96.95%





In [30]:
#model.load_state_dict(torch.load('models/best_accuracy_model_3.pth'))
test_accuracy = evaluate(model, test_loader, device)
print(f'Точность на тестовом наборе: {test_accuracy:.2f}%')

Testing: 100%|██████████| 72/72 [00:04<00:00, 14.55it/s]

Точность на тестовом наборе: 96.68%





In [31]:
import os
from PIL import Image
import torch
from torchvision import transforms
from tqdm import tqdm 

def recognize_license_plates(model, folder_path, transform, device):
    model.eval() 
    images = [img for img in os.listdir(folder_path) if img.endswith(('.png', '.jpg', '.jpeg'))]
    
    results = {}
    
    for img_name in tqdm(images, desc="Processing Images"):
        img_path = os.path.join(folder_path, img_name)
        image = Image.open(img_path).convert('L') 
        
        image_tensor = transform(image).unsqueeze(0).to(device)
        
        with torch.no_grad():
            output = model(image_tensor)  # (seq_len, batch, num_classes)
        
        decoded_text = decode_predictions(output)
        
        results[img_name] = decoded_text[0]
    
    return results

folder_path = 'dataset-ocr/fine-tune-train/img_plate_image'  # Путь к папке с изображениями
#model.load_state_dict(torch.load('best_accuracy_model_2.pth'))
#model.to(device)

results = recognize_license_plates(model, folder_path, transform, device)

for img_name, text in results.items():
    print(f"{img_name}: {text}")


Processing Images: 100%|██████████| 8/8 [00:00<00:00, 54.74it/s]

A023TY97.png: A023TY97
A413YE97.png: A413YE97
B642OT97.png: B642OT97
H702TH97.png: H702TH97
K263CO97.png: K263CO97
O571KT99.png: O571KT99
T829MK97.png: T829MK97
Y726PA97.png: Y726PA97



