应用场景
在办公自动化、文档处理、数据录入等场景中,经常需要从大量图片中提取文字信息。例如:
- 批量处理扫描的表单、合同、发票等文档
- 从图片集中提取特定区域的文字数据
- 将纸质资料快速转换为电子文本并整理归档
通过设置识别区域,可以精准定位需要提取的内容,提高识别准确率,减少干扰信息。
界面设计
应用界面采用 WPF 实现,主要包含以下部分:
- 顶部导航栏:包含应用标题和功能按钮(选择图片、开始识别、保存结果)
- 左侧图片预览区:显示当前选中的图片及识别区域框
- 中间参数设置区:可调整识别区域坐标和大小
- 右侧结果预览区:展示识别出的文字内容
- 底部状态栏:显示处理进度和状态信息
详细代码步骤
下面是基于 WPF 和 Tesseract OCR 的实现代码:
using System.Windows;
namespace OCRTextExtractor
{
public partial class App : Application
{
protected override void OnStartup(StartupEventArgs e)
{
base.OnStartup(e);
// 确保Tesseract数据目录存在
if (!System.IO.Directory.Exists("./tessdata"))
{
System.IO.Directory.CreateDirectory("./tessdata");
MessageBox.Show("请将Tesseract OCR语言数据文件(.traineddata)放到应用程序目录下的tessdata文件夹中。", "缺少OCR数据",
MessageBoxButton.OK, MessageBoxImage.Warning);
}
}
}
}
<Window x:Class="OCRTextExtractor.MainWindow"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
Title="图片文字识别提取工具" Height="700" Width="1000"
WindowStartupLocation="CenterScreen">
<Grid>
<!-- 顶部导航栏 -->
<DockPanel Height="50" DockPanel.Dock="Top" Background="#333333">
<Button Content="选择图片" Margin="10,5" Width="80" Command="{Binding SelectImagesCommand}" DockPanel.Dock="Left"/>
<Button Content="开始识别" Margin="10,5" Width="80" Command="{Binding StartRecognitionCommand}" DockPanel.Dock="Left"/>
<Button Content="保存结果" Margin="10,5" Width="80" Command="{Binding SaveResultsCommand}" DockPanel.Dock="Left"/>
<Label Content="{Binding StatusMessage}" Foreground="White" VerticalContentAlignment="Center" Margin="10,0"/>
</DockPanel>
<!-- 主体内容区 -->
<Grid>
<!-- 左侧图片预览区 -->
<GridSplitter Width="5" HorizontalAlignment="Left" Grid.Column="1" Background="#CCCCCC"/>
<Grid Grid.Column="0" Margin="10">
<Label Content="图片预览" FontWeight="Bold" Margin="0,0,0,5"/>
<Border BorderBrush="Gray" BorderThickness="1" Margin="0,25,0,0">
<Canvas x:Name="ImageCanvas" Background="LightGray" MouseLeftButtonDown="Canvas_MouseLeftButtonDown"
MouseMove="Canvas_MouseMove" MouseLeftButtonUp="Canvas_MouseLeftButtonUp">
<Image x:Name="PreviewImage" Stretch="Uniform"/>
<Rectangle x:Name="SelectionRect" Stroke="Red" StrokeThickness="2" Visibility="Hidden"/>
</Canvas>
</Border>
<StackPanel Orientation="Horizontal" Margin="0,5,0,0">
<Button Content="上一张" Width="70" Command="{Binding PreviousImageCommand}" Margin="0,0,5,0"/>
<Button Content="下一张" Width="70" Command="{Binding NextImageCommand}"/>
<Label Content="{Binding CurrentImageInfo}" VerticalContentAlignment="Center" Margin="10,0,0,0"/>
</StackPanel>
</Grid>
<!-- 中间参数设置区 -->
<GridSplitter Width="5" HorizontalAlignment="Left" Grid.Column="3" Background="#CCCCCC"/>
<Grid Grid.Column="2" Margin="10">
<Label Content="识别区域设置" FontWeight="Bold" Margin="0,0,0,5"/>
<StackPanel Orientation="Vertical" Margin="0,25,0,0">
<GroupBox Header="区域坐标" Margin="0,0,0,10">
<Grid Margin="5">
<Grid.RowDefinitions>
<RowDefinition Height="Auto"/>
<RowDefinition Height="Auto"/>
</Grid.RowDefinitions>
<Grid.ColumnDefinitions>
<ColumnDefinition Width="Auto"/>
<ColumnDefinition Width="*"/>
<ColumnDefinition Width="Auto"/>
<ColumnDefinition Width="*"/>
</Grid.ColumnDefinitions>
<Label Content="X:" Grid.Row="0" Grid.Column="0" VerticalAlignment="Center"/>
<TextBox Text="{Binding RecognitionRegion.X, Mode=TwoWay}" Grid.Row="0" Grid.Column="1" Margin="5,0"/>
<Label Content="Y:" Grid.Row="0" Grid.Column="2" VerticalAlignment="Center"/>
<TextBox Text="{Binding RecognitionRegion.Y, Mode=TwoWay}" Grid.Row="0" Grid.Column="3" Margin="5,0"/>
<Label Content="宽度:" Grid.Row="1" Grid.Column="0" VerticalAlignment="Center"/>
<TextBox Text="{Binding RecognitionRegion.Width, Mode=TwoWay}" Grid.Row="1" Grid.Column="1" Margin="5,0"/>
<Label Content="高度:" Grid.Row="1" Grid.Column="2" VerticalAlignment="Center"/>
<TextBox Text="{Binding RecognitionRegion.Height, Mode=TwoWay}" Grid.Row="1" Grid.Column="3" Margin="5,0"/>
</Grid>
</GroupBox>
<GroupBox Header="OCR设置" Margin="0,0,0,10">
<Grid Margin="5">
<Grid.RowDefinitions>
<RowDefinition Height="Auto"/>
<RowDefinition Height="Auto"/>
</Grid.RowDefinitions>
<Grid.ColumnDefinitions>
<ColumnDefinition Width="Auto"/>
<ColumnDefinition Width="*"/>
</Grid.ColumnDefinitions>
<Label Content="识别语言:" Grid.Row="0" Grid.Column="0" VerticalAlignment="Center"/>
<ComboBox ItemsSource="{Binding AvailableLanguages}" SelectedItem="{Binding SelectedLanguage}"
Grid.Row="0" Grid.Column="1" Margin="5,0"/>
<CheckBox Content="自动保存每张识别结果" IsChecked="{Binding AutoSaveEachResult}"
Grid.Row="1" Grid.Column="0" Grid.ColumnSpan="2" Margin="5,0"/>
</Grid>
</GroupBox>
<Button Content="应用设置" Command="{Binding ApplySettingsCommand}" Height="30"/>
</StackPanel>
</Grid>
<!-- 右侧结果预览区 -->
<Grid Grid.Column="4" Margin="10">
<Label Content="识别结果" FontWeight="Bold" Margin="0,0,0,5"/>
<Border BorderBrush="Gray" BorderThickness="1" Margin="0,25,0,0">
<TextBox Text="{Binding RecognizedText, Mode=TwoWay}" AcceptsReturn="True" TextWrapping="Wrap"
VerticalScrollBarVisibility="Auto" HorizontalScrollBarVisibility="Auto" IsReadOnly="True"/>
</Border>
<StackPanel Orientation="Horizontal" Margin="0,5,0,0">
<Label Content="处理进度:" VerticalContentAlignment="Center"/>
<ProgressBar Value="{Binding ProgressValue}" Maximum="{Binding ProgressMaximum}" Width="200" Margin="5,0"/>
<Label Content="{Binding ProgressText}" VerticalContentAlignment="Center" Margin="5,0"/>
</StackPanel>
</Grid>
</Grid>
</Grid>
</Window>
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using Microsoft.Win32;
using Prism.Commands;
using Prism.Mvvm;
using Tesseract;
namespace OCRTextExtractor
{
public partial class MainWindow : Window
{
public MainWindow()
{
InitializeComponent();
DataContext = new MainViewModel();
}
private Point? startPoint = null;
private void Canvas_MouseLeftButtonDown(object sender, MouseButtonEventArgs e)
{
startPoint = e.GetPosition(ImageCanvas);
SelectionRect.Visibility = Visibility.Visible;
ImageCanvas.CaptureMouse();
}
private void Canvas_MouseMove(object sender, MouseEventArgs e)
{
if (startPoint.HasValue && e.LeftButton == MouseButtonState.Pressed)
{
var endPoint = e.GetPosition(ImageCanvas);
double x = Math.Min(startPoint.Value.X, endPoint.X);
double y = Math.Min(startPoint.Value.Y, endPoint.Y);
double width = Math.Abs(endPoint.X - startPoint.Value.X);
double height = Math.Abs(endPoint.Y - startPoint.Value.Y);
SelectionRect.SetValue(Canvas.LeftProperty, x);
SelectionRect.SetValue(Canvas.TopProperty, y);
SelectionRect.Width = width;
SelectionRect.Height = height;
var viewModel = DataContext as MainViewModel;
if (viewModel != null)
{
viewModel.RecognitionRegion.X = x;
viewModel.RecognitionRegion.Y = y;
viewModel.RecognitionRegion.Width = width;
viewModel.RecognitionRegion.Height = height;
}
}
}
private void Canvas_MouseLeftButtonUp(object sender, MouseButtonEventArgs e)
{
startPoint = null;
ImageCanvas.ReleaseMouseCapture();
}
}
public class MainViewModel : BindableBase
{
private List<string> imageFiles = new List<string>();
private int currentImageIndex = 0;
private BitmapSource currentImage;
private string recognizedText;
private string statusMessage = "就绪";
private Rect recognitionRegion = new Rect(100, 100, 300, 200);
private double progressValue;
private double progressMaximum;
private string progressText = "0/0";
private bool autoSaveEachResult = false;
private string selectedLanguage = "chi_sim";
private List<string> availableLanguages = new List<string> { "eng", "chi_sim", "jpn" };
public BitmapSource CurrentImage
{
get { return currentImage; }
set { SetProperty(ref currentImage, value); }
}
public string RecognizedText
{
get { return recognizedText; }
set { SetProperty(ref recognizedText, value); }
}
public string StatusMessage
{
get { return statusMessage; }
set { SetProperty(ref statusMessage, value); }
}
public Rect RecognitionRegion
{
get { return recognitionRegion; }
set { SetProperty(ref recognitionRegion, value); }
}
public double ProgressValue
{
get { return progressValue; }
set { SetProperty(ref progressValue, value); }
}
public double ProgressMaximum
{
get { return progressMaximum; }
set { SetProperty(ref progressMaximum, value); }
}
public string ProgressText
{
get { return progressText; }
set { SetProperty(ref progressText, value); }
}
public bool AutoSaveEachResult
{
get { return autoSaveEachResult; }
set { SetProperty(ref autoSaveEachResult, value); }
}
public string SelectedLanguage
{
get { return selectedLanguage; }
set { SetProperty(ref selectedLanguage, value); }
}
public List<string> AvailableLanguages
{
get { return availableLanguages; }
set { SetProperty(ref availableLanguages, value); }
}
public string CurrentImageInfo
{
get
{
if (imageFiles.Count == 0)
return "未选择图片";
return $"图片 {currentImageIndex + 1}/{imageFiles.Count}: {Path.GetFileName(imageFiles[currentImageIndex])}";
}
}
public ICommand SelectImagesCommand { get; private set; }
public ICommand PreviousImageCommand { get; private set; }
public ICommand NextImageCommand { get; private set; }
public ICommand StartRecognitionCommand { get; private set; }
public ICommand SaveResultsCommand { get; private set; }
public ICommand ApplySettingsCommand { get; private set; }
public MainViewModel()
{
SelectImagesCommand = new DelegateCommand(SelectImages);
PreviousImageCommand = new DelegateCommand(PreviousImage, CanNavigateImage).ObservesProperty(() => CurrentImageInfo);
NextImageCommand = new DelegateCommand(NextImage, CanNavigateImage).ObservesProperty(() => CurrentImageInfo);
StartRecognitionCommand = new DelegateCommand(StartRecognition, CanStartRecognition).ObservesProperty(() => CurrentImageInfo);
SaveResultsCommand = new DelegateCommand(SaveResults, CanSaveResults).ObservesProperty(() => RecognizedText);
ApplySettingsCommand = new DelegateCommand(ApplySettings);
}
private void SelectImages()
{
var openFileDialog = new OpenFileDialog
{
Filter = "图片文件|*.jpg;*.jpeg;*.png;*.bmp;*.gif|所有文件|*.*",
Multiselect = true,
Title = "选择图片文件"
};
if (openFileDialog.ShowDialog() == true)
{
imageFiles = openFileDialog.FileNames.ToList();
currentImageIndex = 0;
LoadCurrentImage();
StatusMessage = $"已选择 {imageFiles.Count} 张图片";
}
}
private void LoadCurrentImage()
{
if (imageFiles.Count > 0)
{
try
{
var image = new BitmapImage(new Uri(imageFiles[currentImageIndex]));
CurrentImage = image;
RecognizedText = "";
RaisePropertyChanged(nameof(CurrentImageInfo));
}
catch (Exception ex)
{
StatusMessage = $"加载图片时出错: {ex.Message}";
}
}
}
private bool CanNavigateImage()
{
return imageFiles.Count > 0;
}
private void PreviousImage()
{
if (currentImageIndex > 0)
{
currentImageIndex--;
LoadCurrentImage();
}
}
private void NextImage()
{
if (currentImageIndex < imageFiles.Count - 1)
{
currentImageIndex++;
LoadCurrentImage();
}
}
private bool CanStartRecognition()
{
return imageFiles.Count > 0;
}
private void StartRecognition()
{
try
{
StatusMessage = "开始识别...";
ProgressMaximum = imageFiles.Count;
ProgressValue = 0;
var results = new List<(string FileName, string Text)>();
foreach (var imageFile in imageFiles)
{
var text = RecognizeTextFromImage(imageFile);
results.Add((Path.GetFileName(imageFile), text));
if (AutoSaveEachResult)
{
SaveSingleResult(imageFile, text);
}
ProgressValue++;
ProgressText = $"{ProgressValue}/{ProgressMaximum}";
}
// 合并所有识别结果
RecognizedText = string.Join(Environment.NewLine + "------------------------" + Environment.NewLine,
results.Select(r => $"文件名: {r.FileName}{Environment.NewLine}识别结果:{Environment.NewLine}{r.Text}"));
StatusMessage = "识别完成";
}
catch (Exception ex)
{
StatusMessage = $"识别过程中出错: {ex.Message}";
}
}
private string RecognizeTextFromImage(string imagePath)
{
try
{
using (var engine = new TesseractEngine(@"./tessdata", SelectedLanguage, EngineMode.Default))
{
using (var img = Pix.LoadFromFile(imagePath))
{
// 截取识别区域
if (RecognitionRegion.Width > 0 && RecognitionRegion.Height > 0)
{
int x = (int)Math.Max(0, RecognitionRegion.X);
int y = (int)Math.Max(0, RecognitionRegion.Y);
int width = (int)Math.Min(img.Width - x, RecognitionRegion.Width);
int height = (int)Math.Min(img.Height - y, RecognitionRegion.Height);
if (width > 0 && height > 0)
{
using (var region = img.Copy(x, y, width, height))
{
using (var page = engine.Process(region))
{
return page.GetText();
}
}
}
}
// 如果区域无效,则处理全图
using (var page = engine.Process(img))
{
return page.GetText();
}
}
}
}
catch (Exception ex)
{
StatusMessage = $"处理图片 {Path.GetFileName(imagePath)} 时出错: {ex.Message}";
return $"[识别出错: {ex.Message}]";
}
}
private void SaveSingleResult(string imagePath, string text)
{
try
{
string resultPath = Path.ChangeExtension(imagePath, ".txt");
File.WriteAllText(resultPath, text);
}
catch { } // 忽略单个文件保存错误
}
private bool CanSaveResults()
{
return !string.IsNullOrEmpty(RecognizedText);
}
private void SaveResults()
{
var saveFileDialog = new SaveFileDialog
{
Filter = "文本文件|*.txt|Excel文件|*.xlsx|所有文件|*.*",
Title = "保存识别结果",
DefaultExt = "txt"
};
if (saveFileDialog.ShowDialog() == true)
{
try
{
if (Path.GetExtension(saveFileDialog.FileName).ToLower() == ".xlsx")
{
SaveToExcel(saveFileDialog.FileName);
}
else
{
File.WriteAllText(saveFileDialog.FileName, RecognizedText);
}
StatusMessage = $"结果已保存到 {saveFileDialog.FileName}";
}
catch (Exception ex)
{
StatusMessage = $"保存结果时出错: {ex.Message}";
}
}
}
private void SaveToExcel(string filePath)
{
// 使用EPPlus库创建Excel文件
using (var package = new OfficeOpenXml.ExcelPackage(new FileInfo(filePath)))
{
var worksheet = package.Workbook.Worksheets.Add("识别结果");
// 分割多个图片的识别结果
var results = RecognizedText.Split(new[] { "------------------------" }, StringSplitOptions.RemoveEmptyEntries);
// 添加标题行
worksheet.Cells[1, 1].Value = "图片文件名";
worksheet.Cells[1, 2].Value = "识别文本";
// 填充数据
for (int i = 0; i < results.Length; i++)
{
var lines = results[i].Split(new[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
if (lines.Length >= 2 && lines[0].StartsWith("文件名:"))
{
string fileName = lines[0].Substring(5).Trim();
string text = string.Join(Environment.NewLine, lines.Skip(1));
worksheet.Cells[i + 2, 1].Value = fileName;
worksheet.Cells[i + 2, 2].Value = text;
}
}
// 自动调整列宽
worksheet.Cells.AutoFitColumns();
// 保存文件
package.Save();
}
}
private void ApplySettings()
{
StatusMessage = "已应用识别区域设置";
}
}
}
总结优化
这个应用实现了从图片中批量提取文字并保存到 Excel 的功能,主要特点包括:
- 用户友好的界面:直观的图片预览和识别区域设置功能
- 灵活的识别区域调整:支持鼠标绘制和手动输入坐标两种方式
- 多语言支持:内置多种语言识别选项
- 批量处理能力:支持多张图片连续处理
- 结果导出:支持将识别结果保存为文本或 Excel 格式
优化建议
性能优化:
- 可以添加多线程处理,提高批量识别效率
- 实现识别结果的缓存机制,避免重复处理
功能增强:
- 添加图片预处理功能(灰度化、二值化、降噪等)提高识别准确率
- 支持识别结果的编辑和校对
- 增加更多导出格式选项(如 CSV、PDF 等)
用户体验:
- 添加识别进度的详细信息和预估时间
- 支持保存和加载识别区域配置
- 提供快捷键支持,提高操作效率
使用时,需要在应用程序目录下创建tessdata
文件夹,并放入相应的语言数据文件(.traineddata),可以从 Tesseract GitHub 仓库下载。