Difference between revisions of "Publications/lazzara.11.icdar"
From LRDE
(5 intermediate revisions by the same user not shown) | |||
Line 6: | Line 6: | ||
| booktitle = Proceedings of the 11th International Conference on Document Analysis and Recognition (ICDAR) |
| booktitle = Proceedings of the 11th International Conference on Document Analysis and Recognition (ICDAR) |
||
| address = Beijing, China |
| address = Beijing, China |
||
+ | | pages = 252 to 258 |
||
| organization = International Association for Pattern Recognition (IAPR) |
| organization = International Association for Pattern Recognition (IAPR) |
||
− | | |
+ | | lrdeprojects = Olena |
− | | urllrde = 201109-ICDAR |
||
| abstract = Electronic documents are being more and more usable thanks to better and more affordable network, storage and computational facilities. But in order to benefit from computer-aided document management, paper documents must be digitized and analyzed. This task may be challenging at several levels. Data may be of multiple types thus requiring different adapted processing chains. The tools to be developed should also take into account the needs and knowledge of users, ranging from a simple graphical application to a complete programming framework. Finallythe data sets to process may be large. In this paper, we expose a set of features that a Document Image Analysis framework should provide to handle the previous issues. In particular, a good strategy to address both flexibility and efficiency issues is the Generic Programming (GP) paradigm. These ideas are implemented as an open source moduleSCRIBO, built on top of Olena, a generic and efficient image processing platform. Our solution features services such as preprocessing filters, text detection, page segmentation and document reconstruction (as XML, PDF or HTML documents). This framework, composed of reusable software components, can be used to create full-fledged graphical applications, small utilities, or processing chains to be integrated into third-party projects. |
| abstract = Electronic documents are being more and more usable thanks to better and more affordable network, storage and computational facilities. But in order to benefit from computer-aided document management, paper documents must be digitized and analyzed. This task may be challenging at several levels. Data may be of multiple types thus requiring different adapted processing chains. The tools to be developed should also take into account the needs and knowledge of users, ranging from a simple graphical application to a complete programming framework. Finallythe data sets to process may be large. In this paper, we expose a set of features that a Document Image Analysis framework should provide to handle the previous issues. In particular, a good strategy to address both flexibility and efficiency issues is the Generic Programming (GP) paradigm. These ideas are implemented as an open source moduleSCRIBO, built on top of Olena, a generic and efficient image processing platform. Our solution features services such as preprocessing filters, text detection, page segmentation and document reconstruction (as XML, PDF or HTML documents). This framework, composed of reusable software components, can be used to create full-fledged graphical applications, small utilities, or processing chains to be integrated into third-party projects. |
||
| lrdeposter = http://www.lrde.epita.fr/dload/papers/lazzara.11.icdar.poster.pdf |
| lrdeposter = http://www.lrde.epita.fr/dload/papers/lazzara.11.icdar.poster.pdf |
||
| lrdepaper = http://www.lrde.epita.fr/dload/papers/lazzara.11.icdar.pdf |
| lrdepaper = http://www.lrde.epita.fr/dload/papers/lazzara.11.icdar.pdf |
||
− | | lrdeprojects = Olena |
||
| lrdenewsdate = 2011-06-01 |
| lrdenewsdate = 2011-06-01 |
||
| type = inproceedings |
| type = inproceedings |
||
Line 28: | Line 27: | ||
address = <nowiki>{</nowiki>Beijing, China<nowiki>}</nowiki>, |
address = <nowiki>{</nowiki>Beijing, China<nowiki>}</nowiki>, |
||
month = sep, |
month = sep, |
||
⚫ | |||
organization = <nowiki>{</nowiki>International Association for Pattern Recognition (IAPR)<nowiki>}</nowiki>, |
organization = <nowiki>{</nowiki>International Association for Pattern Recognition (IAPR)<nowiki>}</nowiki>, |
||
⚫ | |||
abstract = <nowiki>{</nowiki>Electronic documents are being more and more usable thanks |
abstract = <nowiki>{</nowiki>Electronic documents are being more and more usable thanks |
||
to better and more affordable network, storage and |
to better and more affordable network, storage and |
Latest revision as of 18:57, 4 January 2018
- Authors
- Guillaume Lazzara, Roland Levillain, Thierry Géraud, Yann Jacquelet, Julien Marquegnies, Arthur Crépin-Leblond
- Where
- Proceedings of the 11th International Conference on Document Analysis and Recognition (ICDAR)
- Place
- Beijing, China
- Type
- inproceedings
- Projects
- Olena
- Date
- 2011-06-01
Abstract
Electronic documents are being more and more usable thanks to better and more affordable network, storage and computational facilities. But in order to benefit from computer-aided document management, paper documents must be digitized and analyzed. This task may be challenging at several levels. Data may be of multiple types thus requiring different adapted processing chains. The tools to be developed should also take into account the needs and knowledge of users, ranging from a simple graphical application to a complete programming framework. Finallythe data sets to process may be large. In this paper, we expose a set of features that a Document Image Analysis framework should provide to handle the previous issues. In particular, a good strategy to address both flexibility and efficiency issues is the Generic Programming (GP) paradigm. These ideas are implemented as an open source moduleSCRIBO, built on top of Olena, a generic and efficient image processing platform. Our solution features services such as preprocessing filters, text detection, page segmentation and document reconstruction (as XML, PDF or HTML documents). This framework, composed of reusable software components, can be used to create full-fledged graphical applications, small utilities, or processing chains to be integrated into third-party projects.
Documents
Bibtex (lrde.bib)
@InProceedings{ lazzara.11.icdar, author = {Guillaume Lazzara and Roland Levillain and Thierry G\'eraud and Yann Jacquelet and Julien Marquegnies and Arthur Cr\'epin-Leblond}, title = {The {SCRIBO} Module of the {Olena} Platform: a Free Software Framework for Document Image Analysis}, booktitle = {Proceedings of the 11th International Conference on Document Analysis and Recognition (ICDAR)}, year = 2011, address = {Beijing, China}, month = sep, pages = {252--258}, organization = {International Association for Pattern Recognition (IAPR)}, abstract = {Electronic documents are being more and more usable thanks to better and more affordable network, storage and computational facilities. But in order to benefit from computer-aided document management, paper documents must be digitized and analyzed. This task may be challenging at several levels. Data may be of multiple types thus requiring different adapted processing chains. The tools to be developed should also take into account the needs and knowledge of users, ranging from a simple graphical application to a complete programming framework. Finally, the data sets to process may be large. In this paper, we expose a set of features that a Document Image Analysis framework should provide to handle the previous issues. In particular, a good strategy to address both flexibility and efficiency issues is the Generic Programming (GP) paradigm. These ideas are implemented as an open source module, SCRIBO, built on top of Olena, a generic and efficient image processing platform. Our solution features services such as preprocessing filters, text detection, page segmentation and document reconstruction (as XML, PDF or HTML documents). This framework, composed of reusable software components, can be used to create full-fledged graphical applications, small utilities, or processing chains to be integrated into third-party projects.}, keywords = {Document Image Analysis, Software Design, Reusability, Free Software} }