diff --git a/Document-Processing/Data-Extraction/Smart-Data-Extractor/NET/Assemblies-Required.md b/Document-Processing/Data-Extraction/Smart-Data-Extractor/NET/Assemblies-Required.md index 1f36509a8a..eb871b3030 100644 --- a/Document-Processing/Data-Extraction/Smart-Data-Extractor/NET/Assemblies-Required.md +++ b/Document-Processing/Data-Extraction/Smart-Data-Extractor/NET/Assemblies-Required.md @@ -31,6 +31,7 @@ The following assemblies need to be referenced in your application based on the Syncfusion.PdfToImageConverter.Base
Syncfusion.SmartFormRecognizer.Base
Syncfusion.SmartTableExtractor.Base
+ Syncfusion.Markdown
@@ -47,6 +48,7 @@ The following assemblies need to be referenced in your application based on the Syncfusion.PdfToImageConverter.Portable
Syncfusion.SmartFormRecognizer.Portable
Syncfusion.SmartTableExtractor.Portable
+ Syncfusion.Markdown
@@ -62,6 +64,7 @@ The following assemblies need to be referenced in your application based on the Syncfusion.PdfToImageConverter.NET
Syncfusion.SmartFormRecognizer.NET
Syncfusion.SmartTableExtractor.NET
+ Syncfusion.Markdown
diff --git a/Document-Processing/Data-Extraction/Smart-Data-Extractor/NET/Features.md b/Document-Processing/Data-Extraction/Smart-Data-Extractor/NET/Features.md index 3d6f61acb0..5ac2cc3755 100644 --- a/Document-Processing/Data-Extraction/Smart-Data-Extractor/NET/Features.md +++ b/Document-Processing/Data-Extraction/Smart-Data-Extractor/NET/Features.md @@ -113,7 +113,7 @@ using (FileStream inputStream = new FileStream("Input.pdf", FileMode.Open, FileA ## Extract Data as JSON from PDF Document -To extract form fields across a PDF document using the **ExtractDataAsJson** method of the **DataExtractor** class with form recognition options, refer to the following code example: +To extract form fields across a PDF document using the **ExtractDataAsJson** method of the **DataExtractor** class, refer to the following code example: {% tabs %} @@ -129,7 +129,7 @@ using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess { //Initialize the Smart Data Extractor. DataExtractor extractor = new DataExtractor(); - //Extract form data as JSON. + //Extract data as JSON. string data = extractor.ExtractDataAsJson(stream); //Save the extracted JSON data into an output file. File.WriteAllText("Output.json", data, Encoding.UTF8); @@ -149,7 +149,7 @@ using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess { //Initialize the Smart Data Extractor. DataExtractor extractor = new DataExtractor(); - //Extract form data as JSON. + //Extract data as JSON. string data = extractor.ExtractDataAsJson(stream); //Save the extracted JSON data into an output file. File.WriteAllText("Output.json", data, Encoding.UTF8); @@ -159,6 +159,54 @@ using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess {% endtabs %} +## Extract Data as Markdown from PDF Document + +To extract form fields across a PDF document using the **ExtractDataAsMarkdown** method of the **DataExtractor** class, refer to the following code example: + +{% tabs %} + +{% highlight c# tabtitle="C# [Cross-platform]" %} + +using System.IO; +using Syncfusion.SmartDataExtractor; +using Syncfusion.SmartFormRecognizer; +using System.Text; + +//Open the input PDF file as a stream. +using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess.Read)) +{ + //Initialize the Smart Data Extractor. + DataExtractor extractor = new DataExtractor(); + //Extract data as Markdown. + string data = extractor.ExtractDataAsMarkdown(stream); + //Save the extracted Markdown data into an output file. + File.WriteAllText("Output.md", data, Encoding.UTF8); +} + +{% endhighlight %} + +{% highlight c# tabtitle="C# [Windows-specific]" %} + +using System.IO; +using Syncfusion.SmartDataExtractor; +using Syncfusion.SmartFormRecognizer; +using System.Text; + +//Open the input PDF file as a stream. +using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess.Read)) +{ + //Initialize the Smart Data Extractor. + DataExtractor extractor = new DataExtractor(); + //Extract data as Markdown. + string data = extractor.ExtractDataAsMarkdown(stream); + //Save the extracted Markdown data into an output file. + File.WriteAllText("Output.md", data, Encoding.UTF8); +} + +{% endhighlight %} + +{% endtabs %} + ## Extract Data as JSON from an Image To extract structured data from an image document using the **ExtractDataAsJson** method of the **DataExtractor** class, refer to the following code examples. diff --git a/Document-Processing/Data-Extraction/Smart-Data-Extractor/NET/troubleshooting.md b/Document-Processing/Data-Extraction/Smart-Data-Extractor/NET/troubleshooting.md index 21fd1c1629..72552ce853 100644 --- a/Document-Processing/Data-Extraction/Smart-Data-Extractor/NET/troubleshooting.md +++ b/Document-Processing/Data-Extraction/Smart-Data-Extractor/NET/troubleshooting.md @@ -78,18 +78,21 @@ documentation: UG Solution In your MVC project file (.csproj), add the following build target to copy the native DLL from the NuGet package folder to the bin folder: -

-
-
-  
-
-      

- - +{% tabs %} +{% highlight C# %} + + + + + +{% endhighlight %} +{% endtabs %} + + diff --git a/Document-Processing/Data-Extraction/Smart-Table-Extractor/NET/Assemblies-Required.md b/Document-Processing/Data-Extraction/Smart-Table-Extractor/NET/Assemblies-Required.md index e88399385d..7e6565d838 100644 --- a/Document-Processing/Data-Extraction/Smart-Table-Extractor/NET/Assemblies-Required.md +++ b/Document-Processing/Data-Extraction/Smart-Table-Extractor/NET/Assemblies-Required.md @@ -29,6 +29,7 @@ The following assemblies need to be referenced in your application based on the Syncfusion.OCRProcessor.Base
Syncfusion.Pdf.Base
Syncfusion.PdfToImageConverter.Base
+ Syncfusion.Markdown
@@ -43,6 +44,7 @@ The following assemblies need to be referenced in your application based on the Syncfusion.Pdf.Imaging.Portable
Syncfusion.Pdf.Portable
Syncfusion.PdfToImageConverter.Portable
+ Syncfusion.Markdown
@@ -56,6 +58,7 @@ The following assemblies need to be referenced in your application based on the Syncfusion.Pdf.Imaging.NET
Syncfusion.Pdf.NET
Syncfusion.PdfToImageConverter.NET
+ Syncfusion.Markdown
diff --git a/Document-Processing/Data-Extraction/Smart-Table-Extractor/NET/Features.md b/Document-Processing/Data-Extraction/Smart-Table-Extractor/NET/Features.md index ddefb46025..cb4699cc27 100644 --- a/Document-Processing/Data-Extraction/Smart-Table-Extractor/NET/Features.md +++ b/Document-Processing/Data-Extraction/Smart-Table-Extractor/NET/Features.md @@ -26,19 +26,8 @@ using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess { // Initialize the Smart Table Extractor TableExtractor extractor = new TableExtractor(); - - //Configure table extraction options such as border-less table detection, page range, and confidence threshold. - TableExtractionOptions options = new TableExtractionOptions(); - options.DetectBorderlessTables = true; - options.PageRange = new int[,] { { 1, 5 } }; - options.ConfidenceThreshold = 0.6; - - //Assign the configured options to the extractor. - extractor.TableExtractionOptions = options; - //Extract table data from the PDF document as JSON string. string data = extractor.ExtractTableAsJson(stream); - //Save the extracted JSON data into an output file. File.WriteAllText("Output.json", data, Encoding.UTF8); } @@ -56,19 +45,8 @@ using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess { //Initialize the Smart Table Extractor. TableExtractor extractor = new TableExtractor(); - - //Configure table extraction options such as border-less table detection, page range, and confidence threshold. - TableExtractionOptions options = new TableExtractionOptions(); - options.DetectBorderlessTables = true; - options.PageRange = new int[,] { { 1, 5 } }; - options.ConfidenceThreshold = 0.6; - - //Assign the configured options to the extractor. - extractor.TableExtractionOptions = options; - //Extract table data from the PDF document as JSON string. string data = extractor.ExtractTableAsJson(stream); - //Save the extracted JSON data into an output file. File.WriteAllText("Output.json", data, Encoding.UTF8); } @@ -94,17 +72,14 @@ using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess { //Initialize the Smart Table Extractor. TableExtractor extractor = new TableExtractor(); - //Configure the table extraction option to detect border-less tables in the document. TableExtractionOptions options = new TableExtractionOptions(); options.DetectBorderlessTables = true; //Assign the configured options to the extractor. extractor.TableExtractionOptions = options; - //Extract table data from the PDF document as a JSON string. string data = extractor.ExtractTableAsJson(stream); - //Save the extracted JSON data into an output file. File.WriteAllText("Output.json", data, Encoding.UTF8); } @@ -122,17 +97,13 @@ using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess { //Initialize the Smart Table Extractor. TableExtractor extractor = new TableExtractor(); - //Configure the table extraction option to detect border-less tables in the document. TableExtractionOptions options = new TableExtractionOptions(); options.DetectBorderlessTables = true; - //Assign the configured options to the extractor. extractor.TableExtractionOptions = options; - //Extract table data from the PDF document as a JSON string. string data = extractor.ExtractTableAsJson(stream); - //Save the extracted JSON data into an output file. File.WriteAllText("Output.json", data, Encoding.UTF8); } @@ -158,17 +129,13 @@ using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess { //Initialize the Smart Table Extractor. TableExtractor extractor = new TableExtractor(); - //Configure table extraction options to specify the page range for detection. TableExtractionOptions options = new TableExtractionOptions(); options.PageRange = new int[,] { { 2, 4 } }; - //Assign the configured options to the extractor. extractor.TableExtractionOptions = options; - //Extract table data from the specified page range as a JSON string. string data = extractor.ExtractTableAsJson(stream); - //Save the extracted JSON data into an output file. File.WriteAllText("Output.json", data, Encoding.UTF8); } @@ -186,17 +153,13 @@ using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess { //Initialize the Smart Table Extractor. TableExtractor extractor = new TableExtractor(); - //Configure table extraction options to specify the page range for detection. TableExtractionOptions options = new TableExtractionOptions(); options.PageRange = new int[,] { { 2, 4 } }; - //Assign the configured options to the extractor. extractor.TableExtractionOptions = options; - //Extract table data from the specified page range as a JSON string. string data = extractor.ExtractTableAsJson(stream); - //Save the extracted JSON data into an output file. File.WriteAllText("Output.json", data, Encoding.UTF8); } @@ -222,17 +185,13 @@ using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess { //Initialize the Smart Table Extractor. TableExtractor extractor = new TableExtractor(); - //Configure table extraction options to set the confidence threshold for detection. TableExtractionOptions options = new TableExtractionOptions(); options.ConfidenceThreshold = 0.6; - //Assign the configured options to the extractor. extractor.TableExtractionOptions = options; - //Extract table data from the PDF document as a JSON string. string data = extractor.ExtractTableAsJson(stream); - //Save the extracted JSON data into an output file. File.WriteAllText("Output.json", data, Encoding.UTF8); } @@ -250,17 +209,13 @@ using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess { //Initialize the Smart Table Extractor. TableExtractor extractor = new TableExtractor(); - //Configure table extraction options to set the confidence threshold for detection. TableExtractionOptions options = new TableExtractionOptions(); options.ConfidenceThreshold = 0.6; - //Assign the configured options to the extractor. extractor.TableExtractionOptions = options; - //Extract table data from the PDF document as a JSON string. string data = extractor.ExtractTableAsJson(stream); - //Save the extracted JSON data into an output file. File.WriteAllText("Output.json", data, Encoding.UTF8); } @@ -285,22 +240,12 @@ using Syncfusion.SmartTableExtractor; //Open the input PDF file as a stream. using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess.Read)) { - //Declare and configure the table extraction options with border-less table detection and confidence threshold. - TableExtractionOptions extractionOptions = new TableExtractionOptions(); - extractionOptions.DetectBorderlessTables = true; - extractionOptions.ConfidenceThreshold = 0.6; - //Initialize the Smart Table Extractor and assign the configured options. TableExtractor tableExtractor = new TableExtractor(); - //Assign the configured table extraction options to the extractor. - tableExtractor.TableExtractionOptions = extractionOptions; - //Create a cancellation token with a timeout of 30 seconds to control the async operation. CancellationTokenSource cts = new CancellationTokenSource(TimeSpan.FromSeconds(30)); - //Call the asynchronous extraction API to extract table data as a JSON string. string data = await tableExtractor.ExtractTableAsJsonAsync(stream, cts.Token); - //Save the extracted JSON data into an output file. File.WriteAllText("Output.json", data, Encoding.UTF8); } @@ -317,21 +262,12 @@ using Syncfusion.SmartTableExtractor; //Open the input PDF file as a stream. using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess.Read)) { - //Declare and configure the table extraction options with border-less table detection and confidence threshold. - TableExtractionOptions extractionOptions = new TableExtractionOptions(); - extractionOptions.DetectBorderlessTables = true; - extractionOptions.ConfidenceThreshold = 0.6; - //Initialize the Smart Table Extractor and assign the configured options. TableExtractor tableExtractor = new TableExtractor(); - tableExtractor.TableExtractionOptions = extractionOptions; - //Create a cancellation token with a timeout of 30 seconds to control the async operation. CancellationTokenSource cts = new CancellationTokenSource(TimeSpan.FromSeconds(30)); - //Call the asynchronous extraction API to extract table data as a JSON string. string data = await tableExtractor.ExtractTableAsJsonAsync(stream, cts.Token); - //Save the extracted JSON data into an output file. File.WriteAllText("Output.json", data, Encoding.UTF8); } @@ -340,3 +276,49 @@ using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess {% endtabs %} +## Extract Table data as Markdown from a PDF Document + +To extract structured table data from a PDF document using the **ExtractTableAsMarkdown** method of the **TableExtractor** class, refer to the following code + +{% tabs %} + +{% highlight c# tabtitle="C# [Cross-platform]" %} + +using System.IO; +using System.Text; +using Syncfusion.SmartTableExtractor; + +//Open the input PDF file as a stream. +using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess.Read)) +{ + // Initialize the Smart Table Extractor + TableExtractor extractor = new TableExtractor(); + //Extract table data from the PDF document as markdown. + string data = extractor.ExtractTableAsMarkdown(stream); + //Save the extracted markdown data into an output file. + File.WriteAllText("Output.md", data, Encoding.UTF8); +} + +{% endhighlight %} + +{% highlight c# tabtitle="C# [Windows-specific]" %} + +using System.IO; +using System.Text; +using Syncfusion.SmartTableExtractor; + +//Open the input PDF file as a stream. +using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess.Read)) +{ + // Initialize the Smart Table Extractor + TableExtractor extractor = new TableExtractor(); + //Extract table data from the PDF document as markdown. + string data = extractor.ExtractTableAsMarkdown(stream); + //Save the extracted markdown data into an output file. + File.WriteAllText("Output.md", data, Encoding.UTF8); +} + +{% endhighlight %} + +{% endtabs %} + diff --git a/Document-Processing/Data-Extraction/Smart-Table-Extractor/NET/data-extraction-images/onnx.png b/Document-Processing/Data-Extraction/Smart-Table-Extractor/NET/data-extraction-images/onnx.png deleted file mode 100644 index 11ff3f4822..0000000000 Binary files a/Document-Processing/Data-Extraction/Smart-Table-Extractor/NET/data-extraction-images/onnx.png and /dev/null differ diff --git a/Document-Processing/Data-Extraction/Smart-Table-Extractor/NET/table-extraction-images/onnx-table.png b/Document-Processing/Data-Extraction/Smart-Table-Extractor/NET/table-extraction-images/onnx-table.png new file mode 100644 index 0000000000..4e67e28200 Binary files /dev/null and b/Document-Processing/Data-Extraction/Smart-Table-Extractor/NET/table-extraction-images/onnx-table.png differ diff --git a/Document-Processing/Data-Extraction/Smart-Table-Extractor/NET/troubleshooting.md b/Document-Processing/Data-Extraction/Smart-Table-Extractor/NET/troubleshooting.md index 183a0e15c9..ddc7e74962 100644 --- a/Document-Processing/Data-Extraction/Smart-Table-Extractor/NET/troubleshooting.md +++ b/Document-Processing/Data-Extraction/Smart-Table-Extractor/NET/troubleshooting.md @@ -26,7 +26,7 @@ documentation: UG

Please refer to the below screenshot,

- Runtime folder + Runtime folder

Note: If you publish your application, ensure the runtimes/models folder and ONNX files are included in the publish output. @@ -64,29 +64,36 @@ documentation: UG - - - - - - - - - - - + + + + + + + + + + + +
ExceptionMicrosoft.ML.ONNXRuntime.ONNXRuntimeException
ReasonThe required native runtime library (ONNXRuntime.dll) is missing from your application's bin folder.
Solution - In your MVC project file (.csproj), add the following build target to copy the native DLL from the NuGet package folder to the bin folder: -

-
-
-  
-
-      
-
-
Exception +Microsoft.ML.ONNXRuntime.ONNXRuntimeException +
Reason +The required native runtime library (ONNXRuntime.dll) is missing from your application's bin folder. +
Solution + In your MVC project file (.csproj), add the following build target to copy the native DLL from the NuGet package folder to the bin folder:
+{% tabs %} +{% highlight C# %} + + + + + +{% endhighlight %} +{% endtabs %} +