我是靠谱客的博主 秀丽西牛,这篇文章主要介绍使用NOPI读取Word、Excel文档内容,现在分享给大家,希望可以做个参考。

使用NOPI读取Excel的例子很多,读取Word的例子不多。

Excel的解析方式有多中,可以使用ODBC查询,把Excel作为一个数据集对待。也可以使用文档结构模型的方式进行解析,即解析Workbook(工作簿)、Sheet、Row、Column。

Word的解析比较复杂,因为Word的文档结构模型定义较为复杂。解析Word或者Excel,关键是理解Word、Excel的文档对象模型。

Word、Excel文档对象模型的解析,可以通过COM接口调用,此类方式使用较广。(可以录制宏代码,然后替换为对应的语言)

也可以使用XML模型解析,尤其是对于2007、2010版本的文档的解析。

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
using NPOI.POIFS.FileSystem; using NPOI.SS.UserModel; using NPOI.XSSF.UserModel; using NPOI.XWPF.UserModel; using System; using System.Collections.Generic; using System.Configuration; using System.IO; using System.Text; namespace eyuan { public static class NOPIHandler { /// <summary> /// /// </summary> /// <param name="fileName"></param> /// <returns></returns> public static List<List<List<string>>> ReadExcel(string fileName) { //打开Excel工作簿 XSSFWorkbook hssfworkbook = null; try { using (FileStream file = new FileStream(fileName, FileMode.Open, FileAccess.Read)) { hssfworkbook = new XSSFWorkbook(file); } } catch (Exception e) { LogHandler.LogWrite(string.Format("文件{0}打开失败,错误:{1}", new string[] { fileName, e.ToString() })); } //循环Sheet页 int sheetsCount = hssfworkbook.NumberOfSheets; List<List<List<string>>> workBookContent = new List<List<List<string>>>(); for (int i = 0; i < sheetsCount; i++) { //Sheet索引从0开始 ISheet sheet = hssfworkbook.GetSheetAt(i); //循环行 List<List<string>> sheetContent = new List<List<string>>(); int rowCount = sheet.PhysicalNumberOfRows; for (int j = 0; j < rowCount; j++) { //Row(逻辑行)的索引从0开始 IRow row = sheet.GetRow(j); //循环列(各行的列数可能不同) List<string> rowContent = new List<string>(); int cellCount = row.PhysicalNumberOfCells; for (int k = 0; k < cellCount; k++) { //ICell cell = row.GetCell(k); ICell cell = row.Cells[k]; if (cell == null) { rowContent.Add("NIL"); } else { rowContent.Add(cell.ToString()); //rowContent.Add(cell.StringCellValue); } } //添加行到集合中 sheetContent.Add(rowContent); } //添加Sheet到集合中 workBookContent.Add(sheetContent); } return workBookContent; } /// <summary> /// /// </summary> /// <param name="fileName"></param> /// <returns></returns> public static string ReadExcelText(string fileName) { string ExcelCellSeparator = ConfigurationManager.AppSettings["ExcelCellSeparator"]; string ExcelRowSeparator = ConfigurationManager.AppSettings["ExcelRowSeparator"]; string ExcelSheetSeparator = ConfigurationManager.AppSettings["ExcelSheetSeparator"]; // List<List<List<string>>> excelContent = ReadExcel(fileName); string fileText = string.Empty; StringBuilder sbFileText = new StringBuilder(); //循环处理WorkBook中的各Sheet页 List<List<List<string>>>.Enumerator enumeratorWorkBook = excelContent.GetEnumerator(); while (enumeratorWorkBook.MoveNext()) { //循环处理当期Sheet页中的各行 List<List<string>>.Enumerator enumeratorSheet = enumeratorWorkBook.Current.GetEnumerator(); while (enumeratorSheet.MoveNext()) { string[] rowContent = enumeratorSheet.Current.ToArray(); sbFileText.Append(string.Join(ExcelCellSeparator, rowContent)); sbFileText.Append(ExcelRowSeparator); } sbFileText.Append(ExcelSheetSeparator); } // fileText = sbFileText.ToString(); return fileText; } /// <summary> /// 读取Word内容 /// </summary> /// <param name="fileName"></param> /// <returns></returns> public static string ReadWordText(string fileName) { string WordTableCellSeparator = ConfigurationManager.AppSettings["WordTableCellSeparator"]; string WordTableRowSeparator = ConfigurationManager.AppSettings["WordTableRowSeparator"]; string WordTableSeparator = ConfigurationManager.AppSettings["WordTableSeparator"]; // string CaptureWordHeader = ConfigurationManager.AppSettings["CaptureWordHeader"]; string CaptureWordFooter = ConfigurationManager.AppSettings["CaptureWordFooter"]; string CaptureWordTable = ConfigurationManager.AppSettings["CaptureWordTable"]; string CaptureWordImage = ConfigurationManager.AppSettings["CaptureWordImage"]; // string CaptureWordImageFileName = ConfigurationManager.AppSettings["CaptureWordImageFileName"]; // string fileText = string.Empty; StringBuilder sbFileText = new StringBuilder(); #region 打开文档 XWPFDocument document = null; try { using (FileStream file = new FileStream(fileName, FileMode.Open, FileAccess.Read)) { document = new XWPFDocument(file); } } catch (Exception e) { LogHandler.LogWrite(string.Format("文件{0}打开失败,错误:{1}", new string[] { fileName, e.ToString() })); } #endregion #region 页眉、页脚 //页眉 if (CaptureWordHeader == "true") { sbFileText.AppendLine("Capture Header Begin"); foreach (XWPFHeader xwpfHeader in document.HeaderList) { sbFileText.AppendLine(string.Format("{0}", new string[] { xwpfHeader.Text })); } sbFileText.AppendLine("Capture Header End"); } //页脚 if (CaptureWordFooter == "true") { sbFileText.AppendLine("Capture Footer Begin"); foreach (XWPFFooter xwpfFooter in document.FooterList) { sbFileText.AppendLine(string.Format("{0}", new string[] { xwpfFooter.Text })); } sbFileText.AppendLine("Capture Footer End"); } #endregion #region 表格 if (CaptureWordTable == "true") { sbFileText.AppendLine("Capture Table Begin"); foreach (XWPFTable table in document.Tables) { //循环表格行 foreach (XWPFTableRow row in table.Rows) { foreach (XWPFTableCell cell in row.GetTableCells()) { sbFileText.Append(cell.GetText()); // sbFileText.Append(WordTableCellSeparator); } sbFileText.Append(WordTableRowSeparator); } sbFileText.Append(WordTableSeparator); } sbFileText.AppendLine("Capture Table End"); } #endregion #region 图片 if (CaptureWordImage == "true") { sbFileText.AppendLine("Capture Image Begin"); foreach (XWPFPictureData pictureData in document.AllPictures) { string picExtName = pictureData.suggestFileExtension(); string picFileName = pictureData.GetFileName(); byte[] picFileContent = pictureData.GetData(); // string picTempName = string.Format(CaptureWordImageFileName, new string[] { Guid.NewGuid().ToString() + "_" + picFileName + "." + picExtName }); // using (FileStream fs = new FileStream(picTempName, FileMode.Create, FileAccess.Write)) { fs.Write(picFileContent, 0, picFileContent.Length); fs.Close(); } // sbFileText.AppendLine(picTempName); } sbFileText.AppendLine("Capture Image End"); } #endregion //正文段落 sbFileText.AppendLine("Capture Paragraph Begin"); foreach (XWPFParagraph paragraph in document.Paragraphs) { sbFileText.AppendLine(paragraph.ParagraphText); } sbFileText.AppendLine("Capture Paragraph End"); // // fileText = sbFileText.ToString(); return fileText; } } }

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持靠谱客。

最后

以上就是秀丽西牛最近收集整理的关于使用NOPI读取Word、Excel文档内容的全部内容,更多相关使用NOPI读取Word、Excel文档内容内容请搜索靠谱客的其他文章。

本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
点赞(139)

评论列表共有 0 条评论

立即
投稿
返回
顶部