/update/extract
" in SolrConfigXml file.xxxxxxxxxx
<requestHandler name="/update/extract" startup="lazy" class="solr.extraction.ExtractingRequestHandler" />
solrUrl
", "collectionName
", ...) with your information.openSearcher
" to true (SolrConfigXml file -> updateHandler -> autoCommit)/update/extract
:xxxxxxxxxx
final String UPDATE_EXTRACT_REQUEST_PATH = "/update/extract";
final String[] solrUrl = { "http://localhost:8983/solr" };
final String collectionName = "collection1";
// org.apache.solr.common.params.CollectionAdminParams.COLLECTION = "collection";
final CloudSolrClient cloudSolrClient = new CloudSolrClient.Builder(Arrays.asList(solrUrl)).build();
cloudSolrClient.setDefaultCollection(collectionName);
// extracting text [/update/extract] [ContentStreamUpdateRequest::addContentStream]
{
final String contentStream = "extract this text ...";
final String contentType = "text/plain;charset=UTF-8";
final ByteArrayStream byteArrayStream = new ByteArrayStream(contentStream.getBytes(StandardCharsets.UTF_8), null);
byteArrayStream.setContentType(contentType);
final ModifiableSolrParams modifiableSolrParams = new ModifiableSolrParams();
modifiableSolrParams.add(CollectionAdminParams.COLLECTION, collectionName);
final ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest(UPDATE_EXTRACT_REQUEST_PATH);
contentStreamUpdateRequest.addContentStream(byteArrayStream);
contentStreamUpdateRequest.setParams(modifiableSolrParams);
contentStreamUpdateRequest.setMethod(METHOD.POST);
final NamedList<Object> response = cloudSolrClient.request(contentStreamUpdateRequest);
System.out.println(response);
}
cloudSolrClient.close();
xxxxxxxxxx
<doc>
<str name="id">222014cd-c96d-454c-a7f4-9fe53ea0b0bb</str>
<long name="_version_">1621391017444900864</long>
<arr name="stream_size">
<str>null</str>
</arr>
<arr name="X-Parsed-By">
<str>org.apache.tika.parser.DefaultParser</str>
<str>org.apache.tika.parser.txt.TXTParser</str>
</arr>
<arr name="stream_content_type">
<str>text/plain;charset=UTF-8</str>
</arr>
<arr name="Content-Encoding">
<str>UTF-8</str>
</arr>
<arr name="Content-Type">
<str>text/plain; charset=UTF-8</str>
</arr>
<arr name="content">
<str>
stream_size null
X-Parsed-By org.apache.tika.parser.DefaultParser
X-Parsed-By org.apache.tika.parser.txt.TXTParser
stream_content_type text/plain;charset=UTF-8
Content-Encoding UTF-8
Content-Type text/plain; charset=UTF-8
extract this text ...
</str>
</arr>
</doc>
xxxxxxxxxx
<str name="captureAttr">true</str>
xxxxxxxxxx
<str name="lowernames">true</str>
xxxxxxxxxx
<str name="fmap.meta">attr_</str>
xxxxxxxxxx
<requestHandler name="/update/extract" startup="lazy" class="solr.extraction.ExtractingRequestHandler">
<lst name="defaults">
<str name="lowernames">true</str>
<str name="captureAttr">true</str>
<str name="fmap.meta">attr_</str>
<str name="fmap.content">_text_</str>
<str name="update.chain">genuuid</str>
</lst>
</requestHandler>
xxxxxxxxxx
<updateRequestProcessorChain name="genuuid">
<processor class="solr.UUIDUpdateProcessorFactory">
<str name="fieldName">id</str>
</processor>
<processor class="solr.LogUpdateProcessorFactory" />
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
xxxxxxxxxx
<dynamicField name="*" type="text_general" indexed="true" stored="true" multiValued="true" />