Asynchronous Programming In ColdFusion (2018)

With advances in computing and transition from single-user desktop application to web-based application, multi-threading emerged as a core feature for all modern programming languages.

In ColdFusion, you can implement threads using cfthread.

However, to simplify the things further, we have introduced Asynchronous programming in the 2018 release of ColdFusion.

Benefits

  1. Easy to use syntax with object-oriented styles.
  2. In-built chaining capability with then() and error() constructs, therefore, it’s easy to model your workflows.
  3. Result of an execution as Future, so easy to pass on the result.
  4. Near real-time processing.
  5. Uses its own configurable thread pool.
  6. ThreadPool is created on demand and can scale up based on load.

 

Syntax

func = function() {
//query from database
}
thenFunc = function(){
//process query result
}

errorHandler = function(){
throw (message="Error processing data!!!", type="error");
}
Future = runAsync(func,1).then(thenFunc).error(errorHandler);

 

async_syntax

 

Result of async execution is returned as Future.

 

You can use the following APIs on Future:

  • get()
  • get(timeout)
  • isDone()
  • cancel()
  • isCancelled()
  • then(UDFMethod)
  • then(UDFMethod, timeout)
  • error(UDFMethod)
  • error(UDFMethod, timeout)

 

We have also supported the use case where you want an EmptyFuture with no code execution. This construct is useful in modeling producer consumer workflows wherein EmptyFuture will be passed by the producer to the consumer, consumer will do some processing and mark it as complete with a result which will serve as the acknowledgement to the producer for the task completion.

The syntax is as follows:

emptyFuture = runasync();

You can use the following APIs on EmptyFuture:

  • isDone()
  • cancel()
  • isCancelled()
  • get()
  • complete(Object)

 

Case Study

Say you work for an eCommerce company and you want to crawl your competitor’s web pages so that you can price your items competitively.

 

Suggested approach: Async programming capability will really help here in parallelizing the task.

 

CrawlHelper.cfc

component{
remote function startCrawl(){
urls = ["https://en.wikipedia.org/wiki/Adobe_ColdFusion",
"http://www.learncfinaweek.com/week1/What_is_ColdFusion_",
"https://www.raymondcamden.com/categories/coldfusion",
"http://forta.com/blog/index.cfm/2008/12/14/ColdFusion-Per-Application-Settings-v2",
"https://coldfusion.adobe.com/2018/05/coldfusion-blog-redirection-to-coldfusion-community-portal/",
"http://cephas.net/blog/tags/coldfusion/",
"http://blog.cfaether.com/2018/05/re-upgrade-path-to-coldfusion-2018.html"
];

futResAr = crawlWebPages(urls);
}

 

//Parallelizes the task by running separate async call for each of the url.

function crawlWebPages(urls){
func = function(url){
webCrawler = CreateObject("component", "WebCrawler");
return webCrawler.crawl(url);
}

futArr = [];
for(item in urls){
futArr.append(runasync(function(){ return item}).then(func));
}
return futArr;
}
}

 

WebCrawler.cfc

This cfc will have the core logic of firing up cfhttp calls for the page and embedded url within the document. It builds up the content  and returns the same.

<cfcomponent output="false">

<cfset container = {}>
<cfset container.maxLinks = "0" />
<cfset container.excludeFilters = "" />

<cfset container.qData = QueryNew('url,title,body,itemDate', 'varchar,varchar,varchar,date') />
<cfset container.qLinks = QueryNew('url', 'varchar') />


<cffunction name="indexPage" access="remote">
<cfargument name="pageData" default="" />

<cfset title = pageData.title>
<cfset body = pageData.body>
<cfset titleAr = ListToArray(title,' ')>


<cfscript>
writedump(titleAr,"console");
for(str in titleAr){
strVal = Trim(str);
strVal = strVal.toLowerCase();
cacheput(strVal, body);
}
</cfscript>

</cffunction>

<cffunction name="crawl" access="remote">
<cfargument name="site" default="" />
<cfargument name="extensions" default="" />
<cfargument name="excludeFilters" default="" />
<cfargument name="maxLinks" default="0" />
<cfif IsValid('URL', ARGUMENTS.site) and GetStatus(ARGUMENTS.site)><cfset container.maxLinks = Val(ARGUMENTS.maxLinks) />
<cfset container.excludeFilters = ARGUMENTS.excludeFilters />
<cfset container.extensions = ARGUMENTS.extensions />
<cfset checkLinks(ARGUMENTS.site, ARGUMENTS.site, ARGUMENTS.extensions) />
</cfif><cfreturn container.qData />
</cffunction>

<cffunction name="getStatus">
<cfargument name="link" required="true" />
<cfset var result = 0 />
<cfhttp method="head" url="#ARGUMENTS.link#" redirect="true" timeout="5"></cfhttp>
<cfset result = Val(cfhttp.statusCode) /><cfreturn result />
</cffunction>

<cffunction name="shouldFollow">
<cfargument name="link" required="true" />
<cfargument name="domain" required="true" />
<cfset var result = true />

<cfquery name="qHasBeenChecked" dbtype="query">
SELECT url
FROM container.qLinks
WHERE url = '#ARGUMENTS.link#'
</cfquery>

<cfif qHasBeenChecked.recordCount>
<cfset result = false />
<cfelseif ARGUMENTS.link contains 'javascript:'>
<cfset result = false />
<cfelseif Val(container.maxLinks) and container.qLinks.recordCount gte Val(container.maxLinks)>
<cfset result = false />
<cfelseif Left(link, Len(ARGUMENTS.domain)) neq ARGUMENTS.domain>
<cfset result = false />
</cfif>
<cfreturn result />
</cffunction>

<cffunction name="shouldIndex">
<cfargument name="link" required="true" />
<cfset var result = true />

<cfif ListLen(container.extensions) and not ListFindNoCase(container.extensions, ListLast(ListFirst(ARGUMENTS.link, '?'), '.'))>
<cfset result = false />
<cfelseif ListLen(container.excludeFilters)>
<cfloop index="filter" list="#container.excludeFilters#" delimiters="|">
<cfset literalFilter = Replace(filter, '*', '', 'ALL')>
<cfif Left(filter, 1) eq '*' and Right(filter, 1) eq '*'>
<cfif link contains literalFilter>
<cfset result = false />
</cfif>
<cfelseif Right(filter, 1) eq '*'>
<cfif Left(link, Len(literalFilter)) eq literalFilter>
<cfset result = false />
</cfif>
<cfelseif Left(filter, 1) eq '*'>
<cfif Right(link, Len(literalFilter)) eq literalFilter>
<cfset result = false />
</cfif>
<cfelse>
<cfif link eq filter>
<cfset result = false />
</cfif>
</cfif>
</cfloop>
</cfif>
<cfreturn result />
</cffunction>

<cffunction name="checkLinks">
<cfargument name="page" required="true" />
<cfargument name="domain" required="true" />
<cfset var link = '' />

<!--- Get the page --->
<cfhttp method="get" url="#ARGUMENTS.page#" redirect="true" resolveurl="true" timeout="10"></cfhttp>

<cfset QueryAddRow(container.qLinks) />
<cfset QuerySetCell(container.qLinks, 'url', ARGUMENTS.page) />

<cfif Val(CFHTTP.statusCode) eq 200>
<cfif shouldIndex (ARGUMENTS.page)>

<cfset QueryAddRow(container.qData) />
<cfset QuerySetCell(container.qData, 'url', getRelativePath(ARGUMENTS.page)) />
<cfset QuerySetCell(container.qData, 'title', getPageTitle(CFHTTP.fileContent)) />
<cfset QuerySetCell(container.qData, 'body', getBrowsableContent(CFHTTP.fileContent)) />
<cfset QuerySetCell(container.qData, 'itemDate', '') />
</cfif>

<cfset aLinks = ReMatchNoCase('((((https?:|ftp : ) \/\/)|(www\.|ftp\.))[-[:alnum:]\?$%,\.\/\|&##!@:=\+~_]+[A-Za-z0-9\/])', StripComments(cfhttp.fileContent)) />
<cfloop index="link" array="#aLinks#">

<cfset link = Replace(ListFirst(link, '##'), ':80', '', 'ONE') />

<cfif shouldFollow(link, ARGUMENTS.domain)>
<cfset linkStatus = GetStatus(link) />

<cfif linkStatus eq 200>
<!--- Link check its contents as well --->
<cfset checkLinks(link, ARGUMENTS.domain)>
</cfif>
</cfif>
</cfloop>
</cfif>
<cfreturn />
</cffunction>

<cffunction name="getBrowsableContent">
<cfargument name="string" required="true" />

<cfset ARGUMENTS.string = StripComments(ARGUMENTS.string) />
<cfset ARGUMENTS.string = ReReplaceNoCase(ARGUMENTS.string, '<script.*?>.*?</script>', '', 'ALL') />
<cfset ARGUMENTS.string = ReReplaceNoCase(ARGUMENTS.string, '<style.*?>.*?</style>', '', 'ALL') />
<cfset ARGUMENTS.string = ReReplace(ARGUMENTS.string, '<[^>]*>', '', 'ALL') />

<cfreturn ARGUMENTS.string />
</cffunction>

<cffunction name="stripComments">
<cfargument name="string" required="true" />
<cfset ARGUMENTS.string = ReReplace(ARGUMENTS.string, '<--[^(--&gt  ) ]*-->', '', 'ALL') />
<cfreturn ARGUMENTS.string />
</cffunction>

<cffunction name="getPageTitle">
<cfargument name="string" required="true" />
<cfreturn ReReplace(ARGUMENTS.string, ".*<title>([^<>]*)</title>.*", "\1") />
</cffunction>

<cffunction name="getRelativePath">
<cfargument name="path" required="true" />

<cfset ARGUMENTS.path = ReplaceNoCase(ARGUMENTS.path, 'http://', '', 'ONE') />
<cfset ARGUMENTS.path = ReplaceNoCase(ARGUMENTS.path, ListFirst(ARGUMENTS.path, '/'), '', 'ONE') />

<cfreturn ARGUMENTS.path />
</cffunction>
</cfcomponent>

One Response

Leave a reply

Related